From c2857ccba1871b6da71a38930ace02e5041016e8 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 20 Mar 2019 18:18:03 -0400 Subject: [PATCH 001/603] initial commit --- .gitignore | 4 + License.md | 201 ++++++++++++++++++ dbt/__init__.py | 1 + dbt/adapters/__init__.py | 1 + dbt/adapters/spark/__init__.py | 12 ++ dbt/adapters/spark/connections.py | 188 ++++++++++++++++ dbt/adapters/spark/impl.py | 92 ++++++++ dbt/adapters/spark/relation.py | 46 ++++ dbt/include/__init__.py | 1 + dbt/include/spark/__init__.py | 2 + dbt/include/spark/dbt_project.yml | 5 + dbt/include/spark/macros/adapters.sql | 95 +++++++++ .../macros/materializations/incremental.sql | 83 ++++++++ setup.py | 28 +++ 14 files changed, 759 insertions(+) create mode 100644 .gitignore create mode 100644 License.md create mode 100644 dbt/__init__.py create mode 100644 dbt/adapters/__init__.py create mode 100644 dbt/adapters/spark/__init__.py create mode 100644 dbt/adapters/spark/connections.py create mode 100644 dbt/adapters/spark/impl.py create mode 100644 dbt/adapters/spark/relation.py create mode 100644 dbt/include/__init__.py create mode 100644 dbt/include/spark/__init__.py create mode 100644 dbt/include/spark/dbt_project.yml create mode 100644 dbt/include/spark/macros/adapters.sql create mode 100644 dbt/include/spark/macros/materializations/incremental.sql create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..98d9a8e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.egg-info +env/ +*.pyc +__pycache__ diff --git a/License.md b/License.md new file mode 100644 index 00000000..8dada3ed --- /dev/null +++ b/License.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/dbt/__init__.py b/dbt/__init__.py new file mode 100644 index 00000000..69e3be50 --- /dev/null +++ b/dbt/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/dbt/adapters/__init__.py b/dbt/adapters/__init__.py new file mode 100644 index 00000000..69e3be50 --- /dev/null +++ b/dbt/adapters/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py new file mode 100644 index 00000000..278cf11e --- /dev/null +++ b/dbt/adapters/spark/__init__.py @@ -0,0 +1,12 @@ +from dbt.adapters.spark.connections import SparkConnectionManager +from dbt.adapters.spark.connections import SparkCredentials +from dbt.adapters.spark.relation import SparkRelation +from dbt.adapters.spark.impl import SparkAdapter + +from dbt.adapters.base import AdapterPlugin +from dbt.include import spark + +Plugin = AdapterPlugin( + adapter=SparkAdapter, + credentials=SparkCredentials, + include_path=spark.PACKAGE_PATH) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py new file mode 100644 index 00000000..ae480d8e --- /dev/null +++ b/dbt/adapters/spark/connections.py @@ -0,0 +1,188 @@ +from contextlib import contextmanager + +from dbt.adapters.base import Credentials +from dbt.adapters.sql import SQLConnectionManager +from dbt.logger import GLOBAL_LOGGER as logger +import dbt.exceptions + +import jaydebeapi + +JDBC_CONN_STRING = 'jdbc:spark://{creds.host}:{creds.port}/{creds.schema};{jdbc_conf}' # noqa + +SPARK_CREDENTIALS_CONTRACT = { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'host': { + 'type': 'string' + }, + 'port': { + 'type': 'integer', + 'minimum': 0, + 'maximum': 65535, + }, + 'database': { + 'type': ['string'], + }, + 'schema': { + 'type': 'string', + }, + 'user': { + 'type': 'string' + }, + 'password': { + 'type': 'string' + }, + 'jdbc_driver': { + 'type': 'object', + 'properties': { + 'class': { + 'type': 'string' + }, + 'path': { + 'type': 'string' + }, + }, + 'required': ['class', 'path'] + }, + 'jdbc_config': { + 'type': 'object' + } + }, + 'required': ['host', 'port', 'user', 'password', 'jdbc_driver', + 'jdbc_config', 'database', 'schema'], +} + + +class SparkCredentials(Credentials): + SCHEMA = SPARK_CREDENTIALS_CONTRACT + + def __init__(self, *args, **kwargs): + kwargs.setdefault('database', kwargs.get('schema')) + kwargs.setdefault('jdbc_config', {}) + + super(SparkCredentials, self).__init__(*args, **kwargs) + + @property + def type(self): + return 'spark' + + def _connection_keys(self): + return ('host', 'port', 'schema', 'user', 'jdbc_driver', + 'jdbc_config') + + +class ConnectionWrapper(object): + "Wrap a Spark connection in a way that no-ops transactions" + # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html + def __init__(self, handle): + self.handle = handle + self._cursor = None + self._fetch_result = None + + def cursor(self): + self._cursor = self.handle.cursor() + return self + + def cancel(self): + if self._cursor is not None: + self._cursor.cancel() + + def close(self): + # TODO? + self.handle.close() + + def commit(self, *args, **kwargs): + logger.debug("NotImplemented: commit") + + def rollback(self, *args, **kwargs): + logger.debug("NotImplemented: rollback") + + def start_transaction(self, *args, **kwargs): + logger.debug("NotImplemented: start_transaction") + + def fetchall(self): + return self._cursor.fetchall() + + def execute(self, sql, bindings=None): + if sql.strip().endswith(";"): + sql = sql.strip()[:-1] + + return self._cursor.execute(sql, bindings) + + @property + def description(self): + return self._cursor.description + + +class SparkConnectionManager(SQLConnectionManager): + TYPE = 'spark' + + @contextmanager + def exception_handler(self, sql, connection_name='master'): + try: + yield + # TODO: introspect into `DatabaseError`s and expose `errorName`, + # `errorType`, etc instead of stack traces full of garbage! + except Exception as exc: + logger.debug("Error while running:\n{}".format(sql)) + logger.debug(exc) + raise dbt.exceptions.RuntimeException(exc) + + # No transactions on Spark.... + def add_begin_query(self, *args, **kwargs): + logger.debug("NotImplemented: add_begin_query") + + def add_commit_query(self, *args, **kwargs): + logger.debug("NotImplemented: add_commit_query") + + def commit(self, *args, **kwargs): + logger.debug("NotImplemented: commit") + + def rollback(self, *args, **kwargs): + logger.debug("NotImplemented: rollback") + + @classmethod + def _build_jdbc_url(cls, creds): + jdbc_conf = ";".join( + "{}={}".format(key, val) + for (key, val) in creds.jdbc_config.items() + ) + + return JDBC_CONN_STRING.format(creds=creds, jdbc_conf=jdbc_conf) + + @classmethod + def open(cls, connection): + if connection.state == 'open': + logger.debug('Connection is already open, skipping open.') + return connection + + credentials = connection.credentials + + jdbc_url = cls._build_jdbc_url(credentials) + auth = { + "user": credentials.user, + "password": credentials.password + } + + conn = jaydebeapi.connect( + credentials.jdbc_driver['class'], + jdbc_url, + auth, + credentials.jdbc_driver['path'] + ) + + wrapped = ConnectionWrapper(conn) + + connection.state = 'open' + connection.handle = wrapped + return connection + + @classmethod + def get_status(cls, cursor): + # No status from the cursor... + return 'OK' + + def cancel(self, connection): + import ipdb; ipdb.set_trace() + connection.handle.cancel() diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py new file mode 100644 index 00000000..925fbc98 --- /dev/null +++ b/dbt/adapters/spark/impl.py @@ -0,0 +1,92 @@ +from dbt.adapters.sql import SQLAdapter +from dbt.adapters.spark import SparkRelation +from dbt.adapters.spark import SparkConnectionManager +import dbt.exceptions + +import agate + + +LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' +GET_RELATION_TYPE_MACRO_NAME = 'spark_get_relation_type' +DROP_RELATION_MACRO_NAME = 'drop_relation' + + +class SparkAdapter(SQLAdapter): + ConnectionManager = SparkConnectionManager + Relation = SparkRelation + + @classmethod + def date_function(cls): + return 'CURRENT_TIMESTAMP()' + + @classmethod + def convert_text_type(cls, agate_table, col_idx): + return "STRING" + + @classmethod + def convert_number_type(cls, agate_table, col_idx): + decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) + return "DOUBLE" if decimals else "BIGINT" + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + return "TIMESTAMP" + + def create_schema(self, database, schema, model_name=None): + raise dbt.exceptions.NotImplementedException( + 'Schema/Database creation is not supported in the Spark adapter. ' + 'Please create the database "{}" manually'.format(database) + ) + + def drop_schema(self, database, schema, model_name=None): + raise dbt.exceptions.NotImplementedException( + 'Schema/Database deletion is not supported in the Spark adapter. ' + 'Please drop the database "{}" manually'.format(database) + ) + + def get_relation_type(self, relation, model_name=None): + kwargs = {'relation': relation} + return self.execute_macro( + GET_RELATION_TYPE_MACRO_NAME, + kwargs=kwargs, + connection_name=model_name, + release=True + ) + + def list_relations_without_caching(self, information_schema, schema, + model_name=None): + kwargs = {'information_schema': information_schema, 'schema': schema} + results = self.execute_macro( + LIST_RELATIONS_MACRO_NAME, + kwargs=kwargs, + connection_name=model_name, + release=True + ) + + relations = [] + quote_policy = { + 'schema': True, + 'identifier': True + } + for _database, name, _ in results: + rel = self.Relation.create( + database=_database, + schema=_database, + identifier=name, + quote_policy=quote_policy, + ) + rel.type = self.get_relation_type(rel) + relations.append(rel) + return relations + + # Override that doesn't check the type of the relation -- we do it + # dynamically in the macro code + def drop_relation(self, relation, model_name=None): + if dbt.flags.USE_CACHE: + self.cache.drop(relation) + + self.execute_macro( + DROP_RELATION_MACRO_NAME, + kwargs={'relation': relation}, + connection_name=model_name + ) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py new file mode 100644 index 00000000..18adfd4f --- /dev/null +++ b/dbt/adapters/spark/relation.py @@ -0,0 +1,46 @@ + +from dbt.adapters.base.relation import BaseRelation +import dbt.utils + + +class SparkRelation(BaseRelation): + DEFAULTS = { + 'metadata': { + 'type': 'SparkRelation' + }, + 'quote_character': '`', + 'quote_policy': { + 'database': False, + 'schema': False, + 'identifier': False, + }, + 'include_policy': { + 'database': False, + 'schema': True, + 'identifier': True, + } + } + + SCHEMA = { + 'type': 'object', + 'properties': { + 'metadata': { + 'type': 'object', + 'properties': { + 'type': { + 'type': 'string', + 'const': 'SparkRelation', + }, + }, + }, + 'type': { + 'enum': BaseRelation.RelationTypes + [None] + }, + 'path': BaseRelation.PATH_SCHEMA, + 'include_policy': BaseRelation.POLICY_SCHEMA, + 'quote_policy': BaseRelation.POLICY_SCHEMA, + 'quote_character': {'type': 'string'}, + }, + 'required': ['metadata', 'type', 'path', 'include_policy', + 'quote_policy', 'quote_character'] + } diff --git a/dbt/include/__init__.py b/dbt/include/__init__.py new file mode 100644 index 00000000..69e3be50 --- /dev/null +++ b/dbt/include/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/dbt/include/spark/__init__.py b/dbt/include/spark/__init__.py new file mode 100644 index 00000000..564a3d1e --- /dev/null +++ b/dbt/include/spark/__init__.py @@ -0,0 +1,2 @@ +import os +PACKAGE_PATH = os.path.dirname(__file__) diff --git a/dbt/include/spark/dbt_project.yml b/dbt/include/spark/dbt_project.yml new file mode 100644 index 00000000..2294c23d --- /dev/null +++ b/dbt/include/spark/dbt_project.yml @@ -0,0 +1,5 @@ + +name: dbt_spark +version: 1.0 + +macro-paths: ["macros"] diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql new file mode 100644 index 00000000..ae380403 --- /dev/null +++ b/dbt/include/spark/macros/adapters.sql @@ -0,0 +1,95 @@ + +{#-- We can't use temporary tables with `create ... as ()` syntax #} +{% macro spark_create_temporary_view(relation, sql) -%} + create temporary view {{ relation.include(database=false, schema=false) }} as + {{ sql }} +{% endmacro %} + +{% macro spark__create_table_as(temporary, relation, sql) -%} + {% if temporary -%} + {{ spark_create_temporary_view(relation, sql) }} + {%- else -%} + create table {{ relation }} as + {{ sql }} + {%- endif %} +{%- endmacro -%} + +{% macro spark__create_view_as(relation, sql) -%} + create view {{ relation }} as + {{ sql }} +{% endmacro %} + +{% macro spark__get_columns_in_relation(relation) -%} + {% call statement('get_columns_in_relation', fetch_result=True) %} + describe {{ relation }} + {% endcall %} + + {% set table = load_result('get_columns_in_relation').table %} + {{ return(sql_convert_columns_in_relation(table)) }} + +{% endmacro %} + + +{% macro spark__list_relations_without_caching(information_schema, schema) %} + {% call statement('list_relations_without_caching', fetch_result=True) -%} + show tables in {{ schema }} + {% endcall %} + + {% do return(load_result('list_relations_without_caching').table) %} +{% endmacro %} + + +{% macro spark__list_schemas(database) -%} + {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} + show databases + {% endcall %} + {{ return(load_result('list_schemas').table) }} +{% endmacro %} + + +{% macro spark__check_schema_exists(information_schema, schema) -%} + {# TODO #} + {% call statement('check_schema_exists', fetch_result=True) -%} + show databases + {%- endcall %} + {{ return(load_result('check_schema_exists').table) }} +{%- endmacro %} + +{% macro spark__current_timestamp() -%} + current_timestamp() +{%- endmacro %} + +{% macro spark_get_relation_type(relation) -%} + {% call statement('check_schema_exists', fetch_result=True) -%} + SHOW TBLPROPERTIES {{ relation }} ('view.default.database') + {%- endcall %} + {% set res = load_result('check_schema_exists').table %} + {% if 'does not have property' in res[0][0] %} + {{ return('table') }} + {% else %} + {{ return('view') }} + {% endif %} +{%- endmacro %} + + +{% macro spark__rename_relation(from_relation, to_relation) -%} + {% call statement('rename_relation') -%} + {% if not from_relation.type %} + {% do exceptions.raise_database_error("Cannot rename a relation with an unknown type: " ~ from_relation) %} + {% elif from_relation.type == 'table' %} + alter table {{ from_relation }} rename to {{ to_relation }} + {% elif from_relation.type == 'view' %} + alter view {{ from_relation }} rename to {{ to_relation }} + {% else %} + {% do exceptions.raise_database_error("Unknown type '" ~ from_relation.type ~ "' for relation: " ~ from_relation) %} + {% endif %} + {%- endcall %} +{% endmacro %} + + +{% macro spark__drop_relation(relation) -%} + {% set type = relation.type if relation.type is not none else spark_get_relation_type(relation) %} + {% call statement('drop_relation', auto_begin=False) -%} + drop {{ type }} if exists {{ relation }} + {%- endcall %} +{% endmacro %} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql new file mode 100644 index 00000000..f118caf2 --- /dev/null +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -0,0 +1,83 @@ + +{% macro dbt__incremental_delete(target_relation, tmp_relation) -%} + + {%- set unique_key = config.require('unique_key') -%} + + delete + from {{ target_relation }} + where ({{ unique_key }}) in ( + select ({{ unique_key }}) + from {{ tmp_relation.include(schema=False, database=False) }} + ); + +{%- endmacro %} + +{% materialization incremental, default -%} + {%- set sql_where = config.get('sql_where') -%} + {%- set unique_key = config.get('unique_key') -%} + + {%- set identifier = model['alias'] -%} + {%- set tmp_identifier = identifier + '__dbt_incremental_tmp' -%} + + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} + {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, + schema=schema, + database=database, type='table') -%} + + {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + {%- set should_drop = full_refresh_mode or (old_relation is not none and not old_relation.is_table -%} + + -- setup + {% if old_relation is none -%} + -- noop + {%- elif should_drop -%} + {{ adapter.drop_relation(old_relation) }} + {%- set old_relation = none -%} + {%- endif %} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + -- build model + {% if full_refresh_mode or old_relation is none -%} + {%- call statement('main') -%} + {{ create_table_as(False, target_relation, sql) }} + {%- endcall -%} + {%- else -%} + {%- call statement() -%} + {{ dbt.create_table_as(True, tmp_relation, sql) }} + + {%- endcall -%} + + {{ adapter.expand_target_column_types(temp_table=tmp_identifier, + to_relation=target_relation) }} + + {%- call statement('main') -%} + {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} + {% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %} + + {% if unique_key is not none -%} + + {{ dbt__incremental_delete(target_relation, tmp_relation) }} + + {%- endif %} + + insert into {{ target_relation }} ({{ dest_cols_csv }}) + ( + select {{ dest_cols_csv }} + from {{ tmp_relation.include(schema=False, database=False) }} + ); + {% endcall %} + {%- endif %} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + + -- `COMMIT` happens here + {{ adapter.commit() }} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + +{%- endmaterialization %} diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..967f4500 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +from setuptools import find_packages +from distutils.core import setup + +package_name = "dbt-spark" +package_version = "0.13.0" +description = """The dbt_spark adpter plugin for dbt (data build tool)""" + +setup( + name=package_name, + version=package_version, + description=description, + long_description=description, + author='Drew Banin', + author_email='drew@fishtownanalytics.com', + url='https://github.com/fishtown-analytics/dbt-spark', + packages=find_packages(), + package_data={ + 'dbt': [ + 'include/dbt_spark/dbt_project.yml', + 'include/dbt_spark/macros/*.sql', + ] + }, + install_requires=[ + 'dbt-core=={}'.format(package_version), + 'JayDeBeApi==1.1.1' + ] +) From bd92038313505ee7685103f509312d0f863eb1f8 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 20 Mar 2019 19:41:52 -0400 Subject: [PATCH 002/603] use pyhive+thrift for conn --- dbt/adapters/spark/connections.py | 81 +++++++++---------------------- setup.py | 5 +- 2 files changed, 26 insertions(+), 60 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index ae480d8e..6e0b0327 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -5,9 +5,11 @@ from dbt.logger import GLOBAL_LOGGER as logger import dbt.exceptions -import jaydebeapi +from pyhive import hive +from thrift.transport import THttpClient +import base64 -JDBC_CONN_STRING = 'jdbc:spark://{creds.host}:{creds.port}/{creds.schema};{jdbc_conf}' # noqa +SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/0/{cluster}" SPARK_CREDENTIALS_CONTRACT = { 'type': 'object', @@ -21,36 +23,20 @@ 'minimum': 0, 'maximum': 65535, }, + 'cluster': { + 'type': 'string' + }, 'database': { - 'type': ['string'], + 'type': 'string', }, 'schema': { 'type': 'string', }, - 'user': { - 'type': 'string' - }, - 'password': { - 'type': 'string' - }, - 'jdbc_driver': { - 'type': 'object', - 'properties': { - 'class': { - 'type': 'string' - }, - 'path': { - 'type': 'string' - }, - }, - 'required': ['class', 'path'] + 'token': { + 'type': 'string', }, - 'jdbc_config': { - 'type': 'object' - } }, - 'required': ['host', 'port', 'user', 'password', 'jdbc_driver', - 'jdbc_config', 'database', 'schema'], + 'required': ['host', 'database', 'schema', 'cluster'], } @@ -59,7 +45,6 @@ class SparkCredentials(Credentials): def __init__(self, *args, **kwargs): kwargs.setdefault('database', kwargs.get('schema')) - kwargs.setdefault('jdbc_config', {}) super(SparkCredentials, self).__init__(*args, **kwargs) @@ -68,8 +53,7 @@ def type(self): return 'spark' def _connection_keys(self): - return ('host', 'port', 'schema', 'user', 'jdbc_driver', - 'jdbc_config') + return ('host', 'port', 'cluster', 'schema') class ConnectionWrapper(object): @@ -92,15 +76,9 @@ def close(self): # TODO? self.handle.close() - def commit(self, *args, **kwargs): - logger.debug("NotImplemented: commit") - def rollback(self, *args, **kwargs): logger.debug("NotImplemented: rollback") - def start_transaction(self, *args, **kwargs): - logger.debug("NotImplemented: start_transaction") - def fetchall(self): return self._cursor.fetchall() @@ -142,40 +120,27 @@ def commit(self, *args, **kwargs): def rollback(self, *args, **kwargs): logger.debug("NotImplemented: rollback") - @classmethod - def _build_jdbc_url(cls, creds): - jdbc_conf = ";".join( - "{}={}".format(key, val) - for (key, val) in creds.jdbc_config.items() - ) - - return JDBC_CONN_STRING.format(creds=creds, jdbc_conf=jdbc_conf) - @classmethod def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection - credentials = connection.credentials - - jdbc_url = cls._build_jdbc_url(credentials) - auth = { - "user": credentials.user, - "password": credentials.password - } + conn_url = SPARK_CONNECTION_URL.format(**connection.credentials) + transport = THttpClient.THttpClient(conn_url) - conn = jaydebeapi.connect( - credentials.jdbc_driver['class'], - jdbc_url, - auth, - credentials.jdbc_driver['path'] - ) + creds = "token:{}".format(connection.credentials['token']).encode() + token = base64.standard_b64encode(creds).decode() + transport.setCustomHeaders({ + 'Authorization': 'Basic {}'.format(token) + }) - wrapped = ConnectionWrapper(conn) + conn = hive.connect(thrift_transport=transport) + #import ipdb; ipdb.set_trace() + #wrapped = ConnectionWrapper(conn) connection.state = 'open' - connection.handle = wrapped + connection.handle = conn # Should we wrap? return connection @classmethod diff --git a/setup.py b/setup.py index 967f4500..9193b07b 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-spark" -package_version = "0.13.0" +package_version = "0.13.0-rc1" description = """The dbt_spark adpter plugin for dbt (data build tool)""" setup( @@ -23,6 +23,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), - 'JayDeBeApi==1.1.1' + 'PyHive>=0.6.0,<0.7.0', + 'thrift>=0.11.0,<0.12.0' ] ) From c13b20a216e4b8e914ddad8c385085b5b980d025 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 20 Mar 2019 23:23:37 -0400 Subject: [PATCH 003/603] implement incremental and ephemeral materializations --- dbt/adapters/spark/connections.py | 21 +++-- dbt/include/spark/macros/adapters.sql | 27 +++++- .../macros/materializations/incremental.sql | 91 +++++++------------ 3 files changed, 72 insertions(+), 67 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 6e0b0327..98ae2b03 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -9,6 +9,7 @@ from thrift.transport import THttpClient import base64 + SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/0/{cluster}" SPARK_CREDENTIALS_CONTRACT = { @@ -73,7 +74,6 @@ def cancel(self): self._cursor.cancel() def close(self): - # TODO? self.handle.close() def rollback(self, *args, **kwargs): @@ -100,12 +100,18 @@ class SparkConnectionManager(SQLConnectionManager): def exception_handler(self, sql, connection_name='master'): try: yield - # TODO: introspect into `DatabaseError`s and expose `errorName`, - # `errorType`, etc instead of stack traces full of garbage! except Exception as exc: logger.debug("Error while running:\n{}".format(sql)) logger.debug(exc) - raise dbt.exceptions.RuntimeException(exc) + if len(exc.args) == 0: + raise + + thrift_resp = exc.args[0] + if hasattr(thrift_resp, 'status'): + msg = thrift_resp.status.errorMessage + raise dbt.exceptions.RuntimeException(msg) + else: + raise dbt.exceptions.RuntimeException(str(exc)) # No transactions on Spark.... def add_begin_query(self, *args, **kwargs): @@ -136,16 +142,15 @@ def open(cls, connection): }) conn = hive.connect(thrift_transport=transport) - #import ipdb; ipdb.set_trace() - #wrapped = ConnectionWrapper(conn) + wrapped = ConnectionWrapper(conn) connection.state = 'open' - connection.handle = conn # Should we wrap? + connection.handle = wrapped return connection @classmethod def get_status(cls, cursor): - # No status from the cursor... + #status = cursor._cursor.poll() return 'OK' def cancel(self, connection): diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index ae380403..86579813 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -5,11 +5,36 @@ {{ sql }} {% endmacro %} +{% macro file_format_clause() %} + {%- set file_format = config.get('file_format', validator=validation.any[basestring]) -%} + {%- if file_format is not none %} + using {{ file_format }} + {%- endif %} +{%- endmacro -%} + +{% macro partition_cols(label, required=false) %} + {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} + {%- if cols is not none %} + {%- if cols is string -%} + {%- set cols = [cols] -%} + {%- endif -%} + {{ label }} ( + {%- for item in cols -%} + {{ item }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor -%} + ) + {%- endif %} +{%- endmacro -%} + {% macro spark__create_table_as(temporary, relation, sql) -%} {% if temporary -%} {{ spark_create_temporary_view(relation, sql) }} {%- else -%} - create table {{ relation }} as + create table {{ relation }} + {{ file_format_clause() }} + {{ partition_cols(label="partitioned by") }} + as {{ sql }} {%- endif %} {%- endmacro -%} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index f118caf2..4313c6e4 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -1,83 +1,58 @@ +{% materialization incremental, adapter='spark' -%} -{% macro dbt__incremental_delete(target_relation, tmp_relation) -%} - - {%- set unique_key = config.require('unique_key') -%} - - delete - from {{ target_relation }} - where ({{ unique_key }}) in ( - select ({{ unique_key }}) - from {{ tmp_relation.include(schema=False, database=False) }} - ); - -{%- endmacro %} - -{% materialization incremental, default -%} - {%- set sql_where = config.get('sql_where') -%} - {%- set unique_key = config.get('unique_key') -%} + {%- set partitions = config.get('partition_by') -%} + {% if not partitions %} + {% do exceptions.raise_compiler_error("Table partitions are required for incremental models on Spark") %} + {% endif %} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = identifier + '__dbt_incremental_tmp' -%} + {%- set tmp_identifier = model['alias'] ~ "__dbt_tmp" -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} - {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, - schema=schema, - database=database, type='table') -%} + {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, type='table') -%} - {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} - {%- set should_drop = full_refresh_mode or (old_relation is not none and not old_relation.is_table -%} + {%- set full_refresh = flags.FULL_REFRESH == True and old_relation is not none -%} + {%- set old_relation_is_view = old_relation is not none and old_relation.is_view -%} - -- setup - {% if old_relation is none -%} - -- noop - {%- elif should_drop -%} + {%- if full_refresh or old_relation_is_view -%} {{ adapter.drop_relation(old_relation) }} {%- set old_relation = none -%} {%- endif %} - {{ run_hooks(pre_hooks, inside_transaction=False) }} + {{ run_hooks(pre_hooks) }} - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} + {% call statement() %} + set spark.sql.sources.partitionOverwriteMode = DYNAMIC + {% endcall %} - -- build model - {% if full_refresh_mode or old_relation is none -%} - {%- call statement('main') -%} - {{ create_table_as(False, target_relation, sql) }} - {%- endcall -%} - {%- else -%} - {%- call statement() -%} - {{ dbt.create_table_as(True, tmp_relation, sql) }} + {% call statement() %} + set spark.sql.hive.convertMetastoreParquet = false + {% endcall %} - {%- endcall -%} - {{ adapter.expand_target_column_types(temp_table=tmp_identifier, - to_relation=target_relation) }} + {#-- This is required to make dbt's incremental scheme work #} + {%- if old_relation is none -%} - {%- call statement('main') -%} - {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} - {% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %} + {%- call statement('main') -%} + {{ create_table_as(False, target_relation, sql) }} + {%- endcall %} - {% if unique_key is not none -%} + {%- else -%} - {{ dbt__incremental_delete(target_relation, tmp_relation) }} + {%- call statement('main') -%} + {{ create_table_as(True, tmp_relation, sql) }} + {%- endcall -%} - {%- endif %} + {#-- insert statements don't like CTEs, so support them via a temp view #} + {%- call statement() -%} + insert overwrite table {{ target_relation }} + {{ partition_cols(label="partition") }} + select * from {{ tmp_relation.include(database=false, schema=false) }} + {%- endcall -%} - insert into {{ target_relation }} ({{ dest_cols_csv }}) - ( - select {{ dest_cols_csv }} - from {{ tmp_relation.include(schema=False, database=False) }} - ); - {% endcall %} {%- endif %} - {{ run_hooks(post_hooks, inside_transaction=True) }} - - -- `COMMIT` happens here - {{ adapter.commit() }} - - {{ run_hooks(post_hooks, inside_transaction=False) }} + {{ run_hooks(post_hooks) }} {%- endmaterialization %} From 17da2cae5680831dcc83e64fb159f73d0f246480 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 21 Mar 2019 09:53:33 -0400 Subject: [PATCH 004/603] Create README.md --- README.md | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..0e33f16f --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +## dbt-spark + +### Installation +This plugin can be installed via pip: +``` +$ pip install dbt-spark +``` + +### Configuring your profile + +A dbt profile can be configured to run against Spark using the following configuration: + +| Option | Description | Required? | Example | +|---------|----------------------------------------------------|-------------------------|--------------------------| +| schema | Specify the schema (database) to build models into | Required | `analytics` | +| host | The hostname to connect to | Required | `yourorg.sparkhost.com` | +| port | The port to connect to the host on | Optional (default: 443) | `443` | +| token | The token to use for authenticating to the cluster | Required | `abc123` | +| cluster | The name of the cluster to connect to | Required | `01234-23423-coffeetime` | + + +**Example profiles.yml entry:** +``` +your_profile_name: + target: dev + outputs: + dev: + type: spark + schema: analytics + host: yourorg.sparkhost.com + port: 443 + token: abc123 + cluster: 01234-23423-coffeetime +``` + +### Usage Notes + +**Model Configuration** + +The following configurations can be supplied to models run with the dbt-spark plugin: + + +| Option | Description | Required? | Example | +|---------|----------------------------------------------------|-------------------------|--------------------------| +| file_format | The file format to use when creating tables | Optional | `parquet` | + + + +**Incremental Models** + +Spark does not natively support `delete`, `update`, or `merge` statements. As such, [incremental models](https://docs.getdbt.com/docs/configuring-incremental-models) +are implemented differently than usual in this plugin. To use incremental models, specify a `partition_by` clause in your model config. +dbt will use an `insert overwrite` query to overwrite the partitions included in your query. Be sure to re-select _all_ of the relevant +data for a partition when using incremental models. + +``` +{{ config( + materialized='incremental', + partition_by=['date_day'], + file_format='parquet' +) }} + +/* + Every partition returned by this query will be overwritten + when this model runs +*/ + +select + date_day, + count(*) as users + +from {{ ref('events') }} +where date_day::date >= '2019-01-01' +group by 1 +``` + +### Reporting bugs and contributing code + +- Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt-spark/issues/new). + +## Code of Conduct + +Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/). From afcbd83136e24586a6f849deb6fcbb3cc59d9f2f Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 24 Mar 2019 11:41:30 -0400 Subject: [PATCH 005/603] defer relation type inspection to when it is needed --- dbt/adapters/spark/impl.py | 9 +++++---- setup.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 925fbc98..00dee348 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -53,6 +53,8 @@ def get_relation_type(self, relation, model_name=None): release=True ) + # Override that creates macros without a known type - adapter macros that + # require a type will dynamically check at query-time def list_relations_without_caching(self, information_schema, schema, model_name=None): kwargs = {'information_schema': information_schema, 'schema': schema} @@ -69,14 +71,13 @@ def list_relations_without_caching(self, information_schema, schema, 'identifier': True } for _database, name, _ in results: - rel = self.Relation.create( + relations.append(self.Relation.create( database=_database, schema=_database, identifier=name, quote_policy=quote_policy, - ) - rel.type = self.get_relation_type(rel) - relations.append(rel) + type=None + )) return relations # Override that doesn't check the type of the relation -- we do it diff --git a/setup.py b/setup.py index 9193b07b..d8577ede 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-spark" -package_version = "0.13.0-rc1" +package_version = "0.13.0" description = """The dbt_spark adpter plugin for dbt (data build tool)""" setup( From 848f3efb74125ed94d8928d107d2b5eb26ac9673 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 11 Apr 2019 09:03:57 -0400 Subject: [PATCH 006/603] fix spark dbt project path --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d8577ede..46f8478c 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ packages=find_packages(), package_data={ 'dbt': [ - 'include/dbt_spark/dbt_project.yml', - 'include/dbt_spark/macros/*.sql', + 'include/spark/dbt_project.yml', + 'include/spark/macros/*.sql', ] }, install_requires=[ From 8cfe84ca16783c7335e521c8935885622cd8dd70 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Apr 2019 16:13:33 -0400 Subject: [PATCH 007/603] Handle interactive cluster startup, implement docs generation Fixes: #13, #12, #10 --- README.md | 5 ++- dbt/adapters/spark/connections.py | 37 +++++++++++++++++-- dbt/adapters/spark/impl.py | 52 +++++++++++++++++++++++++++ dbt/include/spark/macros/adapters.sql | 13 ++----- 4 files changed, 93 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0e33f16f..cf5c6430 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,8 @@ A dbt profile can be configured to run against Spark using the following configu | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443) | `443` | | token | The token to use for authenticating to the cluster | Required | `abc123` | -| cluster | The name of the cluster to connect to | Required | `01234-23423-coffeetime` | +| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | +| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | **Example profiles.yml entry:** @@ -31,6 +32,8 @@ your_profile_name: port: 443 token: abc123 cluster: 01234-23423-coffeetime + connect_retries: 5 + connect_timeout: 60 ``` ### Usage Notes diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 98ae2b03..a3a95daf 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -8,6 +8,7 @@ from pyhive import hive from thrift.transport import THttpClient import base64 +import time SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/0/{cluster}" @@ -36,6 +37,16 @@ 'token': { 'type': 'string', }, + 'connect_timeout': { + 'type': 'integer', + 'minimum': 0, + 'maximum': 60, + }, + 'connect_retries': { + 'type': 'integer', + 'minimum': 0, + 'maximum': 60, + } }, 'required': ['host', 'database', 'schema', 'cluster'], } @@ -141,7 +152,28 @@ def open(cls, connection): 'Authorization': 'Basic {}'.format(token) }) - conn = hive.connect(thrift_transport=transport) + connect_retries = connection.credentials.get('connect_retries', 0) + connect_timeout = connection.credentials.get('connect_timeout', 10) + + exc = None + for i in range(1 + connect_retries): + try: + conn = hive.connect(thrift_transport=transport) + break + except Exception as e: + exc = e + if e.message and 'pending' in (e.message.lower()): + logger.warning("Warning: {}\n\tRetrying in {} seconds " + "({} of {})".format(e.message, + connect_timeout, + i + 1, + connect_retries)) + time.sleep(connect_timeout) + else: + raise + else: + raise exc + wrapped = ConnectionWrapper(conn) connection.state = 'open' @@ -150,9 +182,8 @@ def open(cls, connection): @classmethod def get_status(cls, cursor): - #status = cursor._cursor.poll() + # status = cursor._cursor.poll() return 'OK' def cancel(self, connection): - import ipdb; ipdb.set_trace() connection.handle.cancel() diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 00dee348..e4364ad3 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -3,6 +3,7 @@ from dbt.adapters.spark import SparkConnectionManager import dbt.exceptions +from dbt.logger import GLOBAL_LOGGER as logger import agate @@ -91,3 +92,54 @@ def drop_relation(self, relation, model_name=None): kwargs={'relation': relation}, connection_name=model_name ) + + def get_catalog(self, manifest): + connection = self.connections.get('catalog') + client = connection.handle + + schemas = manifest.get_used_schemas() + + column_names = ( + 'table_database', + 'table_schema', + 'table_name', + 'table_type', + 'table_comment', + 'table_owner', + 'column_name', + 'column_index', + 'column_type', + 'column_comment', + ) + + columns = [] + for (database_name, schema_name) in schemas: + relations = self.list_relations(database_name, schema_name) + for relation in relations: + logger.debug("Getting table schema for relation {}".format(relation)) + table_columns = self.get_columns_in_relation(relation) + rel_type = self.get_relation_type(relation) + + for column_index, column in enumerate(table_columns): + # Fixes for pseudocolumns with no type + if column.name in ('# Partition Information', '# col_name'): + continue + elif column.dtype is None: + continue + + column_data = ( + relation.database, + relation.schema, + relation.name, + rel_type, + None, + None, + column.name, + column_index, + column.data_type, + None, + ) + column_dict = dict(zip(column_names, column_data)) + columns.append(column_dict) + + return dbt.clients.agate_helper.table_from_data(columns, column_names) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 86579813..c4616ca8 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -72,23 +72,16 @@ {% endmacro %} -{% macro spark__check_schema_exists(information_schema, schema) -%} - {# TODO #} - {% call statement('check_schema_exists', fetch_result=True) -%} - show databases - {%- endcall %} - {{ return(load_result('check_schema_exists').table) }} -{%- endmacro %} - {% macro spark__current_timestamp() -%} current_timestamp() {%- endmacro %} + {% macro spark_get_relation_type(relation) -%} - {% call statement('check_schema_exists', fetch_result=True) -%} + {% call statement('get_relation_type', fetch_result=True) -%} SHOW TBLPROPERTIES {{ relation }} ('view.default.database') {%- endcall %} - {% set res = load_result('check_schema_exists').table %} + {% set res = load_result('get_relation_type').table %} {% if 'does not have property' in res[0][0] %} {{ return('table') }} {% else %} From 52426fd02a159a9aa89006bb438bcbf2d7e0e971 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Apr 2019 16:39:40 -0400 Subject: [PATCH 008/603] fix for exception without a message --- dbt/adapters/spark/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index a3a95daf..fc5f1358 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -162,7 +162,7 @@ def open(cls, connection): break except Exception as e: exc = e - if e.message and 'pending' in (e.message.lower()): + if hasattr(e, 'message') and 'pending' in (e.message.lower()): logger.warning("Warning: {}\n\tRetrying in {} seconds " "({} of {})".format(e.message, connect_timeout, From 4c92eb0ab10057ad75e0644f28fa0867723339ac Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Apr 2019 16:48:33 -0400 Subject: [PATCH 009/603] fix docs deletion --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cf5c6430..b0800de7 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ A dbt profile can be configured to run against Spark using the following configu | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443) | `443` | | token | The token to use for authenticating to the cluster | Required | `abc123` | +| cluster | The name of the cluster to connect to | Required | `01234-23423-coffeetime` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | From 525bbfdc7a013a3b460c3ebe9680eeb38da2eb03 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Apr 2019 17:00:36 -0400 Subject: [PATCH 010/603] fix for initial "temporarily unavailable" state --- dbt/adapters/spark/connections.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index fc5f1358..e74bad85 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -162,12 +162,17 @@ def open(cls, connection): break except Exception as e: exc = e - if hasattr(e, 'message') and 'pending' in (e.message.lower()): - logger.warning("Warning: {}\n\tRetrying in {} seconds " - "({} of {})".format(e.message, - connect_timeout, - i + 1, - connect_retries)) + if not hasattr(e, 'message') or if e.message is None: + raise + + message = e.message.lower() + is_pending = 'pending' in message + is_starting = 'temporarily_unavailable' in message + + warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" + if is_pending or is_starting: + logger.warning(warning.format(e.message, connect_timeout, + i + 1, connect_retries)) time.sleep(connect_timeout) else: raise From e45c9e6196cf928b008b98b84a3ca4012b3c5bfc Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 2 May 2019 11:14:36 -0600 Subject: [PATCH 011/603] fix error check --- dbt/adapters/spark/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index e74bad85..9c43c31d 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -162,7 +162,7 @@ def open(cls, connection): break except Exception as e: exc = e - if not hasattr(e, 'message') or if e.message is None: + if getattr(e, 'message', None) is None: raise message = e.message.lower() From 3e0189d856b9a872ff4f00655e1b5540563a58ad Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 10 May 2019 12:43:46 -0400 Subject: [PATCH 012/603] Table materialization. Drop without renaming --- .../spark/macros/materializations/table.sql | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 dbt/include/spark/macros/materializations/table.sql diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql new file mode 100644 index 00000000..f8748a3a --- /dev/null +++ b/dbt/include/spark/macros/materializations/table.sql @@ -0,0 +1,32 @@ +{% materialization table, adapter = 'spark' %} + {%- set identifier = model['alias'] -%} + + {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} + {% if non_destructive_mode %} + {{ exceptions.raise_compiler_error("--non-destructive mode is not supported on Spark") }} + {% endif %} + + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set target_relation = api.Relation.create(identifier=identifier, + schema=schema, + database=database, + type='table') -%} + + {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} + {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} + + -- setup: if the target relation already exists, drop it + {% if old_relation -%} + {{ adapter.drop_relation(old_relation) }} + {%- endif %} + + {{ run_hooks(pre_hooks) }} + + -- build model + {% call statement('main') -%} + {{ create_table_as(False, target_relation, sql) }} + {%- endcall %} + + {{ run_hooks(post_hooks) }} + +{% endmaterialization %} From a91859946cba2cb32b7358ffe592805841ae1a2b Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 13 May 2019 16:35:29 -0400 Subject: [PATCH 013/603] Rm superfluous --- dbt/include/spark/macros/materializations/table.sql | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index f8748a3a..82281b79 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -1,20 +1,12 @@ {% materialization table, adapter = 'spark' %} {%- set identifier = model['alias'] -%} - {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} - {% if non_destructive_mode %} - {{ exceptions.raise_compiler_error("--non-destructive mode is not supported on Spark") }} - {% endif %} - {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} - {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} - -- setup: if the target relation already exists, drop it {% if old_relation -%} {{ adapter.drop_relation(old_relation) }} From 741febe9bf4b127134438b564fde4fb0c3709063 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 14 May 2019 18:34:34 -0400 Subject: [PATCH 014/603] Naive view as well --- .../spark/macros/materializations/table.sql | 5 ++-- .../spark/macros/materializations/view.sql | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 dbt/include/spark/macros/materializations/view.sql diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index 82281b79..9dcd477a 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -1,4 +1,5 @@ {% materialization table, adapter = 'spark' %} + {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} @@ -7,13 +8,13 @@ database=database, type='table') -%} + {{ run_hooks(pre_hooks) }} + -- setup: if the target relation already exists, drop it {% if old_relation -%} {{ adapter.drop_relation(old_relation) }} {%- endif %} - {{ run_hooks(pre_hooks) }} - -- build model {% call statement('main') -%} {{ create_table_as(False, target_relation, sql) }} diff --git a/dbt/include/spark/macros/materializations/view.sql b/dbt/include/spark/macros/materializations/view.sql new file mode 100644 index 00000000..58202fa8 --- /dev/null +++ b/dbt/include/spark/macros/materializations/view.sql @@ -0,0 +1,25 @@ +{% materialization view, adapter = 'spark' %} + + {%- set identifier = model['alias'] -%} + + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set target_relation = api.Relation.create(identifier=identifier, + schema=schema, + database=database, + type='view') -%} + + {{ run_hooks(pre_hooks) }} + + -- setup: if the target relation already exists, drop it + {% if old_relation -%} + {{ adapter.drop_relation(old_relation) }} + {%- endif %} + + -- build model + {% call statement('main') -%} + {{ create_view_as(target_relation, sql) }} + {%- endcall %} + + {{ run_hooks(post_hooks) }} + +{%- endmaterialization -%} From bf0ec2a36315196c072a78eff059c01d3b2decbf Mon Sep 17 00:00:00 2001 From: "Housewright, Ross" Date: Fri, 17 May 2019 16:45:33 -0400 Subject: [PATCH 015/603] Adding `thrift` mode Adding a method selector to the profile configuration, to toggle between `http` (existing behavior) and `thrift` (direct conneciton to cluster). --- README.md | 32 ++++++++++++++++++++++++++---- dbt/adapters/spark/connections.py | 33 ++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index b0800de7..b8cba36d 100644 --- a/README.md +++ b/README.md @@ -8,25 +8,32 @@ $ pip install dbt-spark ### Configuring your profile +**Connection Method** + +Connections can be made to Spark in two different modes. The `http` mode is used when connecting to a managed service such as Databricks, which provides an HTTP endpoint; the `thrift` mode is used to connect directly to the master node of a cluster (either on-premise or in the cloud). + A dbt profile can be configured to run against Spark using the following configuration: | Option | Description | Required? | Example | |---------|----------------------------------------------------|-------------------------|--------------------------| +| method | Specify the connection method (`thrift` or `http`) | Required | `http` | | schema | Specify the schema (database) to build models into | Required | `analytics` | | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | -| port | The port to connect to the host on | Optional (default: 443) | `443` | -| token | The token to use for authenticating to the cluster | Required | `abc123` | -| cluster | The name of the cluster to connect to | Required | `01234-23423-coffeetime` | +| port | The port to connect to the host on | Optional (default: 443 for `http`, 10000 for `thrift`) | `443` | +| token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | +| cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | +|user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | -**Example profiles.yml entry:** +**Example profiles.yml entries:** ``` your_profile_name: target: dev outputs: dev: + method: http type: spark schema: analytics host: yourorg.sparkhost.com @@ -37,6 +44,23 @@ your_profile_name: connect_timeout: 60 ``` +``` +your_profile_name: + target: dev + outputs: + dev: + method: thrift + type: spark + schema: analytics + host: 127.0.0.1 + port: 10000 + user: hadoop + connect_retries: 5 + connect_timeout: 60 +``` + + + ### Usage Notes **Model Configuration** diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 9c43c31d..63543fa5 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -17,6 +17,9 @@ 'type': 'object', 'additionalProperties': False, 'properties': { + 'method': { + 'enum': ['thrift', 'http'], + }, 'host': { 'type': 'string' }, @@ -25,6 +28,9 @@ 'minimum': 0, 'maximum': 65535, }, + 'user': { + 'type': 'string' + }, 'cluster': { 'type': 'string' }, @@ -48,7 +54,7 @@ 'maximum': 60, } }, - 'required': ['host', 'database', 'schema', 'cluster'], + 'required': ['method', 'host', 'database', 'schema'], } @@ -143,22 +149,27 @@ def open(cls, connection): logger.debug('Connection is already open, skipping open.') return connection - conn_url = SPARK_CONNECTION_URL.format(**connection.credentials) - transport = THttpClient.THttpClient(conn_url) - - creds = "token:{}".format(connection.credentials['token']).encode() - token = base64.standard_b64encode(creds).decode() - transport.setCustomHeaders({ - 'Authorization': 'Basic {}'.format(token) - }) - connect_retries = connection.credentials.get('connect_retries', 0) connect_timeout = connection.credentials.get('connect_timeout', 10) exc = None for i in range(1 + connect_retries): try: - conn = hive.connect(thrift_transport=transport) + if connection.credentials['method'] == 'http': + conn_url = SPARK_CONNECTION_URL.format(**connection.credentials) + transport = THttpClient.THttpClient(conn_url) + + creds = "token:{}".format(connection.credentials['token']).encode() + token = base64.standard_b64encode(creds).decode() + transport.setCustomHeaders({ + 'Authorization': 'Basic {}'.format(token) + }) + + conn = hive.connect(thrift_transport=transport) + elif connection.credentials['method'] == 'thrift': + conn = hive.connect(host=connection.credentials['host'], + port=connection.credentials.get('port'), + username=connection.credentials.get('username')) break except Exception as e: exc = e From b282ad6807e7ec2db6ebc3cda4a47b16ed61f482 Mon Sep 17 00:00:00 2001 From: "Housewright, Ross" Date: Tue, 18 Jun 2019 21:30:38 -0400 Subject: [PATCH 016/603] Update README.md Adding details about how to connect to EMR. --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b8cba36d..515b1c5d 100644 --- a/README.md +++ b/README.md @@ -19,13 +19,17 @@ A dbt profile can be configured to run against Spark using the following configu | method | Specify the connection method (`thrift` or `http`) | Required | `http` | | schema | Specify the schema (database) to build models into | Required | `analytics` | | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | -| port | The port to connect to the host on | Optional (default: 443 for `http`, 10000 for `thrift`) | `443` | +| port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | | token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | | cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | |user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | +**Usage with Amazon EMR** + +To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. + **Example profiles.yml entries:** ``` @@ -53,7 +57,7 @@ your_profile_name: type: spark schema: analytics host: 127.0.0.1 - port: 10000 + port: 10001 user: hadoop connect_retries: 5 connect_timeout: 60 From 632c23371b6038eea54ad123c21b3a8abd432311 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 30 Jun 2019 15:14:42 -0400 Subject: [PATCH 017/603] add unit tests; fix username config bug --- .circleci/config.yml | 14 ++++ .gitignore | 1 + dbt/adapters/spark/__init__.py | 4 +- dbt/adapters/spark/connections.py | 110 ++++++++++++++++++++++++++---- dbt/adapters/spark/impl.py | 12 ++-- dbt/adapters/spark/relation.py | 2 - dev_requirements.txt | 10 +++ requirements.txt | 3 + setup.py | 2 +- test/__init__.py | 0 test/unit/__init__.py | 0 test/unit/test_adapter.py | 89 ++++++++++++++++++++++++ test/unit/utils.py | 49 +++++++++++++ tox.ini | 17 +++++ 14 files changed, 287 insertions(+), 26 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 dev_requirements.txt create mode 100644 requirements.txt create mode 100644 test/__init__.py create mode 100644 test/unit/__init__.py create mode 100644 test/unit/test_adapter.py create mode 100644 test/unit/utils.py create mode 100644 tox.ini diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..eb35cf17 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,14 @@ +version: 2 +jobs: + unit: + docker: + - image: fishtownjacob/test-container + steps: + - checkout + - run: tox -e flake8,unit + +workflows: + version: 2 + test-everything: + jobs: + - unit diff --git a/.gitignore b/.gitignore index 98d9a8e8..201bd403 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ env/ *.pyc __pycache__ +.tox/ diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py index 278cf11e..e933ac27 100644 --- a/dbt/adapters/spark/__init__.py +++ b/dbt/adapters/spark/__init__.py @@ -1,6 +1,6 @@ -from dbt.adapters.spark.connections import SparkConnectionManager +from dbt.adapters.spark.connections import SparkConnectionManager # noqa from dbt.adapters.spark.connections import SparkCredentials -from dbt.adapters.spark.relation import SparkRelation +from dbt.adapters.spark.relation import SparkRelation # noqa from dbt.adapters.spark.impl import SparkAdapter from dbt.adapters.base import AdapterPlugin diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 63543fa5..974c557a 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -5,8 +5,10 @@ from dbt.logger import GLOBAL_LOGGER as logger import dbt.exceptions -from pyhive import hive +from TCLIService.ttypes import TOperationState as ThriftState from thrift.transport import THttpClient +from pyhive import hive + import base64 import time @@ -77,6 +79,7 @@ def _connection_keys(self): class ConnectionWrapper(object): "Wrap a Spark connection in a way that no-ops transactions" # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html + def __init__(self, handle): self.handle = handle self._cursor = None @@ -88,10 +91,25 @@ def cursor(self): def cancel(self): if self._cursor is not None: - self._cursor.cancel() + # Handle bad response in the pyhive lib when + # the connection is cancelled + try: + self._cursor.cancel() + except EnvironmentError as exc: + logger.debug( + "Exception while cancelling query: {}".format(exc) + ) def close(self): - self.handle.close() + if self._cursor is not None: + # Handle bad response in the pyhive lib when + # the connection is cancelled + try: + self._cursor.close() + except EnvironmentError as exc: + logger.debug( + "Exception while closing cursor: {}".format(exc) + ) def rollback(self, *args, **kwargs): logger.debug("NotImplemented: rollback") @@ -103,7 +121,54 @@ def execute(self, sql, bindings=None): if sql.strip().endswith(";"): sql = sql.strip()[:-1] - return self._cursor.execute(sql, bindings) + # Reaching into the private enumeration here is bad form, + # but there doesn't appear to be any way to determine that + # a query has completed executing from the pyhive public API. + # We need to use an async query + poll here, otherwise our + # request may be dropped after ~5 minutes by the thrift server + STATE_PENDING = [ + ThriftState.INITIALIZED_STATE, + ThriftState.RUNNING_STATE, + ThriftState.PENDING_STATE, + ] + + STATE_SUCCESS = [ + ThriftState.FINISHED_STATE, + ] + + self._cursor.execute(sql, bindings, async_=True) + poll_state = self._cursor.poll() + state = poll_state.operationState + + while state in STATE_PENDING: + logger.debug("Poll status: {}, sleeping".format(state)) + + poll_state = self._cursor.poll() + state = poll_state.operationState + + # If an errorMessage is present, then raise a database exception + # with that exact message. If no errorMessage is present, the + # query did not necessarily succeed: check the state against the + # known successful states, raising an error if the query did not + # complete in a known good state. This can happen when queries are + # cancelled, for instance. The errorMessage will be None, but the + # state of the query will be "cancelled". By raising an exception + # here, we prevent dbt from showing a status of OK when the query + # has in fact failed. + if poll_state.errorMessage: + logger.debug("Poll response: {}".format(poll_state)) + logger.debug("Poll status: {}".format(state)) + dbt.exceptions.raise_database_error(poll_state.errorMessage) + + elif state not in STATE_SUCCESS: + status_type = ThriftState._VALUES_TO_NAMES.get( + state, + 'Unknown<{!r}>'.format(state)) + + dbt.exceptions.raise_database_error( + "Query failed with status: {}".format(status_type)) + + logger.debug("Poll status: {}, query complete".format(state)) @property def description(self): @@ -143,33 +208,49 @@ def commit(self, *args, **kwargs): def rollback(self, *args, **kwargs): logger.debug("NotImplemented: rollback") + @classmethod + def validate_creds(cls, creds, required): + method = creds.method + + for key in required: + if key not in creds: + raise dbt.exceptions.DbtProfileError( + "The config '{}' is required when using the {} method" + " to connect to Spark".format(key, method)) + @classmethod def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection - connect_retries = connection.credentials.get('connect_retries', 0) - connect_timeout = connection.credentials.get('connect_timeout', 10) + creds = connection.credentials + connect_retries = creds.get('connect_retries', 0) + connect_timeout = creds.get('connect_timeout', 10) exc = None for i in range(1 + connect_retries): try: - if connection.credentials['method'] == 'http': - conn_url = SPARK_CONNECTION_URL.format(**connection.credentials) + if creds.method == 'http': + cls.validate_creds(creds, ['token', 'host', 'port', + 'cluster']) + + conn_url = SPARK_CONNECTION_URL.format(**creds) transport = THttpClient.THttpClient(conn_url) - creds = "token:{}".format(connection.credentials['token']).encode() - token = base64.standard_b64encode(creds).decode() + raw_token = "token:{}".format(creds.token).encode() + token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders({ 'Authorization': 'Basic {}'.format(token) }) conn = hive.connect(thrift_transport=transport) - elif connection.credentials['method'] == 'thrift': - conn = hive.connect(host=connection.credentials['host'], - port=connection.credentials.get('port'), - username=connection.credentials.get('username')) + elif creds.method == 'thrift': + cls.validate_creds(creds, ['host']) + + conn = hive.connect(host=creds.host, + port=creds.get('port'), + username=creds.get('user')) break except Exception as e: exc = e @@ -198,7 +279,6 @@ def open(cls, connection): @classmethod def get_status(cls, cursor): - # status = cursor._cursor.poll() return 'OK' def cancel(self, connection): diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index e4364ad3..0af7bd3b 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -81,7 +81,7 @@ def list_relations_without_caching(self, information_schema, schema, )) return relations - # Override that doesn't check the type of the relation -- we do it + # Override that doesn't check the type of the relation -- we do it # dynamically in the macro code def drop_relation(self, relation, model_name=None): if dbt.flags.USE_CACHE: @@ -94,9 +94,6 @@ def drop_relation(self, relation, model_name=None): ) def get_catalog(self, manifest): - connection = self.connections.get('catalog') - client = connection.handle - schemas = manifest.get_used_schemas() column_names = ( @@ -116,13 +113,16 @@ def get_catalog(self, manifest): for (database_name, schema_name) in schemas: relations = self.list_relations(database_name, schema_name) for relation in relations: - logger.debug("Getting table schema for relation {}".format(relation)) + logger.debug("Getting table schema for relation {}".format(relation)) # noqa table_columns = self.get_columns_in_relation(relation) rel_type = self.get_relation_type(relation) for column_index, column in enumerate(table_columns): # Fixes for pseudocolumns with no type - if column.name in ('# Partition Information', '# col_name'): + if column.name in ( + '# Partition Information', + '# col_name' + ): continue elif column.dtype is None: continue diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 18adfd4f..f089fa77 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -1,6 +1,4 @@ - from dbt.adapters.base.relation import BaseRelation -import dbt.utils class SparkRelation(BaseRelation): diff --git a/dev_requirements.txt b/dev_requirements.txt new file mode 100644 index 00000000..866e18b3 --- /dev/null +++ b/dev_requirements.txt @@ -0,0 +1,10 @@ +freezegun==0.3.9 +pytest==4.4.0 +mock>=1.3.0 +flake8>=3.5.0 +pytz==2017.2 +bumpversion==0.5.3 +tox==2.5.0 +ipdb +pytest-xdist>=1.28.0,<2 +flaky>=3.5.3,<4 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..62281993 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +dbt-core==0.13.0 +PyHive>=0.6.0,<0.7.0 +thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 46f8478c..ef3e8af3 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,6 @@ install_requires=[ 'dbt-core=={}'.format(package_version), 'PyHive>=0.6.0,<0.7.0', - 'thrift>=0.11.0,<0.12.0' + 'thrift>=0.11.0,<0.12.0', ] ) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py new file mode 100644 index 00000000..89857886 --- /dev/null +++ b/test/unit/test_adapter.py @@ -0,0 +1,89 @@ +import mock +import unittest +import dbt.adapters +import dbt.flags as flags +from pyhive import hive +from dbt.adapters.spark import SparkAdapter +import agate + +from .utils import config_from_parts_or_dicts, inject_adapter + + +class TestSparkAdapter(unittest.TestCase): + + def setUp(self): + flags.STRICT_MODE = True + + self.project_cfg = { + 'name': 'X', + 'version': '0.1', + 'profile': 'test', + 'project-root': '/tmp/dbt/does-not-exist', + 'quoting': { + 'identifier': False, + 'schema': False, + } + } + + def get_target_http(self, project): + return config_from_parts_or_dicts(project, { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'http', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'cluster': '01234-23423-coffeetime', + } + }, + 'target': 'test' + }) + + def get_target_thrift(self, project): + return config_from_parts_or_dicts(project, { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'thrift', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 10001, + 'user': 'dbt' + } + }, + 'target': 'test' + }) + + def test_http_connection(self): + config = self.get_target_http(self.project_cfg) + adapter = SparkAdapter(config) + + + def hive_http_connect(thrift_transport): + self.assertEqual(thrift_transport.scheme, 'https') + self.assertEqual(thrift_transport.port, 443) + self.assertEqual(thrift_transport.host, 'myorg.sparkhost.com') + self.assertEqual(thrift_transport.path, '/sql/protocolv1/o/0/01234-23423-coffeetime') + + with mock.patch.object(hive, 'connect', new=hive_http_connect): + connection = adapter.acquire_connection('dummy') + + self.assertEqual(connection.state, 'open') + self.assertNotEqual(connection.handle, None) + + def test_thrift_connection(self): + config = self.get_target_thrift(self.project_cfg) + adapter = SparkAdapter(config) + + def hive_thrift_connect(host, port, username): + self.assertEqual(host, 'myorg.sparkhost.com') + self.assertEqual(port, 10001) + self.assertEqual(username, 'dbt') + + with mock.patch.object(hive, 'connect', new=hive_thrift_connect): + connection = adapter.acquire_connection('dummy') + + self.assertEqual(connection.state, 'open') + self.assertNotEqual(connection.handle, None) diff --git a/test/unit/utils.py b/test/unit/utils.py new file mode 100644 index 00000000..def039c2 --- /dev/null +++ b/test/unit/utils.py @@ -0,0 +1,49 @@ +"""Unit test utility functions. + +Note that all imports should be inside the functions to avoid import/mocking +issues. +""" +import mock + + +class Obj(object): + which = 'blah' + + +def mock_connection(name): + conn = mock.MagicMock() + conn.name = name + return conn + + +def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): + from dbt.config import Project, Profile, RuntimeConfig + from dbt.utils import parse_cli_vars + from copy import deepcopy + if not isinstance(cli_vars, dict): + cli_vars = parse_cli_vars(cli_vars) + if not isinstance(project, Project): + project = Project.from_project_config(deepcopy(project), packages) + if not isinstance(profile, Profile): + profile = Profile.from_raw_profile_info(deepcopy(profile), + project.profile_name, + cli_vars) + args = Obj() + args.vars = repr(cli_vars) + return RuntimeConfig.from_parts( + project=project, + profile=profile, + args=args + ) + + +def inject_adapter(value): + """Inject the given adapter into the adapter factory, so your hand-crafted + artisanal adapter will be available from get_adapter() as if dbt loaded it. + """ + from dbt.adapters import factory + from dbt.adapters.base.connections import BaseConnectionManager + key = value.type() + factory._ADAPTERS[key] = value + factory.ADAPTER_TYPES[key] = type(value) + diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..8eca64a2 --- /dev/null +++ b/tox.ini @@ -0,0 +1,17 @@ +[tox] +skipsdist = True +envlist = unit, flake8 + + +[testenv:flake8] +basepython = python3 +commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 dbt/' +deps = + -r{toxinidir}/dev_requirements.txt + +[testenv:unit] +basepython = python3 +commands = /bin/bash -c '{envpython} -m pytest -v {posargs} test/unit' +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/dev_requirements.txt From 708271725acd1a7b79d7a827c6598076f16ec5e9 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 3 Jul 2019 12:18:42 -0400 Subject: [PATCH 018/603] prep release for 0.13.0 --- README.md | 5 ++++- setup.py | 20 ++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 515b1c5d..b1a69eb7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ ## dbt-spark +### Documentation +For more information on using Spark with dbt, consult the [dbt documentation](https://docs.getdbt.com/docs/profile-spark). + ### Installation This plugin can be installed via pip: ``` @@ -22,7 +25,7 @@ A dbt profile can be configured to run against Spark using the following configu | port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | | token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | | cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | -|user | The username to use to connect to the cluster | Optional | `hadoop` | +| user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | diff --git a/setup.py b/setup.py index ef3e8af3..80fb7efb 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,36 @@ #!/usr/bin/env python from setuptools import find_packages from distutils.core import setup +import os + + +this_directory = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(this_directory, 'README.md')) as f: + long_description = f.read() + package_name = "dbt-spark" package_version = "0.13.0" -description = """The dbt_spark adpter plugin for dbt (data build tool)""" +description = """The SparkSQL plugin for dbt (data build tool)""" setup( name=package_name, version=package_version, + description=description, - long_description=description, - author='Drew Banin', - author_email='drew@fishtownanalytics.com', + long_description=long_description, + long_description_content_type='text/markdown', + + author='Fishtown Analytics', + author_email='info@fishtownanalytics.com', url='https://github.com/fishtown-analytics/dbt-spark', + packages=find_packages(), package_data={ 'dbt': [ 'include/spark/dbt_project.yml', 'include/spark/macros/*.sql', + 'include/spark/macros/**/*.sql', ] }, install_requires=[ From b4db0548266f260c3b75903ce08ad140805b355a Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 3 Jul 2019 13:58:26 -0400 Subject: [PATCH 019/603] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index b1a69eb7..821b3ce7 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ For more information on using Spark with dbt, consult the [dbt documentation](ht ### Installation This plugin can be installed via pip: ``` +# Install prerequisites: +$ pip install pyhive[hive] + +# Install dbt-spark: $ pip install dbt-spark ``` From f2de0636936d5aee7cc3e77ca10295219a0a800a Mon Sep 17 00:00:00 2001 From: Rob Poidomani Date: Sun, 29 Sep 2019 11:48:48 -0700 Subject: [PATCH 020/603] Update connections.py --- dbt/adapters/spark/connections.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 974c557a..4f144e21 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -12,8 +12,8 @@ import base64 import time - -SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/0/{cluster}" +#need to add organization as a parameter, as its required by Azure Databricks and is different per customer. +SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" SPARK_CREDENTIALS_CONTRACT = { 'type': 'object', @@ -33,6 +33,9 @@ 'user': { 'type': 'string' }, + 'organization': { + 'type': 'number' + }, 'cluster': { 'type': 'string' }, @@ -65,6 +68,7 @@ class SparkCredentials(Credentials): def __init__(self, *args, **kwargs): kwargs.setdefault('database', kwargs.get('schema')) + kwargs.setdefault('organization', 0) super(SparkCredentials, self).__init__(*args, **kwargs) @@ -73,7 +77,7 @@ def type(self): return 'spark' def _connection_keys(self): - return ('host', 'port', 'cluster', 'schema') + return ('host', 'port', 'cluster', 'schema', 'organization') class ConnectionWrapper(object): @@ -232,8 +236,9 @@ def open(cls, connection): for i in range(1 + connect_retries): try: if creds.method == 'http': + cls.validate_creds(creds, ['token', 'host', 'port', - 'cluster']) + 'cluster', 'organization']) conn_url = SPARK_CONNECTION_URL.format(**creds) transport = THttpClient.THttpClient(conn_url) From 72247c6e9a418e82313cda2d2f1dadb36c2451c0 Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Wed, 2 Oct 2019 08:59:00 -0700 Subject: [PATCH 021/603] Coercing organization to string --- dbt/adapters/spark/connections.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 4f144e21..cdd1701d 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -12,7 +12,7 @@ import base64 import time -#need to add organization as a parameter, as its required by Azure Databricks and is different per customer. +#adding organization as a parameter, as it is required by Azure Databricks and is different per workspace. SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" SPARK_CREDENTIALS_CONTRACT = { @@ -34,7 +34,7 @@ 'type': 'string' }, 'organization': { - 'type': 'number' + 'type': 'string' }, 'cluster': { 'type': 'string' @@ -68,7 +68,12 @@ class SparkCredentials(Credentials): def __init__(self, *args, **kwargs): kwargs.setdefault('database', kwargs.get('schema')) - kwargs.setdefault('organization', 0) + + #coercing org to a string since it is unknown whether Azure Databricks will always keep it numeric + if 'organization' in kwargs: + kwargs['organization'] = str(kwargs['organization']) + else: + kwargs['organization'] = '0' super(SparkCredentials, self).__init__(*args, **kwargs) From 234ab7ae81986d47841c282185df7e48eefcd2ba Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Wed, 2 Oct 2019 09:00:05 -0700 Subject: [PATCH 022/603] comment format --- dbt/adapters/spark/connections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index cdd1701d..eba3b092 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -12,7 +12,7 @@ import base64 import time -#adding organization as a parameter, as it is required by Azure Databricks and is different per workspace. +# adding organization as a parameter, as it is required by Azure Databricks and is different per workspace. SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" SPARK_CREDENTIALS_CONTRACT = { @@ -69,7 +69,7 @@ class SparkCredentials(Credentials): def __init__(self, *args, **kwargs): kwargs.setdefault('database', kwargs.get('schema')) - #coercing org to a string since it is unknown whether Azure Databricks will always keep it numeric + # coercing org to a string since it is unknown whether Azure Databricks will always keep it numeric if 'organization' in kwargs: kwargs['organization'] = str(kwargs['organization']) else: From 321acfd8ac44dd15f7013c88d47e9552d755552f Mon Sep 17 00:00:00 2001 From: Bruno Filippone Date: Fri, 18 Oct 2019 16:31:24 +0100 Subject: [PATCH 023/603] Allow overriding spark config in pre_hook --- dbt/include/spark/macros/materializations/incremental.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 4313c6e4..5de4993d 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -20,8 +20,6 @@ {%- set old_relation = none -%} {%- endif %} - {{ run_hooks(pre_hooks) }} - {% call statement() %} set spark.sql.sources.partitionOverwriteMode = DYNAMIC {% endcall %} @@ -30,6 +28,7 @@ set spark.sql.hive.convertMetastoreParquet = false {% endcall %} + {{ run_hooks(pre_hooks) }} {#-- This is required to make dbt's incremental scheme work #} {%- if old_relation is none -%} From e4e3e2a35df9dc6f0c468f77792cfe144fe2d91f Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 10 Dec 2019 13:58:50 +0100 Subject: [PATCH 024/603] Add support for creating/dropping schema's --- dbt/include/spark/macros/adapters.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index c4616ca8..199d8227 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -76,6 +76,17 @@ current_timestamp() {%- endmacro %} +{% macro spark__create_schema(database_name, schema_name) -%} + {%- call statement('create_schema') -%} + create schema if not exists {{ schema_name }} + {%- endcall -%} +{% endmacro %} + +{% macro spark__drop_schema(database_name, schema_name) -%} + {%- call statement('drop_schema') -%} + drop schema if exists {{ schema_name }} cascade + {%- endcall -%} +{% endmacro %} {% macro spark_get_relation_type(relation) -%} {% call statement('get_relation_type', fetch_result=True) -%} From 0ca522f97f7890fc632384922155ed99e3522071 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 7 Oct 2019 23:20:59 -0400 Subject: [PATCH 025/603] add CI env for spark tests --- .circleci/config.yml | 25 +++++++++++++++++++++++++ .gitignore | 3 +++ dbt/adapters/spark/connections.py | 16 ++++++++++++++++ dev_requirements.txt | 7 +++++++ test/integration/profiles.yml | 14 ++++++++++++++ tox.ini | 12 +++++++++++- 6 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 test/integration/profiles.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index eb35cf17..8fab9b0c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,8 +7,33 @@ jobs: - checkout - run: tox -e flake8,unit + integration: + docker: + - image: fishtownjacob/test-container + steps: + - checkout + + - run: + name: Checkout test project + command: git clone https://github.com/fishtown-analytics/dbt-integration-tests.git + + - run: + name: "Setup credentials" + command: mkdir -p ~/.dbt/ && cp ~/project/test/integration/profiles.yml ~/.dbt/profiles.yml + + - run: + name: Run integration tests + command: tox -e integration + no_output_timeout: 1h + + - store_artifacts: + path: ./logs + workflows: version: 2 test-everything: jobs: - unit + - integration: + requires: + - unit diff --git a/.gitignore b/.gitignore index 201bd403..b8722c5c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ env/ *.pyc __pycache__ .tox/ +build/ +dist/ +dbt-integration-tests diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 974c557a..78e394de 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -3,11 +3,13 @@ from dbt.adapters.base import Credentials from dbt.adapters.sql import SQLConnectionManager from dbt.logger import GLOBAL_LOGGER as logger +from dbt.compat import NUMBERS import dbt.exceptions from TCLIService.ttypes import TOperationState as ThriftState from thrift.transport import THttpClient from pyhive import hive +from datetime import datetime import base64 import time @@ -136,6 +138,9 @@ def execute(self, sql, bindings=None): ThriftState.FINISHED_STATE, ] + if bindings is not None: + bindings = [self._fix_binding(binding) for binding in bindings] + self._cursor.execute(sql, bindings, async_=True) poll_state = self._cursor.poll() state = poll_state.operationState @@ -170,6 +175,17 @@ def execute(self, sql, bindings=None): logger.debug("Poll status: {}, query complete".format(state)) + @classmethod + def _fix_binding(cls, value): + """Convert complex datatypes to primitives that can be loaded by + the Spark driver""" + if isinstance(value, NUMBERS): + return float(value) + elif isinstance(value, datetime): + return value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + else: + return value + @property def description(self): return self._cursor.description diff --git a/dev_requirements.txt b/dev_requirements.txt index 866e18b3..b81a05b4 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -8,3 +8,10 @@ tox==2.5.0 ipdb pytest-xdist>=1.28.0,<2 flaky>=3.5.3,<4 + +# Test requirements +behave==1.2.6 +parse==1.8.4 +parse-type==0.4.2 +PyHamcrest==1.9.0 +six==1.11.0 diff --git a/test/integration/profiles.yml b/test/integration/profiles.yml new file mode 100644 index 00000000..8943e2e1 --- /dev/null +++ b/test/integration/profiles.yml @@ -0,0 +1,14 @@ + +spark: + target: ci + outputs: + ci: + type: spark + method: http + schema: 'public_integration_tests' + host: '{{ env_var("SPARK_HOST") }}' + port: 443 + token: '{{ env_var("SPARK_TOKEN") }}' + cluster: '{{ env_var("SPARK_CLUSTER") }}' + connect_timeout: 30 + connect_retries: 10 diff --git a/tox.ini b/tox.ini index 8eca64a2..82b6c9b0 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] skipsdist = True -envlist = unit, flake8 +envlist = unit, flake8, integration [testenv:flake8] @@ -15,3 +15,13 @@ commands = /bin/bash -c '{envpython} -m pytest -v {posargs} test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + +[testenv:integration] +basepython = python3 +changedir = dbt-integration-tests +commands = /bin/bash -c '{envpython} -m behave -f progress3 -D profile_name=spark' +passenv = SPARK_HOST SPARK_CLUSTER SPARK_TOKEN +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/dev_requirements.txt + -e . From 2df5282e1e4a4ea000d06c3dc25a6d977fbd65d3 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Fri, 27 Sep 2019 15:10:15 -0700 Subject: [PATCH 026/603] Fix seeds insert bug (#33) (@aaronsteers) --- dbt/include/spark/macros/adapters.sql | 75 +++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index c4616ca8..634b64f1 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -111,3 +111,78 @@ drop {{ type }} if exists {{ relation }} {%- endcall %} {% endmacro %} + +{% macro spark__load_csv_rows(model, batch_size) %} + {% set agate_table = model['agate_table'] %} + {% set cols_sql = ", ".join(agate_table.column_names) %} + {% set bindings = [] %} + + {% set statements = [] %} + + {% for chunk in agate_table.rows | batch(batch_size) %} + {% set bindings = [] %} + + {% for row in chunk %} + {% set _ = bindings.extend(row) %} + {% endfor %} + + {% set sql %} + insert into {{ this.render(False) }} values + {% for row in chunk -%} + ({%- for column in agate_table.column_names -%} + %s + {%- if not loop.last%},{%- endif %} + {%- endfor -%}) + {%- if not loop.last%},{%- endif %} + {%- endfor %} + {% endset %} + + {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + + {% if loop.index0 == 0 %} + {% set _ = statements.append(sql) %} + {% endif %} + {% endfor %} + + {# Return SQL so we can render it out into the compiled files #} + {{ return(statements[0]) }} +{% endmacro %} + + +{% macro spark__reset_csv_table(model, full_refresh, old_relation) %} + {% if old_relation %} + {{ adapter.drop_relation(old_relation) }} + {% endif %} + {% set sql = create_csv_table(model) %} + {{ return(sql) }} +{% endmacro %} + + +{% materialization seed, adapter='spark' %} + + {%- set identifier = model['alias'] -%} + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set csv_table = model["agate_table"] -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + -- build model + {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation) %} + {% set status = 'CREATE' %} + {% set num_rows = (csv_table.rows | length) %} + {% set sql = load_csv_rows(model) %} + + {% call noop_statement('main', status ~ ' ' ~ num_rows) %} + {{ create_table_sql }}; + -- dbt seed -- + {{ sql }} + {% endcall %} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + -- `COMMIT` happens here + {{ adapter.commit() }} + {{ run_hooks(post_hooks, inside_transaction=False) }} +{% endmaterialization %} From 343a167fe19dff49f1c0f49740ccecc69352d3e1 Mon Sep 17 00:00:00 2001 From: Tamas Szuromi Date: Tue, 1 Oct 2019 14:39:54 +0200 Subject: [PATCH 027/603] incorporate the dbt_created flag into the Relation class (#35) (@tromika) --- dbt/adapters/spark/impl.py | 5 +---- dbt/adapters/spark/relation.py | 7 +++++-- setup.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 0af7bd3b..30c85820 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -50,7 +50,6 @@ def get_relation_type(self, relation, model_name=None): return self.execute_macro( GET_RELATION_TYPE_MACRO_NAME, kwargs=kwargs, - connection_name=model_name, release=True ) @@ -62,7 +61,6 @@ def list_relations_without_caching(self, information_schema, schema, results = self.execute_macro( LIST_RELATIONS_MACRO_NAME, kwargs=kwargs, - connection_name=model_name, release=True ) @@ -89,8 +87,7 @@ def drop_relation(self, relation, model_name=None): self.execute_macro( DROP_RELATION_MACRO_NAME, - kwargs={'relation': relation}, - connection_name=model_name + kwargs={'relation': relation} ) def get_catalog(self, manifest): diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index f089fa77..20d61b59 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -16,7 +16,9 @@ class SparkRelation(BaseRelation): 'database': False, 'schema': True, 'identifier': True, - } + }, + 'dbt_created': False, + } SCHEMA = { @@ -38,7 +40,8 @@ class SparkRelation(BaseRelation): 'include_policy': BaseRelation.POLICY_SCHEMA, 'quote_policy': BaseRelation.POLICY_SCHEMA, 'quote_character': {'type': 'string'}, + 'dbt_created': {'type': 'boolean'}, }, 'required': ['metadata', 'type', 'path', 'include_policy', - 'quote_policy', 'quote_character'] + 'quote_policy', 'quote_character', 'dbt_created'] } diff --git a/setup.py b/setup.py index 80fb7efb..129e8dd4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ package_name = "dbt-spark" -package_version = "0.13.0" +package_version = "0.14.2" description = """The SparkSQL plugin for dbt (data build tool)""" setup( From f873d36525ea447680fa3e9e03610f2422852aa5 Mon Sep 17 00:00:00 2001 From: Tamas Szuromi Date: Mon, 21 Oct 2019 16:36:27 +0200 Subject: [PATCH 028/603] bump version in setup.py (#35) (@tromika) --- .circleci/config.yml | 8 +- .gitignore | 1 + dbt/include/spark/macros/adapters.sql | 75 ------------------- .../macros/materializations/incremental.sql | 3 +- .../spark/macros/materializations/seed.sql | 72 ++++++++++++++++++ requirements.txt | 2 +- setup.py | 2 +- test/integration/profiles.yml | 2 +- tox.ini | 6 +- 9 files changed, 84 insertions(+), 87 deletions(-) create mode 100644 dbt/include/spark/macros/materializations/seed.sql diff --git a/.circleci/config.yml b/.circleci/config.yml index 8fab9b0c..66cf86d2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,16 +15,14 @@ jobs: - run: name: Checkout test project - command: git clone https://github.com/fishtown-analytics/dbt-integration-tests.git - - - run: - name: "Setup credentials" - command: mkdir -p ~/.dbt/ && cp ~/project/test/integration/profiles.yml ~/.dbt/profiles.yml + command: git clone --branch spark-support https://github.com/fishtown-analytics/dbt-integration-tests.git - run: name: Run integration tests command: tox -e integration no_output_timeout: 1h + environment: + DBT_PROFILES_DIR: /home/dbt_test_user/project/test/integration/ - store_artifacts: path: ./logs diff --git a/.gitignore b/.gitignore index b8722c5c..a61df07c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ build/ dist/ dbt-integration-tests +test/integration/.user.yml diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 634b64f1..c4616ca8 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -111,78 +111,3 @@ drop {{ type }} if exists {{ relation }} {%- endcall %} {% endmacro %} - -{% macro spark__load_csv_rows(model, batch_size) %} - {% set agate_table = model['agate_table'] %} - {% set cols_sql = ", ".join(agate_table.column_names) %} - {% set bindings = [] %} - - {% set statements = [] %} - - {% for chunk in agate_table.rows | batch(batch_size) %} - {% set bindings = [] %} - - {% for row in chunk %} - {% set _ = bindings.extend(row) %} - {% endfor %} - - {% set sql %} - insert into {{ this.render(False) }} values - {% for row in chunk -%} - ({%- for column in agate_table.column_names -%} - %s - {%- if not loop.last%},{%- endif %} - {%- endfor -%}) - {%- if not loop.last%},{%- endif %} - {%- endfor %} - {% endset %} - - {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} - - {% if loop.index0 == 0 %} - {% set _ = statements.append(sql) %} - {% endif %} - {% endfor %} - - {# Return SQL so we can render it out into the compiled files #} - {{ return(statements[0]) }} -{% endmacro %} - - -{% macro spark__reset_csv_table(model, full_refresh, old_relation) %} - {% if old_relation %} - {{ adapter.drop_relation(old_relation) }} - {% endif %} - {% set sql = create_csv_table(model) %} - {{ return(sql) }} -{% endmacro %} - - -{% materialization seed, adapter='spark' %} - - {%- set identifier = model['alias'] -%} - {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set csv_table = model["agate_table"] -%} - - {{ run_hooks(pre_hooks, inside_transaction=False) }} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} - - -- build model - {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation) %} - {% set status = 'CREATE' %} - {% set num_rows = (csv_table.rows | length) %} - {% set sql = load_csv_rows(model) %} - - {% call noop_statement('main', status ~ ' ' ~ num_rows) %} - {{ create_table_sql }}; - -- dbt seed -- - {{ sql }} - {% endcall %} - - {{ run_hooks(post_hooks, inside_transaction=True) }} - -- `COMMIT` happens here - {{ adapter.commit() }} - {{ run_hooks(post_hooks, inside_transaction=False) }} -{% endmaterialization %} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 4313c6e4..d4ba806d 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -13,7 +13,8 @@ {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, type='table') -%} {%- set full_refresh = flags.FULL_REFRESH == True and old_relation is not none -%} - {%- set old_relation_is_view = old_relation is not none and old_relation.is_view -%} + {%- set type = spark_get_relation_type(this) if old_relation else none -%} + {%- set old_relation_is_view = old_relation is not none and type == 'view' -%} {%- if full_refresh or old_relation_is_view -%} {{ adapter.drop_relation(old_relation) }} diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql new file mode 100644 index 00000000..a9760cd7 --- /dev/null +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -0,0 +1,72 @@ +{% macro spark__load_csv_rows(model, batch_size=1000) %} + {% set agate_table = model['agate_table'] %} + {% set cols_sql = ", ".join(agate_table.column_names) %} + {% set bindings = [] %} + + {% set statements = [] %} + + {% for chunk in agate_table.rows | batch(batch_size) %} + {% set bindings = [] %} + + {% for row in chunk %} + {% set _ = bindings.extend(row) %} + {% endfor %} + + {% set sql %} + insert into {{ this.render(False) }} values + {% for row in chunk -%} + ({%- for column in agate_table.column_names -%} + %s + {%- if not loop.last%},{%- endif %} + {%- endfor -%}) + {%- if not loop.last%},{%- endif %} + {%- endfor %} + {% endset %} + + {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + + {% if loop.index0 == 0 %} + {% set _ = statements.append(sql) %} + {% endif %} + {% endfor %} + + {# Return SQL so we can render it out into the compiled files #} + {{ return(statements[0]) }} +{% endmacro %} + +{% macro spark__reset_csv_table(model, full_refresh, old_relation) %} + {% if old_relation %} + {{ adapter.drop_relation(old_relation) }} + {% endif %} + {% set sql = create_csv_table(model) %} + {{ return(sql) }} +{% endmacro %} + +{% materialization seed, adapter='spark' %} + + {%- set identifier = model['alias'] -%} + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set csv_table = model["agate_table"] -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + -- build model + {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation) %} + {% set status = 'CREATE' %} + {% set num_rows = (csv_table.rows | length) %} + {% set sql = load_csv_rows(model) %} + + {% call noop_statement('main', status ~ ' ' ~ num_rows) %} + {{ create_table_sql }}; + -- dbt seed -- + {{ sql }} + {% endcall %} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + -- `COMMIT` happens here + {{ adapter.commit() }} + {{ run_hooks(post_hooks, inside_transaction=False) }} +{% endmaterialization %} diff --git a/requirements.txt b/requirements.txt index 62281993..5cdd15fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.13.0 +dbt-core==0.14.3 PyHive>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 129e8dd4..ca70a925 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ package_name = "dbt-spark" -package_version = "0.14.2" +package_version = "0.14.3" description = """The SparkSQL plugin for dbt (data build tool)""" setup( diff --git a/test/integration/profiles.yml b/test/integration/profiles.yml index 8943e2e1..a12acf1d 100644 --- a/test/integration/profiles.yml +++ b/test/integration/profiles.yml @@ -10,5 +10,5 @@ spark: port: 443 token: '{{ env_var("SPARK_TOKEN") }}' cluster: '{{ env_var("SPARK_CLUSTER") }}' - connect_timeout: 30 + connect_timeout: 60 connect_retries: 10 diff --git a/tox.ini b/tox.ini index 82b6c9b0..502b4b49 100644 --- a/tox.ini +++ b/tox.ini @@ -19,9 +19,9 @@ deps = [testenv:integration] basepython = python3 changedir = dbt-integration-tests -commands = /bin/bash -c '{envpython} -m behave -f progress3 -D profile_name=spark' -passenv = SPARK_HOST SPARK_CLUSTER SPARK_TOKEN +commands = /bin/bash -c '{envpython} -m behave -f progress3 --stop -D profile_name=spark' +passenv = SPARK_HOST SPARK_CLUSTER SPARK_TOKEN DBT_PROFILES_DIR deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt - -e . + -e. From 09dc029ee039e65d7f5a6dec30ee712cd8bd6fc8 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Sat, 21 Dec 2019 13:26:14 +0100 Subject: [PATCH 029/603] Remove exceptions --- dbt/adapters/spark/impl.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 0af7bd3b..18021151 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -33,18 +33,6 @@ def convert_number_type(cls, agate_table, col_idx): def convert_datetime_type(cls, agate_table, col_idx): return "TIMESTAMP" - def create_schema(self, database, schema, model_name=None): - raise dbt.exceptions.NotImplementedException( - 'Schema/Database creation is not supported in the Spark adapter. ' - 'Please create the database "{}" manually'.format(database) - ) - - def drop_schema(self, database, schema, model_name=None): - raise dbt.exceptions.NotImplementedException( - 'Schema/Database deletion is not supported in the Spark adapter. ' - 'Please drop the database "{}" manually'.format(database) - ) - def get_relation_type(self, relation, model_name=None): kwargs = {'relation': relation} return self.execute_macro( From 56429653dc0a7593a671638ce4e04eb9e573f930 Mon Sep 17 00:00:00 2001 From: niels Date: Sun, 22 Dec 2019 10:48:31 +0100 Subject: [PATCH 030/603] Expose location, clustered_by to dbt-spark --- dbt/include/spark/macros/adapters.sql | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index c4616ca8..91a665bd 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -12,6 +12,13 @@ {%- endif %} {%- endmacro -%} +{% macro location_clause() %} + {%- set path = config.get('location', validator=validation.any[basestring]) -%} + {%- if path is not none %} + location '{{ path }}' + {%- endif %} +{%- endmacro -%} + {% macro partition_cols(label, required=false) %} {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} {%- if cols is not none %} @@ -27,6 +34,22 @@ {%- endif %} {%- endmacro -%} +{% macro clustered_cols(label, required=false) %} + {%- set cols = config.get('clustered_by', validator=validation.any[list, basestring]) -%} + {%- set buckets = config.get('clustered_by', validator=validation.any[int]) -%} + {%- if (cols is not none) and (buckets is not none) %} + {%- if cols is string -%} + {%- set cols = [cols] -%} + {%- endif -%} + {{ label }} ( + {%- for item in cols -%} + {{ item }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor -%} + ) into {{ buckets }} buckets + {%- endif %} +{%- endmacro -%} + {% macro spark__create_table_as(temporary, relation, sql) -%} {% if temporary -%} {{ spark_create_temporary_view(relation, sql) }} @@ -34,6 +57,8 @@ create table {{ relation }} {{ file_format_clause() }} {{ partition_cols(label="partitioned by") }} + {{ clustered_cols(label="clustered by") }} + {{ location_clause() }} as {{ sql }} {%- endif %} From ad676ea932cbb6e008dfde04767a1f103a7a1fbc Mon Sep 17 00:00:00 2001 From: niels Date: Sun, 22 Dec 2019 10:54:35 +0100 Subject: [PATCH 031/603] Fixup --- dbt/include/spark/macros/adapters.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 91a665bd..159b4baf 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -36,7 +36,7 @@ {% macro clustered_cols(label, required=false) %} {%- set cols = config.get('clustered_by', validator=validation.any[list, basestring]) -%} - {%- set buckets = config.get('clustered_by', validator=validation.any[int]) -%} + {%- set buckets = config.get('buckets', validator=validation.any[int]) -%} {%- if (cols is not none) and (buckets is not none) %} {%- if cols is string -%} {%- set cols = [cols] -%} From 0b7d7bd970111a16eb935192c7cf9196452db358 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Thu, 2 Jan 2020 14:09:25 -0800 Subject: [PATCH 032/603] instructions for installing from master branch --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 821b3ce7..f45a45cc 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,12 @@ For more information on using Spark with dbt, consult the [dbt documentation](ht ### Installation This plugin can be installed via pip: + ``` -# Install prerequisites: -$ pip install pyhive[hive] +# Install dbt-spark from github (latest master branch): +$ pip install git+https://github.com/fishtown-analytics/dbt-spark@master -# Install dbt-spark: +# OR Install dbt-spark from PyPi (latest stable version published to PyPi): $ pip install dbt-spark ``` From b7f17182fafb2b5a6779baf9bdbac146cad5ba79 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Thu, 2 Jan 2020 14:14:04 -0800 Subject: [PATCH 033/603] auto-include PyHive 'hive' extras --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ca70a925..2ec738ac 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), - 'PyHive>=0.6.0,<0.7.0', + 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', ] ) From 8b0d1e04b191541425979cdfd661a1fa2b2177e8 Mon Sep 17 00:00:00 2001 From: niels Date: Fri, 3 Jan 2020 09:41:32 +0100 Subject: [PATCH 034/603] Add unit-test for macros --- test/unit/test_macros.py | 108 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 test/unit/test_macros.py diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py new file mode 100644 index 00000000..7228f2da --- /dev/null +++ b/test/unit/test_macros.py @@ -0,0 +1,108 @@ +import mock +import unittest +import re +from collections import defaultdict +from jinja2 import Environment, FileSystemLoader +from dbt.context.common import _add_validation + + +class TestSparkMacros(unittest.TestCase): + + def setUp(self): + self.jinja_env = Environment(loader=FileSystemLoader('dbt/include/spark/macros'), + extensions=['jinja2.ext.do',]) + + self.config = defaultdict(lambda: None) + + self.default_context = {} + self.default_context['validation'] = mock.Mock() + self.default_context['config'] = mock.Mock(return_value='') + self.default_context['config'].get = lambda key, *args, **kwargs: self.config[key] + + + def __get_template(self, template_filename): + return self.jinja_env.get_template(template_filename, globals=self.default_context) + + + def __run_macro(self, template, name, *args): + value = getattr(template.module, name)(*args) + return re.sub(r'\s\s+', ' ', value) + + + def test_macros_load(self): + self.jinja_env.get_template('adapters.sql') + + + def test_macros_create_table_as(self): + template = self.__get_template('adapters.sql') + + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table as select 1") + + + def test_macros_create_table_as_file_format(self): + template = self.__get_template('adapters.sql') + + + self.config['file_format'] = 'delta' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table using delta as select 1") + + + def test_macros_create_table_as_partition(self): + template = self.__get_template('adapters.sql') + + + self.config['partition_by'] = 'partition_1' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table partitioned by (partition_1) as select 1") + + + def test_macros_create_table_as_partitions(self): + template = self.__get_template('adapters.sql') + + + self.config['partition_by'] = ['partition_1', 'partition_2'] + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table partitioned by (partition_1,partition_2) as select 1") + + + def test_macros_create_table_as_cluster(self): + template = self.__get_template('adapters.sql') + + + self.config['clustered_by'] = 'cluster_1' + self.config['buckets'] = '1' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table clustered by (cluster_1) into 1 buckets as select 1") + + + def test_macros_create_table_as_clusters(self): + template = self.__get_template('adapters.sql') + + + self.config['clustered_by'] = ['cluster_1', 'cluster_2'] + self.config['buckets'] = '1' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1") + + + def test_macros_create_table_as_location(self): + template = self.__get_template('adapters.sql') + + + self.config['location'] = '/mnt/root' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table location '/mnt/root' as select 1") + + + def test_macros_create_table_as_all(self): + template = self.__get_template('adapters.sql') + + self.config['file_format'] = 'delta' + self.config['location'] = '/mnt/root' + self.config['partition_by'] = ['partition_1', 'partition_2'] + self.config['clustered_by'] = ['cluster_1', 'cluster_2'] + self.config['buckets'] = '1' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root' as select 1") From b167e5f12e68646db5d638c21d067d7266556aa7 Mon Sep 17 00:00:00 2001 From: niels Date: Thu, 9 Jan 2020 15:42:51 +0100 Subject: [PATCH 035/603] Support persist_docs --- dbt/include/spark/macros/adapters.sql | 38 ++++++++++++++++++++++++--- test/unit/test_macros.py | 23 +++++++++++++--- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 159b4baf..f8d9a793 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -5,6 +5,7 @@ {{ sql }} {% endmacro %} + {% macro file_format_clause() %} {%- set file_format = config.get('file_format', validator=validation.any[basestring]) -%} {%- if file_format is not none %} @@ -12,6 +13,7 @@ {%- endif %} {%- endmacro -%} + {% macro location_clause() %} {%- set path = config.get('location', validator=validation.any[basestring]) -%} {%- if path is not none %} @@ -19,6 +21,20 @@ {%- endif %} {%- endmacro -%} + +{% macro comment_clause() %} + {%- set raw_persist_docs = config.get('persist_docs', {}) -%} + + {%- if raw_persist_docs is mapping -%} + {%- set raw_relation = raw_persist_docs.get('relation', false) -%} + {%- if raw_relation -%} + comment '{{ model.description }}' + {% endif %} + {%- else -%} + {{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }} + {% endif %} +{%- endmacro -%} + {% macro partition_cols(label, required=false) %} {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} {%- if cols is not none %} @@ -34,6 +50,7 @@ {%- endif %} {%- endmacro -%} + {% macro clustered_cols(label, required=false) %} {%- set cols = config.get('clustered_by', validator=validation.any[list, basestring]) -%} {%- set buckets = config.get('buckets', validator=validation.any[int]) -%} @@ -50,6 +67,7 @@ {%- endif %} {%- endmacro -%} + {% macro spark__create_table_as(temporary, relation, sql) -%} {% if temporary -%} {{ spark_create_temporary_view(relation, sql) }} @@ -59,23 +77,37 @@ {{ partition_cols(label="partitioned by") }} {{ clustered_cols(label="clustered by") }} {{ location_clause() }} + {{ comment_clause() }} as {{ sql }} {%- endif %} {%- endmacro -%} + {% macro spark__create_view_as(relation, sql) -%} - create view {{ relation }} as + create view {{ relation }} + {{ comment_clause() }} + as {{ sql }} {% endmacro %} + {% macro spark__get_columns_in_relation(relation) -%} {% call statement('get_columns_in_relation', fetch_result=True) %} describe {{ relation }} {% endcall %} - {% set table = load_result('get_columns_in_relation').table %} - {{ return(sql_convert_columns_in_relation(table)) }} + {% set columns = [] %} + {% set vars = {'before_partition_info': True} %} + {% for row in load_result('get_columns_in_relation').table if vars.before_partition_info %} + {% if row[0].startswith('#') %} + {{ vars.update({'before_partition_info': False}) }} + {% else %} + {{ dbt_utils.log_info(row) }} + {{ columns.append(row) }} + {% endif %} + {% endfor %} + {{ return(sql_convert_columns_in_relation(columns)) }} {% endmacro %} diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py index 7228f2da..c08cecb3 100644 --- a/test/unit/test_macros.py +++ b/test/unit/test_macros.py @@ -12,12 +12,14 @@ def setUp(self): self.jinja_env = Environment(loader=FileSystemLoader('dbt/include/spark/macros'), extensions=['jinja2.ext.do',]) - self.config = defaultdict(lambda: None) + self.config = {} self.default_context = {} self.default_context['validation'] = mock.Mock() - self.default_context['config'] = mock.Mock(return_value='') - self.default_context['config'].get = lambda key, *args, **kwargs: self.config[key] + self.default_context['model'] = mock.Mock() + self.default_context['exceptions'] = mock.Mock() + self.default_context['config'] = mock.Mock() + self.default_context['config'].get = lambda key, default=None, **kwargs: self.config.get(key, default) def __get_template(self, template_filename): @@ -96,6 +98,16 @@ def test_macros_create_table_as_location(self): "create table my_table location '/mnt/root' as select 1") + def test_macros_create_table_as_comment(self): + template = self.__get_template('adapters.sql') + + + self.config['persist_docs'] = {'relation': True} + self.default_context['model'].description = 'Description Test' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + "create table my_table comment 'Description Test' as select 1") + + def test_macros_create_table_as_all(self): template = self.__get_template('adapters.sql') @@ -104,5 +116,8 @@ def test_macros_create_table_as_all(self): self.config['partition_by'] = ['partition_1', 'partition_2'] self.config['clustered_by'] = ['cluster_1', 'cluster_2'] self.config['buckets'] = '1' + self.config['persist_docs'] = {'relation': True} + self.default_context['model'].description = 'Description Test' + self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root' as select 1") + "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root' comment 'Description Test' as select 1") From e2f5cb501ce6db0cbc0a6ba44ecaa533c652fef1 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Tue, 14 Jan 2020 17:28:57 +1000 Subject: [PATCH 036/603] upgrade to version 0.15.0 --- .gitignore | 1 + README.md | 36 +++- dbt/adapters/spark/__init__.py | 1 + dbt/adapters/spark/column.py | 41 +++++ dbt/adapters/spark/connections.py | 157 ++++++++-------- dbt/adapters/spark/impl.py | 172 +++++++++++------- dbt/adapters/spark/relation.py | 61 ++----- dbt/include/spark/macros/adapters.sql | 140 ++++++++++---- .../macros/materializations/incremental.sql | 140 ++++++++++---- .../spark/macros/materializations/table.sql | 55 +++++- .../spark/macros/materializations/view.sql | 65 +++++-- requirements.txt | 5 +- setup.py | 10 +- test/unit/test_adapter.py | 57 +++--- test/unit/utils.py | 55 +++++- 15 files changed, 660 insertions(+), 336 deletions(-) create mode 100644 dbt/adapters/spark/column.py diff --git a/.gitignore b/.gitignore index 201bd403..6865fcfc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ env/ *.pyc __pycache__ .tox/ +.idea/ \ No newline at end of file diff --git a/README.md b/README.md index 821b3ce7..3722408d 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,6 @@ For more information on using Spark with dbt, consult the [dbt documentation](ht ### Installation This plugin can be installed via pip: ``` -# Install prerequisites: -$ pip install pyhive[hive] - # Install dbt-spark: $ pip install dbt-spark ``` @@ -79,18 +76,20 @@ your_profile_name: The following configurations can be supplied to models run with the dbt-spark plugin: -| Option | Description | Required? | Example | -|---------|----------------------------------------------------|-------------------------|--------------------------| -| file_format | The file format to use when creating tables | Optional | `parquet` | +| Option | Description | Required? | Example | +|-------------|----------------------------------------------------|-------------------------|------------------------| +| file_format | The file format to use when creating tables (`parquet`, `delta`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `delta` | +| incremental_strategy | The strategy to use for incremental models (`insert_overwrite` or `merge`). Note `merge` requires `file_format` = `delta` and `unique_key` to be specified. | Optional (default: `insert_overwrite`) | `merge` | +| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Required | `['date_day']` | +| cluster_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. This is typically used with partitioning to read and shuffle less data. | Optional | `['name', 'address']` | +| num_buckets | Used in conjunction with `cluster_by`. | Optional (required if `cluster_by` is specified) | `3` | **Incremental Models** -Spark does not natively support `delete`, `update`, or `merge` statements. As such, [incremental models](https://docs.getdbt.com/docs/configuring-incremental-models) -are implemented differently than usual in this plugin. To use incremental models, specify a `partition_by` clause in your model config. -dbt will use an `insert overwrite` query to overwrite the partitions included in your query. Be sure to re-select _all_ of the relevant -data for a partition when using incremental models. +To use incremental models, specify a `partition_by` clause in your model config. The default incremental strategy used is `insert_overwrite`, which will overwrite the partitions included in your query. Be sure to re-select _all_ of the relevant +data for a partition when using the `insert_overwrite` strategy. ``` {{ config( @@ -113,6 +112,23 @@ where date_day::date >= '2019-01-01' group by 1 ``` +The `merge` strategy is only supported when using file_format `delta` (supported in Databricks). It also requires you to specify a `unique key` to match existing records. + +``` +{{ config( + materialized='incremental', + incremental_strategy='merge', + partition_by=['date_day'], + file_format='delta' +) }} + +select * +from {{ ref('events') }} +{% if is_incremental() %} + where date_day > (select max(date_day) from {{ this }}) +{% endif %} +``` + ### Reporting bugs and contributing code - Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt-spark/issues/new). diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py index e933ac27..469e202b 100644 --- a/dbt/adapters/spark/__init__.py +++ b/dbt/adapters/spark/__init__.py @@ -1,6 +1,7 @@ from dbt.adapters.spark.connections import SparkConnectionManager # noqa from dbt.adapters.spark.connections import SparkCredentials from dbt.adapters.spark.relation import SparkRelation # noqa +from dbt.adapters.spark.column import SparkColumn # noqa from dbt.adapters.spark.impl import SparkAdapter from dbt.adapters.base import AdapterPlugin diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py new file mode 100644 index 00000000..4b1a36d2 --- /dev/null +++ b/dbt/adapters/spark/column.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass +from typing import TypeVar + +from dbt.adapters.base.column import Column + +Self = TypeVar('Self', bound='SparkColumn') + + +@dataclass(init=False) +class SparkColumn(Column): + column: str + dtype: str + comment: str + + def __init__( + self, + column: str, + dtype: str, + comment: str = None + ) -> None: + super().__init__(column, dtype) + + self.comment = comment + + @classmethod + def translate_type(cls, dtype: str) -> str: + return dtype + + def can_expand_to(self: Self, other_column: Self) -> bool: + """returns True if both columns are strings""" + return self.is_string() and other_column.is_string() + + def literal(self, value): + return "cast({} as {})".format(value, self.dtype) + + @property + def data_type(self) -> str: + return self.dtype + + def __repr__(self) -> str: + return "".format(self.name, self.data_type) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 974c557a..8c15b9eb 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -3,94 +3,66 @@ from dbt.adapters.base import Credentials from dbt.adapters.sql import SQLConnectionManager from dbt.logger import GLOBAL_LOGGER as logger -import dbt.exceptions from TCLIService.ttypes import TOperationState as ThriftState from thrift.transport import THttpClient from pyhive import hive +from hologram.helpers import StrEnum +from dataclasses import dataclass +from typing import Optional + +import dbt.exceptions +import decimal import base64 import time -SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/0/{cluster}" - -SPARK_CREDENTIALS_CONTRACT = { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'method': { - 'enum': ['thrift', 'http'], - }, - 'host': { - 'type': 'string' - }, - 'port': { - 'type': 'integer', - 'minimum': 0, - 'maximum': 65535, - }, - 'user': { - 'type': 'string' - }, - 'cluster': { - 'type': 'string' - }, - 'database': { - 'type': 'string', - }, - 'schema': { - 'type': 'string', - }, - 'token': { - 'type': 'string', - }, - 'connect_timeout': { - 'type': 'integer', - 'minimum': 0, - 'maximum': 60, - }, - 'connect_retries': { - 'type': 'integer', - 'minimum': 0, - 'maximum': 60, - } - }, - 'required': ['method', 'host', 'database', 'schema'], -} +class SparkConnectionMethod(StrEnum): + THRIFT = 'thrift' + HTTP = 'http' +@dataclass class SparkCredentials(Credentials): - SCHEMA = SPARK_CREDENTIALS_CONTRACT - - def __init__(self, *args, **kwargs): - kwargs.setdefault('database', kwargs.get('schema')) - - super(SparkCredentials, self).__init__(*args, **kwargs) + host: str + method: SparkConnectionMethod + schema: str + cluster: Optional[str] + token: Optional[str] + user: Optional[str] + database: Optional[str] + port: int = 443 + organization: str = '0' + connect_retries: int = 0 + connect_timeout: int = 10 + + def __post_init__(self): + # spark classifies database and schema as the same thing + self.database = self.schema @property def type(self): return 'spark' def _connection_keys(self): - return ('host', 'port', 'cluster', 'schema') + return 'host', 'port', 'schema', 'organization' class ConnectionWrapper(object): - "Wrap a Spark connection in a way that no-ops transactions" + """Wrap a Spark connection in a way that no-ops transactions""" # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html def __init__(self, handle): self.handle = handle self._cursor = None - self._fetch_result = None def cursor(self): self._cursor = self.handle.cursor() return self def cancel(self): - if self._cursor is not None: + if self._cursor: # Handle bad response in the pyhive lib when # the connection is cancelled try: @@ -101,7 +73,7 @@ def cancel(self): ) def close(self): - if self._cursor is not None: + if self._cursor: # Handle bad response in the pyhive lib when # the connection is cancelled try: @@ -136,6 +108,10 @@ def execute(self, sql, bindings=None): ThriftState.FINISHED_STATE, ] + # Convert decimal.Decimal to float as PyHive doesn't work with decimals + if bindings: + bindings = [float(x) if isinstance(x, decimal.Decimal) else x for x in bindings] + self._cursor.execute(sql, bindings, async_=True) poll_state = self._cursor.poll() state = poll_state.operationState @@ -158,14 +134,14 @@ def execute(self, sql, bindings=None): if poll_state.errorMessage: logger.debug("Poll response: {}".format(poll_state)) logger.debug("Poll status: {}".format(state)) - dbt.exceptions.raise_database_error(poll_state.errorMessage) + raise dbt.exceptions.raise_database_error(poll_state.errorMessage) elif state not in STATE_SUCCESS: status_type = ThriftState._VALUES_TO_NAMES.get( state, 'Unknown<{!r}>'.format(state)) - dbt.exceptions.raise_database_error( + raise dbt.exceptions.raise_database_error( "Query failed with status: {}".format(status_type)) logger.debug("Poll status: {}, query complete".format(state)) @@ -178,10 +154,13 @@ def description(self): class SparkConnectionManager(SQLConnectionManager): TYPE = 'spark' + SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" + @contextmanager - def exception_handler(self, sql, connection_name='master'): + def exception_handler(self, sql): try: yield + except Exception as exc: logger.debug("Error while running:\n{}".format(sql)) logger.debug(exc) @@ -195,6 +174,13 @@ def exception_handler(self, sql, connection_name='master'): else: raise dbt.exceptions.RuntimeException(str(exc)) + def cancel(self, connection): + connection.handle.cancel() + + @classmethod + def get_status(cls, cursor): + return 'OK' + # No transactions on Spark.... def add_begin_query(self, *args, **kwargs): logger.debug("NotImplemented: add_begin_query") @@ -213,7 +199,7 @@ def validate_creds(cls, creds, required): method = creds.method for key in required: - if key not in creds: + if not hasattr(creds, key): raise dbt.exceptions.DbtProfileError( "The config '{}' is required when using the {} method" " to connect to Spark".format(key, method)) @@ -225,17 +211,22 @@ def open(cls, connection): return connection creds = connection.credentials - connect_retries = creds.get('connect_retries', 0) - connect_timeout = creds.get('connect_timeout', 10) - exc = None - for i in range(1 + connect_retries): + + for i in range(1 + creds.connect_retries): try: if creds.method == 'http': - cls.validate_creds(creds, ['token', 'host', 'port', - 'cluster']) + cls.validate_creds(creds, ['host', 'port', 'cluster', 'organization', 'token', 'schema']) + + conn_url = cls.SPARK_CONNECTION_URL.format( + host=creds.host, + port=creds.port, + organization=creds.organization, + cluster=creds.cluster + ) + + logger.debug("connection url: {}".format(conn_url)) - conn_url = SPARK_CONNECTION_URL.format(**creds) transport = THttpClient.THttpClient(conn_url) raw_token = "token:{}".format(creds.token).encode() @@ -246,16 +237,18 @@ def open(cls, connection): conn = hive.connect(thrift_transport=transport) elif creds.method == 'thrift': - cls.validate_creds(creds, ['host']) + cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) conn = hive.connect(host=creds.host, - port=creds.get('port'), - username=creds.get('user')) + port=creds.port, + username=creds.user) + else: + raise dbt.exceptions.DbtProfileError("invalid credential method: {}".format(creds.method)) break except Exception as e: exc = e if getattr(e, 'message', None) is None: - raise + raise dbt.exceptions.FailedToConnectException(str(e)) message = e.message.lower() is_pending = 'pending' in message @@ -263,23 +256,15 @@ def open(cls, connection): warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" if is_pending or is_starting: - logger.warning(warning.format(e.message, connect_timeout, - i + 1, connect_retries)) - time.sleep(connect_timeout) + logger.warning(warning.format(e.message, creds.connect_timeout, + i, creds.connect_retries)) + time.sleep(creds.connect_timeout) else: - raise + raise dbt.exceptions.FailedToConnectException(str(e)) else: raise exc - wrapped = ConnectionWrapper(conn) - + handle = ConnectionWrapper(conn) + connection.handle = handle connection.state = 'open' - connection.handle = wrapped return connection - - @classmethod - def get_status(cls, cursor): - return 'OK' - - def cancel(self, connection): - connection.handle.cancel() diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 0af7bd3b..55b04755 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,99 +1,112 @@ +from typing import Optional, List + from dbt.adapters.sql import SQLAdapter -from dbt.adapters.spark import SparkRelation +from dbt.contracts.graph.manifest import Manifest + +from dbt.adapters.spark import SparkColumn from dbt.adapters.spark import SparkConnectionManager -import dbt.exceptions +from dbt.adapters.spark.relation import SparkRelation +from dbt.adapters.base import RelationType, BaseRelation +from dbt.clients.agate_helper import table_from_data from dbt.logger import GLOBAL_LOGGER as logger -import agate +import agate LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' -GET_RELATION_TYPE_MACRO_NAME = 'spark_get_relation_type' -DROP_RELATION_MACRO_NAME = 'drop_relation' +LIST_EXTENDED_PROPERTIES_MACRO_NAME = 'list_extended_properties' +GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' +LIST_SCHEMAS_MACRO_NAME = 'list_schemas' class SparkAdapter(SQLAdapter): - ConnectionManager = SparkConnectionManager + + RELATION_TYPES = { + 'MANAGED': RelationType.Table, + 'VIEW': RelationType.View, + 'EXTERNAL': RelationType.External + } + Relation = SparkRelation + Column = SparkColumn + ConnectionManager = SparkConnectionManager + + AdapterSpecificConfigs = frozenset({"file_format", "partition_by", "cluster_by", "num_buckets", "location"}) @classmethod - def date_function(cls): - return 'CURRENT_TIMESTAMP()' + def date_function(cls) -> str: + return 'current_timestamp()' @classmethod def convert_text_type(cls, agate_table, col_idx): - return "STRING" + return "string" @classmethod def convert_number_type(cls, agate_table, col_idx): decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) - return "DOUBLE" if decimals else "BIGINT" + return "double" if decimals else "bigint" + + @classmethod + def convert_date_type(cls, agate_table, col_idx): + return "date" + + @classmethod + def convert_time_type(cls, agate_table, col_idx): + return "time" @classmethod def convert_datetime_type(cls, agate_table, col_idx): - return "TIMESTAMP" + return "timestamp" - def create_schema(self, database, schema, model_name=None): - raise dbt.exceptions.NotImplementedException( - 'Schema/Database creation is not supported in the Spark adapter. ' - 'Please create the database "{}" manually'.format(database) - ) + def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]: + if not self.Relation.include_policy.database: + database = None - def drop_schema(self, database, schema, model_name=None): - raise dbt.exceptions.NotImplementedException( - 'Schema/Database deletion is not supported in the Spark adapter. ' - 'Please drop the database "{}" manually'.format(database) - ) + return super().get_relation(database, schema, identifier) - def get_relation_type(self, relation, model_name=None): - kwargs = {'relation': relation} - return self.execute_macro( - GET_RELATION_TYPE_MACRO_NAME, - kwargs=kwargs, - connection_name=model_name, - release=True + def list_extended_properties(self, schema, identifier): + results = self.execute_macro( + LIST_EXTENDED_PROPERTIES_MACRO_NAME, + kwargs={'schema': schema, 'identifier': identifier} ) - # Override that creates macros without a known type - adapter macros that - # require a type will dynamically check at query-time - def list_relations_without_caching(self, information_schema, schema, - model_name=None): + detail_idx = 0 + for idx, row in enumerate(results.rows): + if row['col_name'] == '# Detailed Table Information': + detail_idx = idx + continue + + detail_idx += 1 + return results[detail_idx:] if detail_idx != 0 else results + + def list_relations_without_caching(self, information_schema, schema): kwargs = {'information_schema': information_schema, 'schema': schema} + results = self.execute_macro( LIST_RELATIONS_MACRO_NAME, - kwargs=kwargs, - connection_name=model_name, - release=True + kwargs=kwargs ) relations = [] - quote_policy = { - 'schema': True, - 'identifier': True - } - for _database, name, _ in results: + for _schema, name, _ in results: + # get extended properties foreach table + details = self.list_extended_properties(_schema, name) + + _type = None + for col_name, data_type, _ in details: + if col_name == 'Type': + _type = self.RELATION_TYPES.get(data_type, None) + continue + relations.append(self.Relation.create( - database=_database, - schema=_database, + schema=_schema, identifier=name, - quote_policy=quote_policy, - type=None + type=_type )) - return relations - # Override that doesn't check the type of the relation -- we do it - # dynamically in the macro code - def drop_relation(self, relation, model_name=None): - if dbt.flags.USE_CACHE: - self.cache.drop(relation) - - self.execute_macro( - DROP_RELATION_MACRO_NAME, - kwargs={'relation': relation}, - connection_name=model_name - ) + return relations - def get_catalog(self, manifest): + def get_catalog(self, manifest: Manifest) -> agate.Table: schemas = manifest.get_used_schemas() column_names = ( @@ -110,28 +123,26 @@ def get_catalog(self, manifest): ) columns = [] - for (database_name, schema_name) in schemas: - relations = self.list_relations(database_name, schema_name) + for database, schema in schemas: + relations = self.list_relations(database, schema) for relation in relations: - logger.debug("Getting table schema for relation {}".format(relation)) # noqa table_columns = self.get_columns_in_relation(relation) - rel_type = self.get_relation_type(relation) for column_index, column in enumerate(table_columns): - # Fixes for pseudocolumns with no type + # Fixes for pseudo-columns with no type if column.name in ( '# Partition Information', '# col_name' ): continue - elif column.dtype is None: + elif column.data_type is None: continue column_data = ( relation.database, relation.schema, relation.name, - rel_type, + relation.type, None, None, column.name, @@ -142,4 +153,35 @@ def get_catalog(self, manifest): column_dict = dict(zip(column_names, column_data)) columns.append(column_dict) - return dbt.clients.agate_helper.table_from_data(columns, column_names) + return table_from_data(columns, column_names) + + def get_columns_in_relation(self, relation) -> List[SparkColumn]: + table = self.execute_macro( + GET_COLUMNS_IN_RELATION_MACRO_NAME, + kwargs={'relation': relation} + ) + + columns = [] + for col in table: + # Fixes for pseudo-columns with no type + if col.name in ( + '# Partition Information', + '# col_name' + ): + continue + elif col.data_type is None: + continue + + column = self.Column(col.name, col.data_type, col.comment) + columns.append(column) + + return columns + + def check_schema_exists(self, database, schema): + results = self.execute_macro( + LIST_SCHEMAS_MACRO_NAME, + kwargs={'database': database} + ) + + exists = True if schema in [row[0] for row in results] else False + return exists diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index f089fa77..27067d34 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -1,44 +1,23 @@ -from dbt.adapters.base.relation import BaseRelation +from dataclasses import dataclass +from dbt.adapters.base.relation import BaseRelation, Policy + + +@dataclass +class SparkQuotePolicy(Policy): + database: bool = False + schema: bool = False + identifier: bool = False -class SparkRelation(BaseRelation): - DEFAULTS = { - 'metadata': { - 'type': 'SparkRelation' - }, - 'quote_character': '`', - 'quote_policy': { - 'database': False, - 'schema': False, - 'identifier': False, - }, - 'include_policy': { - 'database': False, - 'schema': True, - 'identifier': True, - } - } - SCHEMA = { - 'type': 'object', - 'properties': { - 'metadata': { - 'type': 'object', - 'properties': { - 'type': { - 'type': 'string', - 'const': 'SparkRelation', - }, - }, - }, - 'type': { - 'enum': BaseRelation.RelationTypes + [None] - }, - 'path': BaseRelation.PATH_SCHEMA, - 'include_policy': BaseRelation.POLICY_SCHEMA, - 'quote_policy': BaseRelation.POLICY_SCHEMA, - 'quote_character': {'type': 'string'}, - }, - 'required': ['metadata', 'type', 'path', 'include_policy', - 'quote_policy', 'quote_character'] - } +@dataclass +class SparkIncludePolicy(Policy): + database: bool = False + schema: bool = True + identifier: bool = True + + +@dataclass(frozen=True, eq=False, repr=False) +class SparkRelation(BaseRelation): + quote_policy: SparkQuotePolicy = SparkQuotePolicy() + include_policy: SparkIncludePolicy = SparkIncludePolicy() diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index c4616ca8..05ad3a57 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -1,10 +1,3 @@ - -{#-- We can't use temporary tables with `create ... as ()` syntax #} -{% macro spark_create_temporary_view(relation, sql) -%} - create temporary view {{ relation.include(database=false, schema=false) }} as - {{ sql }} -{% endmacro %} - {% macro file_format_clause() %} {%- set file_format = config.get('file_format', validator=validation.any[basestring]) -%} {%- if file_format is not none %} @@ -12,6 +5,13 @@ {%- endif %} {%- endmacro -%} +{% macro location_clause(label, required=false) %} + {%- set location = config.get('location', validator=validation.any[basestring]) -%} + {%- if location is not none %} + {{ label }} "{{ location }}" + {%- endif %} +{%- endmacro -%} + {% macro partition_cols(label, required=false) %} {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} {%- if cols is not none %} @@ -27,33 +27,105 @@ {%- endif %} {%- endmacro -%} +{% macro clustered_cols(label, required=false) %} + {%- set cols = config.get('cluster_by', validator=validation.any[list, basestring]) -%} + {%- set num_buckets = config.get('num_buckets', validator=validation.any[int]) -%} + {%- if (cols is not none) and (buckets is not none) %} + {%- if cols is string -%} + {%- set cols = [cols] -%} + {%- endif -%} + {{ label }} ( + {%- for item in cols -%} + {{ item }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor -%} + ) into {{ num_buckets }} buckets + {%- endif %} +{%- endmacro -%} + +{% macro spark__load_csv_rows(model, agate_table) %} + {% set cols_sql = ", ".join(agate_table.column_names) %} + {% set bindings = [] %} + + {% set statements = [] %} + + {% for chunk in agate_table.rows | batch(10000) %} + {% set bindings = [] %} + + {% for row in chunk %} + {% set _ = bindings.extend(row) %} + {% endfor %} + + {% set sql %} + insert into {{ this.render() }} values + {% for row in chunk -%} + ({%- for column in agate_table.column_names -%} + %s + {%- if not loop.last%},{%- endif %} + {%- endfor -%}) + {%- if not loop.last%},{%- endif %} + {%- endfor %} + {% endset %} + + {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + + {% if loop.index0 == 0 %} + {% set _ = statements.append(sql) %} + {% endif %} + {% endfor %} + + {# Return SQL so we can render it out into the compiled files #} + {{ return(statements[0]) }} +{% endmacro %} + +{#-- We can't use temporary tables with `create ... as ()` syntax #} +{% macro create_temporary_view(relation, sql) -%} + create temporary view {{ relation.include(schema=false) }} as + {{ sql }} +{% endmacro %} + {% macro spark__create_table_as(temporary, relation, sql) -%} {% if temporary -%} - {{ spark_create_temporary_view(relation, sql) }} + {{ create_temporary_view(relation, sql) }} {%- else -%} create table {{ relation }} - {{ file_format_clause() }} - {{ partition_cols(label="partitioned by") }} + {{ file_format_clause() }} + {{ location_clause(label="location") }} + {{ partition_cols(label="partitioned by") }} + {{ clustered_cols(label="clustered by") }} as {{ sql }} {%- endif %} {%- endmacro -%} -{% macro spark__create_view_as(relation, sql) -%} - create view {{ relation }} as - {{ sql }} +{% macro spark__create_schema(database_name, schema_name) -%} + {%- call statement('create_schema') -%} + create schema if not exists {{schema_name}} + {% endcall %} +{% endmacro %} + +{% macro list_extended_properties(schema, identifier) %} + {% call statement('list_extended_properties', fetch_result=True) -%} + describe extended {{ schema }}.{{ identifier }} + {% endcall %} + + {% do return(load_result('list_extended_properties').table) %} {% endmacro %} {% macro spark__get_columns_in_relation(relation) -%} {% call statement('get_columns_in_relation', fetch_result=True) %} - describe {{ relation }} + describe {{ relation }} {% endcall %} {% set table = load_result('get_columns_in_relation').table %} - {{ return(sql_convert_columns_in_relation(table)) }} -{% endmacro %} + {% set columns = [] %} + {% for row in table %} + {% do columns.append(api.Column(*row)) %} + {% endfor %} + {{ return(columns) }} +{% endmacro %} {% macro spark__list_relations_without_caching(information_schema, schema) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} @@ -63,7 +135,6 @@ {% do return(load_result('list_relations_without_caching').table) %} {% endmacro %} - {% macro spark__list_schemas(database) -%} {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} show databases @@ -71,43 +142,34 @@ {{ return(load_result('list_schemas').table) }} {% endmacro %} - {% macro spark__current_timestamp() -%} current_timestamp() {%- endmacro %} - -{% macro spark_get_relation_type(relation) -%} - {% call statement('get_relation_type', fetch_result=True) -%} - SHOW TBLPROPERTIES {{ relation }} ('view.default.database') - {%- endcall %} - {% set res = load_result('get_relation_type').table %} - {% if 'does not have property' in res[0][0] %} - {{ return('table') }} - {% else %} - {{ return('view') }} - {% endif %} -{%- endmacro %} - - {% macro spark__rename_relation(from_relation, to_relation) -%} {% call statement('rename_relation') -%} {% if not from_relation.type %} - {% do exceptions.raise_database_error("Cannot rename a relation with an unknown type: " ~ from_relation) %} - {% elif from_relation.type == 'table' %} + {% do exceptions.raise_database_error("Cannot drop a relation with a blank type: " ~ from_relation.identifier) %} + {% elif from_relation.type in ('table', 'external') %} alter table {{ from_relation }} rename to {{ to_relation }} {% elif from_relation.type == 'view' %} alter view {{ from_relation }} rename to {{ to_relation }} {% else %} - {% do exceptions.raise_database_error("Unknown type '" ~ from_relation.type ~ "' for relation: " ~ from_relation) %} + {% do exceptions.raise_database_error("Unknown type '" ~ from_relation.type ~ "' for relation: " ~ from_relation.identifier) %} {% endif %} {%- endcall %} {% endmacro %} - {% macro spark__drop_relation(relation) -%} - {% set type = relation.type if relation.type is not none else spark_get_relation_type(relation) %} {% call statement('drop_relation', auto_begin=False) -%} - drop {{ type }} if exists {{ relation }} + {% if not relation.type %} + {% do exceptions.raise_database_error("Cannot drop a relation with a blank type: " ~ relation.identifier) %} + {% elif relation.type in ('table', 'external') %} + drop table if exists {{ relation }} + {% elif relation.type == 'view' %} + drop view if exists {{ relation }} + {% else %} + {% do exceptions.raise_database_error("Unknown type '" ~ relation.type ~ "' for relation: " ~ relation.identifier) %} + {% endif %} {%- endcall %} -{% endmacro %} +{% endmacro %} \ No newline at end of file diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 4313c6e4..fff6d157 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -1,24 +1,96 @@ -{% materialization incremental, adapter='spark' -%} +{% macro dbt_spark_validate_get_file_format() %} + {#-- Find and validate the file format #} + {%- set file_format = config.get("file_format", default="parquet") -%} - {%- set partitions = config.get('partition_by') -%} - {% if not partitions %} - {% do exceptions.raise_compiler_error("Table partitions are required for incremental models on Spark") %} + {% set invalid_file_format_msg -%} + Invalid file format provided: {{ file_format }} + Expected one of: 'text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm' + {%- endset %} + + {% if file_format not in ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm'] %} + {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} {% endif %} - {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['alias'] ~ "__dbt_tmp" -%} + {% do return(file_format) %} +{% endmacro %} + +{% macro dbt_spark_validate_get_incremental_strategy(file_format) %} + {#-- Find and validate the incremental strategy #} + {%- set strategy = config.get("incremental_strategy", default="insert_overwrite") -%} + + {% set invalid_strategy_msg -%} + Invalid incremental strategy provided: {{ strategy }} + Expected one of: 'merge', 'insert_overwrite' + {%- endset %} + + {% set invalid_merge_msg -%} + Invalid incremental strategy provided: {{ strategy }} + You can only choose this strategy when file_format is set to 'delta' + {%- endset %} + + {% if strategy not in ['merge', 'insert_overwrite'] %} + {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} + {%-else %} + {% if strategy == 'merge' and file_format != 'delta' %} + {% do exceptions.raise_compiler_error(invalid_merge_msg) %} + {% endif %} + {% endif %} + + {% do return(strategy) %} +{% endmacro %} + +{% macro dbt_spark_validate_merge(file_format) %} + {% set invalid_file_format_msg -%} + You can only choose the 'merge' incremental_strategy when file_format is set to 'delta' + {%- endset %} + + {% if file_format != 'delta' %} + {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} + {% endif %} - {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} - {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, type='table') -%} +{% endmacro %} - {%- set full_refresh = flags.FULL_REFRESH == True and old_relation is not none -%} - {%- set old_relation_is_view = old_relation is not none and old_relation.is_view -%} - {%- if full_refresh or old_relation_is_view -%} - {{ adapter.drop_relation(old_relation) }} - {%- set old_relation = none -%} - {%- endif %} +{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} + {%- if strategy == 'insert_overwrite' -%} + {#-- insert statements don't like CTEs, so support them via a temp view #} + insert overwrite table {{ target }} + {{ partition_cols(label="partition") }} + select * from {{ source.include(schema=false) }} + {%- else -%} + {#-- merge all columns with databricks delta - schema changes are handled for us #} + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE + on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} + when matched then update set * + when not matched then insert * + + {%- endif -%} + +{% endmacro %} + + +{% materialization incremental, adapter='spark' -%} + {#-- Validate early so we don't run SQL if the file_format is invalid --#} + {% set file_format = dbt_spark_validate_get_file_format() -%} + {#-- Validate early so we don't run SQL if the strategy is invalid --#} + {% set strategy = dbt_spark_validate_get_incremental_strategy(file_format) -%} + + {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + + {% set target_relation = this %} + {% set existing_relation = load_relation(this) %} + {% set tmp_relation = make_temp_relation(this) %} + + {% if strategy == 'merge' %} + {%- set unique_key = config.require('unique_key') -%} + {% do dbt_spark_validate_merge(file_format) %} + {% endif %} + + {%- set partitions = config.get('partition_by', validator=validation.any[list, basestring]) -%} + {% if not partitions %} + {% do exceptions.raise_compiler_error("Table partitions are required for incremental models on Spark") %} + {% endif %} {{ run_hooks(pre_hooks) }} @@ -30,29 +102,25 @@ set spark.sql.hive.convertMetastoreParquet = false {% endcall %} + {% if existing_relation is none %} + {% set build_sql = create_table_as(False, target_relation, sql) %} + {% elif existing_relation.is_view %} + {#-- Can't overwrite a view with a table - we must drop --#} + {% do adapter.drop_relation(existing_relation) %} + {% set build_sql = create_table_as(False, target_relation, sql) %} + {% elif full_refresh_mode %} + {% set build_sql = create_table_as(False, target_relation, sql) %} + {% else %} + {% do run_query(create_table_as(True, tmp_relation, sql)) %} + {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} + {% endif %} - {#-- This is required to make dbt's incremental scheme work #} - {%- if old_relation is none -%} - - {%- call statement('main') -%} - {{ create_table_as(False, target_relation, sql) }} - {%- endcall %} - - {%- else -%} - - {%- call statement('main') -%} - {{ create_table_as(True, tmp_relation, sql) }} - {%- endcall -%} - - {#-- insert statements don't like CTEs, so support them via a temp view #} - {%- call statement() -%} - insert overwrite table {{ target_relation }} - {{ partition_cols(label="partition") }} - select * from {{ tmp_relation.include(database=false, schema=false) }} - {%- endcall -%} - - {%- endif %} + {%- call statement('main') -%} + {{ build_sql }} + {%- endcall -%} {{ run_hooks(post_hooks) }} + {{ return({'relations': [target_relation]}) }} + {%- endmaterialization %} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index 9dcd477a..c13f9917 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -1,25 +1,64 @@ -{% materialization table, adapter = 'spark' %} - +{% materialization table, adapter='spark' %} {%- set identifier = model['alias'] -%} + {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} + {%- set backup_identifier = model['name'] + '__dbt_backup' -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} + {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier, + schema=schema, + database=database, + type='table') -%} - {{ run_hooks(pre_hooks) }} + /* + See ../view/view.sql for more information about this relation. + */ + {%- set backup_relation_type = 'table' if old_relation is none else old_relation.type -%} + {%- set backup_relation = api.Relation.create(identifier=backup_identifier, + schema=schema, + database=database, + type=backup_relation_type) -%} + + {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} - -- setup: if the target relation already exists, drop it - {% if old_relation -%} + -- drop the temp relations if they exist for some reason + {{ adapter.drop_relation(intermediate_relation) }} + {{ adapter.drop_relation(backup_relation) }} + + {% if exists_as_view -%} {{ adapter.drop_relation(old_relation) }} + {%- set old_relation = none -%} {%- endif %} + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + -- build model {% call statement('main') -%} - {{ create_table_as(False, target_relation, sql) }} + {{ create_table_as(False, intermediate_relation, sql) }} {%- endcall %} - {{ run_hooks(post_hooks) }} + -- cleanup + {% if old_relation is not none %} + {{ adapter.rename_relation(target_relation, backup_relation) }} + {% endif %} + + {{ adapter.rename_relation(intermediate_relation, target_relation) }} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + + -- `COMMIT` happens here + {{ adapter.commit() }} + + -- finally, drop the existing/backup relation after the commit + {{ drop_relation_if_exists(backup_relation) }} + + {{ run_hooks(post_hooks, inside_transaction=False) }} -{% endmaterialization %} + {{ return({'relations': [target_relation]}) }} +{% endmaterialization %} \ No newline at end of file diff --git a/dbt/include/spark/macros/materializations/view.sql b/dbt/include/spark/macros/materializations/view.sql index 58202fa8..d295d787 100644 --- a/dbt/include/spark/macros/materializations/view.sql +++ b/dbt/include/spark/macros/materializations/view.sql @@ -1,25 +1,68 @@ -{% materialization view, adapter = 'spark' %} - +{%- materialization view, adapter='spark' -%} + {%- set identifier = model['alias'] -%} + {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} + {%- set backup_identifier = model['name'] + '__dbt_backup' -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set target_relation = api.Relation.create(identifier=identifier, - schema=schema, - database=database, + {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='view') -%} + {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier, + schema=schema, database=database, type='view') -%} + + /* + This relation (probably) doesn't exist yet. If it does exist, it's a leftover from + a previous run, and we're going to try to drop it immediately. At the end of this + materialization, we're going to rename the "old_relation" to this identifier, + and then we're going to drop it. In order to make sure we run the correct one of: + - drop view ... + - drop table ... + We need to set the type of this relation to be the type of the old_relation, if it exists, + or else "view" as a sane default if it does not. Note that if the old_relation does not + exist, then there is nothing to move out of the way and subsequentally drop. In that case, + this relation will be effectively unused. + */ + {%- set backup_relation_type = 'view' if old_relation is none else old_relation.type -%} + {%- set backup_relation = api.Relation.create(identifier=backup_identifier, + schema=schema, database=database, + type=backup_relation_type) -%} + + {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - {{ run_hooks(pre_hooks) }} + {{ run_hooks(pre_hooks, inside_transaction=False) }} - -- setup: if the target relation already exists, drop it - {% if old_relation -%} + -- drop the temp relations if they exists for some reason + {{ adapter.drop_relation(intermediate_relation) }} + {{ adapter.drop_relation(backup_relation) }} + + {% if exists_as_table -%} {{ adapter.drop_relation(old_relation) }} + {%- set old_relation = none -%} {%- endif %} + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + -- build model {% call statement('main') -%} - {{ create_view_as(target_relation, sql) }} + {{ create_view_as(intermediate_relation, sql) }} {%- endcall %} - {{ run_hooks(post_hooks) }} + -- cleanup + -- move the existing view out of the way + {% if old_relation is not none %} + {{ adapter.rename_relation(target_relation, backup_relation) }} + {% endif %} + {{ adapter.rename_relation(intermediate_relation, target_relation) }} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + + {{ adapter.commit() }} + + {{ drop_relation_if_exists(backup_relation) }} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [target_relation]}) }} -{%- endmaterialization -%} +{%- endmaterialization -%} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 62281993..55b227ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -dbt-core==0.13.0 -PyHive>=0.6.0,<0.7.0 -thrift>=0.11.0,<0.12.0 +dbt-core==0.15.0 +PyHive[hive]>=0.6.0,<0.7.0 diff --git a/setup.py b/setup.py index 80fb7efb..d4556a37 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -from setuptools import find_packages -from distutils.core import setup +from setuptools import find_packages, setup import os @@ -10,7 +9,7 @@ package_name = "dbt-spark" -package_version = "0.13.0" +package_version = "0.15.0" description = """The SparkSQL plugin for dbt (data build tool)""" setup( @@ -30,12 +29,11 @@ 'dbt': [ 'include/spark/dbt_project.yml', 'include/spark/macros/*.sql', - 'include/spark/macros/**/*.sql', + 'include/spark/macros/**/*.sql' ] }, install_requires=[ 'dbt-core=={}'.format(package_version), - 'PyHive>=0.6.0,<0.7.0', - 'thrift>=0.11.0,<0.12.0', + 'PyHive[hive]>=0.6.0,<0.7.0' ] ) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 89857886..d62da48b 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -1,18 +1,20 @@ -import mock import unittest -import dbt.adapters +from unittest import mock + import dbt.flags as flags from pyhive import hive + from dbt.adapters.spark import SparkAdapter -import agate +from .utils import config_from_parts_or_dicts + -from .utils import config_from_parts_or_dicts, inject_adapter +# from spark import connector as spark_connector class TestSparkAdapter(unittest.TestCase): def setUp(self): - flags.STRICT_MODE = True + flags.STRICT_MODE = False self.project_cfg = { 'name': 'X', @@ -25,56 +27,60 @@ def setUp(self): } } - def get_target_http(self, project): + def _get_target_http(self, project): return config_from_parts_or_dicts(project, { 'outputs': { 'test': { - 'type': 'spark', - 'method': 'http', - 'schema': 'analytics', - 'host': 'myorg.sparkhost.com', - 'port': 443, - 'token': 'abc123', - 'cluster': '01234-23423-coffeetime', + 'type': 'spark', + 'method': 'http', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'organization': '0123456789', + 'cluster': '01234-23423-coffeetime', } }, 'target': 'test' }) - def get_target_thrift(self, project): + def _get_target_thrift(self, project): return config_from_parts_or_dicts(project, { 'outputs': { 'test': { - 'type': 'spark', - 'method': 'thrift', - 'schema': 'analytics', - 'host': 'myorg.sparkhost.com', - 'port': 10001, - 'user': 'dbt' + 'type': 'spark', + 'method': 'thrift', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 10001, + 'user': 'dbt' } }, 'target': 'test' }) def test_http_connection(self): - config = self.get_target_http(self.project_cfg) + config = self._get_target_http(self.project_cfg) adapter = SparkAdapter(config) - def hive_http_connect(thrift_transport): self.assertEqual(thrift_transport.scheme, 'https') self.assertEqual(thrift_transport.port, 443) self.assertEqual(thrift_transport.host, 'myorg.sparkhost.com') - self.assertEqual(thrift_transport.path, '/sql/protocolv1/o/0/01234-23423-coffeetime') + self.assertEqual(thrift_transport.path, '/sql/protocolv1/o/0123456789/01234-23423-coffeetime') with mock.patch.object(hive, 'connect', new=hive_http_connect): connection = adapter.acquire_connection('dummy') self.assertEqual(connection.state, 'open') self.assertNotEqual(connection.handle, None) + self.assertEqual(connection.credentials.cluster, '01234-23423-coffeetime') + self.assertEqual(connection.credentials.token, 'abc123') + self.assertEqual(connection.credentials.schema, 'analytics') + self.assertEqual(connection.credentials.database, 'analytics') def test_thrift_connection(self): - config = self.get_target_thrift(self.project_cfg) + config = self._get_target_thrift(self.project_cfg) adapter = SparkAdapter(config) def hive_thrift_connect(host, port, username): @@ -87,3 +93,6 @@ def hive_thrift_connect(host, port, username): self.assertEqual(connection.state, 'open') self.assertNotEqual(connection.handle, None) + self.assertEqual(connection.credentials.schema, 'analytics') + self.assertEqual(connection.credentials.database, 'analytics') + diff --git a/test/unit/utils.py b/test/unit/utils.py index def039c2..5869eca3 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -1,12 +1,27 @@ """Unit test utility functions. - Note that all imports should be inside the functions to avoid import/mocking issues. """ -import mock +import os +from unittest import mock +from unittest import TestCase + +from hologram import ValidationError + + +def normalize(path): + """On windows, neither is enough on its own: + >>> normcase('C:\\documents/ALL CAPS/subdir\\..') + 'c:\\documents\\all caps\\subdir\\..' + >>> normpath('C:\\documents/ALL CAPS/subdir\\..') + 'C:\\documents\\ALL CAPS' + >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..')) + 'c:\\documents\\all caps' + """ + return os.path.normcase(os.path.normpath(path)) -class Obj(object): +class Obj: which = 'blah' @@ -30,6 +45,7 @@ def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): cli_vars) args = Obj() args.vars = repr(cli_vars) + args.profile_dir = '/dev/null' return RuntimeConfig.from_parts( project=project, profile=profile, @@ -41,9 +57,34 @@ def inject_adapter(value): """Inject the given adapter into the adapter factory, so your hand-crafted artisanal adapter will be available from get_adapter() as if dbt loaded it. """ - from dbt.adapters import factory - from dbt.adapters.base.connections import BaseConnectionManager + from dbt.adapters.factory import FACTORY key = value.type() - factory._ADAPTERS[key] = value - factory.ADAPTER_TYPES[key] = type(value) + FACTORY.adapters[key] = value + FACTORY.adapter_types[key] = type(value) + + +class ContractTestCase(TestCase): + ContractType = None + + def setUp(self): + self.maxDiff = None + super().setUp() + + def assert_to_dict(self, obj, dct): + self.assertEqual(obj.to_dict(), dct) + + def assert_from_dict(self, obj, dct, cls=None): + if cls is None: + cls = self.ContractType + self.assertEqual(cls.from_dict(dct), obj) + + def assert_symmetric(self, obj, dct, cls=None): + self.assert_to_dict(obj, dct) + self.assert_from_dict(obj, dct, cls) + + def assert_fails_validation(self, dct, cls=None): + if cls is None: + cls = self.ContractType + with self.assertRaises(ValidationError): + cls.from_dict(dct) From 02809a3353eb677bd51f3cf7e052d1cc8b849268 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Tue, 14 Jan 2020 20:05:46 +1000 Subject: [PATCH 037/603] exclude build and dist folders --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6865fcfc..194336b0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ env/ *.pyc __pycache__ .tox/ -.idea/ \ No newline at end of file +.idea/ +build/ +dist/ \ No newline at end of file From 601a840272ce37718fadbec0efa58faee0e9ba37 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Tue, 14 Jan 2020 20:06:02 +1000 Subject: [PATCH 038/603] include drop_schema macro --- dbt/include/spark/macros/adapters.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 05ad3a57..55ba5c86 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -104,6 +104,12 @@ {% endcall %} {% endmacro %} +{% macro spark__drop_schema(database_name, schema_name) -%} + {%- call statement('drop_schema') -%} + drop schema if exists {{ schema_name }} cascade + {%- endcall -%} +{% endmacro %} + {% macro list_extended_properties(schema, identifier) %} {% call statement('list_extended_properties', fetch_result=True) -%} describe extended {{ schema }}.{{ identifier }} From beb2b91fddf7e4bceccdfa9f88185f3d9c24b2b5 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Wed, 15 Jan 2020 16:33:43 +1000 Subject: [PATCH 039/603] separate spark__load_csv_rows macro into seed.sql file --- .../spark/macros/materializations/seed.sql | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 dbt/include/spark/macros/materializations/seed.sql diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql new file mode 100644 index 00000000..b2d3d2cb --- /dev/null +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -0,0 +1,34 @@ +{% macro spark__load_csv_rows(model, agate_table) %} + {% set cols_sql = ", ".join(agate_table.column_names) %} + {% set bindings = [] %} + + {% set statements = [] %} + + {% for chunk in agate_table.rows | batch(10000) %} + {% set bindings = [] %} + + {% for row in chunk %} + {% set _ = bindings.extend(row) %} + {% endfor %} + + {% set sql %} + insert into {{ this.render() }} values + {% for row in chunk -%} + ({%- for column in agate_table.column_names -%} + %s + {%- if not loop.last%},{%- endif %} + {%- endfor -%}) + {%- if not loop.last%},{%- endif %} + {%- endfor %} + {% endset %} + + {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + + {% if loop.index0 == 0 %} + {% set _ = statements.append(sql) %} + {% endif %} + {% endfor %} + + {# Return SQL so we can render it out into the compiled files #} + {{ return(statements[0]) }} +{% endmacro %} \ No newline at end of file From a32e29cb48e84c0e5976b02d5cc54354884bf437 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Wed, 15 Jan 2020 16:33:59 +1000 Subject: [PATCH 040/603] ignore dbt-integration-tests --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 194336b0..0e3e5dab 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ __pycache__ .tox/ .idea/ build/ -dist/ \ No newline at end of file +dist/ +dbt-integration-tests \ No newline at end of file From 7e62f1350522371f62f3a1e3cc40b61d430bbb18 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Wed, 15 Jan 2020 17:57:36 +1000 Subject: [PATCH 041/603] support for extracting statistics cache schemas and relations --- dbt/adapters/spark/impl.py | 233 +++++++++++++++----------- dbt/include/spark/macros/adapters.sql | 82 +++------ test/unit/test_adapter.py | 160 +++++++++++++++++- 3 files changed, 317 insertions(+), 158 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 55b04755..68f1f9aa 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,12 +1,13 @@ -from typing import Optional, List +from typing import Optional, List, Dict +import dbt from dbt.adapters.sql import SQLAdapter from dbt.contracts.graph.manifest import Manifest from dbt.adapters.spark import SparkColumn from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark.relation import SparkRelation -from dbt.adapters.base import RelationType, BaseRelation +from dbt.adapters.base import BaseRelation from dbt.clients.agate_helper import table_from_data from dbt.logger import GLOBAL_LOGGER as logger @@ -14,18 +15,34 @@ import agate LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' -LIST_EXTENDED_PROPERTIES_MACRO_NAME = 'list_extended_properties' -GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' +GET_RELATION_TYPE_MACRO_NAME = 'get_relation_type' LIST_SCHEMAS_MACRO_NAME = 'list_schemas' +FETCH_TBL_PROPERTIES_MACRO_NAME = 'fetch_tbl_properties' class SparkAdapter(SQLAdapter): - - RELATION_TYPES = { - 'MANAGED': RelationType.Table, - 'VIEW': RelationType.View, - 'EXTERNAL': RelationType.External - } + COLUMN_NAMES = ( + 'table_database', + 'table_schema', + 'table_name', + 'table_type', + 'table_comment', + 'table_owner', + 'column_name', + 'column_index', + 'column_type', + 'column_comment', + + 'stats:bytes:label', + 'stats:bytes:value', + 'stats:bytes:description', + 'stats:bytes:include', + + 'stats:rows:label', + 'stats:rows:value', + 'stats:rows:description', + 'stats:rows:include', + ) Relation = SparkRelation Column = SparkColumn @@ -64,22 +81,27 @@ def get_relation(self, database: str, schema: str, identifier: str) -> Optional[ return super().get_relation(database, schema, identifier) - def list_extended_properties(self, schema, identifier): - results = self.execute_macro( - LIST_EXTENDED_PROPERTIES_MACRO_NAME, - kwargs={'schema': schema, 'identifier': identifier} + def get_relation_type(self, relation): + kwargs = {'relation': relation} + return self.execute_macro( + GET_RELATION_TYPE_MACRO_NAME, + kwargs=kwargs, + release=True ) - detail_idx = 0 - for idx, row in enumerate(results.rows): - if row['col_name'] == '# Detailed Table Information': - detail_idx = idx - continue - - detail_idx += 1 - return results[detail_idx:] if detail_idx != 0 else results - - def list_relations_without_caching(self, information_schema, schema): + def add_schema_to_cache(self, schema) -> str: + """Cache a new schema in dbt. It will show up in `list relations`.""" + if schema is None: + name = self.nice_connection_name() + dbt.exceptions.raise_compiler_error( + 'Attempted to cache a null schema for {}'.format(name) + ) + if dbt.flags.USE_CACHE: + self.cache.add_schema(None, schema) + # so jinja doesn't render things + return '' + + def list_relations_without_caching(self, information_schema, schema) -> List[SparkRelation]: kwargs = {'information_schema': information_schema, 'schema': schema} results = self.execute_macro( @@ -89,94 +111,111 @@ def list_relations_without_caching(self, information_schema, schema): relations = [] for _schema, name, _ in results: - # get extended properties foreach table - details = self.list_extended_properties(_schema, name) - - _type = None - for col_name, data_type, _ in details: - if col_name == 'Type': - _type = self.RELATION_TYPES.get(data_type, None) - continue - - relations.append(self.Relation.create( + relation = self.Relation.create( schema=_schema, - identifier=name, - type=_type - )) + identifier=name + ) + self.cache_added(relation) + relations.append(relation) return relations - def get_catalog(self, manifest: Manifest) -> agate.Table: - schemas = manifest.get_used_schemas() - - column_names = ( - 'table_database', - 'table_schema', - 'table_name', - 'table_type', - 'table_comment', - 'table_owner', - 'column_name', - 'column_index', - 'column_type', - 'column_comment', - ) + @staticmethod + def _parse_relation(relation: Relation, + table_columns: List[Column], + rel_type: str, + properties: Dict[str, str] = None) -> List[dict]: + properties = properties or {} + statistics = {} + table_owner_key = 'Owner' + + # First check if it is present in the properties + table_owner = properties.get(table_owner_key) + + found_detailed_table_marker = False + for column in table_columns: + if column.name == '# Detailed Table Information': + found_detailed_table_marker = True + + # In case there is another column with the name Owner + if not found_detailed_table_marker: + continue - columns = [] - for database, schema in schemas: - relations = self.list_relations(database, schema) - for relation in relations: - table_columns = self.get_columns_in_relation(relation) + if not table_owner and column.name == table_owner_key: + table_owner = column.data_type - for column_index, column in enumerate(table_columns): - # Fixes for pseudo-columns with no type - if column.name in ( - '# Partition Information', - '# col_name' - ): - continue - elif column.data_type is None: - continue - - column_data = ( - relation.database, - relation.schema, - relation.name, - relation.type, - None, - None, - column.name, - column_index, - column.data_type, - None, - ) - column_dict = dict(zip(column_names, column_data)) - columns.append(column_dict) - - return table_from_data(columns, column_names) - - def get_columns_in_relation(self, relation) -> List[SparkColumn]: - table = self.execute_macro( - GET_COLUMNS_IN_RELATION_MACRO_NAME, - kwargs={'relation': relation} - ) + if column.name == 'Statistics': + # format: 1109049927 bytes, 14093476 rows + statistics = {stats.split(" ")[1]: int(stats.split(" ")[0]) for + stats in column.data_type.split(', ')} columns = [] - for col in table: + for column_index, column in enumerate(table_columns): # Fixes for pseudo-columns with no type - if col.name in ( - '# Partition Information', - '# col_name' - ): + if column.name in { + '# Partition Information', + '# col_name', + '' + }: continue - elif col.data_type is None: + elif column.name == '# Detailed Table Information': + # Loop until the detailed table information + break + elif column.data_type is None: continue - column = self.Column(col.name, col.data_type, col.comment) - columns.append(column) + column_data = ( + relation.database, + relation.schema, + relation.name, + rel_type, + None, + table_owner, + column.name, + column_index, + column.data_type, + None, + + # Table level stats + 'Table size', + statistics.get("bytes"), + "The size of the table in bytes", + statistics.get("bytes") is not None, + + # Column level stats + 'Number of rows', + statistics.get("rows"), + "The number of rows in the table", + statistics.get("rows") is not None + ) + + column_dict = dict(zip(SparkAdapter.COLUMN_NAMES, column_data)) + columns.append(column_dict) return columns + def get_properties(self, relation: Relation) -> Dict[str, str]: + properties = self.execute_macro( + FETCH_TBL_PROPERTIES_MACRO_NAME, + kwargs={'relation': relation} + ) + return {key: value for (key, value) in properties} + + def get_catalog(self, manifest: Manifest) -> agate.Table: + schemas = manifest.get_used_schemas() + + columns = [] + for database, schema in schemas: + relations = self.list_relations(database, schema) + for relation in relations: + properties = self.get_properties(relation) + logger.debug("Getting table schema for relation {}".format(relation)) # noqa + table_columns = self.get_columns_in_relation(relation) + rel_type = self.get_relation_type(relation) + columns += self._parse_relation(relation, table_columns, rel_type, properties) + + return table_from_data(columns, SparkAdapter.COLUMN_NAMES) + def check_schema_exists(self, database, schema): results = self.execute_macro( LIST_SCHEMAS_MACRO_NAME, diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 55ba5c86..a95b00aa 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -43,40 +43,24 @@ {%- endif %} {%- endmacro -%} -{% macro spark__load_csv_rows(model, agate_table) %} - {% set cols_sql = ", ".join(agate_table.column_names) %} - {% set bindings = [] %} - - {% set statements = [] %} - - {% for chunk in agate_table.rows | batch(10000) %} - {% set bindings = [] %} - - {% for row in chunk %} - {% set _ = bindings.extend(row) %} - {% endfor %} - - {% set sql %} - insert into {{ this.render() }} values - {% for row in chunk -%} - ({%- for column in agate_table.column_names -%} - %s - {%- if not loop.last%},{%- endif %} - {%- endfor -%}) - {%- if not loop.last%},{%- endif %} - {%- endfor %} - {% endset %} - - {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} - - {% if loop.index0 == 0 %} - {% set _ = statements.append(sql) %} - {% endif %} - {% endfor %} - - {# Return SQL so we can render it out into the compiled files #} - {{ return(statements[0]) }} -{% endmacro %} +{% macro fetch_tbl_properties(relation) -%} + {% call statement('list_properties', fetch_result=True) -%} + SHOW TBLPROPERTIES {{ relation }} + {% endcall %} + {% do return(load_result('list_properties').table) %} +{%- endmacro %} + +{% macro get_relation_type(relation) -%} + {% call statement('get_relation_type', fetch_result=True) -%} + SHOW TBLPROPERTIES {{ relation }} ('view.default.database') + {%- endcall %} + {% set res = load_result('get_relation_type').table %} + {% if 'does not have property' in res[0][0] %} + {{ return('table') }} + {% else %} + {{ return('view') }} + {% endif %} +{%- endmacro %} {#-- We can't use temporary tables with `create ... as ()` syntax #} {% macro create_temporary_view(relation, sql) -%} @@ -110,26 +94,13 @@ {%- endcall -%} {% endmacro %} -{% macro list_extended_properties(schema, identifier) %} - {% call statement('list_extended_properties', fetch_result=True) -%} - describe extended {{ schema }}.{{ identifier }} - {% endcall %} - - {% do return(load_result('list_extended_properties').table) %} -{% endmacro %} - {% macro spark__get_columns_in_relation(relation) -%} {% call statement('get_columns_in_relation', fetch_result=True) %} - describe {{ relation }} + describe extended {{ relation }} {% endcall %} {% set table = load_result('get_columns_in_relation').table %} - - {% set columns = [] %} - {% for row in table %} - {% do columns.append(api.Column(*row)) %} - {% endfor %} - {{ return(columns) }} + {{ return(sql_convert_columns_in_relation(table)) }} {% endmacro %} @@ -156,7 +127,7 @@ {% call statement('rename_relation') -%} {% if not from_relation.type %} {% do exceptions.raise_database_error("Cannot drop a relation with a blank type: " ~ from_relation.identifier) %} - {% elif from_relation.type in ('table', 'external') %} + {% elif from_relation.type in ('table') %} alter table {{ from_relation }} rename to {{ to_relation }} {% elif from_relation.type == 'view' %} alter view {{ from_relation }} rename to {{ to_relation }} @@ -167,15 +138,8 @@ {% endmacro %} {% macro spark__drop_relation(relation) -%} + {% set type = relation.type if relation.type is not none else get_relation_type(relation) %} {% call statement('drop_relation', auto_begin=False) -%} - {% if not relation.type %} - {% do exceptions.raise_database_error("Cannot drop a relation with a blank type: " ~ relation.identifier) %} - {% elif relation.type in ('table', 'external') %} - drop table if exists {{ relation }} - {% elif relation.type == 'view' %} - drop view if exists {{ relation }} - {% else %} - {% do exceptions.raise_database_error("Unknown type '" ~ relation.type ~ "' for relation: " ~ relation.identifier) %} - {% endif %} + drop {{ type }} if exists {{ relation }} {%- endcall %} {% endmacro %} \ No newline at end of file diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index d62da48b..9c64350d 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -3,8 +3,8 @@ import dbt.flags as flags from pyhive import hive - -from dbt.adapters.spark import SparkAdapter +from agate import Column, MappedSequence +from dbt.adapters.spark import SparkAdapter, SparkRelation from .utils import config_from_parts_or_dicts @@ -96,3 +96,159 @@ def hive_thrift_connect(host, port, username): self.assertEqual(connection.credentials.schema, 'analytics') self.assertEqual(connection.credentials.database, 'analytics') + def test_parse_relation(self): + rel_type = SparkRelation.RelationType.Table + + relation = SparkRelation.create( + database='default_database', + schema='default_schema', + identifier='mytable', + type=rel_type + ) + + # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED + plain_rows = [ + ('col1', 'decimal(22,0)'), + ('col2', 'string',), + ('# Partition Information', 'data_type'), + ('# col_name', 'data_type'), + ('dt', 'date'), + ('', ''), + ('# Detailed Table Information', ''), + ('Database', relation.database), + ('Owner', 'root'), + ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'), + ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), + ('Type', 'MANAGED'), + ('Provider', 'delta'), + ('Location', '/mnt/vo'), + ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'), + ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'), + ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'), + ('Partition Provider', 'Catalog') + ] + + input_cols = [Column(index=None, name=r[0], data_type=r[1], rows=MappedSequence( + keys=['col_name', 'data_type'], + values=r + )) for r in plain_rows] + + rows = SparkAdapter._parse_relation(relation, input_cols, rel_type) + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0], { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': relation.type, + 'stats:bytes:description': 'The size of the table in bytes', + 'stats:bytes:include': False, + 'stats:bytes:label': 'Table size', + 'stats:bytes:value': None, + 'stats:rows:description': 'The number of rows in the table', + 'stats:rows:include': False, + 'stats:rows:label': 'Number of rows', + 'stats:rows:value': None, + 'table_comment': None, + 'table_owner': 'root', + 'column_name': 'col1', + 'column_index': 0, + 'column_type': 'decimal(22,0)', + 'column_comment': None + }) + + self.assertEqual(rows[1], { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': relation.type, + 'stats:bytes:description': 'The size of the table in bytes', + 'stats:bytes:include': False, + 'stats:bytes:label': 'Table size', + 'stats:bytes:value': None, + 'stats:rows:description': 'The number of rows in the table', + 'stats:rows:include': False, + 'stats:rows:label': 'Number of rows', + 'stats:rows:value': None, + 'table_comment': None, + 'table_owner': 'root', + 'column_name': 'col2', + 'column_index': 1, + 'column_type': 'string', + 'column_comment': None + }) + + self.assertEqual(rows[2], { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': relation.type, + 'stats:bytes:description': 'The size of the table in bytes', + 'stats:bytes:include': False, + 'stats:bytes:label': 'Table size', + 'stats:bytes:value': None, + 'stats:rows:description': 'The number of rows in the table', + 'stats:rows:include': False, + 'stats:rows:label': 'Number of rows', + 'stats:rows:value': None, + 'table_comment': None, + 'table_owner': 'root', + 'column_name': 'dt', + 'column_index': 4, + 'column_type': 'date', + 'column_comment': None + }) + + def test_parse_relation_with_properties(self): + rel_type = SparkRelation.RelationType.Table + + relation = SparkRelation.create( + database='default_database', + schema='default_schema', + identifier='mytable', + type=rel_type + ) + + # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED + plain_rows = [ + ('col1', 'decimal(19,25)'), + ('', ''), + ('# Detailed Table Information', ''), + ('Database', relation.database), + ('Owner', 'root'), + ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'), + ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), + ('Type', 'MANAGED'), + ('Provider', 'delta'), + ('Location', '/mnt/vo'), + ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'), + ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'), + ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'), + ('Partition Provider', 'Catalog') + ] + + input_cols = [Column(index=None, name=r[0], data_type=r[1], rows=MappedSequence( + keys=['col_name', 'data_type'], + values=r + )) for r in plain_rows] + + rows = SparkAdapter._parse_relation(relation, input_cols, rel_type, {'Owner': 'Fokko'}) + self.assertEqual(rows[0], { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'stats:bytes:description': 'The size of the table in bytes', + 'stats:bytes:include': False, + 'stats:bytes:label': 'Table size', + 'stats:bytes:value': None, + 'stats:rows:description': 'The number of rows in the table', + 'stats:rows:include': False, + 'stats:rows:label': 'Number of rows', + 'stats:rows:value': None, + 'table_comment': None, + 'table_owner': 'Fokko', + 'column_name': 'col1', + 'column_index': 0, + 'column_type': 'decimal(19,25)', + 'column_comment': None + }) From 4f600313060a2c2a6d57d0e569f9742edf68a853 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Sun, 19 Jan 2020 21:31:26 +1000 Subject: [PATCH 042/603] fixes for full-refresh --- dbt/adapters/spark/impl.py | 12 ++++++++++++ dbt/include/spark/macros/adapters.sql | 2 +- .../spark/macros/materializations/incremental.sql | 5 +---- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 68f1f9aa..637b0cdf 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -17,6 +17,7 @@ LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' GET_RELATION_TYPE_MACRO_NAME = 'get_relation_type' LIST_SCHEMAS_MACRO_NAME = 'list_schemas' +DROP_RELATION_MACRO_NAME = 'drop_relation' FETCH_TBL_PROPERTIES_MACRO_NAME = 'fetch_tbl_properties' @@ -216,6 +217,17 @@ def get_catalog(self, manifest: Manifest) -> agate.Table: return table_from_data(columns, SparkAdapter.COLUMN_NAMES) + # Override that doesn't check the type of the relation -- we do it + # dynamically in the macro code + def drop_relation(self, relation, model_name=None): + if dbt.flags.USE_CACHE: + self.cache.drop(relation) + + self.execute_macro( + DROP_RELATION_MACRO_NAME, + kwargs={'relation': relation} + ) + def check_schema_exists(self, database, schema): results = self.execute_macro( LIST_SCHEMAS_MACRO_NAME, diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index a95b00aa..0af4ca51 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -126,7 +126,7 @@ {% macro spark__rename_relation(from_relation, to_relation) -%} {% call statement('rename_relation') -%} {% if not from_relation.type %} - {% do exceptions.raise_database_error("Cannot drop a relation with a blank type: " ~ from_relation.identifier) %} + {% do exceptions.raise_database_error("Cannot rename a relation with a blank type: " ~ from_relation.identifier) %} {% elif from_relation.type in ('table') %} alter table {{ from_relation }} rename to {{ to_relation }} {% elif from_relation.type == 'view' %} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index fff6d157..bb3d1bdc 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -104,12 +104,9 @@ {% if existing_relation is none %} {% set build_sql = create_table_as(False, target_relation, sql) %} - {% elif existing_relation.is_view %} - {#-- Can't overwrite a view with a table - we must drop --#} + {% elif existing_relation.is_view or full_refresh_mode %} {% do adapter.drop_relation(existing_relation) %} {% set build_sql = create_table_as(False, target_relation, sql) %} - {% elif full_refresh_mode %} - {% set build_sql = create_table_as(False, target_relation, sql) %} {% else %} {% do run_query(create_table_as(True, tmp_relation, sql)) %} {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} From 3c93fa5c970e638090a3d26c181bc1cd61e739d4 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Sun, 19 Jan 2020 21:31:50 +1000 Subject: [PATCH 043/603] ensure relation type is set for views and tables --- dbt/include/spark/macros/materializations/table.sql | 2 +- dbt/include/spark/macros/materializations/view.sql | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index c13f9917..48dfe7ff 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -16,7 +16,7 @@ /* See ../view/view.sql for more information about this relation. */ - {%- set backup_relation_type = 'table' if old_relation is none else old_relation.type -%} + {%- set backup_relation_type = 'table' if old_relation is none else get_relation_type(old_relation) -%} {%- set backup_relation = api.Relation.create(identifier=backup_identifier, schema=schema, database=database, diff --git a/dbt/include/spark/macros/materializations/view.sql b/dbt/include/spark/macros/materializations/view.sql index d295d787..c6751222 100644 --- a/dbt/include/spark/macros/materializations/view.sql +++ b/dbt/include/spark/macros/materializations/view.sql @@ -22,7 +22,8 @@ exist, then there is nothing to move out of the way and subsequentally drop. In that case, this relation will be effectively unused. */ - {%- set backup_relation_type = 'view' if old_relation is none else old_relation.type -%} + + {%- set backup_relation_type = 'view' if old_relation is none else get_relation_type(old_relation) -%} {%- set backup_relation = api.Relation.create(identifier=backup_identifier, schema=schema, database=database, type=backup_relation_type) -%} From 0e154e8cfe2192cffc2bd14176d743678770c6d8 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Mon, 27 Jan 2020 18:29:05 -0800 Subject: [PATCH 044/603] Update README.md Co-Authored-By: Jeremy Cohen --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f45a45cc..21606a89 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This plugin can be installed via pip: # Install dbt-spark from github (latest master branch): $ pip install git+https://github.com/fishtown-analytics/dbt-spark@master -# OR Install dbt-spark from PyPi (latest stable version published to PyPi): +# Install dbt-spark from PyPi: $ pip install dbt-spark ``` From a0ee3e32b7b7761613ce064de7a7250bd297333e Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Mon, 27 Jan 2020 18:30:29 -0800 Subject: [PATCH 045/603] removing reference to pip from master --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 21606a89..bd84fd10 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,6 @@ For more information on using Spark with dbt, consult the [dbt documentation](ht This plugin can be installed via pip: ``` -# Install dbt-spark from github (latest master branch): -$ pip install git+https://github.com/fishtown-analytics/dbt-spark@master - # Install dbt-spark from PyPi: $ pip install dbt-spark ``` From 289b6fe859d3a97fb399013657cafbde4b1ef964 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 28 Jan 2020 22:00:18 -0500 Subject: [PATCH 046/603] Use show table extended to get all rel types --- dbt/adapters/spark/impl.py | 19 ++++++++++++------- dbt/include/spark/macros/adapters.sql | 2 +- .../macros/materializations/incremental.sql | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 30c85820..c08ca17a 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -58,24 +58,29 @@ def get_relation_type(self, relation, model_name=None): def list_relations_without_caching(self, information_schema, schema, model_name=None): kwargs = {'information_schema': information_schema, 'schema': schema} - results = self.execute_macro( - LIST_RELATIONS_MACRO_NAME, - kwargs=kwargs, - release=True - ) + try: + results = self.execute_macro( + LIST_RELATIONS_MACRO_NAME, + kwargs=kwargs, + release=True + ) + except dbt.exceptions.RuntimeException as e: + if hasattr(e, 'msg') and f"Database '{schema}' not found" in e.msg: + return [] relations = [] quote_policy = { 'schema': True, 'identifier': True } - for _database, name, _ in results: + for _database, name, _, information in results: + rel_type = ('view' if 'Type: VIEW' in information else 'table') relations.append(self.Relation.create( database=_database, schema=_database, identifier=name, quote_policy=quote_policy, - type=None + type=rel_type )) return relations diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index c4616ca8..9fd0fb9c 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -57,7 +57,7 @@ {% macro spark__list_relations_without_caching(information_schema, schema) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} - show tables in {{ schema }} + show table extended in {{ schema }} like '*' {% endcall %} {% do return(load_result('list_relations_without_caching').table) %} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 82fba58e..73e6af7c 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -13,7 +13,7 @@ {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, type='table') -%} {%- set full_refresh = flags.FULL_REFRESH == True and old_relation is not none -%} - {%- set type = spark_get_relation_type(this) if old_relation else none -%} + {%- set type = old_relation.type if old_relation.type is not none else spark_get_relation_type(this) -%} {%- set old_relation_is_view = old_relation is not none and type == 'view' -%} {%- if full_refresh or old_relation_is_view -%} From a32906d4a5f971c29d8e38761e465973430b0680 Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Tue, 28 Jan 2020 19:27:06 -0800 Subject: [PATCH 047/603] Update README.md --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index bd84fd10..4c5523db 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ A dbt profile can be configured to run against Spark using the following configu | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | | token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | +| organiztion | The id of the Azure Databricks workspace being used; only for Azure Databricks | Optional (default: 0, for Azure Databricks it is REQUIRED to be set) | 1234567891234567 | | cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | | user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | @@ -37,6 +38,25 @@ To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo **Example profiles.yml entries:** + +**AWS Databricks, via http** +``` +your_profile_name: + target: dev + outputs: + dev: + method: http + type: spark + schema: analytics + host: yourorg.sparkhost.com + port: 443 + token: abc123 + cluster: 01234-23423-coffeetime + connect_retries: 5 + connect_timeout: 60 +``` + +**Azure Databricks, via http** ``` your_profile_name: target: dev @@ -48,6 +68,7 @@ your_profile_name: host: yourorg.sparkhost.com port: 443 token: abc123 + organization: 1234567891234567 cluster: 01234-23423-coffeetime connect_retries: 5 connect_timeout: 60 From a388ad784606f0cd2d2850bcd00037503dffd175 Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Tue, 28 Jan 2020 19:38:14 -0800 Subject: [PATCH 048/603] Update README.md --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4c5523db..4f5f76b1 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,18 @@ A dbt profile can be configured to run against Spark using the following configu | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | | token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | -| organiztion | The id of the Azure Databricks workspace being used; only for Azure Databricks | Optional (default: 0, for Azure Databricks it is REQUIRED to be set) | 1234567891234567 | +| organiztion | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Azure Databricks Note | 1234567891234567 | | cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | | user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | +**Azure Databricks Note** + +To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. When connecting to Azure Databricks, the organization tag is required to be set in the profiles.yml connection file, as it will be defaulted to 0 otherwise, and will not connect to Azure. This connection method follows the databricks-connect package's semantics for connecting to Azure Databricks. + +dbt-spark has been tested against AWS Databricks, and it does not appear to be have the same way as Azure. It appears to default this value in connection URLs to 0, so dbt-spark does the same for AWS' connections (i.e. simply leave organization-id out when connecting to the AWS version). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties + **Usage with Amazon EMR** To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. @@ -39,7 +45,7 @@ To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo **Example profiles.yml entries:** -**AWS Databricks, via http** +**http, e.g. AWS Databricks** ``` your_profile_name: target: dev @@ -74,6 +80,7 @@ your_profile_name: connect_timeout: 60 ``` +**Thrift connection** ``` your_profile_name: target: dev From f2e977d1d11c40dcb4b75210f06fc741b6889b09 Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Tue, 28 Jan 2020 19:40:57 -0800 Subject: [PATCH 049/603] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4f5f76b1..b1009d3c 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,9 @@ A dbt profile can be configured to run against Spark using the following configu To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. When connecting to Azure Databricks, the organization tag is required to be set in the profiles.yml connection file, as it will be defaulted to 0 otherwise, and will not connect to Azure. This connection method follows the databricks-connect package's semantics for connecting to Azure Databricks. -dbt-spark has been tested against AWS Databricks, and it does not appear to be have the same way as Azure. It appears to default this value in connection URLs to 0, so dbt-spark does the same for AWS' connections (i.e. simply leave organization-id out when connecting to the AWS version). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties +dbt-spark has been tested against AWS Databricks, and it does not appear to behave the same way as Azure. It appears to default this value in connection URLs to 0, so dbt-spark does the same for AWS' connections (i.e. simply leave organization-id out when connecting to the AWS version). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties. + +Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. **Usage with Amazon EMR** From 7aa749f1dab3fa211918658974d8eccbf85a81ad Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Tue, 28 Jan 2020 19:50:45 -0800 Subject: [PATCH 050/603] Update README.md --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b1009d3c..39fe215f 100644 --- a/README.md +++ b/README.md @@ -26,20 +26,26 @@ A dbt profile can be configured to run against Spark using the following configu | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | | token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | -| organiztion | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Azure Databricks Note | 1234567891234567 | +| organization | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Databricks Note | 1234567891234567 | | cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | | user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | -**Azure Databricks Note** +**Databricks Note** -To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. When connecting to Azure Databricks, the organization tag is required to be set in the profiles.yml connection file, as it will be defaulted to 0 otherwise, and will not connect to Azure. This connection method follows the databricks-connect package's semantics for connecting to Azure Databricks. +AWS and Azure Databricks have differences in their connections, likely due to differences in how their URLs are generated between the two services. -dbt-spark has been tested against AWS Databricks, and it does not appear to behave the same way as Azure. It appears to default this value in connection URLs to 0, so dbt-spark does the same for AWS' connections (i.e. simply leave organization-id out when connecting to the AWS version). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties. +To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. When connecting to Azure Databricks, the organization tag is required to be set in the profiles.yml connection file, as it will be defaulted to 0 otherwise, and will not connect to Azure. This connection method follows the databricks-connect package's semantics for connecting to Databricks. + +dbt-spark has also been tested against AWS Databricks, and it has some differences in the URLs used. It appears to default the positional value where organization lives in AWS connection URLs to 0, so dbt-spark does the same for AWS connections (i.e. simply leave organization-id out when connecting to the AWS version and dbt-spark will construct the correct AWS URL for you). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties. Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. +Lastly, the host field for Databricks can be found at the start of your workspace or cluster url: .azuredatabricks.net for Azure, or .cloud.databricks.com for AWS. + + + **Usage with Amazon EMR** To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. From 9ab1da326c48d9d674d31f8c0623bc6ec4bb0fdb Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Tue, 28 Jan 2020 19:54:08 -0800 Subject: [PATCH 051/603] Update Readme for new organization tag Updating doc for more Databricks detail --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 39fe215f..4a0ce73d 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,8 @@ AWS and Azure Databricks have differences in their connections, likely due to di To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. When connecting to Azure Databricks, the organization tag is required to be set in the profiles.yml connection file, as it will be defaulted to 0 otherwise, and will not connect to Azure. This connection method follows the databricks-connect package's semantics for connecting to Databricks. +Of special note is the fact that organization ID is treated as a string by dbt-spark, as opposed to a large number. While all examples to date have contained numeric digits, it is unknown how long that may continue, and what the upper limit of this number is. If you do have a leading zero, please include it in the organization tag and dbt-spark will pass that along. + dbt-spark has also been tested against AWS Databricks, and it has some differences in the URLs used. It appears to default the positional value where organization lives in AWS connection URLs to 0, so dbt-spark does the same for AWS connections (i.e. simply leave organization-id out when connecting to the AWS version and dbt-spark will construct the correct AWS URL for you). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties. Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. From 70e50b734afbed9418c2436459544aa69d296fa4 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Thu, 2 Jan 2020 14:09:25 -0800 Subject: [PATCH 052/603] instructions for installing from master branch --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 821b3ce7..f45a45cc 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,12 @@ For more information on using Spark with dbt, consult the [dbt documentation](ht ### Installation This plugin can be installed via pip: + ``` -# Install prerequisites: -$ pip install pyhive[hive] +# Install dbt-spark from github (latest master branch): +$ pip install git+https://github.com/fishtown-analytics/dbt-spark@master -# Install dbt-spark: +# OR Install dbt-spark from PyPi (latest stable version published to PyPi): $ pip install dbt-spark ``` From d4c82af3f316731643d1af6e052290cfd81e3d2c Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Thu, 2 Jan 2020 14:14:04 -0800 Subject: [PATCH 053/603] auto-include PyHive 'hive' extras --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ca70a925..2ec738ac 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), - 'PyHive>=0.6.0,<0.7.0', + 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', ] ) From 7e189db26638484b46ffd6b8b3c4f199304c1bed Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Mon, 27 Jan 2020 18:29:05 -0800 Subject: [PATCH 054/603] Update README.md Co-Authored-By: Jeremy Cohen --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f45a45cc..21606a89 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This plugin can be installed via pip: # Install dbt-spark from github (latest master branch): $ pip install git+https://github.com/fishtown-analytics/dbt-spark@master -# OR Install dbt-spark from PyPi (latest stable version published to PyPi): +# Install dbt-spark from PyPi: $ pip install dbt-spark ``` From 85b671ada2f2d84ce3870d7cdba8aad70ef89f61 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Mon, 27 Jan 2020 18:30:29 -0800 Subject: [PATCH 055/603] removing reference to pip from master --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 21606a89..bd84fd10 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,6 @@ For more information on using Spark with dbt, consult the [dbt documentation](ht This plugin can be installed via pip: ``` -# Install dbt-spark from github (latest master branch): -$ pip install git+https://github.com/fishtown-analytics/dbt-spark@master - # Install dbt-spark from PyPi: $ pip install dbt-spark ``` From caf9426dc61987cdd2c69f870e0a3034dc2af744 Mon Sep 17 00:00:00 2001 From: Rob Poidomani <13532007+poidra02@users.noreply.github.com> Date: Sat, 1 Feb 2020 08:16:23 -0800 Subject: [PATCH 056/603] Clarify Databricks hostname in readme Plus organization format tweak in table to match other fields --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4a0ce73d..6a7d168a 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ A dbt profile can be configured to run against Spark using the following configu | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | | token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | -| organization | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Databricks Note | 1234567891234567 | +| organization | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Databricks Note | `1234567891234567` | | cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | | user | The username to use to connect to the cluster | Optional | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | @@ -44,7 +44,7 @@ dbt-spark has also been tested against AWS Databricks, and it has some differenc Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. -Lastly, the host field for Databricks can be found at the start of your workspace or cluster url: .azuredatabricks.net for Azure, or .cloud.databricks.com for AWS. +Lastly, the host field for Databricks can be found at the start of your workspace or cluster url (but don't include https://): region.azuredatabricks.net for Azure, or account.cloud.databricks.com for AWS. From ef8a96ce9b3ac24863739e199b6342755bf58eae Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 3 Feb 2020 21:49:09 -0500 Subject: [PATCH 057/603] Reimplement get_relation to handle custom schemas --- dbt/adapters/spark/impl.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index bfa3d103..c104a4c7 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -67,6 +67,27 @@ def list_relations_without_caching(self, information_schema, schema, )) return relations + def get_relation(self, database, schema, identifier): + relations_list = self.list_relations(schema, schema) + + matches = self._make_match(relations_list=relations_list, + database=None, schema=schema, + identifier=identifier) + + if len(matches) > 1: + kwargs = { + 'identifier': identifier, + 'schema': schema + } + dbt.exceptions.get_relation_returned_multiple_results( + kwargs, matches + ) + + elif matches: + return matches[0] + + return None + # Override that doesn't check the type of the relation -- we do it # dynamically in the macro code def drop_relation(self, relation, model_name=None): From 29615d45e40863adba24968d17d328627e4f064f Mon Sep 17 00:00:00 2001 From: niels Date: Tue, 4 Feb 2020 14:17:59 +0100 Subject: [PATCH 058/603] Fixup, removed log statement --- dbt/include/spark/macros/adapters.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index f8d9a793..22166216 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -103,7 +103,6 @@ {% if row[0].startswith('#') %} {{ vars.update({'before_partition_info': False}) }} {% else %} - {{ dbt_utils.log_info(row) }} {{ columns.append(row) }} {% endif %} {% endfor %} From f23901c40151599573769092745550baba434edc Mon Sep 17 00:00:00 2001 From: niels Date: Tue, 4 Feb 2020 14:32:21 +0100 Subject: [PATCH 059/603] Fixup, readme + adapter specific config --- README.md | 5 ++++- dbt/adapters/spark/impl.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 821b3ce7..28f1f415 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,10 @@ The following configurations can be supplied to models run with the dbt-spark pl | Option | Description | Required? | Example | |---------|----------------------------------------------------|-------------------------|--------------------------| | file_format | The file format to use when creating tables | Optional | `parquet` | - +| location | The created table uses the specified directory to store its data. | Optional | `/mnt/root` | +| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | +| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | +| buckets | The number of buckets to create while clustering | Optional | `8` | **Incremental Models** diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 30c85820..4d12f2a1 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -15,6 +15,9 @@ class SparkAdapter(SQLAdapter): ConnectionManager = SparkConnectionManager Relation = SparkRelation + + AdapterSpecificConfigs = frozenset({"file_format", "location", "partition_by", + "clustered_by", "buckets"}) @classmethod def date_function(cls): From db598507e91f6d9e3ad6f8b565fb077391678c11 Mon Sep 17 00:00:00 2001 From: niels Date: Tue, 4 Feb 2020 14:34:41 +0100 Subject: [PATCH 060/603] Make explicit that buckets is required if clustered_by is specified --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 28f1f415..0e08de2c 100644 --- a/README.md +++ b/README.md @@ -84,8 +84,8 @@ The following configurations can be supplied to models run with the dbt-spark pl | file_format | The file format to use when creating tables | Optional | `parquet` | | location | The created table uses the specified directory to store its data. | Optional | `/mnt/root` | | partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | -| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | -| buckets | The number of buckets to create while clustering | Optional | `8` | +| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | +| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | **Incremental Models** From 0171a25adb37c76449cafc9e07bfbf4e3f98970a Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 4 Feb 2020 10:13:44 -0500 Subject: [PATCH 061/603] Fix flake8 errors --- dbt/adapters/spark/connections.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index f47cb0fc..dfd2277a 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -14,8 +14,11 @@ import base64 import time -# adding organization as a parameter, as it is required by Azure Databricks and is different per workspace. -SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" +# adding organization as a parameter, as it is required by Azure Databricks +# and is different per workspace. +SPARK_CONNECTION_URL = ''' + https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster} + ''' SPARK_CREDENTIALS_CONTRACT = { 'type': 'object', @@ -70,8 +73,9 @@ class SparkCredentials(Credentials): def __init__(self, *args, **kwargs): kwargs.setdefault('database', kwargs.get('schema')) - - # coercing org to a string since it is unknown whether Azure Databricks will always keep it numeric + + # coercing org to a string since it is unknown whether + # Azure Databricks will always keep it numeric if 'organization' in kwargs: kwargs['organization'] = str(kwargs['organization']) else: @@ -257,7 +261,7 @@ def open(cls, connection): for i in range(1 + connect_retries): try: if creds.method == 'http': - + cls.validate_creds(creds, ['token', 'host', 'port', 'cluster', 'organization']) From c07b9d70a3123d5b4f4fc41e00bf9553ebbecabf Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 4 Feb 2020 12:55:59 -0500 Subject: [PATCH 062/603] Fix unit test --- dbt/adapters/spark/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index dfd2277a..cbc14b34 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -18,7 +18,7 @@ # and is different per workspace. SPARK_CONNECTION_URL = ''' https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster} - ''' + '''.strip() SPARK_CREDENTIALS_CONTRACT = { 'type': 'object', From c814994df2aa5feb2a416acb64f8dd8a3e06e3b6 Mon Sep 17 00:00:00 2001 From: niels Date: Thu, 6 Feb 2020 17:02:54 +0100 Subject: [PATCH 063/603] Revert spark__get_columns_in_relation --- dbt/include/spark/macros/adapters.sql | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 22166216..389bd4ed 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -97,16 +97,8 @@ describe {{ relation }} {% endcall %} - {% set columns = [] %} - {% set vars = {'before_partition_info': True} %} - {% for row in load_result('get_columns_in_relation').table if vars.before_partition_info %} - {% if row[0].startswith('#') %} - {{ vars.update({'before_partition_info': False}) }} - {% else %} - {{ columns.append(row) }} - {% endif %} - {% endfor %} - {{ return(sql_convert_columns_in_relation(columns)) }} + {% set table = load_result('get_columns_in_relation').table %} + {{ return(sql_convert_columns_in_relation(table)) }} {% endmacro %} From 50e13780f65d7989c5217afdbe7b2a9ed80d5635 Mon Sep 17 00:00:00 2001 From: niels Date: Thu, 6 Feb 2020 17:13:07 +0100 Subject: [PATCH 064/603] Switch to location_root --- dbt/include/spark/macros/adapters.sql | 7 ++++--- test/unit/test_macros.py | 13 +++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 389bd4ed..ebf32808 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -15,9 +15,10 @@ {% macro location_clause() %} - {%- set path = config.get('location', validator=validation.any[basestring]) -%} - {%- if path is not none %} - location '{{ path }}' + {%- set location_root = config.get('location_root', validator=validation.any[basestring]) -%} + {%- set identifier = model['alias'] -%} + {%- if location_root is not none %} + location '{{ location_root }}/{{ identifier }}' {%- endif %} {%- endmacro -%} diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py index c08cecb3..eb8852ed 100644 --- a/test/unit/test_macros.py +++ b/test/unit/test_macros.py @@ -26,8 +26,9 @@ def __get_template(self, template_filename): return self.jinja_env.get_template(template_filename, globals=self.default_context) - def __run_macro(self, template, name, *args): - value = getattr(template.module, name)(*args) + def __run_macro(self, template, name, temporary, relation, sql): + self.default_context['model'].alias = relation + value = getattr(template.module, name)(temporary, relation, sql) return re.sub(r'\s\s+', ' ', value) @@ -93,9 +94,9 @@ def test_macros_create_table_as_location(self): template = self.__get_template('adapters.sql') - self.config['location'] = '/mnt/root' + self.config['location_root'] = '/mnt/root' self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table location '/mnt/root' as select 1") + "create table my_table location '/mnt/root/my_table' as select 1") def test_macros_create_table_as_comment(self): @@ -112,7 +113,7 @@ def test_macros_create_table_as_all(self): template = self.__get_template('adapters.sql') self.config['file_format'] = 'delta' - self.config['location'] = '/mnt/root' + self.config['location_root'] = '/mnt/root' self.config['partition_by'] = ['partition_1', 'partition_2'] self.config['clustered_by'] = ['cluster_1', 'cluster_2'] self.config['buckets'] = '1' @@ -120,4 +121,4 @@ def test_macros_create_table_as_all(self): self.default_context['model'].description = 'Description Test' self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root' comment 'Description Test' as select 1") + "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1") From 81c0ef05b734e55d5428d2c8a75c002e8aa7dd5b Mon Sep 17 00:00:00 2001 From: niels Date: Thu, 6 Feb 2020 20:59:12 +0100 Subject: [PATCH 065/603] Fixup, location_root --- README.md | 2 +- dbt/adapters/spark/impl.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0e08de2c..a3339b2f 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ The following configurations can be supplied to models run with the dbt-spark pl | Option | Description | Required? | Example | |---------|----------------------------------------------------|-------------------------|--------------------------| | file_format | The file format to use when creating tables | Optional | `parquet` | -| location | The created table uses the specified directory to store its data. | Optional | `/mnt/root` | +| location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | `/mnt/root` | | partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 4d12f2a1..80140bb0 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -16,7 +16,7 @@ class SparkAdapter(SQLAdapter): ConnectionManager = SparkConnectionManager Relation = SparkRelation - AdapterSpecificConfigs = frozenset({"file_format", "location", "partition_by", + AdapterSpecificConfigs = frozenset({"file_format", "location_root", "partition_by", "clustered_by", "buckets"}) @classmethod From f7f8d843a6e1c777a396a6b68732a30034d0215c Mon Sep 17 00:00:00 2001 From: niels Date: Thu, 6 Feb 2020 21:02:17 +0100 Subject: [PATCH 066/603] Fixup, pep8 --- dbt/adapters/spark/impl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 80140bb0..ba452796 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -15,9 +15,10 @@ class SparkAdapter(SQLAdapter): ConnectionManager = SparkConnectionManager Relation = SparkRelation - - AdapterSpecificConfigs = frozenset({"file_format", "location_root", "partition_by", - "clustered_by", "buckets"}) + + AdapterSpecificConfigs = frozenset({"file_format", "location_root", + "partition_by", "clustered_by", + "buckets"}) @classmethod def date_function(cls): From 87590de0d620b189dc4b4c65584208cef7c7a611 Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Fri, 7 Feb 2020 16:34:13 +1000 Subject: [PATCH 067/603] updated connection keys to include cluster when logging --- dbt/adapters/spark/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 8c15b9eb..81a27fb2 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -46,7 +46,7 @@ def type(self): return 'spark' def _connection_keys(self): - return 'host', 'port', 'schema', 'organization' + return 'host', 'port', 'cluster', 'schema', 'organization' class ConnectionWrapper(object): From 7ed8129458f5d9a4b0b20ad1818d7656cbaefc5e Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Fri, 7 Feb 2020 16:35:06 +1000 Subject: [PATCH 068/603] change comment to Optional[str] --- dbt/adapters/spark/column.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 4b1a36d2..7059b1de 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import TypeVar +from typing import TypeVar, Optional from dbt.adapters.base.column import Column @@ -16,7 +16,7 @@ def __init__( self, column: str, dtype: str, - comment: str = None + comment: Optional[str] ) -> None: super().__init__(column, dtype) From f0ae5ff4f46d700a9033f7e99e8203dd40fb873e Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 7 Feb 2020 13:46:53 -0500 Subject: [PATCH 069/603] Simpler logic for old_relation.type --- dbt/include/spark/macros/materializations/incremental.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 73e6af7c..e6b2ee90 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -13,8 +13,7 @@ {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, type='table') -%} {%- set full_refresh = flags.FULL_REFRESH == True and old_relation is not none -%} - {%- set type = old_relation.type if old_relation.type is not none else spark_get_relation_type(this) -%} - {%- set old_relation_is_view = old_relation is not none and type == 'view' -%} + {%- set old_relation_is_view = old_relation is not none and old_relation.type == 'view' -%} {%- if full_refresh or old_relation_is_view -%} {{ adapter.drop_relation(old_relation) }} From d8906d6d157c6fe3833e018ad84ca411fa895cf8 Mon Sep 17 00:00:00 2001 From: Dandandan Date: Thu, 13 Feb 2020 20:10:07 +0100 Subject: [PATCH 070/603] Return without results when list relations fails --- dbt/adapters/spark/impl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index bfd43f02..40330452 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -59,6 +59,9 @@ def list_relations_without_caching(self, information_schema, schema, except dbt.exceptions.RuntimeException as e: if hasattr(e, 'msg') and f"Database '{schema}' not found" in e.msg: return [] + else: + logger.debug(f"Error while retrieving information about {schema}: {e.msg}") + return [] relations = [] quote_policy = { From 93bece57ebe808b3221ab3225fd092896add2a40 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Wed, 26 Feb 2020 01:12:21 -0800 Subject: [PATCH 071/603] freeze jinja2 version to resolve `._compat` issue --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 2ec738ac..0a55d720 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), + 'jinja<3.0.0', # until dbt-core reaches 0.15.2: https://github.com/fishtown-analytics/dbt/issues/2147 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', ] From a109fffb31fba6bda9385fea7b307f822e62a9f6 Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Wed, 26 Feb 2020 01:14:36 -0800 Subject: [PATCH 072/603] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0a55d720..af4b31a5 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), - 'jinja<3.0.0', # until dbt-core reaches 0.15.2: https://github.com/fishtown-analytics/dbt/issues/2147 + 'jinja2<3.0.0', # until dbt-core reaches 0.15.2: https://github.com/fishtown-analytics/dbt/issues/2147 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', ] From ebee1374f57eefd77736624573538b37b5b8fe5a Mon Sep 17 00:00:00 2001 From: Sam Kosky Date: Wed, 26 Feb 2020 20:35:06 +1000 Subject: [PATCH 073/603] implement quoting on columns --- dbt/adapters/spark/column.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 7059b1de..88a7e01f 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -33,6 +33,10 @@ def can_expand_to(self: Self, other_column: Self) -> bool: def literal(self, value): return "cast({} as {})".format(value, self.dtype) + @property + def quoted(self) -> str: + return '`{}`'.format(self.column) + @property def data_type(self) -> str: return self.dtype From 3c86ce018e60d0a9172e5c82e85a9436497bf06c Mon Sep 17 00:00:00 2001 From: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Date: Thu, 27 Feb 2020 15:38:27 -0800 Subject: [PATCH 074/603] Update setup.py Thanks! Co-Authored-By: Jeremy Cohen --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index af4b31a5..b5925b41 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ }, install_requires=[ 'dbt-core=={}'.format(package_version), - 'jinja2<3.0.0', # until dbt-core reaches 0.15.2: https://github.com/fishtown-analytics/dbt/issues/2147 + 'jinja2<3.0.0', # until dbt-core reaches 0.16.0: https://github.com/fishtown-analytics/dbt/issues/2147 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', ] From 35d4a50177887bc1f2c236a564d1d51616970cf3 Mon Sep 17 00:00:00 2001 From: Tony Qiu Date: Mon, 2 Mar 2020 16:06:31 -0500 Subject: [PATCH 075/603] - add c.name None check - making snapshot working with delta lake --- dbt/adapters/spark/impl.py | 48 +++- .../macros/materializations/snapshot.sql | 236 ++++++++++++++++++ 2 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 dbt/include/spark/macros/materializations/snapshot.sql diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 637b0cdf..b40fe5a0 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -7,7 +7,7 @@ from dbt.adapters.spark import SparkColumn from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark.relation import SparkRelation -from dbt.adapters.base import BaseRelation +from dbt.adapters.base import BaseRelation, RelationType from dbt.clients.agate_helper import table_from_data from dbt.logger import GLOBAL_LOGGER as logger @@ -114,7 +114,9 @@ def list_relations_without_caching(self, information_schema, schema) -> List[Spa for _schema, name, _ in results: relation = self.Relation.create( schema=_schema, - identifier=name + identifier=name, + #TODO: append relation type view/table + ## type=RelationType.Table ) self.cache_added(relation) relations.append(relation) @@ -236,3 +238,45 @@ def check_schema_exists(self, database, schema): exists = True if schema in [row[0] for row in results] else False return exists + + + def valid_snapshot_target(self, relation: BaseRelation) -> None: + """Ensure that the target relation is valid, by making sure it has the + expected columns. + + :param Relation relation: The relation to check + :raises dbt.exceptions.CompilationException: If the columns are + incorrect. + """ + if not isinstance(relation, self.Relation): + dbt.exceptions.invalid_type_error( + method_name='valid_snapshot_target', + arg_name='relation', + got_value=relation, + expected_type=self.Relation) + + columns = self.get_columns_in_relation(relation) + names = set(c.name.lower() for c in columns if c.name) + expanded_keys = ('scd_id', 'valid_from', 'valid_to') + extra = [] + missing = [] + for legacy in expanded_keys: + desired = 'dbt_' + legacy + if desired not in names: + missing.append(desired) + if legacy in names: + extra.append(legacy) + + if missing: + if extra: + msg = ( + 'Snapshot target has ("{}") but not ("{}") - is it an ' + 'unmigrated previous version archive?' + .format('", "'.join(extra), '", "'.join(missing)) + ) + else: + msg = ( + 'Snapshot target is not a snapshot table (missing "{}")' + .format('", "'.join(missing)) + ) + dbt.exceptions.raise_compiler_error(msg) \ No newline at end of file diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql new file mode 100644 index 00000000..a23b8a73 --- /dev/null +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -0,0 +1,236 @@ +{% macro spark__snapshot_hash_arguments(args) -%} + md5({%- for arg in args -%} + coalesce(cast({{ arg }} as string ), '') + {% if not loop.last %} || '|' || {% endif %} + {%- endfor -%}) +{%- endmacro %} + +{% macro build_snapshot_table(strategy, sql) %} + + select *, + {{ strategy.scd_id }} as dbt_scd_id, + {{ strategy.unique_key }} as dbt_unique_key, + {{ strategy.updated_at }} as dbt_updated_at, + {{ strategy.updated_at }} as dbt_valid_from, + nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to + from ( + {{ sql }} + ) sbq + +{% endmacro %} + +{% macro snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%} + + with snapshot_query as ( + + {{ source_sql }} + + ), + + snapshotted_data as ( + + select * + + from {{ target_relation }} + + ), + + source_data as ( + + select *, + {{ strategy.scd_id }} as dbt_scd_id, + {{ strategy.unique_key }} as dbt_unique_key, + {{ strategy.updated_at }} as dbt_updated_at, + {{ strategy.updated_at }} as dbt_valid_from, + nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to + + from snapshot_query + ), + + insertions as ( + + select + source_data.* + + from source_data + left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key + where snapshotted_data.dbt_unique_key is null + or ( + snapshotted_data.dbt_unique_key is not null + and snapshotted_data.dbt_valid_to is null + and ( + {{ strategy.row_changed }} + ) + ) + + ) + + select * from insertions + +{%- endmacro %} + + +{% macro snapshot_staging_table_updates(strategy, source_sql, target_relation) -%} + + with snapshot_query as ( + + {{ source_sql }} + + ), + + snapshotted_data as ( + + select * + + from {{ target_relation }} + + ), + + source_data as ( + + select + *, + {{ strategy.scd_id }} as dbt_scd_id, + {{ strategy.unique_key }} as dbt_unique_key, + {{ strategy.updated_at }} as dbt_updated_at, + {{ strategy.updated_at }} as dbt_valid_from + + from snapshot_query + ), + + updates as ( + + select + 'update' as dbt_change_type, + snapshotted_data.dbt_scd_id, + source_data.dbt_valid_from as dbt_valid_to + + from source_data + join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key + where snapshotted_data.dbt_valid_to is null + and ( + {{ strategy.row_changed }} + ) + + ) + + select * from updates + +{%- endmacro %} + +{% macro build_snapshot_staging_table_updates(strategy, sql, target_relation) %} + {% set tmp_update_relation = make_temp_relation(target_relation, '__dbt_tmp_update') %} + + {% set update_select = snapshot_staging_table_updates(strategy, sql, target_relation) %} + + {% call statement('build_snapshot_staging_relation_updates') %} + {{ create_table_as(True, tmp_update_relation, update_select) }} + {% endcall %} + + {% do return(tmp_update_relation) %} +{% endmacro %} + +{% macro build_snapshot_staging_table_insert(strategy, sql, target_relation) %} + {% set tmp_insert_relation = make_temp_relation(target_relation, '__dbt_tmp_insert') %} + + {% set inserts_select = snapshot_staging_table_inserts(strategy, sql, target_relation) %} + + {% call statement('build_snapshot_staging_relation_inserts') %} + {{ create_table_as(True, tmp_insert_relation, inserts_select) }} + {% endcall %} + + + {% do return(tmp_insert_relation) %} +{% endmacro %} + +{% macro spark__snapshot_merge_update_sql(target, source) -%} + + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE + on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id + when matched then update set DBT_INTERNAL_DEST.dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to + ; +{% endmacro %} + + +{% macro spark__snapshot_merge_insert_sql(target, source) -%} + + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE + on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id + when not matched then insert * + ; +{% endmacro %} + + +{% materialization snapshot, adapter='spark' %} + {%- set config = model['config'] -%} + + {%- set target_table = model.get('alias', model.get('name')) -%} + + {%- set strategy_name = config.get('strategy') -%} + {%- set unique_key = config.get('unique_key') %} + + {% if not adapter.check_schema_exists(model.database, model.schema) %} + {% do create_schema(model.database, model.schema) %} + {% endif %} + + {% set target_relation_exists, target_relation = get_or_create_relation( + database=model.database, + schema=model.schema, + identifier=target_table, + type='table') -%} + + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + {% set strategy_macro = strategy_dispatch(strategy_name) %} + {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %} + + {% if not target_relation_exists %} + + {% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %} + {% call statement('main') -%} + {{ create_table_as(False, target_relation, build_sql) }} + {% endcall %} + + {% else %} + + {{ adapter.valid_snapshot_target(target_relation) }} + + {% set staging_insert_table = build_snapshot_staging_table_insert(strategy, sql, target_relation) %} + + {% call statement('main') %} + {{ spark__snapshot_merge_insert_sql( + target = target_relation, + source = staging_insert_table + ) + }} + {% endcall %} + + {% set staging_update_table = build_snapshot_staging_table_updates(strategy, sql, target_relation) %} + + {% call statement('main-2') %} + {{ spark__snapshot_merge_update_sql( + target = target_relation, + source = staging_update_table + ) + }} + {% endcall %} + {% endif %} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + + {{ adapter.commit() }} + + {% if staging_table is defined %} + {% do post_snapshot(staging_table) %} + {% endif %} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [target_relation]}) }} + +{% endmaterialization %} \ No newline at end of file From 32cdb40886699416a015ea48ebfd15edae49c7e7 Mon Sep 17 00:00:00 2001 From: Tony Qiu Date: Tue, 3 Mar 2020 09:58:26 -0500 Subject: [PATCH 076/603] Fix the seed bug --- .../spark/macros/materializations/seed.sql | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index b2d3d2cb..2c208d25 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -31,4 +31,55 @@ {# Return SQL so we can render it out into the compiled files #} {{ return(statements[0]) }} -{% endmacro %} \ No newline at end of file +{% endmacro %} + + +{% materialization seed, adapter='spark' %} + + {%- set identifier = model['alias'] -%} + {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + + {%- set exists_as_table = (old_relation is not none and get_relation_type(old_relation) == 'table') -%} + {%- set exists_as_view = (old_relation is not none and get_relation_type(old_relation) == 'view') -%} + + {%- set agate_table = load_agate_table() -%} + {%- do store_result('agate_table', status='OK', agate_table=agate_table) -%} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + -- build model + {% set create_table_sql = "" %} + {% if exists_as_view %} + {{ exceptions.raise_compiler_error("Cannot seed to '{}', it is a view".format(old_relation)) }} + {% elif exists_as_table %} + {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %} + {% else %} + {% set create_table_sql = create_csv_table(model, agate_table) %} + {% endif %} + + {% set status = 'CREATE' if full_refresh_mode else 'INSERT' %} + {% set num_rows = (agate_table.rows | length) %} + {% set sql = load_csv_rows(model, agate_table) %} + + {% call noop_statement('main', status ~ ' ' ~ num_rows) %} + {{ create_table_sql }}; + -- dbt seed -- + {{ sql }} + {% endcall %} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + + -- `COMMIT` happens here + {{ adapter.commit() }} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + + {% set target_relation = this.incorporate(type='table') %} + {{ return({'relations': [target_relation]}) }} + +{% endmaterialization %} \ No newline at end of file From 0619f9b67ed41d62f0b23871ef686c0ba162b579 Mon Sep 17 00:00:00 2001 From: Dandandan Date: Thu, 5 Mar 2020 15:03:34 +0100 Subject: [PATCH 077/603] Pep8 fix --- dbt/adapters/spark/impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 40330452..59b8d9b1 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -60,7 +60,8 @@ def list_relations_without_caching(self, information_schema, schema, if hasattr(e, 'msg') and f"Database '{schema}' not found" in e.msg: return [] else: - logger.debug(f"Error while retrieving information about {schema}: {e.msg}") + description = "Error while retrieving information about" + logger.debug(f"{description} {schema}: {e.msg}") return [] relations = [] From 955e81698ef002193b9b88681a4d05022ed86db6 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 16 Mar 2020 21:22:48 +0100 Subject: [PATCH 078/603] Pull the owner from the DESCRIBE EXTENDED (#39) * Pull the owner from the DESCRIBE EXTENDED * Switch the order * We also want to look inside of the property * Update * First version * A bit of a cleanup * Make Flake8 happy * Fix the failing test * Introduced the SparkColumn * Less is more * Fix small issues * Fix the test * Apply comments * Fix the unit test :) --- .gitignore | 1 + dbt/adapters/spark/impl.py | 121 ++++++++++++++----------- dbt/adapters/spark/relation.py | 30 ++++++- dbt/include/spark/macros/adapters.sql | 13 +-- test/unit/test_adapter.py | 122 ++++++++++++++++++++++---- 5 files changed, 213 insertions(+), 74 deletions(-) diff --git a/.gitignore b/.gitignore index a61df07c..ced8ccd6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ env/ *.pyc __pycache__ .tox/ +.idea/ build/ dist/ dbt-integration-tests diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 59b8d9b1..af7ad954 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,15 +1,22 @@ -from dbt.adapters.sql import SQLAdapter -from dbt.adapters.spark import SparkRelation -from dbt.adapters.spark import SparkConnectionManager -import dbt.exceptions +from typing import List, Dict -from dbt.logger import GLOBAL_LOGGER as logger import agate +import dbt.exceptions +from dbt.adapters.sql import SQLAdapter +from dbt.contracts.graph.manifest import Manifest +from dbt.logger import GLOBAL_LOGGER as logger +from dbt.adapters.spark import SparkConnectionManager +from dbt.adapters.spark import SparkRelation +from dbt.adapters.spark.relation import SparkColumn LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' GET_RELATION_TYPE_MACRO_NAME = 'spark_get_relation_type' DROP_RELATION_MACRO_NAME = 'drop_relation' +FETCH_TBLPROPERTIES_MACRO_NAME = 'spark_fetch_tblproperties' +GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' + +KEY_TABLE_OWNER = 'Owner' class SparkAdapter(SQLAdapter): @@ -48,7 +55,7 @@ def get_relation_type(self, relation, model_name=None): # Override that creates macros without a known type - adapter macros that # require a type will dynamically check at query-time def list_relations_without_caching(self, information_schema, schema, - model_name=None): + model_name=None) -> List: kwargs = {'information_schema': information_schema, 'schema': schema} try: results = self.execute_macro( @@ -112,53 +119,69 @@ def drop_relation(self, relation, model_name=None): kwargs={'relation': relation} ) - def get_catalog(self, manifest): + @staticmethod + def find_table_information_separator(rows: List[dict]) -> int: + pos = 0 + for row in rows: + if not row['col_name'] or row['col_name'].startswith('#'): + break + pos += 1 + return pos + + def parse_describe_extended( + self, + relation: Relation, + raw_rows: List[agate.Row] + ) -> List[SparkColumn]: + # Convert the Row to a dict + dict_rows = [dict(zip(row._keys, row._values)) for row in raw_rows] + # Find the separator between the rows and the metadata provided + # by the DESCRIBE TABLE EXTENDED statement + pos = SparkAdapter.find_table_information_separator(dict_rows) + + # Remove rows that start with a hash, they are comments + rows = [ + row for row in raw_rows[0:pos] + if not row['col_name'].startswith('#') + ] + metadata = { + col['col_name']: col['data_type'] for col in raw_rows[pos + 1:] + } + return [SparkColumn( + relation.database, + relation.schema, + relation.name, + relation.type, + metadata.get(KEY_TABLE_OWNER), + column['col_name'], + idx, + column['data_type'] + ) for idx, column in enumerate(rows)] + + def get_columns_in_relation(self, + relation: Relation) -> List[SparkColumn]: + rows: List[agate.Row] = super().get_columns_in_relation(relation) + return self.parse_describe_extended(relation, rows) + + def get_properties(self, relation: Relation) -> Dict[str, str]: + properties = self.execute_macro( + FETCH_TBLPROPERTIES_MACRO_NAME, + kwargs={'relation': relation} + ) + return {key: value for (key, value) in properties} + + def get_catalog(self, manifest: Manifest) -> agate.Table: schemas = manifest.get_used_schemas() - column_names = ( - 'table_database', - 'table_schema', - 'table_name', - 'table_type', - 'table_comment', - 'table_owner', - 'column_name', - 'column_index', - 'column_type', - 'column_comment', - ) + def to_dict(d: any) -> Dict: + return d.__dict__ columns = [] for (database_name, schema_name) in schemas: relations = self.list_relations(database_name, schema_name) for relation in relations: - logger.debug("Getting table schema for relation {}".format(relation)) # noqa - table_columns = self.get_columns_in_relation(relation) - rel_type = self.get_relation_type(relation) - - for column_index, column in enumerate(table_columns): - # Fixes for pseudocolumns with no type - if column.name in ( - '# Partition Information', - '# col_name' - ): - continue - elif column.dtype is None: - continue - - column_data = ( - relation.database, - relation.schema, - relation.name, - rel_type, - None, - None, - column.name, - column_index, - column.data_type, - None, - ) - column_dict = dict(zip(column_names, column_data)) - columns.append(column_dict) - - return dbt.clients.agate_helper.table_from_data(columns, column_names) + logger.debug("Getting table schema for relation {}", relation) + columns += list( + map(to_dict, self.get_columns_in_relation(relation)) + ) + return agate.Table.from_object(columns) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 20d61b59..9edbff51 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -1,4 +1,4 @@ -from dbt.adapters.base.relation import BaseRelation +from dbt.adapters.base.relation import BaseRelation, Column class SparkRelation(BaseRelation): @@ -45,3 +45,31 @@ class SparkRelation(BaseRelation): 'required': ['metadata', 'type', 'path', 'include_policy', 'quote_policy', 'quote_character', 'dbt_created'] } + + +class SparkColumn(Column): + + def __init__(self, + table_database: str, + table_schema: str, + table_name: str, + table_type: str, + table_owner: str, + column_name: str, + column_index: int, + column_type: str): + super(SparkColumn, self).__init__(column_name, column_type) + self.table_database = table_database + self.table_schema = table_schema + self.table_name = table_name + self.table_type = table_type + self.table_owner = table_owner + self.column_name = column_name + self.column_index = column_index + + @property + def quoted(self): + return '`{}`'.format(self.column) + + def __repr__(self): + return "".format(self.name, self.data_type) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 9fb942a9..97e072ee 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -95,12 +95,9 @@ {% macro spark__get_columns_in_relation(relation) -%} {% call statement('get_columns_in_relation', fetch_result=True) %} - describe {{ relation }} + describe extended {{ relation }} {% endcall %} - - {% set table = load_result('get_columns_in_relation').table %} - {{ return(sql_convert_columns_in_relation(table)) }} - + {% do return(load_result('get_columns_in_relation').table) %} {% endmacro %} @@ -149,6 +146,12 @@ {% endif %} {%- endmacro %} +{% macro spark_fetch_tblproperties(relation) -%} + {% call statement('list_properties', fetch_result=True) -%} + SHOW TBLPROPERTIES {{ relation }} + {% endcall %} + {% do return(load_result('list_properties').table) %} +{%- endmacro %} {% macro spark__rename_relation(from_relation, to_relation) -%} {% call statement('rename_relation') -%} diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 89857886..4ddd99c8 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -1,12 +1,13 @@ -import mock import unittest -import dbt.adapters + import dbt.flags as flags +import mock +from agate import Row +from dbt.adapters.base import BaseRelation from pyhive import hive -from dbt.adapters.spark import SparkAdapter -import agate -from .utils import config_from_parts_or_dicts, inject_adapter +from dbt.adapters.spark import SparkAdapter +from .utils import config_from_parts_or_dicts class TestSparkAdapter(unittest.TestCase): @@ -29,13 +30,13 @@ def get_target_http(self, project): return config_from_parts_or_dicts(project, { 'outputs': { 'test': { - 'type': 'spark', - 'method': 'http', - 'schema': 'analytics', - 'host': 'myorg.sparkhost.com', - 'port': 443, - 'token': 'abc123', - 'cluster': '01234-23423-coffeetime', + 'type': 'spark', + 'method': 'http', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'cluster': '01234-23423-coffeetime', } }, 'target': 'test' @@ -45,12 +46,12 @@ def get_target_thrift(self, project): return config_from_parts_or_dicts(project, { 'outputs': { 'test': { - 'type': 'spark', - 'method': 'thrift', - 'schema': 'analytics', - 'host': 'myorg.sparkhost.com', - 'port': 10001, - 'user': 'dbt' + 'type': 'spark', + 'method': 'thrift', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 10001, + 'user': 'dbt' } }, 'target': 'test' @@ -60,7 +61,6 @@ def test_http_connection(self): config = self.get_target_http(self.project_cfg) adapter = SparkAdapter(config) - def hive_http_connect(thrift_transport): self.assertEqual(thrift_transport.scheme, 'https') self.assertEqual(thrift_transport.port, 443) @@ -87,3 +87,87 @@ def hive_thrift_connect(host, port, username): self.assertEqual(connection.state, 'open') self.assertNotEqual(connection.handle, None) + + def test_parse_relation(self): + self.maxDiff = None + rel_type = 'table' + + relation = BaseRelation.create( + database='default_database', + schema='default_schema', + identifier='mytable', + type=rel_type + ) + + # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED + plain_rows = [ + ('col1', 'decimal(22,0)'), + ('col2', 'string',), + ('dt', 'date'), + ('# Partition Information', 'data_type'), + ('# col_name', 'data_type'), + ('dt', 'date'), + (None, None), + ('# Detailed Table Information', None), + ('Database', relation.database), + ('Owner', 'root'), + ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'), + ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), + ('Type', 'MANAGED'), + ('Provider', 'delta'), + ('Location', '/mnt/vo'), + ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'), + ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'), + ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'), + ('Partition Provider', 'Catalog') + ] + + input_cols = [Row(keys=['col_name', 'data_type'], values=r) for r in plain_rows] + + config = self.get_target_http(self.project_cfg) + rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0].__dict__, { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'col1', + 'column_name': 'col1', + 'column_index': 0, + 'dtype': 'decimal(22,0)', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) + + self.assertEqual(rows[1].__dict__, { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'col2', + 'column_name': 'col2', + 'column_index': 1, + 'dtype': 'string', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) + + self.assertEqual(rows[2].__dict__, { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'dt', + 'column_name': 'dt', + 'column_index': 2, + 'dtype': 'date', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) From 8a5168ab7c9442e57e31d878f29c99a9d9642ca9 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 16 Mar 2020 17:25:02 -0400 Subject: [PATCH 079/603] Fix: column order for incremental insert overwrite (#60) Use dest_cols_csv for correct order --- .../macros/materializations/incremental.sql | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index e6b2ee90..c26d2e0f 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -1,3 +1,13 @@ +{% macro get_insert_overwrite_sql(source_relation, target_relation, partitions) %} + + {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} + {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} + insert overwrite table {{ target_relation }} + {{ partition_cols(label="partition") }} + select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} + +{% endmacro %} + {% materialization incremental, adapter='spark' -%} {%- set partitions = config.get('partition_by') -%} @@ -44,10 +54,8 @@ {%- endcall -%} {#-- insert statements don't like CTEs, so support them via a temp view #} - {%- call statement() -%} - insert overwrite table {{ target_relation }} - {{ partition_cols(label="partition") }} - select * from {{ tmp_relation.include(database=false, schema=false) }} + {%- call statement('main') -%} + {{ get_insert_overwrite_sql(tmp_relation, target_relation, partitions) }} {%- endcall -%} {%- endif %} From 55b236c76e68158e9f422640c391a44a134efc41 Mon Sep 17 00:00:00 2001 From: Daniel Mateus Pires Date: Tue, 17 Mar 2020 14:19:11 +0000 Subject: [PATCH 080/603] Add a docker-compose environment for local/integration testing (#58) * adds docker image * fix jinja breakage * add venv * added env vars * remove docker-in-docker * updates per feedback * trim down pip installs * Re-organizing Dockerfile * Simplify bootstrap, change to same image as other dbt repos * Add more to .dockerignore * Multi-stage build * Rename spark/docker to docker/ * Add docker-compose * Remove DBT from image, separate into thrift directory * Add consistent spark-warehouse * Add README instructions to run locally * Add how to reset environment * Remove unused conf file * Add hadoop user for integration tests * Remove unrelated requirements.txt change * Remove accidently committed Pipfiles * Cleanup ARG and ENV usage Swap Spark instructions and Hadoop instructions Re-use base image to share ENV Remove some ARGs * Change default schema to 'analytics' to match README * Add reference to SQL/JDBC endpoint Co-Authored-By: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> * Specifications in README Co-Authored-By: Fokko Driesprong * Remove some logic to use godatadriven's docker image * Disambiguate names in docker-compose * Update wrong port Co-Authored-By: Jacob Beck * Use dbt user instead of hadoop Co-Authored-By: Jacob Beck Co-authored-by: Aaron Steers <18150651+aaronsteers@users.noreply.github.com> Co-authored-by: Fokko Driesprong Co-authored-by: Jacob Beck --- .gitignore | 2 ++ README.md | 40 +++++++++++++++++++++++++++++++++++++++- docker-compose.yml | 27 +++++++++++++++++++++++++++ docker/hive-site.xml | 42 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 docker-compose.yml create mode 100644 docker/hive-site.xml diff --git a/.gitignore b/.gitignore index ced8ccd6..9070f37c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.hive-metastore/ +.spark-warehouse/ *.egg-info env/ *.pyc diff --git a/README.md b/README.md index 2d71dbc7..942e08d6 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ dbt-spark has also been tested against AWS Databricks, and it has some differenc Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. -Lastly, the host field for Databricks can be found at the start of your workspace or cluster url (but don't include https://): region.azuredatabricks.net for Azure, or account.cloud.databricks.com for AWS. +Lastly, the host field for Databricks can be found at the start of your workspace or cluster url (but don't include https://): region.azuredatabricks.net for Azure, or account.cloud.databricks.com for AWS. @@ -152,6 +152,44 @@ where date_day::date >= '2019-01-01' group by 1 ``` +### Running locally + +A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend. + +``` +docker-compose up +``` + +Your profile should look like this: + +``` +your_profile_name: + target: local + outputs: + local: + method: thrift + type: spark + schema: analytics + host: 127.0.0.1 + port: 10000 + user: dbt + connect_retries: 5 + connect_timeout: 60 +``` + +Connecting to the local spark instance: + +* The Spark UI should be available at [http://localhost:4040/sqlserver/](http://localhost:4040/sqlserver/) +* The endpoint for SQL-based testing is at `http://localhost:10000` and can be referenced with the Hive or Spark JDBC drivers using connection string `jdbc:hive2://localhost:10000` and default credentials `dbt`:`dbt` + +Note that the Hive metastore data is persisted under `./.hive-metastore/`, and the Spark-produced data under `./.spark-warehouse/`. To completely reset you environment run the following: + +``` +docker-compose down +rm -rf ./.hive-metastore/ +rm -rf ./.spark-warehouse/ +``` + ### Reporting bugs and contributing code - Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt-spark/issues/new). diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..3bc54fdf --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +version: "3.7" +services: + + dbt-spark245-thrift: + image: godatadriven/spark:2.4.5 + ports: + - "10000:10000" + - "4040:4040" + depends_on: + - dbt-hive-metastore + command: > + --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 + --name Thrift JDBC/ODBC Server + volumes: + - ./.spark-warehouse/:/spark-warehouse/ + - ./docker/hive-site.xml:/usr/spark/conf/hive-site.xml + environment: + - WAIT_FOR=dbt-hive-metastore:5432 + + dbt-hive-metastore: + image: postgres:9.6.17-alpine + volumes: + - ./.hive-metastore/:/var/lib/postgresql/data + environment: + - POSTGRES_USER=dbt + - POSTGRES_PASSWORD=dbt + - POSTGRES_DB=metastore diff --git a/docker/hive-site.xml b/docker/hive-site.xml new file mode 100644 index 00000000..a92e87b7 --- /dev/null +++ b/docker/hive-site.xml @@ -0,0 +1,42 @@ + + + + + + + + javax.jdo.option.ConnectionURL + jdbc:postgresql://dbt-hive-metastore/metastore + + + + javax.jdo.option.ConnectionDriverName + org.postgresql.Driver + + + + javax.jdo.option.ConnectionUserName + dbt + + + + javax.jdo.option.ConnectionPassword + dbt + + + From 95bd1cf7b6d6bda3665e0d3c035eb96aee2ede64 Mon Sep 17 00:00:00 2001 From: Daniel Mateus Pires Date: Thu, 19 Mar 2020 20:33:13 +0000 Subject: [PATCH 081/603] Integration tests without cluster (#62) * Integration tests working without cluster, with custom Docker images * Remove unnecessary custom spark image * Use spark:2, use new fishtown hosted test image, remove unused Dockerfile * Trigger CI * Wait for Spark-Thrift to be ready * Change default integration --- .circleci/config.yml | 45 ++++++++++++++++++++++++++++++----- dev_requirements.txt | 4 +++- docker-compose.yml | 4 ++-- test/integration/profiles.yml | 15 ++++++++++-- tox.ini | 16 ++++++++++--- 5 files changed, 70 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 66cf86d2..f9fdd379 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,28 +1,61 @@ version: 2 + jobs: unit: docker: - - image: fishtownjacob/test-container + - image: fishtownanalytics/test-container:5 + environment: + DBT_INVOCATION_ENV: circle steps: - checkout - run: tox -e flake8,unit - integration: + integration-spark2: docker: - - image: fishtownjacob/test-container + + - image: fishtownanalytics/test-container:5 + environment: + DBT_INVOCATION_ENV: circle + + - image: godatadriven/spark:2 + environment: + WAIT_FOR: localhost:5432 + command: > + --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 + --name Thrift JDBC/ODBC Server + --conf spark.hadoop.javax.jdo.option.ConnectionURL=jdbc:postgresql://localhost/metastore + --conf spark.hadoop.javax.jdo.option.ConnectionUserName=dbt + --conf spark.hadoop.javax.jdo.option.ConnectionPassword=dbt + --conf spark.hadoop.javax.jdo.option.ConnectionDriverName=org.postgresql.Driver + + - image: postgres:9.6.17-alpine + environment: + POSTGRES_USER: dbt + POSTGRES_PASSWORD: dbt + POSTGRES_DB: metastore + steps: - checkout + - run: + name: install dockerize + command: curl -LO https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz && tar -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz && rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz + environment: + DOCKERIZE_VERSION: v0.6.1 + - run: + name: Wait for Spark-Thrift + command: ./dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s + - run: name: Checkout test project command: git clone --branch spark-support https://github.com/fishtown-analytics/dbt-integration-tests.git - - run: name: Run integration tests - command: tox -e integration + command: tox -e integration-spark-thrift no_output_timeout: 1h environment: DBT_PROFILES_DIR: /home/dbt_test_user/project/test/integration/ + SPARK_HOST: localhost - store_artifacts: path: ./logs @@ -32,6 +65,6 @@ workflows: test-everything: jobs: - unit - - integration: + - integration-spark2: requires: - unit diff --git a/dev_requirements.txt b/dev_requirements.txt index b81a05b4..096a5469 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -14,4 +14,6 @@ behave==1.2.6 parse==1.8.4 parse-type==0.4.2 PyHamcrest==1.9.0 -six==1.11.0 +six>=1.14.0 +sasl==0.2.1 +thrift_sasl==0.4.1 diff --git a/docker-compose.yml b/docker-compose.yml index 3bc54fdf..8ef97a44 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,8 @@ version: "3.7" services: - dbt-spark245-thrift: - image: godatadriven/spark:2.4.5 + dbt-spark2-thrift: + image: godatadriven/spark:2 ports: - "10000:10000" - "4040:4040" diff --git a/test/integration/profiles.yml b/test/integration/profiles.yml index a12acf1d..dca5d36f 100644 --- a/test/integration/profiles.yml +++ b/test/integration/profiles.yml @@ -1,5 +1,4 @@ - -spark: +spark-http: target: ci outputs: ci: @@ -12,3 +11,15 @@ spark: cluster: '{{ env_var("SPARK_CLUSTER") }}' connect_timeout: 60 connect_retries: 10 +spark-thrift: + target: ci + outputs: + ci: + method: thrift + type: spark + schema: analytics + host: localhost + port: 10000 + user: dbt + connect_retries: 5 + connect_timeout: 60 diff --git a/tox.ini b/tox.ini index 502b4b49..f3faa119 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] skipsdist = True -envlist = unit, flake8, integration +envlist = unit, flake8, integration-spark-thrift [testenv:flake8] @@ -16,12 +16,22 @@ deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt -[testenv:integration] +[testenv:integration-spark-http] basepython = python3 changedir = dbt-integration-tests -commands = /bin/bash -c '{envpython} -m behave -f progress3 --stop -D profile_name=spark' +commands = /bin/bash -c '{envpython} -m behave -f progress3 --stop -D profile_name=spark-http' passenv = SPARK_HOST SPARK_CLUSTER SPARK_TOKEN DBT_PROFILES_DIR deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt -e. + +[testenv:integration-spark-thrift] +basepython = python3 +changedir = dbt-integration-tests +commands = /bin/bash -c '{envpython} -m behave -f progress3 --stop -D profile_name=spark-thrift' +passenv = DBT_PROFILES_DIR +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/dev_requirements.txt + -e. From 5e414e744f743094df5a9b651b4a3d16a292ed3e Mon Sep 17 00:00:00 2001 From: Daniel Mateus Pires Date: Thu, 19 Mar 2020 20:54:42 +0000 Subject: [PATCH 082/603] Use image with baked-in dockerize --- .circleci/config.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f9fdd379..b632cba4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ jobs: integration-spark2: docker: - - image: fishtownanalytics/test-container:5 + - image: dmateusp/test-container:dockerize # TODO: change me before merging environment: DBT_INVOCATION_ENV: circle @@ -37,14 +37,9 @@ jobs: steps: - checkout - - run: - name: install dockerize - command: curl -LO https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz && tar -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz && rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz - environment: - DOCKERIZE_VERSION: v0.6.1 - run: name: Wait for Spark-Thrift - command: ./dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s + command: dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s - run: name: Checkout test project From 73eab3838e4b89586562dc72501b475d2ceef843 Mon Sep 17 00:00:00 2001 From: Daniel Mateus Pires Date: Thu, 19 Mar 2020 20:55:16 +0000 Subject: [PATCH 083/603] Remove SPARK_HOST variable (not needed) --- .circleci/config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b632cba4..3416b1cc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -50,7 +50,6 @@ jobs: no_output_timeout: 1h environment: DBT_PROFILES_DIR: /home/dbt_test_user/project/test/integration/ - SPARK_HOST: localhost - store_artifacts: path: ./logs From e6610ae61ea9732159f39ff8f22a69fda2a5a745 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 20 Mar 2020 01:31:11 +0100 Subject: [PATCH 084/603] Add support for extracting statistics (#41) * Add support for table statistics * Fix --- dbt/adapters/spark/impl.py | 13 +++---- dbt/adapters/spark/relation.py | 21 +++++++++++ test/unit/test_adapter.py | 67 ++++++++++++++++++++++++++++++++-- test/unit/test_macros.py | 4 +- 4 files changed, 92 insertions(+), 13 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index af7ad954..be0f053d 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -17,6 +17,7 @@ GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' KEY_TABLE_OWNER = 'Owner' +KEY_TABLE_STATISTICS = 'Statistics' class SparkAdapter(SQLAdapter): @@ -153,6 +154,7 @@ def parse_describe_extended( relation.name, relation.type, metadata.get(KEY_TABLE_OWNER), + metadata.get(KEY_TABLE_STATISTICS), column['col_name'], idx, column['data_type'] @@ -172,16 +174,13 @@ def get_properties(self, relation: Relation) -> Dict[str, str]: def get_catalog(self, manifest: Manifest) -> agate.Table: schemas = manifest.get_used_schemas() - - def to_dict(d: any) -> Dict: - return d.__dict__ - columns = [] for (database_name, schema_name) in schemas: relations = self.list_relations(database_name, schema_name) for relation in relations: logger.debug("Getting table schema for relation {}", relation) - columns += list( - map(to_dict, self.get_columns_in_relation(relation)) - ) + columns += [ + col.to_dict() for col in + self.get_columns_in_relation(relation) + ] return agate.Table.from_object(columns) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 9edbff51..1f8f8676 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -55,6 +55,7 @@ def __init__(self, table_name: str, table_type: str, table_owner: str, + table_stats: str, column_name: str, column_index: int, column_type: str): @@ -66,6 +67,18 @@ def __init__(self, self.table_owner = table_owner self.column_name = column_name self.column_index = column_index + self.table_stats = {} + if table_stats: + # format: 1109049927 bytes, 14093476 rows + stats = { + stats.split(" ")[1]: int(stats.split(" ")[0]) + for stats in table_stats.split(', ') + } + for key, val in stats.items(): + self.table_stats[f'stats:{key}:label'] = key + self.table_stats[f'stats:{key}:value'] = val + self.table_stats[f'stats:{key}:description'] = '' + self.table_stats[f'stats:{key}:include'] = True @property def quoted(self): @@ -73,3 +86,11 @@ def quoted(self): def __repr__(self): return "".format(self.name, self.data_type) + + def to_dict(self): + original_dict = self.__dict__.copy() + # If there are stats, merge them into the root of the dict + if self.table_stats: + original_dict.update(self.table_stats) + del original_dict['table_stats'] + return original_dict diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 4ddd99c8..ef86dd19 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -127,7 +127,7 @@ def test_parse_relation(self): config = self.get_target_http(self.project_cfg) rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) self.assertEqual(len(rows), 3) - self.assertEqual(rows[0].__dict__, { + self.assertEqual(rows[0].to_dict(), { 'table_database': relation.database, 'table_schema': relation.schema, 'table_name': relation.name, @@ -142,7 +142,7 @@ def test_parse_relation(self): 'char_size': None }) - self.assertEqual(rows[1].__dict__, { + self.assertEqual(rows[1].to_dict(), { 'table_database': relation.database, 'table_schema': relation.schema, 'table_name': relation.name, @@ -157,7 +157,7 @@ def test_parse_relation(self): 'char_size': None }) - self.assertEqual(rows[2].__dict__, { + self.assertEqual(rows[2].to_dict(), { 'table_database': relation.database, 'table_schema': relation.schema, 'table_name': relation.name, @@ -171,3 +171,64 @@ def test_parse_relation(self): 'numeric_precision': None, 'char_size': None }) + + def test_parse_relation_with_statistics(self): + self.maxDiff = None + rel_type = 'table' + + relation = BaseRelation.create( + database='default_database', + schema='default_schema', + identifier='mytable', + type=rel_type + ) + + # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED + plain_rows = [ + ('col1', 'decimal(22,0)'), + ('# Partition Information', 'data_type'), + (None, None), + ('# Detailed Table Information', None), + ('Database', relation.database), + ('Owner', 'root'), + ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'), + ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), + ('Statistics', '1109049927 bytes, 14093476 rows'), + ('Type', 'MANAGED'), + ('Provider', 'delta'), + ('Location', '/mnt/vo'), + ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'), + ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'), + ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'), + ('Partition Provider', 'Catalog') + ] + + input_cols = [Row(keys=['col_name', 'data_type'], values=r) for r in plain_rows] + + config = self.get_target_http(self.project_cfg) + rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].to_dict(), { + 'table_database': relation.database, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'col1', + 'column_name': 'col1', + 'column_index': 0, + 'dtype': 'decimal(22,0)', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None, + + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 1109049927, + + 'stats:rows:description': '', + 'stats:rows:include': True, + 'stats:rows:label': 'rows', + 'stats:rows:value': 14093476, + }) diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py index eb8852ed..bb51214f 100644 --- a/test/unit/test_macros.py +++ b/test/unit/test_macros.py @@ -1,9 +1,7 @@ import mock -import unittest import re -from collections import defaultdict +import unittest from jinja2 import Environment, FileSystemLoader -from dbt.context.common import _add_validation class TestSparkMacros(unittest.TestCase): From 97ec1b8f9b73c343360e9a1bee6548f70dca6378 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 19 Mar 2020 20:57:50 -0600 Subject: [PATCH 085/603] upgrade to 0.15.3 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 167b6857..3b0792da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.15.0 +dbt-core==0.15.3 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 2a770f31..0746b262 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ package_name = "dbt-spark" -package_version = "0.15.0" +package_version = "0.15.3" description = """The SparkSQL plugin for dbt (data build tool)""" setup( From ab5b1825a805ee70762d91c13ad569844b3f6e7f Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 20 Mar 2020 00:05:14 -0600 Subject: [PATCH 086/603] fix tests/issues with my merge --- dbt/adapters/spark/connections.py | 5 +++-- dbt/adapters/spark/impl.py | 2 ++ dbt/include/spark/macros/materializations/seed.sql | 10 +++++----- test/unit/test_adapter.py | 5 ++++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 21ac6d37..bbec36e9 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -3,6 +3,7 @@ import dbt.exceptions from dbt.adapters.base import Credentials from dbt.adapters.sql import SQLConnectionManager +from dbt.contracts.connection import ConnectionState from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import DECIMALS @@ -219,7 +220,7 @@ def validate_creds(cls, creds, required): @classmethod def open(cls, connection): - if connection.state == 'open': + if connection.state == ConnectionState.OPEN: logger.debug('Connection is already open, skipping open.') return connection @@ -280,5 +281,5 @@ def open(cls, connection): handle = ConnectionWrapper(conn) connection.handle = handle - connection.state = 'open' + connection.state = ConnectionState.OPEN return connection diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index dd674548..a49ebe73 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -144,6 +144,8 @@ def list_relations_without_caching(self, information_schema, schema) -> List[Spa self.cache_added(relation) relations.append(relation) + return relations + def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]: if not self.Relation.include_policy.database: database = None diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 2ad31c79..8e5a0f90 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -1,5 +1,4 @@ {% macro spark__load_csv_rows(model, agate_table) %} - {% set agate_table = model['agate_table'] %} {% set batch_size = 1000 %} {% set cols_sql = ", ".join(agate_table.column_names) %} {% set bindings = [] %} @@ -47,7 +46,8 @@ {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set csv_table = model["agate_table"] -%} + {%- set agate_table = load_agate_table() -%} + {%- do store_result('agate_table', status='OK', agate_table=agate_table) -%} {{ run_hooks(pre_hooks, inside_transaction=False) }} @@ -55,10 +55,10 @@ {{ run_hooks(pre_hooks, inside_transaction=True) }} -- build model - {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation) %} + {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %} {% set status = 'CREATE' %} - {% set num_rows = (csv_table.rows | length) %} - {% set sql = load_csv_rows(model) %} + {% set num_rows = (agate_table.rows | length) %} + {% set sql = load_csv_rows(model, agate_table) %} {% call noop_statement('main', status ~ ' ' ~ num_rows) %} {{ create_table_sql }}; diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 0707f9c8..9fba6bcd 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -66,8 +66,10 @@ def hive_http_connect(thrift_transport): self.assertEqual(thrift_transport.host, 'myorg.sparkhost.com') self.assertEqual(thrift_transport.path, '/sql/protocolv1/o/0123456789/01234-23423-coffeetime') - with mock.patch.object(hive, 'connect', new=hive_http_connect): + # with mock.patch.object(hive, 'connect', new=hive_http_connect): + with mock.patch('dbt.adapters.spark.connections.hive.connect', new=hive_http_connect): connection = adapter.acquire_connection('dummy') + connection.handle # trigger lazy-load self.assertEqual(connection.state, 'open') self.assertNotEqual(connection.handle, None) @@ -87,6 +89,7 @@ def hive_thrift_connect(host, port, username): with mock.patch.object(hive, 'connect', new=hive_thrift_connect): connection = adapter.acquire_connection('dummy') + connection.handle # trigger lazy-load self.assertEqual(connection.state, 'open') self.assertNotEqual(connection.handle, None) From 580fc529579dd0eb2464cae1a0fb19069cf29def Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 19 Mar 2020 21:06:15 -0600 Subject: [PATCH 087/603] PR feedback --- dbt/adapters/spark/relation.py | 1 + .../macros/materializations/incremental.sql | 22 +++--- .../spark/macros/materializations/table.sql | 55 +++----------- .../spark/macros/materializations/view.sql | 72 +------------------ 4 files changed, 22 insertions(+), 128 deletions(-) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 27067d34..92473d55 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -21,3 +21,4 @@ class SparkIncludePolicy(Policy): class SparkRelation(BaseRelation): quote_policy: SparkQuotePolicy = SparkQuotePolicy() include_policy: SparkIncludePolicy = SparkIncludePolicy() + quote_character: str = '`' diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index fc26b1d8..b07d2dcd 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -1,4 +1,4 @@ -{% macro get_insert_overwrite_sql(source_relation, target_relation, partitions) %} +{% macro get_insert_overwrite_sql(source_relation, target_relation) %} {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} @@ -60,18 +60,21 @@ {% endmacro %} -{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} - {%- if strategy == 'insert_overwrite' -%} - {#-- insert statements don't like CTEs, so support them via a temp view #} - {{ get_insert_overwrite_sql() }} - {%- else -%} - {#-- merge all columns with databricks delta - schema changes are handled for us #} +{% macro get_merge_sql(source, target, unique_key) %} merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} when matched then update set * when not matched then insert * +{% endmacro %} +{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} + {%- if strategy == 'insert_overwrite' -%} + {#-- insert statements don't like CTEs, so support them via a temp view #} + {{ get_insert_overwrite_sql(source, target) }} + {%- else -%} + {#-- merge all columns with databricks delta - schema changes are handled for us #} + {{ get_merge_sql(source, target, unique_key) }} {%- endif -%} {% endmacro %} @@ -94,11 +97,6 @@ {% do dbt_spark_validate_merge(file_format) %} {% endif %} - {%- set partitions = config.get('partition_by') -%} - {% if not partitions %} - {% do exceptions.raise_compiler_error("Table partitions are required for incremental models on Spark") %} - {% endif %} - {% call statement() %} set spark.sql.sources.partitionOverwriteMode = DYNAMIC {% endcall %} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index 48dfe7ff..2bd73d6c 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -1,64 +1,25 @@ -{% materialization table, adapter='spark' %} +{% materialization table, adapter = 'spark' %} + {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} - {%- set backup_identifier = model['name'] + '__dbt_backup' -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} - {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier, - schema=schema, - database=database, - type='table') -%} - - /* - See ../view/view.sql for more information about this relation. - */ - {%- set backup_relation_type = 'table' if old_relation is none else get_relation_type(old_relation) -%} - {%- set backup_relation = api.Relation.create(identifier=backup_identifier, - schema=schema, - database=database, - type=backup_relation_type) -%} - - {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} - -- drop the temp relations if they exist for some reason - {{ adapter.drop_relation(intermediate_relation) }} - {{ adapter.drop_relation(backup_relation) }} + {{ run_hooks(pre_hooks) }} - {% if exists_as_view -%} + -- setup: if the target relation already exists, drop it + {% if old_relation -%} {{ adapter.drop_relation(old_relation) }} - {%- set old_relation = none -%} {%- endif %} - {{ run_hooks(pre_hooks, inside_transaction=False) }} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} - -- build model {% call statement('main') -%} - {{ create_table_as(False, intermediate_relation, sql) }} + {{ create_table_as(False, target_relation, sql) }} {%- endcall %} - -- cleanup - {% if old_relation is not none %} - {{ adapter.rename_relation(target_relation, backup_relation) }} - {% endif %} - - {{ adapter.rename_relation(intermediate_relation, target_relation) }} - - {{ run_hooks(post_hooks, inside_transaction=True) }} - - -- `COMMIT` happens here - {{ adapter.commit() }} - - -- finally, drop the existing/backup relation after the commit - {{ drop_relation_if_exists(backup_relation) }} - - {{ run_hooks(post_hooks, inside_transaction=False) }} + {{ run_hooks(post_hooks) }} - {{ return({'relations': [target_relation]}) }} -{% endmaterialization %} \ No newline at end of file +{% endmaterialization %} diff --git a/dbt/include/spark/macros/materializations/view.sql b/dbt/include/spark/macros/materializations/view.sql index c6751222..a68d78c1 100644 --- a/dbt/include/spark/macros/materializations/view.sql +++ b/dbt/include/spark/macros/materializations/view.sql @@ -1,69 +1,3 @@ -{%- materialization view, adapter='spark' -%} - - {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} - {%- set backup_identifier = model['name'] + '__dbt_backup' -%} - - {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, - type='view') -%} - {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier, - schema=schema, database=database, type='view') -%} - - /* - This relation (probably) doesn't exist yet. If it does exist, it's a leftover from - a previous run, and we're going to try to drop it immediately. At the end of this - materialization, we're going to rename the "old_relation" to this identifier, - and then we're going to drop it. In order to make sure we run the correct one of: - - drop view ... - - drop table ... - We need to set the type of this relation to be the type of the old_relation, if it exists, - or else "view" as a sane default if it does not. Note that if the old_relation does not - exist, then there is nothing to move out of the way and subsequentally drop. In that case, - this relation will be effectively unused. - */ - - {%- set backup_relation_type = 'view' if old_relation is none else get_relation_type(old_relation) -%} - {%- set backup_relation = api.Relation.create(identifier=backup_identifier, - schema=schema, database=database, - type=backup_relation_type) -%} - - {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - - {{ run_hooks(pre_hooks, inside_transaction=False) }} - - -- drop the temp relations if they exists for some reason - {{ adapter.drop_relation(intermediate_relation) }} - {{ adapter.drop_relation(backup_relation) }} - - {% if exists_as_table -%} - {{ adapter.drop_relation(old_relation) }} - {%- set old_relation = none -%} - {%- endif %} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} - - -- build model - {% call statement('main') -%} - {{ create_view_as(intermediate_relation, sql) }} - {%- endcall %} - - -- cleanup - -- move the existing view out of the way - {% if old_relation is not none %} - {{ adapter.rename_relation(target_relation, backup_relation) }} - {% endif %} - {{ adapter.rename_relation(intermediate_relation, target_relation) }} - - {{ run_hooks(post_hooks, inside_transaction=True) }} - - {{ adapter.commit() }} - - {{ drop_relation_if_exists(backup_relation) }} - - {{ run_hooks(post_hooks, inside_transaction=False) }} - - {{ return({'relations': [target_relation]}) }} - -{%- endmaterialization -%} \ No newline at end of file +{% materialization view, adapter='spark' -%} + {{ return(create_or_replace_view()) }} +{%- endmaterialization %} From ba641bb98eaf5cb62fe699a8ebcb420dbfb9c9cd Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 20 Mar 2020 08:22:46 -0600 Subject: [PATCH 088/603] flake8 --- dbt/adapters/spark/connections.py | 18 ++++++++++++------ dbt/adapters/spark/impl.py | 16 ++++++---------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index bbec36e9..0a3110bd 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -55,7 +55,7 @@ def _connection_keys(self): class ConnectionWrapper(object): """Wrap a Spark connection in a way that no-ops transactions""" - # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html + # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html # noqa def __init__(self, handle): self.handle = handle @@ -168,7 +168,9 @@ def description(self): class SparkConnectionManager(SQLConnectionManager): TYPE = 'spark' - SPARK_CONNECTION_URL = "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" + SPARK_CONNECTION_URL = ( + "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" + ) @contextmanager def exception_handler(self, sql): @@ -252,13 +254,16 @@ def open(cls, connection): conn = hive.connect(thrift_transport=transport) elif creds.method == 'thrift': - cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) + cls.validate_creds(creds, + ['host', 'port', 'user', 'schema']) conn = hive.connect(host=creds.host, port=creds.port, username=creds.user) else: - raise dbt.exceptions.DbtProfileError("invalid credential method: {}".format(creds.method)) + raise dbt.exceptions.DbtProfileError( + f"invalid credential method: {creds.method}" + ) break except Exception as e: exc = e @@ -271,8 +276,9 @@ def open(cls, connection): warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" if is_pending or is_starting: - logger.warning(warning.format(e.message, creds.connect_timeout, - i, creds.connect_retries)) + msg = warning.format(e.message, creds.connect_timeout, + i, creds.connect_retries) + logger.warning(msg) time.sleep(creds.connect_timeout) else: raise dbt.exceptions.FailedToConnectException(str(e)) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 80e8582c..b3362895 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -52,8 +52,6 @@ class SparkAdapter(SQLAdapter): Column = SparkColumn ConnectionManager = SparkConnectionManager - AdapterSpecificConfigs = frozenset({"file_format", "partition_by", "cluster_by", "num_buckets", "location"}) - AdapterSpecificConfigs = frozenset({"file_format", "location_root", "partition_by", "clustered_by", "buckets"}) @@ -83,12 +81,6 @@ def convert_time_type(cls, agate_table, col_idx): def convert_datetime_type(cls, agate_table, col_idx): return "timestamp" - def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]: - if not self.Relation.include_policy.database: - database = None - - return super().get_relation(database, schema, identifier) - def add_schema_to_cache(self, schema) -> str: """Cache a new schema in dbt. It will show up in `list relations`.""" if schema is None: @@ -101,7 +93,9 @@ def add_schema_to_cache(self, schema) -> str: # so jinja doesn't render things return '' - def list_relations_without_caching(self, information_schema, schema) -> List[SparkRelation]: + def list_relations_without_caching( + self, information_schema, schema + ) -> List[SparkRelation]: kwargs = {'information_schema': information_schema, 'schema': schema} try: results = self.execute_macro( @@ -130,7 +124,9 @@ def list_relations_without_caching(self, information_schema, schema) -> List[Spa return relations - def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]: + def get_relation( + self, database: str, schema: str, identifier: str + ) -> Optional[BaseRelation]: if not self.Relation.include_policy.database: database = None From 9c685c8b7d5cdec1be90f604b5653aa0e2394350 Mon Sep 17 00:00:00 2001 From: Daniel Mateus Pires Date: Fri, 20 Mar 2020 18:02:24 +0000 Subject: [PATCH 089/603] Use new container --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3416b1cc..61afed92 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,7 @@ version: 2 jobs: unit: docker: - - image: fishtownanalytics/test-container:5 + - image: fishtownanalytics/test-container:6 environment: DBT_INVOCATION_ENV: circle steps: @@ -13,7 +13,7 @@ jobs: integration-spark2: docker: - - image: dmateusp/test-container:dockerize # TODO: change me before merging + - image: fishtownanalytics/test-container:6 environment: DBT_INVOCATION_ENV: circle From 452c2cbc7f5f6b1ef61f29cdb95e33de4b0caae7 Mon Sep 17 00:00:00 2001 From: Tony Qiu Date: Fri, 20 Mar 2020 19:45:23 -0400 Subject: [PATCH 090/603] remove unused function --- dbt/adapters/spark/impl.py | 42 -------------------------------------- 1 file changed, 42 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index a50c4e22..b3362895 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -286,45 +286,3 @@ def check_schema_exists(self, database, schema): exists = True if schema in [row[0] for row in results] else False return exists - - - def valid_snapshot_target(self, relation: BaseRelation) -> None: - """Ensure that the target relation is valid, by making sure it has the - expected columns. - - :param Relation relation: The relation to check - :raises dbt.exceptions.CompilationException: If the columns are - incorrect. - """ - if not isinstance(relation, self.Relation): - dbt.exceptions.invalid_type_error( - method_name='valid_snapshot_target', - arg_name='relation', - got_value=relation, - expected_type=self.Relation) - - columns = self.get_columns_in_relation(relation) - names = set(c.name.lower() for c in columns if c.name) - expanded_keys = ('scd_id', 'valid_from', 'valid_to') - extra = [] - missing = [] - for legacy in expanded_keys: - desired = 'dbt_' + legacy - if desired not in names: - missing.append(desired) - if legacy in names: - extra.append(legacy) - - if missing: - if extra: - msg = ( - 'Snapshot target has ("{}") but not ("{}") - is it an ' - 'unmigrated previous version archive?' - .format('", "'.join(extra), '", "'.join(missing)) - ) - else: - msg = ( - 'Snapshot target is not a snapshot table (missing "{}")' - .format('", "'.join(missing)) - ) - dbt.exceptions.raise_compiler_error(msg) \ No newline at end of file From 9b13d128e0a7eb0589ef09f24a263c88acb29192 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 23 Mar 2020 08:42:17 -0600 Subject: [PATCH 091/603] PR feedback: return relations created in materializations --- dbt/include/spark/macros/materializations/seed.sql | 5 +++++ dbt/include/spark/macros/materializations/table.sql | 2 ++ 2 files changed, 7 insertions(+) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 8e5a0f90..66bcf41f 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -46,6 +46,8 @@ {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + {%- set target_relation = api.Relation.create(database=database, schema=schema, identifier=identifier, + type='table') -%} {%- set agate_table = load_agate_table() -%} {%- do store_result('agate_table', status='OK', agate_table=agate_table) -%} @@ -70,4 +72,7 @@ -- `COMMIT` happens here {{ adapter.commit() }} {{ run_hooks(post_hooks, inside_transaction=False) }} + + {{ return({'relations': [target_relation]}) }} + {% endmaterialization %} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index 2bd73d6c..d772a554 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -22,4 +22,6 @@ {{ run_hooks(post_hooks) }} + {{ return({'relations': [target_relation]})}} + {% endmaterialization %} From ef65a2d9fc5b38994189ee9c1abb7ae3babec64b Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 23 Mar 2020 16:19:23 -0600 Subject: [PATCH 092/603] dbt-spark 0.16.0 - Remove duplicate create/drop schema implementations - Use the get_merge_sql pattern from core --- dbt/include/spark/macros/adapters.sql | 12 ------------ .../spark/macros/materializations/incremental.sql | 14 +++++++------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 40a5e5be..3e6ce236 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -134,18 +134,6 @@ current_timestamp() {%- endmacro %} -{% macro spark__create_schema(database_name, schema_name) -%} - {%- call statement('create_schema') -%} - create schema if not exists {{ schema_name }} - {%- endcall -%} -{% endmacro %} - -{% macro spark__drop_schema(database_name, schema_name) -%} - {%- call statement('drop_schema') -%} - drop schema if exists {{ schema_name }} cascade - {%- endcall -%} -{% endmacro %} - {% macro spark__rename_relation(from_relation, to_relation) -%} {% call statement('rename_relation') -%} {% if not from_relation.type %} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index b07d2dcd..630d6988 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -60,21 +60,21 @@ {% endmacro %} -{% macro get_merge_sql(source, target, unique_key) %} - merge into {{ target }} as DBT_INTERNAL_DEST - using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE - on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} - when matched then update set * - when not matched then insert * + +{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=[]) %} + {% set merge_source = source.include(schema=false) %} + {% do return(default__get_merge_sql(target, source, unique_key, dest_columns, predicates)) %} {% endmacro %} + {% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} {%- if strategy == 'insert_overwrite' -%} {#-- insert statements don't like CTEs, so support them via a temp view #} {{ get_insert_overwrite_sql(source, target) }} {%- else -%} {#-- merge all columns with databricks delta - schema changes are handled for us #} - {{ get_merge_sql(source, target, unique_key) }} + {% set dest_columns = adapter.get_columns_in_relation(target) %} + {{ get_merge_sql(target, source, unique_key, dest_columns, predicates=none) }} {%- endif -%} {% endmacro %} From 59fcf8754ae011d86605671393516ace7e037a0e Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 24 Mar 2020 08:25:12 -0600 Subject: [PATCH 093/603] =?UTF-8?q?Bump=20version:=200.15.3=20=E2=86=92=20?= =?UTF-8?q?0.16.0a1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added bumpversion Changed restrictions on dbt-core vs dbt-spark versions Fix '>==' in setup.py to be '==' --- .bumpversion.cfg | 24 ++++++++++++++++++++++++ setup.py | 17 ++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 .bumpversion.cfg diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 00000000..1906574c --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,24 @@ +[bumpversion] +current_version = 0.16.0a1 +parse = (?P\d+) + \.(?P\d+) + \.(?P\d+) + ((?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}{prerelease}{num} + {major}.{minor}.{patch} +commit = False +tag = False + +[bumpversion:part:prerelease] +first_value = a +values = + a + b + rc + +[bumpversion:part:num] +first_value = 1 + +[bumpversion:file:setup.py] + diff --git a/setup.py b/setup.py index 0b388f63..f0c45968 100644 --- a/setup.py +++ b/setup.py @@ -9,9 +9,21 @@ package_name = "dbt-spark" -package_version = "0.15.3" +package_version = "0.16.0a1" description = """The SparkSQL plugin for dbt (data build tool)""" +# evade bumpversion with this fun trick +DBT_VERSION = (0, 16, 0) +dbt_version = '.'.join(map(str, DBT_VERSION)) +# the package version should be the dbt version, with maybe some things on the +# ends of it. (0.16.0 vs 0.16.0a1, 0.16.0.1, ...) +if not package_version.startswith(dbt_version): + raise ValueError( + f'Invalid setup.py: package_version={package_version} must start with ' + f'dbt_version={dbt_version} (from {DBT_VERSION})' + ) + + setup( name=package_name, version=package_version, @@ -33,8 +45,7 @@ ] }, install_requires=[ - f'dbt-core>=={package_version}', - 'jinja2<3.0.0', # until dbt-core reaches 0.16.0: https://github.com/fishtown-analytics/dbt/issues/2147 + f'dbt-core=={dbt_version}', 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', ] From 806cfd75092c247fa93eefcb4d005798a43e53fb Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 25 Mar 2020 08:12:22 -0600 Subject: [PATCH 094/603] remove cache_added call to avoid the deadlock --- dbt/adapters/spark/impl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index b3362895..0b66e0fe 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -119,7 +119,6 @@ def list_relations_without_caching( identifier=name, type=rel_type ) - self.cache_added(relation) relations.append(relation) return relations From 0052c367a6b0407e12ca8e62dd852b6394689af1 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 25 Mar 2020 10:14:04 -0600 Subject: [PATCH 095/603] PR feedback Add adapter.quote Use '*' in the merge sql instead of collecting the columns --- dbt/adapters/spark/impl.py | 3 +++ .../spark/macros/materializations/incremental.sql | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 0b66e0fe..b732d233 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -81,6 +81,9 @@ def convert_time_type(cls, agate_table, col_idx): def convert_datetime_type(cls, agate_table, col_idx): return "timestamp" + def quote(self, identifier): + return '`{}`'.format(identifier) + def add_schema_to_cache(self, schema) -> str: """Cache a new schema in dbt. It will show up in `list relations`.""" if schema is None: diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 630d6988..f31aaf41 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -62,8 +62,12 @@ {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=[]) %} - {% set merge_source = source.include(schema=false) %} - {% do return(default__get_merge_sql(target, source, unique_key, dest_columns, predicates)) %} + {# ignore dest_columns - we will just use `*` #} + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE + on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} + when matched then update set * + when not matched then insert * {% endmacro %} @@ -73,8 +77,7 @@ {{ get_insert_overwrite_sql(source, target) }} {%- else -%} {#-- merge all columns with databricks delta - schema changes are handled for us #} - {% set dest_columns = adapter.get_columns_in_relation(target) %} - {{ get_merge_sql(target, source, unique_key, dest_columns, predicates=none) }} + {{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }} {%- endif -%} {% endmacro %} From 643b84c870976c363075677b2c0b474bfd3add07 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 25 Mar 2020 12:55:57 -0600 Subject: [PATCH 096/603] pr feedback: [] -> none --- dbt/include/spark/macros/materializations/incremental.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index f31aaf41..f5d7335f 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -61,7 +61,7 @@ {% endmacro %} -{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=[]) %} +{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} {# ignore dest_columns - we will just use `*` #} merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE From 04b2a42e0b4f46d61f04feaefe75e3af78e006d9 Mon Sep 17 00:00:00 2001 From: Danny Pallotta Date: Fri, 27 Mar 2020 22:05:05 +1030 Subject: [PATCH 097/603] Table formatting of Usage Notes --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 59bbda18..a13db188 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,8 @@ The following configurations can be supplied to models run with the dbt-spark pl | Option | Description | Required? | Example | -| file_format | The file format to use when creating tables (`parquet`, `delta`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `parquet` | +|---------|----------------------------------------------------|-------------------------|--------------------------| +| file_format | The file format to use when creating tables (`parquet`, `delta`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `parquet`| | location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | `/mnt/root` | | partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | From 090a99a07875f9675f3ed724c3b6b7e5c063481d Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 27 Mar 2020 13:46:25 -0600 Subject: [PATCH 098/603] Move version info around, clean up some bumpversion stuff --- .bumpversion-dbt.cfg | 23 +++++++++++++++++++++++ .bumpversion.cfg | 11 ++++++++--- dbt/adapters/spark/__version__.py | 1 + setup.py | 24 +++++++++++++++++++----- 4 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 .bumpversion-dbt.cfg create mode 100644 dbt/adapters/spark/__version__.py diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg new file mode 100644 index 00000000..7de84051 --- /dev/null +++ b/.bumpversion-dbt.cfg @@ -0,0 +1,23 @@ +[bumpversion] +current_version = 0.16.0 +parse = (?P\d+) + \.(?P\d+) + \.(?P\d+) + ((?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}{prerelease}{num} + {major}.{minor}.{patch} +commit = False +tag = False + +[bumpversion:part:prerelease] +first_value = a +values = + a + b + rc + +[bumpversion:part:num] +first_value = 1 + +[bumpversion:file:setup.py] diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 1906574c..b2084a03 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -3,16 +3,19 @@ current_version = 0.16.0a1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) + (\.(?P\d+))? ((?P[a-z]+)(?P\d+))? -serialize = +serialize = + {major}.{minor}.{patch}.{pluginpatch}{prerelease}{num} {major}.{minor}.{patch}{prerelease}{num} + {major}.{minor}.{patch}.{pluginpatch} {major}.{minor}.{patch} commit = False tag = False [bumpversion:part:prerelease] first_value = a -values = +values = a b rc @@ -20,5 +23,7 @@ values = [bumpversion:part:num] first_value = 1 -[bumpversion:file:setup.py] +[bumpversion:part:pluginpatch] +first_value = 1 +[bumpversion:file:dbt/adapters/spark/__version__.py] diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py new file mode 100644 index 00000000..e5f9d0eb --- /dev/null +++ b/dbt/adapters/spark/__version__.py @@ -0,0 +1 @@ +version = "0.16.0a1" diff --git a/setup.py b/setup.py index f0c45968..344a767a 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from setuptools import find_packages, setup import os +import re this_directory = os.path.abspath(os.path.dirname(__file__)) @@ -9,18 +10,31 @@ package_name = "dbt-spark" -package_version = "0.16.0a1" + + +# get this from a separate file +def _dbt_spark_version(): + _version_path = os.path.join( + this_directory, 'dbt', 'adapters', 'spark', '__version__.py' + ) + _version_pattern = r'''version\s*=\s*["'](.+)["']''' + with open(_version_path) as f: + match = re.search(_version_pattern, f.read().strip()) + if match is None: + raise ValueError(f'invalid version at {_version_path}') + return match.group(1) + + +package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -# evade bumpversion with this fun trick -DBT_VERSION = (0, 16, 0) -dbt_version = '.'.join(map(str, DBT_VERSION)) +dbt_version = '0.16.0' # the package version should be the dbt version, with maybe some things on the # ends of it. (0.16.0 vs 0.16.0a1, 0.16.0.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' - f'dbt_version={dbt_version} (from {DBT_VERSION})' + f'dbt_version={dbt_version}' ) From 936aaf5fd20b8bcf7d73ef0fc292ce89961bda97 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 27 Mar 2020 15:33:59 -0600 Subject: [PATCH 099/603] add requirements.txt to dbt bumpversion --- .bumpversion-dbt.cfg | 2 ++ requirements.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 7de84051..7a9cc503 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -21,3 +21,5 @@ values = first_value = 1 [bumpversion:file:setup.py] + +[bumpversion:file:requirements.txt] diff --git a/requirements.txt b/requirements.txt index 3b0792da..e0ffbd43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.15.3 +dbt-core==0.16.0 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 From bb9b56b19be8e7d9386370945959053c66e39258 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 30 Mar 2020 12:47:10 -0600 Subject: [PATCH 100/603] bring unit tests up to 0.16.0 support Set the circleci config go to v2.1 --- .circleci/config.yml | 2 +- test/unit/test_adapter.py | 4 +- test/unit/utils.py | 93 +++++++++++++++++++++++++++++++++++---- 3 files changed, 87 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 61afed92..f89aa0a4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,4 @@ -version: 2 +version: 2.1 jobs: unit: diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 15394eae..72e78ea5 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -98,7 +98,7 @@ def hive_thrift_connect(host, port, username): def test_parse_relation(self): self.maxDiff = None - rel_type = SparkRelation.RelationType.Table + rel_type = SparkRelation.get_relation_type.Table relation = SparkRelation.create( database='default_database', @@ -179,7 +179,7 @@ def test_parse_relation(self): def test_parse_relation_with_statistics(self): self.maxDiff = None - rel_type = SparkRelation.RelationType.Table + rel_type = SparkRelation.get_relation_type.Table relation = SparkRelation.create( database='default_database', diff --git a/test/unit/utils.py b/test/unit/utils.py index 5869eca3..affb6c37 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -1,4 +1,5 @@ """Unit test utility functions. + Note that all imports should be inside the functions to avoid import/mocking issues. """ @@ -11,6 +12,7 @@ def normalize(path): """On windows, neither is enough on its own: + >>> normcase('C:\\documents/ALL CAPS/subdir\\..') 'c:\\documents\\all caps\\subdir\\..' >>> normpath('C:\\documents/ALL CAPS/subdir\\..') @@ -23,6 +25,7 @@ def normalize(path): class Obj: which = 'blah' + single_threaded = False def mock_connection(name): @@ -31,20 +34,63 @@ def mock_connection(name): return conn -def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): - from dbt.config import Project, Profile, RuntimeConfig +def profile_from_dict(profile, profile_name, cli_vars='{}'): + from dbt.config import Profile, ConfigRenderer + from dbt.context.base import generate_base_context from dbt.utils import parse_cli_vars - from copy import deepcopy if not isinstance(cli_vars, dict): cli_vars = parse_cli_vars(cli_vars) - if not isinstance(project, Project): - project = Project.from_project_config(deepcopy(project), packages) + + renderer = ConfigRenderer(generate_base_context(cli_vars)) + return Profile.from_raw_profile_info( + profile, + profile_name, + renderer, + ) + + +def project_from_dict(project, profile, packages=None, cli_vars='{}'): + from dbt.context.target import generate_target_context + from dbt.config import Project, ConfigRenderer + from dbt.utils import parse_cli_vars + if not isinstance(cli_vars, dict): + cli_vars = parse_cli_vars(cli_vars) + + renderer = ConfigRenderer(generate_target_context(profile, cli_vars)) + + project_root = project.pop('project-root', os.getcwd()) + + return Project.render_from_dict( + project_root, project, packages, renderer + ) + + +def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): + from dbt.config import Project, Profile, RuntimeConfig + from copy import deepcopy + + if isinstance(project, Project): + profile_name = project.profile_name + else: + profile_name = project.get('profile') + if not isinstance(profile, Profile): - profile = Profile.from_raw_profile_info(deepcopy(profile), - project.profile_name, - cli_vars) + profile = profile_from_dict( + deepcopy(profile), + profile_name, + cli_vars, + ) + + if not isinstance(project, Project): + project = project_from_dict( + deepcopy(project), + profile, + packages, + cli_vars, + ) + args = Obj() - args.vars = repr(cli_vars) + args.vars = cli_vars args.profile_dir = '/dev/null' return RuntimeConfig.from_parts( project=project, @@ -88,3 +134,32 @@ def assert_fails_validation(self, dct, cls=None): with self.assertRaises(ValidationError): cls.from_dict(dct) + + +def generate_name_macros(package): + from dbt.contracts.graph.parsed import ParsedMacro + from dbt.node_types import NodeType + name_sql = {} + for component in ('database', 'schema', 'alias'): + if component == 'alias': + source = 'node.name' + else: + source = f'target.{component}' + name = f'generate_{component}_name' + sql = f'{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}' + name_sql[name] = sql + + all_sql = '\n'.join(name_sql.values()) + for name, sql in name_sql.items(): + pm = ParsedMacro( + name=name, + resource_type=NodeType.Macro, + unique_id=f'macro.{package}.{name}', + package_name=package, + original_file_path=normalize('macros/macro.sql'), + root_path='./dbt_modules/root', + path=normalize('macros/macro.sql'), + raw_sql=all_sql, + macro_sql=sql, + ) + yield pm From 8f8b451561b2c249714d0fc596ec8c6843e14cb4 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 2 Apr 2020 21:36:48 -0400 Subject: [PATCH 101/603] Update README with latest changes, docs --- README.md | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index a13db188..ee059eef 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ ## dbt-spark ### Documentation -For more information on using Spark with dbt, consult the [dbt documentation](https://docs.getdbt.com/docs/profile-spark). +For more information on using Spark with dbt, consult the dbt documentation: +- [Spark profile](https://docs.getdbt.com/docs/profile-spark) +- [Spark specific configs](https://docs.getdbt.com/docs/spark-configs) ### Installation This plugin can be installed via pip: @@ -36,17 +38,11 @@ A dbt profile can be configured to run against Spark using the following configu AWS and Azure Databricks have differences in their connections, likely due to differences in how their URLs are generated between the two services. -To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. When connecting to Azure Databricks, the organization tag is required to be set in the profiles.yml connection file, as it will be defaulted to 0 otherwise, and will not connect to Azure. This connection method follows the databricks-connect package's semantics for connecting to Databricks. - -Of special note is the fact that organization ID is treated as a string by dbt-spark, as opposed to a large number. While all examples to date have contained numeric digits, it is unknown how long that may continue, and what the upper limit of this number is. If you do have a leading zero, please include it in the organization tag and dbt-spark will pass that along. - -dbt-spark has also been tested against AWS Databricks, and it has some differences in the URLs used. It appears to default the positional value where organization lives in AWS connection URLs to 0, so dbt-spark does the same for AWS connections (i.e. simply leave organization-id out when connecting to the AWS version and dbt-spark will construct the correct AWS URL for you). Note the missing reference to organization here: https://docs.databricks.com/dev-tools/databricks-connect.html#step-2-configure-connection-properties. - -Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. - -Lastly, the host field for Databricks can be found at the start of your workspace or cluster url (but don't include https://): region.azuredatabricks.net for Azure, or account.cloud.databricks.com for AWS. +**Organization:** To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. This is a string field; if there is a leading zero, be sure to include it. +**Port:** Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. +**Host:** The host field for Databricks can be found at the start of your workspace or cluster url: `region.azuredatabricks.net` for Azure, or `account.cloud.databricks.com` for AWS. Do not include `https://`. **Usage with Amazon EMR** @@ -55,24 +51,7 @@ To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo **Example profiles.yml entries:** -**http, e.g. AWS Databricks** -``` -your_profile_name: - target: dev - outputs: - dev: - method: http - type: spark - schema: analytics - host: yourorg.sparkhost.com - port: 443 - token: abc123 - cluster: 01234-23423-coffeetime - connect_retries: 5 - connect_timeout: 60 -``` - -**Azure Databricks, via http** +**http, e.g. Databricks** ``` your_profile_name: target: dev @@ -82,9 +61,9 @@ your_profile_name: type: spark schema: analytics host: yourorg.sparkhost.com + organization: 1234567891234567 # Azure Databricks ONLY port: 443 token: abc123 - organization: 1234567891234567 cluster: 01234-23423-coffeetime connect_retries: 5 connect_timeout: 60 @@ -123,6 +102,7 @@ The following configurations can be supplied to models run with the dbt-spark pl | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | | incremental_strategy | The strategy to use for incremental models (`insert_overwrite` or `merge`). Note `merge` requires `file_format` = `delta` and `unique_key` to be specified. | Optional (default: `insert_overwrite`) | `merge` | +| persist_docs | Whether dbt should include the model description as a table `comment` | Optional | `{'relation': true}` | **Incremental Models** From c1f7212c0e359098f43b71d388c72ea41fbd7045 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 8 Apr 2020 10:37:41 -0600 Subject: [PATCH 102/603] =?UTF-8?q?Bump=20version:=200.16.0a1=20=E2=86=92?= =?UTF-8?q?=200.16.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 7 ++++--- dbt/adapters/spark/__version__.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b2084a03..690bd117 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,11 +1,11 @@ [bumpversion] -current_version = 0.16.0a1 +current_version = 0.16.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) (\.(?P\d+))? ((?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}.{pluginpatch}{prerelease}{num} {major}.{minor}.{patch}{prerelease}{num} {major}.{minor}.{patch}.{pluginpatch} @@ -15,7 +15,7 @@ tag = False [bumpversion:part:prerelease] first_value = a -values = +values = a b rc @@ -27,3 +27,4 @@ first_value = 1 first_value = 1 [bumpversion:file:dbt/adapters/spark/__version__.py] + diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index e5f9d0eb..3f74e1d8 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.16.0a1" +version = "0.16.0" From e583919cfd4ab837e73601c5bdde5b5278a97b7c Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 21 Apr 2020 13:00:16 -0400 Subject: [PATCH 103/603] Add + rename snapshot macros --- .../spark/macros/materializations/snapshot.sql | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index a23b8a73..9eeaf2e2 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -5,7 +5,12 @@ {%- endfor -%}) {%- endmacro %} -{% macro build_snapshot_table(strategy, sql) %} +{% macro spark__snapshot_string_as_time(timestamp) -%} + {%- set result = "to_timestamp('" ~ timestamp ~ "')" -%} + {{ return(result) }} +{%- endmacro %} + +{% macro spark_build_snapshot_table(strategy, sql) %} select *, {{ strategy.scd_id }} as dbt_scd_id, @@ -19,7 +24,7 @@ {% endmacro %} -{% macro snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%} +{% macro spark_snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%} with snapshot_query as ( @@ -70,7 +75,7 @@ {%- endmacro %} -{% macro snapshot_staging_table_updates(strategy, source_sql, target_relation) -%} +{% macro spark_snapshot_staging_table_updates(strategy, source_sql, target_relation) -%} with snapshot_query as ( From 3c0f99a375927db4991fa56700bd1a2e0328df19 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 21 Apr 2020 16:40:59 -0600 Subject: [PATCH 104/603] fix catalog generation --- dbt/adapters/spark/impl.py | 51 ++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index b732d233..6140be65 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -3,8 +3,9 @@ import agate import dbt.exceptions import dbt +from dbt.adapters.base.relation import SchemaSearchMap from dbt.adapters.sql import SQLAdapter -from dbt.contracts.graph.manifest import Manifest +from dbt.node_types import NodeType from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark import SparkRelation @@ -267,12 +268,10 @@ def _massage_column_for_catalog( dct['table_database'] = dct['table_schema'] return dct - def get_catalog(self, manifest: Manifest) -> agate.Table: - schemas = manifest.get_used_schemas() - columns = [] - for database, schema in schemas: - relations = self.list_relations(database, schema) - for relation in relations: + def _get_catalog_for_relations(self, database: str, schema: str): + with self.connection_named(f'{database}.{schema}'): + columns = [] + for relation in self.list_relations(database, schema): logger.debug("Getting table schema for relation {}", relation) columns.extend( self._massage_column_for_catalog(col) @@ -280,6 +279,44 @@ def get_catalog(self, manifest: Manifest) -> agate.Table: ) return agate.Table.from_object(columns) + def _get_cache_schemas(self, manifest, exec_only=False): + info_schema_name_map = SchemaSearchMap() + for node in manifest.nodes.values(): + if exec_only and node.resource_type not in NodeType.executable(): + continue + relation = self.Relation.create( + database=node.database, + schema=node.schema, + identifier='information_schema', + quote_policy=self.config.quoting, + ) + key = relation.information_schema_only() + info_schema_name_map[key] = {node.schema} + return info_schema_name_map + + def _get_one_catalog( + self, information_schema, schemas, manifest, + ) -> agate.Table: + name = f'{information_schema.database}.information_schema' + + if len(schemas) != 1: + dbt.exceptions.raise_compiler_error( + 'Expected only one schema in spark _get_one_catalog' + ) + + database = information_schema.database + schema = list(schemas)[0] + + with self.connection_named(name): + columns = [] + for relation in self.list_relations(database, schema): + logger.debug("Getting table schema for relation {}", relation) + columns.extend( + self._massage_column_for_catalog(col) + for col in self.get_columns_in_relation(relation) + ) + return agate.Table.from_object(columns) + def check_schema_exists(self, database, schema): results = self.execute_macro( LIST_SCHEMAS_MACRO_NAME, From e68a1b9b23a76c68b8e51c0f798f8139f2b3acc9 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 27 Apr 2020 15:49:35 -0400 Subject: [PATCH 105/603] Bump version 0.16.1 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 7a9cc503..62907f23 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.16.0 +current_version = 0.16.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 690bd117..e21d4d97 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.16.0 +current_version = 0.16.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 3f74e1d8..bbbab11e 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.16.0" +version = "0.16.1" diff --git a/requirements.txt b/requirements.txt index e0ffbd43..2f2c177d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.16.0 +dbt-core==0.16.1 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 344a767a..e227c02d 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.16.0' +dbt_version = '0.16.1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.16.0 vs 0.16.0a1, 0.16.0.1, ...) +# ends of it. (0.16.1 vs 0.16.1a1, 0.16.1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 2400f530a397260e73e9c42e9b7d762a7fea6895 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 27 Apr 2020 16:11:36 -0600 Subject: [PATCH 106/603] use find_namespace_packages and PEP 420 --- dbt/.DS_Store | Bin 0 -> 6148 bytes dbt/__init__.py | 1 - dbt/adapters/__init__.py | 1 - dbt/include/__init__.py | 1 - setup.py | 4 ++-- 5 files changed, 2 insertions(+), 5 deletions(-) create mode 100644 dbt/.DS_Store delete mode 100644 dbt/__init__.py delete mode 100644 dbt/adapters/__init__.py delete mode 100644 dbt/include/__init__.py diff --git a/dbt/.DS_Store b/dbt/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3246446b21042644cbd92d0b4ce9086acdca977b GIT binary patch literal 6148 zcmeHK%Sr=55Ue%<1G(hrael!+7((&|`2iu3AcPf!c;1uW<)>NwKnxo%LM~Db-8IwG zHOtmvdmDhQ&xc1~1zmo?*J;X10VyB_q<|EV0)JG%doOLVOjMKtQa}oPE8yRUMtAImV`6+d7-9q<&Y2G5 zI%WxC^8~RMj)~0BEUCn#T8$W%bmm*t^};bR>986;te$K&p;$bf_qQmA^+ZJ}AO$WJ znB{im{eMgUW&Xb;X(t7wz`s(!=9}GS%~z`4I(a$owT=En_nJ?-8`nW$h;~ejcFc{p e Date: Tue, 28 Apr 2020 12:21:55 -0600 Subject: [PATCH 107/603] remove junk --- .gitignore | 1 + dbt/.DS_Store | Bin 6148 -> 0 bytes 2 files changed, 1 insertion(+) delete mode 100644 dbt/.DS_Store diff --git a/.gitignore b/.gitignore index 9070f37c..85c98e1c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ build/ dist/ dbt-integration-tests test/integration/.user.yml +.DS_Store diff --git a/dbt/.DS_Store b/dbt/.DS_Store deleted file mode 100644 index 3246446b21042644cbd92d0b4ce9086acdca977b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%Sr=55Ue%<1G(hrael!+7((&|`2iu3AcPf!c;1uW<)>NwKnxo%LM~Db-8IwG zHOtmvdmDhQ&xc1~1zmo?*J;X10VyB_q<|EV0)JG%doOLVOjMKtQa}oPE8yRUMtAImV`6+d7-9q<&Y2G5 zI%WxC^8~RMj)~0BEUCn#T8$W%bmm*t^};bR>986;te$K&p;$bf_qQmA^+ZJ}AO$WJ znB{im{eMgUW&Xb;X(t7wz`s(!=9}GS%~z`4I(a$owT=En_nJ?-8`nW$h;~ejcFc{p e Date: Wed, 13 May 2020 10:06:48 -0600 Subject: [PATCH 108/603] =?UTF-8?q?Bump=20dbt=20version:=200.16.1=20?= =?UTF-8?q?=E2=86=92=200.17.0rc1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 7 ++++--- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 62907f23..6979f061 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,10 +1,10 @@ [bumpversion] -current_version = 0.16.1 +current_version = 0.17.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) ((?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}{prerelease}{num} {major}.{minor}.{patch} commit = False @@ -12,7 +12,7 @@ tag = False [bumpversion:part:prerelease] first_value = a -values = +values = a b rc @@ -23,3 +23,4 @@ first_value = 1 [bumpversion:file:setup.py] [bumpversion:file:requirements.txt] + diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e21d4d97..c2f4281e 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.16.1 +current_version = 0.17.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index bbbab11e..65d02f6c 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.16.1" +version = "0.17.0rc1" diff --git a/requirements.txt b/requirements.txt index 2f2c177d..073b3db0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.16.1 +dbt-core==0.17.0rc1 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 9eb520f6..a5125ab7 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.16.1' +dbt_version = '0.17.0rc1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.16.1 vs 0.16.1a1, 0.16.1.1, ...) +# ends of it. (0.17.0rc1 vs 0.17.0rc1a1, 0.17.0rc1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From ba5dc41d39b2aa1fdbba75a395842fe45dd15c42 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 13 May 2020 20:29:09 +0200 Subject: [PATCH 109/603] Add header and badge Every project needs a badge --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ee059eef..35d7f9d5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,16 @@ -## dbt-spark +

+ dbt logo +

+

+ + CircleCI + + + Slack + +

+ +# dbt-spark ### Documentation For more information on using Spark with dbt, consult the dbt documentation: From aff77b1d72a39d57ff90af437523281bcff2a9c5 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 13 May 2020 14:42:30 -0600 Subject: [PATCH 110/603] make 0.17.0 work Database is not allowed to be set basically anywhere --- dbt/adapters/spark/connections.py | 14 +++++++-- dbt/adapters/spark/impl.py | 44 +++++++++++---------------- dbt/adapters/spark/relation.py | 18 +++++++++++ dbt/include/spark/dbt_project.yml | 2 +- dbt/include/spark/macros/adapters.sql | 14 ++++----- test/unit/test_adapter.py | 33 ++++++++++++++++++-- test/unit/utils.py | 10 +++--- 7 files changed, 92 insertions(+), 43 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 0a3110bd..a43b659d 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -32,10 +32,10 @@ class SparkCredentials(Credentials): host: str method: SparkConnectionMethod schema: str - cluster: Optional[str] - token: Optional[str] - user: Optional[str] database: Optional[str] + cluster: Optional[str] = None + token: Optional[str] = None + user: Optional[str] = None port: int = 443 organization: str = '0' connect_retries: int = 0 @@ -43,6 +43,14 @@ class SparkCredentials(Credentials): def __post_init__(self): # spark classifies database and schema as the same thing + if ( + self.database is not None and + self.database != self.schema + ): + raise dbt.exceptions.RuntimeException( + f'In credentials: got database={self.database} but ' + f'schema={self.schema} - on spark, both most be the same value' + ) self.database = self.schema @property diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 6140be65..95ba4484 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,11 +1,11 @@ -from typing import Optional, List, Dict, Any +from dataclasses import dataclass +from typing import Optional, List, Dict, Any, Union import agate import dbt.exceptions import dbt -from dbt.adapters.base.relation import SchemaSearchMap +from dbt.adapters.base import AdapterConfig from dbt.adapters.sql import SQLAdapter -from dbt.node_types import NodeType from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark import SparkRelation @@ -25,6 +25,15 @@ KEY_TABLE_STATISTICS = 'Statistics' +@dataclass +class SparkConfig(AdapterConfig): + file_format: str = 'parquet' + location_root: Optional[str] = None + partition_by: Optional[Union[List[str], str]] = None + clustered_by: Optional[Union[List[str], str]] = None + buckets: Optional[int] = None + + class SparkAdapter(SQLAdapter): COLUMN_NAMES = ( 'table_database', @@ -52,10 +61,7 @@ class SparkAdapter(SQLAdapter): Relation = SparkRelation Column = SparkColumn ConnectionManager = SparkConnectionManager - - AdapterSpecificConfigs = frozenset({"file_format", "location_root", - "partition_by", "clustered_by", - "buckets"}) + AdapterSpecificConfigs = SparkConfig @classmethod def date_function(cls) -> str: @@ -98,9 +104,9 @@ def add_schema_to_cache(self, schema) -> str: return '' def list_relations_without_caching( - self, information_schema, schema + self, schema_relation: SparkRelation ) -> List[SparkRelation]: - kwargs = {'information_schema': information_schema, 'schema': schema} + kwargs = {'schema_relation': schema_relation} try: results = self.execute_macro( LIST_RELATIONS_MACRO_NAME, @@ -108,11 +114,12 @@ def list_relations_without_caching( release=True ) except dbt.exceptions.RuntimeException as e: - if hasattr(e, 'msg') and f"Database '{schema}' not found" in e.msg: + errmsg = getattr(e, 'msg', '') + if f"Database '{schema_relation}' not found" in errmsg: return [] else: description = "Error while retrieving information about" - logger.debug(f"{description} {schema}: {e.msg}") + logger.debug(f"{description} {schema_relation}: {e.msg}") return [] relations = [] @@ -279,21 +286,6 @@ def _get_catalog_for_relations(self, database: str, schema: str): ) return agate.Table.from_object(columns) - def _get_cache_schemas(self, manifest, exec_only=False): - info_schema_name_map = SchemaSearchMap() - for node in manifest.nodes.values(): - if exec_only and node.resource_type not in NodeType.executable(): - continue - relation = self.Relation.create( - database=node.database, - schema=node.schema, - identifier='information_schema', - quote_policy=self.config.quoting, - ) - key = relation.information_schema_only() - info_schema_name_map[key] = {node.schema} - return info_schema_name_map - def _get_one_catalog( self, information_schema, schemas, manifest, ) -> agate.Table: diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 92473d55..4a7f4dc3 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from dbt.adapters.base.relation import BaseRelation, Policy +from dbt.exceptions import RuntimeException @dataclass @@ -22,3 +23,20 @@ class SparkRelation(BaseRelation): quote_policy: SparkQuotePolicy = SparkQuotePolicy() include_policy: SparkIncludePolicy = SparkIncludePolicy() quote_character: str = '`' + + def __post_init__(self): + # some core things set database='', which we should ignore. + if self.database and self.database != self.schema: + raise RuntimeException( + f'In relation with identifier={self.identifier}, ' + f'schema={self.schema}: got database={self.database}, but it ' + f'should not be set' + ) + + def render(self): + if self.include_policy.database and self.include_policy.schema: + raise RuntimeException( + 'Got a spark relation with schema and database set to ' + 'include, but only one can be set' + ) + return super().render() diff --git a/dbt/include/spark/dbt_project.yml b/dbt/include/spark/dbt_project.yml index 2294c23d..36d69b41 100644 --- a/dbt/include/spark/dbt_project.yml +++ b/dbt/include/spark/dbt_project.yml @@ -1,5 +1,5 @@ - name: dbt_spark version: 1.0 +config-version: 2 macro-paths: ["macros"] diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 3e6ce236..d0e11fca 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -20,7 +20,7 @@ {%- if raw_persist_docs is mapping -%} {%- set raw_relation = raw_persist_docs.get('relation', false) -%} {%- if raw_relation -%} - comment '{{ model.description }}' + comment '{{ model.description | replace("'", "\\'") }}' {% endif %} {%- else -%} {{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }} @@ -96,15 +96,15 @@ {{ sql }} {% endmacro %} -{% macro spark__create_schema(database_name, schema_name) -%} +{% macro spark__create_schema(relation) -%} {%- call statement('create_schema') -%} - create schema if not exists {{schema_name}} + create schema if not exists {{relation}} {% endcall %} {% endmacro %} -{% macro spark__drop_schema(database_name, schema_name) -%} +{% macro spark__drop_schema(relation) -%} {%- call statement('drop_schema') -%} - drop schema if exists {{ schema_name }} cascade + drop schema if exists {{ relation }} cascade {%- endcall -%} {% endmacro %} @@ -115,9 +115,9 @@ {% do return(load_result('get_columns_in_relation').table) %} {% endmacro %} -{% macro spark__list_relations_without_caching(information_schema, schema) %} +{% macro spark__list_relations_without_caching(relation) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} - show table extended in {{ schema }} like '*' + show table extended in {{ relation }} like '*' {% endcall %} {% do return(load_result('list_relations_without_caching').table) %} diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 72e78ea5..e453c12b 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -2,6 +2,7 @@ from unittest import mock import dbt.flags as flags +from dbt.exceptions import RuntimeException from agate import Row from pyhive import hive from dbt.adapters.spark import SparkAdapter, SparkRelation @@ -101,7 +102,6 @@ def test_parse_relation(self): rel_type = SparkRelation.get_relation_type.Table relation = SparkRelation.create( - database='default_database', schema='default_schema', identifier='mytable', type=rel_type @@ -182,7 +182,6 @@ def test_parse_relation_with_statistics(self): rel_type = SparkRelation.get_relation_type.Table relation = SparkRelation.create( - database='default_database', schema='default_schema', identifier='mytable', type=rel_type @@ -236,3 +235,33 @@ def test_parse_relation_with_statistics(self): 'stats:rows:label': 'rows', 'stats:rows:value': 14093476, }) + + def test_relation_with_database(self): + config = self._get_target_http(self.project_cfg) + adapter = SparkAdapter(config) + # fine + adapter.Relation.create(schema='different', identifier='table') + with self.assertRaises(RuntimeException): + # not fine - database set + adapter.Relation.create(database='something', schema='different', identifier='table') + + def test_profile_with_database(self): + profile = { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'http', + # not allowed + 'database': 'analytics2', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'organization': '0123456789', + 'cluster': '01234-23423-coffeetime', + } + }, + 'target': 'test' + } + with self.assertRaises(RuntimeException): + config_from_parts_or_dicts(self.project_cfg, profile) diff --git a/test/unit/utils.py b/test/unit/utils.py index affb6c37..53630bba 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -35,13 +35,14 @@ def mock_connection(name): def profile_from_dict(profile, profile_name, cli_vars='{}'): - from dbt.config import Profile, ConfigRenderer + from dbt.config import Profile + from dbt.config.renderer import ProfileRenderer from dbt.context.base import generate_base_context from dbt.utils import parse_cli_vars if not isinstance(cli_vars, dict): cli_vars = parse_cli_vars(cli_vars) - renderer = ConfigRenderer(generate_base_context(cli_vars)) + renderer = ProfileRenderer(generate_base_context(cli_vars)) return Profile.from_raw_profile_info( profile, profile_name, @@ -51,12 +52,13 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'): def project_from_dict(project, profile, packages=None, cli_vars='{}'): from dbt.context.target import generate_target_context - from dbt.config import Project, ConfigRenderer + from dbt.config import Project + from dbt.config.renderer import DbtProjectYamlRenderer from dbt.utils import parse_cli_vars if not isinstance(cli_vars, dict): cli_vars = parse_cli_vars(cli_vars) - renderer = ConfigRenderer(generate_target_context(profile, cli_vars)) + renderer = DbtProjectYamlRenderer(generate_target_context(profile, cli_vars)) project_root = project.pop('project-root', os.getcwd()) From 06199a0bd3647672a7a13f171b78324215a4de64 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 14 May 2020 09:36:18 -0600 Subject: [PATCH 111/603] Update dbt/adapters/spark/connections.py Co-authored-by: Jeremy Cohen --- dbt/adapters/spark/connections.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index a43b659d..62b4db77 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -48,8 +48,10 @@ def __post_init__(self): self.database != self.schema ): raise dbt.exceptions.RuntimeException( - f'In credentials: got database={self.database} but ' - f'schema={self.schema} - on spark, both most be the same value' + f' schema: {self.schema} \n' + f' database: {self.database} \n' + f'On Spark, database must be omitted or have the same value as' + f' schema.' ) self.database = self.schema From fbd8d84e28afc39cb59e4b7053db6a2819bd039a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 14 May 2020 09:36:30 -0600 Subject: [PATCH 112/603] Update dbt/adapters/spark/relation.py Co-authored-by: Jeremy Cohen --- dbt/adapters/spark/relation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 4a7f4dc3..2106e5cb 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -28,9 +28,12 @@ def __post_init__(self): # some core things set database='', which we should ignore. if self.database and self.database != self.schema: raise RuntimeException( - f'In relation with identifier={self.identifier}, ' - f'schema={self.schema}: got database={self.database}, but it ' - f'should not be set' + f'Error while parsing relation {self.name}: \n' + f' identifier: {self.identifier} \n' + f' schema: {self.schema} \n' + f' database: {self.database} \n' + f'On Spark, database should not be set. Use the schema ' + f'config to set a custom schema/database for this relation.' ) def render(self): From 908a5ca87788892e5d6758309de458a1220af0ee Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 14 May 2020 10:32:30 -0600 Subject: [PATCH 113/603] clean up the error handling a bit Handle EOFError as a "try again later" message --- dbt/adapters/spark/connections.py | 36 ++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 62b4db77..07df03b9 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -277,21 +277,19 @@ def open(cls, connection): break except Exception as e: exc = e - if getattr(e, 'message', None) is None: - raise dbt.exceptions.FailedToConnectException(str(e)) - - message = e.message.lower() - is_pending = 'pending' in message - is_starting = 'temporarily_unavailable' in message - - warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" - if is_pending or is_starting: - msg = warning.format(e.message, creds.connect_timeout, - i, creds.connect_retries) + retryable_message = _is_retryable_error(e) + if retryable_message: + msg = ( + f"Warning: {retryable_message}\n\tRetrying in " + f"{creds.connect_timeout} seconds " + f"({i} of {creds.connect_retries})" + ) logger.warning(msg) time.sleep(creds.connect_timeout) else: - raise dbt.exceptions.FailedToConnectException(str(e)) + raise dbt.exceptions.FailedToConnectException( + 'failed to connect' + ) from e else: raise exc @@ -299,3 +297,17 @@ def open(cls, connection): connection.handle = handle connection.state = ConnectionState.OPEN return connection + + +def _is_retryable_error(exc: Exception) -> Optional[str]: + if isinstance(exc, EOFError): + return 'EOFError' + message = getattr(exc, 'message', None) + if message is None: + return None + message = message.lower() + if 'pending' in message: + return exc.message + if 'temporarily_unavailable' in message: + return exc.message + return None From 1815489a7eb8d164eac5adc0331688c825f7dee9 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 14 May 2020 10:32:30 -0600 Subject: [PATCH 114/603] clean up the error handling a bit Handle EOFError as a fatal error, and point out token expiration if there is a token --- dbt/adapters/spark/connections.py | 41 ++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 62b4db77..2c152fc8 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -277,21 +277,26 @@ def open(cls, connection): break except Exception as e: exc = e - if getattr(e, 'message', None) is None: - raise dbt.exceptions.FailedToConnectException(str(e)) - - message = e.message.lower() - is_pending = 'pending' in message - is_starting = 'temporarily_unavailable' in message - - warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" - if is_pending or is_starting: - msg = warning.format(e.message, creds.connect_timeout, - i, creds.connect_retries) + if isinstance(e, EOFError): + # The user almost certainly has invalid credentials. + # Perhaps a token expired, or something + msg = 'Failed to connect' + if creds.token is not None: + msg += ', is your token valid?' + raise dbt.exceptions.FailedToConnectException(msg) from e + retryable_message = _is_retryable_error(e) + if retryable_message: + msg = ( + f"Warning: {retryable_message}\n\tRetrying in " + f"{creds.connect_timeout} seconds " + f"({i} of {creds.connect_retries})" + ) logger.warning(msg) time.sleep(creds.connect_timeout) else: - raise dbt.exceptions.FailedToConnectException(str(e)) + raise dbt.exceptions.FailedToConnectException( + 'failed to connect' + ) from e else: raise exc @@ -299,3 +304,15 @@ def open(cls, connection): connection.handle = handle connection.state = ConnectionState.OPEN return connection + + +def _is_retryable_error(exc: Exception) -> Optional[str]: + message = getattr(exc, 'message', None) + if message is None: + return None + message = message.lower() + if 'pending' in message: + return exc.message + if 'temporarily_unavailable' in message: + return exc.message + return None From ad37420ea9dea7110a01878ae366823181c9198a Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 14 May 2020 14:55:24 -0400 Subject: [PATCH 115/603] Update for 0.17.0 snapshot changes --- .../macros/materializations/snapshot.sql | 245 ++++++------------ 1 file changed, 82 insertions(+), 163 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 9eeaf2e2..4661fc4d 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -5,166 +5,63 @@ {%- endfor -%}) {%- endmacro %} + {% macro spark__snapshot_string_as_time(timestamp) -%} {%- set result = "to_timestamp('" ~ timestamp ~ "')" -%} {{ return(result) }} {%- endmacro %} -{% macro spark_build_snapshot_table(strategy, sql) %} - select *, - {{ strategy.scd_id }} as dbt_scd_id, - {{ strategy.unique_key }} as dbt_unique_key, - {{ strategy.updated_at }} as dbt_updated_at, - {{ strategy.updated_at }} as dbt_valid_from, - nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to - from ( - {{ sql }} - ) sbq +{% macro spark__snapshot_merge_sql(target, source, insert_cols) -%} + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source }} as DBT_INTERNAL_SOURCE + on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id + when matched + and DBT_INTERNAL_DEST.dbt_valid_to is null + and DBT_INTERNAL_SOURCE.dbt_change_type = 'update' + then update + set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to + + when not matched + and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert' + then insert * + ; {% endmacro %} -{% macro spark_snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%} - - with snapshot_query as ( - - {{ source_sql }} - - ), - - snapshotted_data as ( - - select * - - from {{ target_relation }} - - ), - - source_data as ( - - select *, - {{ strategy.scd_id }} as dbt_scd_id, - {{ strategy.unique_key }} as dbt_unique_key, - {{ strategy.updated_at }} as dbt_updated_at, - {{ strategy.updated_at }} as dbt_valid_from, - nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to - - from snapshot_query - ), - - insertions as ( - - select - source_data.* - - from source_data - left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key - where snapshotted_data.dbt_unique_key is null - or ( - snapshotted_data.dbt_unique_key is not null - and snapshotted_data.dbt_valid_to is null - and ( - {{ strategy.row_changed }} - ) - ) - - ) - - select * from insertions - -{%- endmacro %} - - -{% macro spark_snapshot_staging_table_updates(strategy, source_sql, target_relation) -%} - - with snapshot_query as ( - - {{ source_sql }} - - ), - - snapshotted_data as ( - - select * - - from {{ target_relation }} - - ), - - source_data as ( - - select - *, - {{ strategy.scd_id }} as dbt_scd_id, - {{ strategy.unique_key }} as dbt_unique_key, - {{ strategy.updated_at }} as dbt_updated_at, - {{ strategy.updated_at }} as dbt_valid_from - from snapshot_query - ), +{% macro spark_build_snapshot_staging_table(strategy, sql, target_relation) %} + {% set tmp_identifier = target_relation.identifier ~ '__dbt_tmp' %} + + {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, + schema=target_relation.schema, + database=target_relation.database, + type='view') -%} - updates as ( + {% set select = snapshot_staging_table(strategy, sql, target_relation) %} - select - 'update' as dbt_change_type, - snapshotted_data.dbt_scd_id, - source_data.dbt_valid_from as dbt_valid_to - - from source_data - join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key - where snapshotted_data.dbt_valid_to is null - and ( - {{ strategy.row_changed }} - ) - - ) - - select * from updates - -{%- endmacro %} - -{% macro build_snapshot_staging_table_updates(strategy, sql, target_relation) %} - {% set tmp_update_relation = make_temp_relation(target_relation, '__dbt_tmp_update') %} - - {% set update_select = snapshot_staging_table_updates(strategy, sql, target_relation) %} - - {% call statement('build_snapshot_staging_relation_updates') %} - {{ create_table_as(True, tmp_update_relation, update_select) }} + {# needs to be a non-temp view so that its columns can be ascertained via `describe` #} + {% call statement('build_snapshot_staging_relation') %} + {{ create_view_as(tmp_relation, select) }} {% endcall %} - {% do return(tmp_update_relation) %} + {% do return(tmp_relation) %} {% endmacro %} -{% macro build_snapshot_staging_table_insert(strategy, sql, target_relation) %} - {% set tmp_insert_relation = make_temp_relation(target_relation, '__dbt_tmp_insert') %} - {% set inserts_select = snapshot_staging_table_inserts(strategy, sql, target_relation) %} - - {% call statement('build_snapshot_staging_relation_inserts') %} - {{ create_table_as(True, tmp_insert_relation, inserts_select) }} - {% endcall %} - - - {% do return(tmp_insert_relation) %} -{% endmacro %} - -{% macro spark__snapshot_merge_update_sql(target, source) -%} - - merge into {{ target }} as DBT_INTERNAL_DEST - using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE - on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id - when matched then update set DBT_INTERNAL_DEST.dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to - ; +{% macro spark__post_snapshot(staging_relation) %} + {% do adapter.drop_relation(staging_relation) %} {% endmacro %} -{% macro spark__snapshot_merge_insert_sql(target, source) -%} - - merge into {{ target }} as DBT_INTERNAL_DEST - using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE - on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id - when not matched then insert * - ; +{% macro spark__create_columns(relation, columns) %} + {% call statement() %} + alter table {{ relation }} add columns ( + {% for column in columns %} + `{{ column.name }}` {{ column.data_type }} {{- ',' if not loop.last -}} + {% endfor %} + ); + {% endcall %} {% endmacro %} @@ -186,6 +83,9 @@ identifier=target_table, type='table') -%} + {%- if not target_relation.is_table -%} + {% do exceptions.relation_wrong_type(target_relation, 'table') %} + {%- endif -%} {{ run_hooks(pre_hooks, inside_transaction=False) }} @@ -197,35 +97,54 @@ {% if not target_relation_exists %} {% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %} - {% call statement('main') -%} - {{ create_table_as(False, target_relation, build_sql) }} - {% endcall %} + {% set final_sql = create_table_as(False, target_relation, build_sql) %} {% else %} {{ adapter.valid_snapshot_target(target_relation) }} - {% set staging_insert_table = build_snapshot_staging_table_insert(strategy, sql, target_relation) %} - - {% call statement('main') %} - {{ spark__snapshot_merge_insert_sql( - target = target_relation, - source = staging_insert_table - ) - }} - {% endcall %} - - {% set staging_update_table = build_snapshot_staging_table_updates(strategy, sql, target_relation) %} - - {% call statement('main-2') %} - {{ spark__snapshot_merge_update_sql( - target = target_relation, - source = staging_update_table - ) - }} - {% endcall %} + {% set staging_table = spark_build_snapshot_staging_table(strategy, sql, target_relation) %} + + -- this may no-op if the database does not require column expansion + {% do adapter.expand_target_column_types(from_relation=staging_table, + to_relation=target_relation) %} + + {% set missing_columns = adapter.get_missing_columns(staging_table, target_relation) + | rejectattr('name', 'equalto', 'dbt_change_type') + | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE') + | rejectattr('name', 'equalto', 'dbt_unique_key') + | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY') + | list %} + + {% do create_columns(target_relation, missing_columns) %} + + {% set source_columns = adapter.get_columns_in_relation(staging_table) + | rejectattr('name', 'equalto', 'dbt_change_type') + | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE') + | rejectattr('name', 'equalto', 'dbt_unique_key') + | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY') + | list %} + + {% set quoted_source_columns = [] %} + {% for column in source_columns %} + {% do quoted_source_columns.append(adapter.quote(column.name)) %} + {% endfor %} + + {% set final_sql = snapshot_merge_sql( + target = target_relation, + source = staging_table, + insert_cols = quoted_source_columns + ) + %} + {% endif %} + {% call statement('main') %} + {{ final_sql }} + {% endcall %} + + {% do persist_docs(target_relation, model) %} + {{ run_hooks(post_hooks, inside_transaction=True) }} {{ adapter.commit() }} @@ -238,4 +157,4 @@ {{ return({'relations': [target_relation]}) }} -{% endmaterialization %} \ No newline at end of file +{% endmaterialization %} From d1f7137534638374402516ba4fcd03ee74c6e145 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 14 May 2020 15:06:43 -0400 Subject: [PATCH 116/603] Add file_format exception --- .../spark/macros/materializations/snapshot.sql | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 4661fc4d..105c13fd 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -55,6 +55,7 @@ {% macro spark__create_columns(relation, columns) %} + {% if columns|length > 0 %} {% call statement() %} alter table {{ relation }} add columns ( {% for column in columns %} @@ -62,6 +63,7 @@ {% endfor %} ); {% endcall %} + {% endif %} {% endmacro %} @@ -72,6 +74,16 @@ {%- set strategy_name = config.get('strategy') -%} {%- set unique_key = config.get('unique_key') %} + {%- set file_format = config.get('file_format', 'parquet') -%} + + {% set invalid_format_msg -%} + Invalid file format: {{ file_format }} + Snapshot functionality requires file_format be set to 'delta' + {%- endset %} + + {%- if file_format != 'delta' -%} + {% do exceptions.raise_compiler_error(invalid_format_msg) %} + {% endif %} {% if not adapter.check_schema_exists(model.database, model.schema) %} {% do create_schema(model.database, model.schema) %} From 07f9bf884f493aeaa52533de3d3f8e19681013be Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 22 May 2020 14:22:33 -0600 Subject: [PATCH 117/603] =?UTF-8?q?Bump=20dbt=20version:=200.17.0rc1=20?= =?UTF-8?q?=E2=86=92=200.17.0rc2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 6979f061..a169d817 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0rc1 +current_version = 0.17.0rc2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c2f4281e..5c3ccfa2 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0rc1 +current_version = 0.17.0rc2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 65d02f6c..ef664b00 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.0rc1" +version = "0.17.0rc2" diff --git a/requirements.txt b/requirements.txt index 073b3db0..219e52ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.0rc1 +dbt-core==0.17.0rc2 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index a5125ab7..00c26e51 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.0rc1' +dbt_version = '0.17.0rc2' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.0rc1 vs 0.17.0rc1a1, 0.17.0rc1.1, ...) +# ends of it. (0.17.0rc2 vs 0.17.0rc2a1, 0.17.0rc2.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From ae0c71f011a82fad3070c640ebe02a8313dcd18d Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 22 May 2020 17:13:51 -0400 Subject: [PATCH 118/603] Raise exception. Add README note --- README.md | 7 +++++++ dbt/adapters/spark/impl.py | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 35d7f9d5..edec2f43 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,13 @@ # dbt-spark +This plugin ports [dbt](https://getdbt.com) functionality to Spark. It supports +running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), +Amazon EMR, or Docker. + +We have not tested extensively against older versions of Apache Spark. The +plugin uses syntax that requires version 2.2.0 or newer. + ### Documentation For more information on using Spark with dbt, consult the dbt documentation: - [Spark profile](https://docs.getdbt.com/docs/profile-spark) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 95ba4484..7e1e3910 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -123,7 +123,13 @@ def list_relations_without_caching( return [] relations = [] - for _schema, name, _, information in results: + for row in results: + if len(row) != 4: + raise dbt.exceptions.RuntimeException( + f'Invalid value from "show table extended ...", ' + f'got {len(row)} values, expected 4' + ) + _schema, name, _, information = row rel_type = ('view' if 'Type: VIEW' in information else 'table') relation = self.Relation.create( schema=_schema, From 3216cd47e4f0e438682ea9f69fe7134a2089ca6a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 26 May 2020 14:21:14 -0600 Subject: [PATCH 119/603] Fix the catalog, making use of rc3 fixes --- dbt/adapters/spark/impl.py | 144 +++++++------------------- dbt/include/spark/macros/adapters.sql | 5 + 2 files changed, 40 insertions(+), 109 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 7e1e3910..2e94c106 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,25 +1,25 @@ from dataclasses import dataclass -from typing import Optional, List, Dict, Any, Union +from typing import Optional, List, Dict, Any, Union, Iterable import agate -import dbt.exceptions + import dbt +import dbt.exceptions + from dbt.adapters.base import AdapterConfig from dbt.adapters.sql import SQLAdapter - from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark import SparkRelation from dbt.adapters.spark import SparkColumn - - from dbt.adapters.base import BaseRelation - +from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER from dbt.logger import GLOBAL_LOGGER as logger GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' LIST_SCHEMAS_MACRO_NAME = 'list_schemas' LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' DROP_RELATION_MACRO_NAME = 'drop_relation' +FETCH_TBL_PROPERTIES_MACRO_NAME = 'fetch_tbl_properties' KEY_TABLE_OWNER = 'Owner' KEY_TABLE_STATISTICS = 'Statistics' @@ -171,7 +171,7 @@ def parse_describe_extended( raw_table_stats = metadata.get(KEY_TABLE_STATISTICS) table_stats = SparkColumn.convert_table_stats(raw_table_stats) return [SparkColumn( - table_database=relation.database, + table_database=None, table_schema=relation.schema, table_name=relation.name, table_type=relation.type, @@ -195,102 +195,29 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: rows: List[agate.Row] = super().get_columns_in_relation(relation) return self.parse_describe_extended(relation, rows) - @staticmethod - def _parse_relation(relation: Relation, - table_columns: List[Column], - rel_type: str, - properties: Dict[str, str] = None) -> List[dict]: - properties = properties or {} - statistics = {} - table_owner_key = 'Owner' - - # First check if it is present in the properties - table_owner = properties.get(table_owner_key) - - found_detailed_table_marker = False - for column in table_columns: - if column.name == '# Detailed Table Information': - found_detailed_table_marker = True - - # In case there is another column with the name Owner - if not found_detailed_table_marker: - continue - - if not table_owner and column.name == table_owner_key: - table_owner = column.data_type - - if column.name == 'Statistics': - # format: 1109049927 bytes, 14093476 rows - statistics = {stats.split(" ")[1]: int(stats.split(" ")[0]) for - stats in column.data_type.split(', ')} - - columns = [] - for column_index, column in enumerate(table_columns): - # Fixes for pseudo-columns with no type - if column.name in { - '# Partition Information', - '# col_name', - '' - }: - continue - elif column.name == '# Detailed Table Information': - # Loop until the detailed table information - break - elif column.data_type is None: - continue - - column_data = ( - relation.database, - relation.schema, - relation.name, - rel_type, - None, - table_owner, - column.name, - column_index, - column.data_type, - None, - - # Table level stats - 'Table size', - statistics.get("bytes"), - "The size of the table in bytes", - statistics.get("bytes") is not None, - - # Column level stats - 'Number of rows', - statistics.get("rows"), - "The number of rows in the table", - statistics.get("rows") is not None - ) - - column_dict = dict(zip(SparkAdapter.COLUMN_NAMES, column_data)) - columns.append(column_dict) - - return columns - - def _massage_column_for_catalog( - self, column: SparkColumn - ) -> Dict[str, Any]: - dct = column.to_dict() - # different expectations here - Column.column is the name - dct['column_name'] = dct.pop('column') - dct['column_type'] = dct.pop('dtype') - # table_database can't be None in core. - if dct['table_database'] is None: - dct['table_database'] = dct['table_schema'] - return dct - - def _get_catalog_for_relations(self, database: str, schema: str): - with self.connection_named(f'{database}.{schema}'): - columns = [] - for relation in self.list_relations(database, schema): - logger.debug("Getting table schema for relation {}", relation) - columns.extend( - self._massage_column_for_catalog(col) - for col in self.get_columns_in_relation(relation) - ) - return agate.Table.from_object(columns) + def _get_columns_for_catalog( + self, relation: SparkRelation + ) -> Iterable[Dict[str, Any]]: + properties = self.get_properties(relation) + columns = self.get_columns_in_relation(relation) + owner = properties.get(KEY_TABLE_OWNER) + + for column in columns: + if owner: + column.table_owner = owner + # convert SparkColumns into catalog dicts + as_dict = column.to_dict() + as_dict['column_name'] = as_dict.pop('column', None) + as_dict['column_type'] = as_dict.pop('dtype') + as_dict['table_database'] = None + yield as_dict + + def get_properties(self, relation: Relation) -> Dict[str, str]: + properties = self.execute_macro( + FETCH_TBL_PROPERTIES_MACRO_NAME, + kwargs={'relation': relation} + ) + return dict(properties) def _get_one_catalog( self, information_schema, schemas, manifest, @@ -306,14 +233,13 @@ def _get_one_catalog( schema = list(schemas)[0] with self.connection_named(name): - columns = [] + columns: List[Dict[str, Any]] = [] for relation in self.list_relations(database, schema): logger.debug("Getting table schema for relation {}", relation) - columns.extend( - self._massage_column_for_catalog(col) - for col in self.get_columns_in_relation(relation) - ) - return agate.Table.from_object(columns) + columns.extend(self._get_columns_for_catalog(relation)) + return agate.Table.from_object( + columns, column_types=DEFAULT_TYPE_TESTER + ) def check_schema_exists(self, database, schema): results = self.execute_macro( diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index d0e11fca..02253fe5 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -153,3 +153,8 @@ drop {{ relation.type }} if exists {{ relation }} {%- endcall %} {% endmacro %} + + +{% macro spark__generate_database_name(custom_database_name=none, node=none) -%} + {% do return(None) %} +{%- endmacro %} From 78f2ae176669e914bbb79fcf157b0dec69439df7 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 26 May 2020 14:53:24 -0600 Subject: [PATCH 120/603] =?UTF-8?q?Bump=20dbt=20version:=200.17.0rc2=20?= =?UTF-8?q?=E2=86=92=200.17.0rc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index a169d817..d9b0cdd5 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0rc2 +current_version = 0.17.0rc3 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5c3ccfa2..fcce6914 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0rc2 +current_version = 0.17.0rc3 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index ef664b00..ef097f1b 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.0rc2" +version = "0.17.0rc3" diff --git a/requirements.txt b/requirements.txt index 219e52ff..c14a957d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.0rc2 +dbt-core==0.17.0rc3 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 00c26e51..e067e75b 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.0rc2' +dbt_version = '0.17.0rc3' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.0rc2 vs 0.17.0rc2a1, 0.17.0rc2.1, ...) +# ends of it. (0.17.0rc3 vs 0.17.0rc3a1, 0.17.0rc3.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 6eb854facffc4de88bb0823b37cb04e831d04440 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 27 May 2020 08:02:15 -0600 Subject: [PATCH 121/603] Stop monkeying around with database --- dbt/adapters/spark/relation.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 2106e5cb..4aa06f82 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -25,16 +25,8 @@ class SparkRelation(BaseRelation): quote_character: str = '`' def __post_init__(self): - # some core things set database='', which we should ignore. - if self.database and self.database != self.schema: - raise RuntimeException( - f'Error while parsing relation {self.name}: \n' - f' identifier: {self.identifier} \n' - f' schema: {self.schema} \n' - f' database: {self.database} \n' - f'On Spark, database should not be set. Use the schema ' - f'config to set a custom schema/database for this relation.' - ) + if self.database != self.schema and self.database: + raise RuntimeException('Cannot set database in spark!') def render(self): if self.include_policy.database and self.include_policy.schema: From 76c23dbae8b4f200bea4e07eab893378f7f1848e Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 29 May 2020 13:38:20 -0600 Subject: [PATCH 122/603] Default the database to None instead of schema in credentials Fix catalog generation to properly handle 1 database (None!) with many schemas Update unit tests --- dbt/adapters/spark/connections.py | 2 +- dbt/adapters/spark/impl.py | 25 +++++++++++++++++++++++-- test/unit/test_adapter.py | 22 ++++++++++++---------- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 2c152fc8..4b2d89c3 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -53,7 +53,7 @@ def __post_init__(self): f'On Spark, database must be omitted or have the same value as' f' schema.' ) - self.database = self.schema + self.database = None @property def type(self): diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 2e94c106..5e2b3447 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,12 +1,13 @@ +from concurrent.futures import Future from dataclasses import dataclass from typing import Optional, List, Dict, Any, Union, Iterable - import agate import dbt import dbt.exceptions from dbt.adapters.base import AdapterConfig +from dbt.adapters.base.impl import catch_as_completed from dbt.adapters.sql import SQLAdapter from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark import SparkRelation @@ -14,6 +15,7 @@ from dbt.adapters.base import BaseRelation from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER from dbt.logger import GLOBAL_LOGGER as logger +from dbt.utils import executor GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' LIST_SCHEMAS_MACRO_NAME = 'list_schemas' @@ -219,6 +221,24 @@ def get_properties(self, relation: Relation) -> Dict[str, str]: ) return dict(properties) + def get_catalog(self, manifest): + schema_map = self._get_catalog_schemas(manifest) + if len(schema_map) != 1: + dbt.exceptions.raise_compiler_error( + f'Expected only one database in get_catalog, found ' + f'{list(schema_map)}' + ) + + with executor(self.config) as tpe: + futures: List[Future[agate.Table]] = [] + for info, schemas in schema_map.items(): + for schema in schemas: + futures.append(tpe.submit( + self._get_one_catalog, info, [schema], manifest + )) + catalogs, exceptions = catch_as_completed(futures) + return catalogs, exceptions + def _get_one_catalog( self, information_schema, schemas, manifest, ) -> agate.Table: @@ -226,7 +246,8 @@ def _get_one_catalog( if len(schemas) != 1: dbt.exceptions.raise_compiler_error( - 'Expected only one schema in spark _get_one_catalog' + f'Expected only one schema in spark _get_one_catalog, found ' + f'{schemas}' ) database = information_schema.database diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index e453c12b..74f9790a 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -73,11 +73,11 @@ def hive_http_connect(thrift_transport): connection.handle # trigger lazy-load self.assertEqual(connection.state, 'open') - self.assertNotEqual(connection.handle, None) + self.assertIsNotNone(connection.handle) self.assertEqual(connection.credentials.cluster, '01234-23423-coffeetime') self.assertEqual(connection.credentials.token, 'abc123') self.assertEqual(connection.credentials.schema, 'analytics') - self.assertEqual(connection.credentials.database, 'analytics') + self.assertIsNone(connection.credentials.database) def test_thrift_connection(self): config = self._get_target_thrift(self.project_cfg) @@ -93,9 +93,9 @@ def hive_thrift_connect(host, port, username): connection.handle # trigger lazy-load self.assertEqual(connection.state, 'open') - self.assertNotEqual(connection.handle, None) + self.assertIsNotNone(connection.handle) self.assertEqual(connection.credentials.schema, 'analytics') - self.assertEqual(connection.credentials.database, 'analytics') + self.assertIsNone(connection.credentials.database) def test_parse_relation(self): self.maxDiff = None @@ -106,6 +106,7 @@ def test_parse_relation(self): identifier='mytable', type=rel_type ) + assert relation.database is None # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED plain_rows = [ @@ -117,7 +118,7 @@ def test_parse_relation(self): ('dt', 'date'), (None, None), ('# Detailed Table Information', None), - ('Database', relation.database), + ('Database', None), ('Owner', 'root'), ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'), ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), @@ -136,7 +137,7 @@ def test_parse_relation(self): rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) self.assertEqual(len(rows), 3) self.assertEqual(rows[0].to_dict(omit_none=False), { - 'table_database': relation.database, + 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, 'table_type': rel_type, @@ -150,7 +151,7 @@ def test_parse_relation(self): }) self.assertEqual(rows[1].to_dict(omit_none=False), { - 'table_database': relation.database, + 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, 'table_type': rel_type, @@ -164,7 +165,7 @@ def test_parse_relation(self): }) self.assertEqual(rows[2].to_dict(omit_none=False), { - 'table_database': relation.database, + 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, 'table_type': rel_type, @@ -186,6 +187,7 @@ def test_parse_relation_with_statistics(self): identifier='mytable', type=rel_type ) + assert relation.database is None # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED plain_rows = [ @@ -193,7 +195,7 @@ def test_parse_relation_with_statistics(self): ('# Partition Information', 'data_type'), (None, None), ('# Detailed Table Information', None), - ('Database', relation.database), + ('Database', None), ('Owner', 'root'), ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'), ('Last Access', 'Wed May 20 19:25:00 UTC 1925'), @@ -213,7 +215,7 @@ def test_parse_relation_with_statistics(self): rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].to_dict(omit_none=False), { - 'table_database': relation.database, + 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, 'table_type': rel_type, From 682ee8997eacbe3f5b4577c425b8ab554c9ec7b6 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 10 Jun 2020 07:26:51 -0600 Subject: [PATCH 123/603] =?UTF-8?q?Bump=20dbt=20version:=200.17.0rc3=20?= =?UTF-8?q?=E2=86=92=200.17.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index d9b0cdd5..0434dbd2 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0rc3 +current_version = 0.17.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index fcce6914..c63c0486 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0rc3 +current_version = 0.17.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index ef097f1b..8d361f89 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.0rc3" +version = "0.17.0" diff --git a/requirements.txt b/requirements.txt index c14a957d..db1f1b66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.0rc3 +dbt-core==0.17.0 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index e067e75b..f75e80b1 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.0rc3' +dbt_version = '0.17.0' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.0rc3 vs 0.17.0rc3a1, 0.17.0rc3.1, ...) +# ends of it. (0.17.0 vs 0.17.0a1, 0.17.0.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 4fed73013a4d7d690edcd1637a8fc68db24ecac3 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 10 Jun 2020 15:46:17 -0400 Subject: [PATCH 124/603] Snapshots: create relations w/ db=none --- dbt/include/spark/macros/materializations/snapshot.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 105c13fd..78214641 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -35,7 +35,7 @@ {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, schema=target_relation.schema, - database=target_relation.database, + database=none, type='view') -%} {% set select = snapshot_staging_table(strategy, sql, target_relation) %} @@ -90,7 +90,7 @@ {% endif %} {% set target_relation_exists, target_relation = get_or_create_relation( - database=model.database, + database=none, schema=model.schema, identifier=target_table, type='table') -%} From e2f92fa7f733c1935b710dfc7efd8d5c3599bb89 Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Mon, 22 Jun 2020 08:51:02 -0400 Subject: [PATCH 125/603] Update logo --- README.md | 4 ++-- etc/dbt-logo-full.svg | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 etc/dbt-logo-full.svg diff --git a/README.md b/README.md index edec2f43..d01494fe 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- dbt logo + dbt logo

@@ -13,7 +13,7 @@ # dbt-spark This plugin ports [dbt](https://getdbt.com) functionality to Spark. It supports -running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), +running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), Amazon EMR, or Docker. We have not tested extensively against older versions of Apache Spark. The diff --git a/etc/dbt-logo-full.svg b/etc/dbt-logo-full.svg new file mode 100644 index 00000000..88f84b70 --- /dev/null +++ b/etc/dbt-logo-full.svg @@ -0,0 +1 @@ + \ No newline at end of file From 43003d1091d9a086f96305946cad0b0f03d341a0 Mon Sep 17 00:00:00 2001 From: Bruno Murino Date: Thu, 25 Jun 2020 23:43:57 +0100 Subject: [PATCH 126/603] added sample_profiles.yml --- dbt/include/spark/sample_profiles.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dbt/include/spark/sample_profiles.yml diff --git a/dbt/include/spark/sample_profiles.yml b/dbt/include/spark/sample_profiles.yml new file mode 100644 index 00000000..91279912 --- /dev/null +++ b/dbt/include/spark/sample_profiles.yml @@ -0,0 +1,14 @@ +your_profile_name: + target: dev + outputs: + dev: + method: http + type: spark + schema: analytics + host: yourorg.sparkhost.com + organization: 1234567891234567 # Azure Databricks ONLY + port: 443 + token: abc123 + cluster: 01234-23423-coffeetime + connect_retries: 5 + connect_timeout: 60 From cfc593ab44bd87d77add6b65b650e87bbcf016c6 Mon Sep 17 00:00:00 2001 From: Bruno Murino Date: Fri, 26 Jun 2020 18:13:29 +0100 Subject: [PATCH 127/603] updated setup.py to include sample_profiles.yml --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f75e80b1..7baea132 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def _dbt_spark_version(): package_data={ 'dbt': [ 'include/spark/dbt_project.yml', + 'include/spark/sample_profiles.yml', 'include/spark/macros/*.sql', 'include/spark/macros/**/*.sql', ] From 67b20171bae404a94ab1ed7832fb850de8256daf Mon Sep 17 00:00:00 2001 From: Bruno Murino Date: Fri, 26 Jun 2020 20:06:15 +0100 Subject: [PATCH 128/603] added thrift profile in sample_profiles.yml --- dbt/include/spark/sample_profiles.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/dbt/include/spark/sample_profiles.yml b/dbt/include/spark/sample_profiles.yml index 91279912..1a7bc944 100644 --- a/dbt/include/spark/sample_profiles.yml +++ b/dbt/include/spark/sample_profiles.yml @@ -1,9 +1,10 @@ your_profile_name: - target: dev outputs: - dev: - method: http + + # Use this if connecting to a hosted spark (e.g. Databricks) + dev_http: type: spark + method: http schema: analytics host: yourorg.sparkhost.com organization: 1234567891234567 # Azure Databricks ONLY @@ -12,3 +13,16 @@ your_profile_name: cluster: 01234-23423-coffeetime connect_retries: 5 connect_timeout: 60 + + # Use this if connecting to Dockerized spark + dev_thrift: + type: spark + method: thrift + schema: analytics + host: 127.0.0.1 + port: 10001 + user: hadoop + connect_retries: 5 + connect_timeout: 60 + + target: dev From 0dc7764bc1d43be43e76f95be0bbef7d29a3708e Mon Sep 17 00:00:00 2001 From: Bruno Murino Date: Sat, 4 Jul 2020 21:42:33 +0100 Subject: [PATCH 129/603] udpated sample profile --- dbt/include/spark/sample_profiles.yml | 30 ++++++++++++--------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/dbt/include/spark/sample_profiles.yml b/dbt/include/spark/sample_profiles.yml index 1a7bc944..52d07891 100644 --- a/dbt/include/spark/sample_profiles.yml +++ b/dbt/include/spark/sample_profiles.yml @@ -1,28 +1,24 @@ -your_profile_name: +default: outputs: # Use this if connecting to a hosted spark (e.g. Databricks) - dev_http: + dev: type: spark method: http - schema: analytics - host: yourorg.sparkhost.com - organization: 1234567891234567 # Azure Databricks ONLY - port: 443 - token: abc123 - cluster: 01234-23423-coffeetime - connect_retries: 5 - connect_timeout: 60 + schema: [dev_schema] + host: [host] + organization: [organization id] # Azure Databricks ONLY + port: [port] + token: [token] + cluster: [cluster id] # Use this if connecting to Dockerized spark - dev_thrift: + prod: type: spark method: thrift - schema: analytics - host: 127.0.0.1 - port: 10001 - user: hadoop - connect_retries: 5 - connect_timeout: 60 + schema: [dev_schema] + host: [host] + port: [port] + user: [prod_user] target: dev From d3343f15df68ecc18a3becdea76c0104eeb02699 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 20 Jul 2020 13:48:48 -0400 Subject: [PATCH 130/603] Bump v0.17.1 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 3 +-- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 0434dbd2..88bab2f0 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0 +current_version = 0.17.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c63c0486..6fde57ad 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.0 +current_version = 0.17.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) @@ -27,4 +27,3 @@ first_value = 1 first_value = 1 [bumpversion:file:dbt/adapters/spark/__version__.py] - diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 8d361f89..9f81cce4 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.0" +version = "0.17.1" diff --git a/requirements.txt b/requirements.txt index db1f1b66..53c1a531 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.0 +dbt-core==0.17.1 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index f75e80b1..ab989a0b 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.0' +dbt_version = '0.17.1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.0 vs 0.17.0a1, 0.17.0.1, ...) +# ends of it. (0.17.1 vs 0.17.1a1, 0.17.1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 87c58d2724bc612affde241c78bcb074deab0fc0 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 29 Jul 2020 14:21:43 -0400 Subject: [PATCH 131/603] Bump v0.17.2rc1 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- dbt/adapters/spark/impl.py | 3 +-- requirements.txt | 2 +- setup.py | 4 ++-- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 88bab2f0..afdd1cd9 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.1 +current_version = 0.17.2rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 6fde57ad..5584ebbc 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.1 +current_version = 0.17.2rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 9f81cce4..b3ac9e85 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.1" +version = "0.17.2rc1" diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 5e2b3447..dead6a0f 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -112,8 +112,7 @@ def list_relations_without_caching( try: results = self.execute_macro( LIST_RELATIONS_MACRO_NAME, - kwargs=kwargs, - release=True + kwargs=kwargs ) except dbt.exceptions.RuntimeException as e: errmsg = getattr(e, 'msg', '') diff --git a/requirements.txt b/requirements.txt index 53c1a531..e31c9ad8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.1 +dbt-core==0.17.2rc1 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index ab989a0b..2d90406f 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.1' +dbt_version = '0.17.2rc1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.1 vs 0.17.1a1, 0.17.1.1, ...) +# ends of it. (0.17.2rc1 vs 0.17.2rc1a1, 0.17.2rc1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 492c58babfdecbc337b6e37a3dd71cc17a1f8a61 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 5 Aug 2020 14:32:53 -0400 Subject: [PATCH 132/603] Bump 0.17.2 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index afdd1cd9..3940ac1d 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.2rc1 +current_version = 0.17.2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5584ebbc..7770a613 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.2rc1 +current_version = 0.17.2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index b3ac9e85..0c0c3cfa 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.2rc1" +version = "0.17.2" diff --git a/requirements.txt b/requirements.txt index e31c9ad8..49efc4ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.2rc1 +dbt-core==0.17.2 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 2d90406f..33d1b273 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.2rc1' +dbt_version = '0.17.2' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.2rc1 vs 0.17.2rc1a1, 0.17.2rc1.1, ...) +# ends of it. (0.17.2 vs 0.17.2a1, 0.17.2.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 475c44fed95ec82ac2712d1d28c44c37b3c58631 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 20 Aug 2020 13:24:21 -0600 Subject: [PATCH 133/603] add support for timestamps that works ok on databricks --- dbt/include/spark/macros/materializations/seed.sql | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 66bcf41f..fefc7b78 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -1,7 +1,5 @@ {% macro spark__load_csv_rows(model, agate_table) %} {% set batch_size = 1000 %} - {% set cols_sql = ", ".join(agate_table.column_names) %} - {% set bindings = [] %} {% set statements = [] %} @@ -9,24 +7,28 @@ {% set bindings = [] %} {% for row in chunk %} - {% set _ = bindings.extend(row) %} + {% do bindings.extend(row) %} {% endfor %} {% set sql %} insert into {{ this.render() }} values {% for row in chunk -%} - ({%- for column in agate_table.column_names -%} + ({%- for column in agate_table.columns -%} + {%- if 'ISODate' in (column.data_type | string) -%} + cast(%s as timestamp) + {%- else -%} %s + {%- endif -%} {%- if not loop.last%},{%- endif %} {%- endfor -%}) {%- if not loop.last%},{%- endif %} {%- endfor %} {% endset %} - {% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} {% if loop.index0 == 0 %} - {% set _ = statements.append(sql) %} + {% do statements.append(sql) %} {% endif %} {% endfor %} From 10a5c7f47e4530d16c42e268199c4957c1516de7 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 21 Aug 2020 07:52:48 -0600 Subject: [PATCH 134/603] handle dbt docs generate more gracefully if dbt has not run --- dbt/adapters/spark/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index dead6a0f..4cbfb61c 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -222,7 +222,7 @@ def get_properties(self, relation: Relation) -> Dict[str, str]: def get_catalog(self, manifest): schema_map = self._get_catalog_schemas(manifest) - if len(schema_map) != 1: + if len(schema_map) > 1: dbt.exceptions.raise_compiler_error( f'Expected only one database in get_catalog, found ' f'{list(schema_map)}' From d436c8195ac2c84d866ca68e298d12ef360032b6 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 21 Aug 2020 07:53:03 -0600 Subject: [PATCH 135/603] allow seeds to supply options (I want delta format seeds for testing) --- .../spark/macros/materializations/seed.sql | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index fefc7b78..1f088e83 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -44,6 +44,35 @@ {{ return(sql) }} {% endmacro %} + +{% macro spark__create_csv_table(model, agate_table) %} + {%- set column_override = model['config'].get('column_types', {}) -%} + {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} + + {% set sql %} + create table {{ this.render() }} ( + {%- for col_name in agate_table.column_names -%} + {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} + {%- set type = column_override.get(col_name, inferred_type) -%} + {%- set column_name = (col_name | string) -%} + {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%} + {%- endfor -%} + ) + {{ file_format_clause() }} + {{ partition_cols(label="partitioned by") }} + {{ clustered_cols(label="clustered by") }} + {{ location_clause() }} + {{ comment_clause() }} + {% endset %} + + {% call statement('_') -%} + {{ sql }} + {%- endcall %} + + {{ return(sql) }} +{% endmacro %} + + {% materialization seed, adapter='spark' %} {%- set identifier = model['alias'] -%} From 6dc72ba720f12a6cf7bf2cfab5d9efa8936680f2 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 23 Jul 2020 07:16:59 -0600 Subject: [PATCH 136/603] test support --- dbt/adapters/spark/impl.py | 63 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 4cbfb61c..73e66558 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -269,3 +269,66 @@ def check_schema_exists(self, database, schema): exists = True if schema in [row[0] for row in results] else False return exists + + def get_rows_different_sql( + self, + relation_a: BaseRelation, + relation_b: BaseRelation, + column_names: Optional[List[str]] = None, + except_operator: str = 'EXCEPT', + ) -> str: + """Generate SQL for a query that returns a single row with a two + columns: the number of rows that are different between the two + relations and the number of mismatched rows. + """ + # This method only really exists for test reasons. + names: List[str] + if column_names is None: + columns = self.get_columns_in_relation(relation_a) + names = sorted((self.quote(c.name) for c in columns)) + else: + names = sorted((self.quote(n) for n in column_names)) + columns_csv = ', '.join(names) + + sql = COLUMNS_EQUAL_SQL.format( + columns=columns_csv, + relation_a=str(relation_a), + relation_b=str(relation_b), + ) + + return sql + + +# spark does something interesting with joins when both tables have the same +# static values for the join condition and complains that the join condition is +# "trivial". Which is true, though it seems like an unreasonable cause for +# failure! It also doesn't like the `from foo, bar` syntax as opposed to +# `from foo cross join bar`. +COLUMNS_EQUAL_SQL = ''' +with diff_count as ( + SELECT + 1 as id, + COUNT(*) as num_missing FROM ( + (SELECT {columns} FROM {relation_a} EXCEPT + SELECT {columns} FROM {relation_b}) + UNION ALL + (SELECT {columns} FROM {relation_b} EXCEPT + SELECT {columns} FROM {relation_a}) + ) as a +), table_a as ( + SELECT COUNT(*) as num_rows FROM {relation_a} +), table_b as ( + SELECT COUNT(*) as num_rows FROM {relation_b} +), row_count_diff as ( + select + 1 as id, + table_a.num_rows - table_b.num_rows as difference + from table_a + cross join table_b +) +select + row_count_diff.difference as row_count_difference, + diff_count.num_missing as num_mismatched +from row_count_diff +cross join diff_count +'''.strip() From 690a6597a9c9a214b752b13ba2b372c1618852a1 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 21 Aug 2020 13:12:12 -0600 Subject: [PATCH 137/603] =?UTF-8?q?Bump=20dbt=20version:=200.17.2=20?= =?UTF-8?q?=E2=86=92=200.18.0rc1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 3 ++- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 3940ac1d..6b130e5b 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.2 +current_version = 0.18.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 7770a613..b01d9f52 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.17.2 +current_version = 0.18.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) @@ -27,3 +27,4 @@ first_value = 1 first_value = 1 [bumpversion:file:dbt/adapters/spark/__version__.py] + diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 0c0c3cfa..2997a2ad 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.17.2" +version = "0.18.0rc1" diff --git a/requirements.txt b/requirements.txt index 49efc4ca..1e815fae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.17.2 +dbt-core==0.18.0rc1 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 33d1b273..3f759682 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.17.2' +dbt_version = '0.18.0rc1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.17.2 vs 0.17.2a1, 0.17.2.1, ...) +# ends of it. (0.18.0rc1 vs 0.18.0rc1a1, 0.18.0rc1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 43bae7f7aebd0931defc4951b00c633a24f7ce7c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 21 Aug 2020 13:19:52 -0600 Subject: [PATCH 138/603] unit test fixes --- test/unit/utils.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/test/unit/utils.py b/test/unit/utils.py index 53630bba..8c9a19a0 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -38,7 +38,7 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'): from dbt.config import Profile from dbt.config.renderer import ProfileRenderer from dbt.context.base import generate_base_context - from dbt.utils import parse_cli_vars + from dbt.config.utils import parse_cli_vars if not isinstance(cli_vars, dict): cli_vars = parse_cli_vars(cli_vars) @@ -50,11 +50,11 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'): ) -def project_from_dict(project, profile, packages=None, cli_vars='{}'): +def project_from_dict(project, profile, packages=None, selectors=None, cli_vars='{}'): from dbt.context.target import generate_target_context from dbt.config import Project from dbt.config.renderer import DbtProjectYamlRenderer - from dbt.utils import parse_cli_vars + from dbt.config.utils import parse_cli_vars if not isinstance(cli_vars, dict): cli_vars = parse_cli_vars(cli_vars) @@ -63,11 +63,11 @@ def project_from_dict(project, profile, packages=None, cli_vars='{}'): project_root = project.pop('project-root', os.getcwd()) return Project.render_from_dict( - project_root, project, packages, renderer + project_root, project, packages, selectors, renderer ) -def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): +def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars='{}'): from dbt.config import Project, Profile, RuntimeConfig from copy import deepcopy @@ -88,6 +88,7 @@ def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): deepcopy(project), profile, packages, + selectors, cli_vars, ) @@ -101,14 +102,20 @@ def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): ) -def inject_adapter(value): +def inject_plugin(plugin): + from dbt.adapters.factory import FACTORY + key = plugin.adapter.type() + FACTORY.plugins[key] = plugin + + +def inject_adapter(value, plugin): """Inject the given adapter into the adapter factory, so your hand-crafted artisanal adapter will be available from get_adapter() as if dbt loaded it. """ + inject_plugin(plugin) from dbt.adapters.factory import FACTORY key = value.type() FACTORY.adapters[key] = value - FACTORY.adapter_types[key] = type(value) class ContractTestCase(TestCase): From 9dc261587c1cecac8710a0d5444f543b40fe97e9 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 21 Aug 2020 15:47:06 -0600 Subject: [PATCH 139/603] use submit_connected for catalog generation, like core --- dbt/adapters/spark/impl.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 73e66558..15f89f7d 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -232,7 +232,8 @@ def get_catalog(self, manifest): futures: List[Future[agate.Table]] = [] for info, schemas in schema_map.items(): for schema in schemas: - futures.append(tpe.submit( + futures.append(tpe.submit_connected( + self, schema, self._get_one_catalog, info, [schema], manifest )) catalogs, exceptions = catch_as_completed(futures) @@ -241,8 +242,6 @@ def get_catalog(self, manifest): def _get_one_catalog( self, information_schema, schemas, manifest, ) -> agate.Table: - name = f'{information_schema.database}.information_schema' - if len(schemas) != 1: dbt.exceptions.raise_compiler_error( f'Expected only one schema in spark _get_one_catalog, found ' @@ -252,14 +251,13 @@ def _get_one_catalog( database = information_schema.database schema = list(schemas)[0] - with self.connection_named(name): - columns: List[Dict[str, Any]] = [] - for relation in self.list_relations(database, schema): - logger.debug("Getting table schema for relation {}", relation) - columns.extend(self._get_columns_for_catalog(relation)) - return agate.Table.from_object( - columns, column_types=DEFAULT_TYPE_TESTER - ) + columns: List[Dict[str, Any]] = [] + for relation in self.list_relations(database, schema): + logger.debug("Getting table schema for relation {}", relation) + columns.extend(self._get_columns_for_catalog(relation)) + return agate.Table.from_object( + columns, column_types=DEFAULT_TYPE_TESTER + ) def check_schema_exists(self, database, schema): results = self.execute_macro( From dc90c10cecee5264d3fdc5e9a64b34ee2b45318c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 14 Sep 2020 14:49:25 -0600 Subject: [PATCH 140/603] =?UTF-8?q?Bump=20dbt=20version:=200.18.0rc1=20?= =?UTF-8?q?=E2=86=92=200.18.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 6b130e5b..ddf6a8de 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.0rc1 +current_version = 0.18.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b01d9f52..b9e31fd7 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.0rc1 +current_version = 0.18.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 2997a2ad..561d56e8 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.18.0rc1" +version = "0.18.0" diff --git a/requirements.txt b/requirements.txt index 1e815fae..b1d6e5c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -dbt-core==0.18.0rc1 +dbt-core==0.18.0 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index 3f759682..7247c618 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.18.0rc1' +dbt_version = '0.18.0' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.18.0rc1 vs 0.18.0rc1a1, 0.18.0rc1.1, ...) +# ends of it. (0.18.0 vs 0.18.0a1, 0.18.0.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From b94e71e8899f84bb859ff5ff06e1936a4707b32e Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 14 Sep 2020 15:33:27 -0600 Subject: [PATCH 141/603] Make circleci tests work --- .circleci/config.yml | 17 ++++----- dev_requirements.txt | 12 +++---- test/integration/profiles.yml | 25 ------------- test/integration/spark-databricks.dbtspec | 43 +++++++++++++++++++++++ test/integration/spark.dbtspec | 33 +++++++++++++++++ tox.ini | 14 ++++---- 6 files changed, 93 insertions(+), 51 deletions(-) delete mode 100644 test/integration/profiles.yml create mode 100644 test/integration/spark-databricks.dbtspec create mode 100644 test/integration/spark.dbtspec diff --git a/.circleci/config.yml b/.circleci/config.yml index f89aa0a4..2c7624f7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,21 +2,19 @@ version: 2.1 jobs: unit: + environment: + DBT_INVOCATION_ENV: circle docker: - - image: fishtownanalytics/test-container:6 - environment: - DBT_INVOCATION_ENV: circle + - image: fishtownanalytics/test-container:9 steps: - checkout - run: tox -e flake8,unit integration-spark2: + environment: + DBT_INVOCATION_ENV: circle docker: - - - image: fishtownanalytics/test-container:6 - environment: - DBT_INVOCATION_ENV: circle - + - image: fishtownanalytics/test-container:9 - image: godatadriven/spark:2 environment: WAIT_FOR: localhost:5432 @@ -48,9 +46,6 @@ jobs: name: Run integration tests command: tox -e integration-spark-thrift no_output_timeout: 1h - environment: - DBT_PROFILES_DIR: /home/dbt_test_user/project/test/integration/ - - store_artifacts: path: ./logs diff --git a/dev_requirements.txt b/dev_requirements.txt index 096a5469..1eadc8f5 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,19 +1,15 @@ freezegun==0.3.9 -pytest==4.4.0 +pytest==6.0.2 mock>=1.3.0 flake8>=3.5.0 pytz==2017.2 bumpversion==0.5.3 -tox==2.5.0 +tox==3.2.0 ipdb -pytest-xdist>=1.28.0,<2 +pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 # Test requirements -behave==1.2.6 -parse==1.8.4 -parse-type==0.4.2 -PyHamcrest==1.9.0 -six>=1.14.0 +pytest-dbt-adapter==0.2.0 sasl==0.2.1 thrift_sasl==0.4.1 diff --git a/test/integration/profiles.yml b/test/integration/profiles.yml deleted file mode 100644 index dca5d36f..00000000 --- a/test/integration/profiles.yml +++ /dev/null @@ -1,25 +0,0 @@ -spark-http: - target: ci - outputs: - ci: - type: spark - method: http - schema: 'public_integration_tests' - host: '{{ env_var("SPARK_HOST") }}' - port: 443 - token: '{{ env_var("SPARK_TOKEN") }}' - cluster: '{{ env_var("SPARK_CLUSTER") }}' - connect_timeout: 60 - connect_retries: 10 -spark-thrift: - target: ci - outputs: - ci: - method: thrift - type: spark - schema: analytics - host: localhost - port: 10000 - user: dbt - connect_retries: 5 - connect_timeout: 60 diff --git a/test/integration/spark-databricks.dbtspec b/test/integration/spark-databricks.dbtspec new file mode 100644 index 00000000..72c36df7 --- /dev/null +++ b/test/integration/spark-databricks.dbtspec @@ -0,0 +1,43 @@ +target: + type: spark + host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}" + cluster: "{{ env_var('DBT_DATABRICKS_CLUSTER_NAME') }}" + token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}" + method: http + port: 443 + schema: "analytics_{{ var('_dbt_random_suffix') }}" + connect_retries: 5 + connect_timeout: 60 +projects: + - overrides: incremental + paths: + "models/incremental.sql": + materialized: incremental + body: "select * from {{ source('raw', 'seed') }}" + facts: + base: + rowcount: 10 + extended: + rowcount: 20 + - overrides: snapshot_strategy_check_cols + dbt_project_yml: &file_format_delta + # we're going to UPDATE the seed tables as part of testing, so we must make them delta format + seeds: + dbt_test_project: + file_format: delta + snapshots: + dbt_test_project: + file_format: delta + - overrides: snapshot_strategy_timestamp + dbt_project_yml: *file_format_delta +sequences: + test_dbt_empty: empty + test_dbt_base: base + test_dbt_ephemeral: ephemeral + test_dbt_incremental: incremental + test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp + test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols + test_dbt_data_test: data_test + test_dbt_ephemeral_data_tests: data_test_ephemeral_models + test_dbt_schema_test: schema_test + diff --git a/test/integration/spark.dbtspec b/test/integration/spark.dbtspec new file mode 100644 index 00000000..68b3c919 --- /dev/null +++ b/test/integration/spark.dbtspec @@ -0,0 +1,33 @@ +target: + type: spark + host: localhost + user: dbt + method: thrift + port: 10000 + connect_retries: 5 + connect_timeout: 60 + schema: "analytics_{{ var('_dbt_random_suffix') }}" +projects: + - overrides: incremental + paths: + "models/incremental.sql": + materialized: incremental + body: "select * from {{ source('raw', 'seed') }}" + facts: + base: + rowcount: 10 + extended: + rowcount: 20 +sequences: + test_dbt_empty: empty + test_dbt_base: base + test_dbt_ephemeral: ephemeral + test_dbt_incremental: incremental + # snapshots require delta format + # test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp + # test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols + test_dbt_data_test: data_test + test_dbt_schema_test: schema_test + # the local cluster currently tests on spark 2.x, which does not support this + # if we upgrade it to 3.x, we can enable this test + # test_dbt_ephemeral_data_tests: data_test_ephemeral_models diff --git a/tox.ini b/tox.ini index f3faa119..b5d01762 100644 --- a/tox.ini +++ b/tox.ini @@ -6,21 +6,22 @@ envlist = unit, flake8, integration-spark-thrift [testenv:flake8] basepython = python3 commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 dbt/' +passenv = DBT_INVOCATION_ENV deps = -r{toxinidir}/dev_requirements.txt [testenv:unit] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} test/unit' +passenv = DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt -[testenv:integration-spark-http] +[testenv:integration-spark-databricks] basepython = python3 -changedir = dbt-integration-tests -commands = /bin/bash -c '{envpython} -m behave -f progress3 --stop -D profile_name=spark-http' -passenv = SPARK_HOST SPARK_CLUSTER SPARK_TOKEN DBT_PROFILES_DIR +commands = /bin/bash -c '{envpython} -m pytest test/integration/spark-databricks.dbtspec' +passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -28,9 +29,8 @@ deps = [testenv:integration-spark-thrift] basepython = python3 -changedir = dbt-integration-tests -commands = /bin/bash -c '{envpython} -m behave -f progress3 --stop -D profile_name=spark-thrift' -passenv = DBT_PROFILES_DIR +commands = /bin/bash -c '{envpython} -m pytest test/integration/spark.dbtspec' +passenv = DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt From b3b5ea3a175176190affe5b646d2a45c85b672d9 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 15 Sep 2020 15:42:20 -0600 Subject: [PATCH 142/603] add databricks test, remove useless CI step --- .circleci/config.yml | 24 +++++++++++++++++++----- tox.ini | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2c7624f7..200c8e0f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - checkout - run: tox -e flake8,unit - integration-spark2: + integration-spark-thrift: environment: DBT_INVOCATION_ENV: circle docker: @@ -39,9 +39,6 @@ jobs: name: Wait for Spark-Thrift command: dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s - - run: - name: Checkout test project - command: git clone --branch spark-support https://github.com/fishtown-analytics/dbt-integration-tests.git - run: name: Run integration tests command: tox -e integration-spark-thrift @@ -49,11 +46,28 @@ jobs: - store_artifacts: path: ./logs + integration-spark-databricks: + environment: + DBT_INVOCATION_ENV: circle + docker: + - image: fishtownanalytics/test-container:9 + steps: + - checkout + - run: + name: Run integration tests + command: tox -e integration-spark-databricks + no_output_timeout: 1h + - store_artifacts: + path: ./logs + workflows: version: 2 test-everything: jobs: - unit - - integration-spark2: + - integration-spark-thrift: + requires: + - unit + - integration-spark-databricks: requires: - unit diff --git a/tox.ini b/tox.ini index b5d01762..c3745707 100644 --- a/tox.ini +++ b/tox.ini @@ -20,7 +20,7 @@ deps = [testenv:integration-spark-databricks] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest test/integration/spark-databricks.dbtspec' +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks.dbtspec' passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt @@ -29,7 +29,7 @@ deps = [testenv:integration-spark-thrift] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest test/integration/spark.dbtspec' +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark.dbtspec' passenv = DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt From 2867310f0bc82198ad13f82a5f3bfb97b3ab34ff Mon Sep 17 00:00:00 2001 From: Daniel Stahl Date: Tue, 22 Sep 2020 06:51:37 -0500 Subject: [PATCH 143/603] allow kerberors --- dbt/adapters/spark/connections.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 4b2d89c3..9e261de8 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -37,6 +37,8 @@ class SparkCredentials(Credentials): token: Optional[str] = None user: Optional[str] = None port: int = 443 + auth: Optional[str]=None + kerberos_service_name: Optional[str]=None organization: str = '0' connect_retries: int = 0 connect_timeout: int = 10 @@ -269,7 +271,9 @@ def open(cls, connection): conn = hive.connect(host=creds.host, port=creds.port, - username=creds.user) + username=creds.user, + auth=creds.auth, + kerberos_service_name=creds.kerberos_service_name) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}" From 9d03250d08b8de3e50617b8f0734a681420d4b58 Mon Sep 17 00:00:00 2001 From: Daniel Stahl Date: Tue, 22 Sep 2020 10:58:32 -0500 Subject: [PATCH 144/603] added unit tests --- test/unit/test_adapter.py | 40 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 74f9790a..70bc955f 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -57,6 +57,22 @@ def _get_target_thrift(self, project): 'target': 'test' }) + def _get_target_thrift_kerberos(self, project): + return config_from_parts_or_dicts(project, { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'thrift', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 10001, + 'user': 'dbt', + 'auth': 'KERBEROS', + 'kerberos_service_name': 'hive' + } + }, + 'target': 'test' + }) def test_http_connection(self): config = self._get_target_http(self.project_cfg) adapter = SparkAdapter(config) @@ -83,10 +99,32 @@ def test_thrift_connection(self): config = self._get_target_thrift(self.project_cfg) adapter = SparkAdapter(config) - def hive_thrift_connect(host, port, username): + def hive_thrift_connect(host, port, username, auth, kerberos_service_name): + self.assertEqual(host, 'myorg.sparkhost.com') + self.assertEqual(port, 10001) + self.assertEqual(username, 'dbt') + self.assertIsNone(auth) + self.assertIsNone(kerberos_service_name) + + with mock.patch.object(hive, 'connect', new=hive_thrift_connect): + connection = adapter.acquire_connection('dummy') + connection.handle # trigger lazy-load + + self.assertEqual(connection.state, 'open') + self.assertIsNotNone(connection.handle) + self.assertEqual(connection.credentials.schema, 'analytics') + self.assertIsNone(connection.credentials.database) + + def test_thrift_connection_kerberos(self): + config = self._get_target_thrift_kerberos(self.project_cfg) + adapter = SparkAdapter(config) + + def hive_thrift_connect(host, port, username, auth, kerberos_service_name): self.assertEqual(host, 'myorg.sparkhost.com') self.assertEqual(port, 10001) self.assertEqual(username, 'dbt') + self.assertEqual(auth, 'KERBEROS') + self.assertEqual(kerberos_service_name, 'hive') with mock.patch.object(hive, 'connect', new=hive_thrift_connect): connection = adapter.acquire_connection('dummy') From b723f1e48fd3dc93fe7a28e91a2a3b25155a8e59 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 30 Sep 2020 10:05:58 -0400 Subject: [PATCH 145/603] Fix flake8 errors --- dbt/adapters/spark/connections.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 9e261de8..3ec6abf4 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -37,8 +37,8 @@ class SparkCredentials(Credentials): token: Optional[str] = None user: Optional[str] = None port: int = 443 - auth: Optional[str]=None - kerberos_service_name: Optional[str]=None + auth: Optional[str] = None + kerberos_service_name: Optional[str] = None organization: str = '0' connect_retries: int = 0 connect_timeout: int = 10 @@ -273,7 +273,7 @@ def open(cls, connection): port=creds.port, username=creds.user, auth=creds.auth, - kerberos_service_name=creds.kerberos_service_name) + kerberos_service_name=creds.kerberos_service_name) # noqa else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}" From 7ca029f78e6119ef5bba716ff9dbc8bc9601188b Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 22 Oct 2020 15:48:44 -0400 Subject: [PATCH 146/603] add odbc connection type with all-purpose/virtual cluster support --- .gitignore | 1 + dbt/adapters/spark/connections.py | 66 +++++++++++++++++++++++++++++-- requirements.txt | 1 + setup.py | 1 + 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 85c98e1c..d6f5c9d0 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ dist/ dbt-integration-tests test/integration/.user.yml .DS_Store +.vscode diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 3ec6abf4..3d8aeefe 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -10,6 +10,7 @@ from TCLIService.ttypes import TOperationState as ThriftState from thrift.transport import THttpClient from pyhive import hive +import pyodbc from datetime import datetime from hologram.helpers import StrEnum @@ -25,6 +26,12 @@ class SparkConnectionMethod(StrEnum): THRIFT = 'thrift' HTTP = 'http' + ODBC = 'odbc' + + +class SparkClusterType(StrEnum): + ALL_PURPOSE = "all-purpose" + VIRTUAL = "virtual" @dataclass @@ -33,6 +40,8 @@ class SparkCredentials(Credentials): method: SparkConnectionMethod schema: str database: Optional[str] + driver: Optional[str] = None + cluster_type: Optional[SparkClusterType] = SparkClusterType.ALL_PURPOSE cluster: Optional[str] = None token: Optional[str] = None user: Optional[str] = None @@ -62,10 +71,10 @@ def type(self): return 'spark' def _connection_keys(self): - return 'host', 'port', 'cluster', 'schema', 'organization' + return 'host', 'port', 'cluster', 'cluster_type', 'schema', 'organization' -class ConnectionWrapper(object): +class PyhiveConnectionWrapper(object): """Wrap a Spark connection in a way that no-ops transactions""" # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html # noqa @@ -177,11 +186,27 @@ def description(self): return self._cursor.description +class PyodbcConnectionWrapper(PyhiveConnectionWrapper): + + def execute(self, sql, bindings=None): + if sql.strip().endswith(";"): + sql = sql.strip()[:-1] + + # pyodbc does not handle a None type binding! + if bindings is None: + self._cursor.execute(sql) + else: + + self._cursor.execute(sql, bindings) + + class SparkConnectionManager(SQLConnectionManager): TYPE = 'spark' + SPARK_CLUSTER_HTTP_PATH = "sql/protocolv1/o/{organization}/{cluster}" + SPARK_VIRTUAL_CLUSTER_HTTP_PATH = "/sql/1.0/endpoints/{cluster}" SPARK_CONNECTION_URL = ( - "https://{host}:{port}/sql/protocolv1/o/{organization}/{cluster}" + "https://{host}:{port}/" + SPARK_CLUSTER_HTTP_PATH ) @contextmanager @@ -265,6 +290,7 @@ def open(cls, connection): }) conn = hive.connect(thrift_transport=transport) + handle = PyhiveConnectionWrapper(conn) elif creds.method == 'thrift': cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) @@ -274,6 +300,39 @@ def open(cls, connection): username=creds.user, auth=creds.auth, kerberos_service_name=creds.kerberos_service_name) # noqa + handle = PyhiveConnectionWrapper(conn) + elif creds.method == 'odbc': + required_fields = ['driver', 'host', 'port', 'token', + 'organization', 'cluster', 'cluster_type'] # noqa + cls.validate_creds(creds, required_fields) + + http_path = None + + if creds.cluster_type == SparkClusterType.ALL_PURPOSE: + http_path = cls.SPARK_CLUSTER_HTTP_PATH.format( + organization=creds.organization, + cluster=creds.cluster + ) + elif creds.cluster_type == SparkClusterType.VIRTUAL: + http_path = cls.SPARK_VIRTUAL_CLUSTER_HTTP_PATH.format( + cluster=creds.cluster + ) + + connection_params = [] + connection_params.append(f"DRIVER={creds.driver}") + connection_params.append(f"Host={creds.host}") + connection_params.append(f"PORT={creds.port}") + connection_params.append("UID=token") + connection_params.append(f"PWD={creds.token}") + connection_params.append(f"HTTPPath={http_path}") + connection_params.append("AuthMech=3") + connection_params.append("ThriftTransport=2") + connection_params.append("SSL=1") + + connection_str = ";".join(connection_params) + + conn = pyodbc.connect(connection_str, autocommit=True) + handle = PyodbcConnectionWrapper(conn) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}" @@ -304,7 +363,6 @@ def open(cls, connection): else: raise exc - handle = ConnectionWrapper(conn) connection.handle = handle connection.state = ConnectionState.OPEN return connection diff --git a/requirements.txt b/requirements.txt index b1d6e5c2..c3770d21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ dbt-core==0.18.0 PyHive[hive]>=0.6.0,<0.7.0 thrift>=0.11.0,<0.12.0 +pyodbc>=4.0.30 diff --git a/setup.py b/setup.py index 5738c57a..a6ef60f4 100644 --- a/setup.py +++ b/setup.py @@ -63,5 +63,6 @@ def _dbt_spark_version(): f'dbt-core=={dbt_version}', 'PyHive[hive]>=0.6.0,<0.7.0', 'thrift>=0.11.0,<0.12.0', + 'pyodbc>=4.0.30', ] ) From e72d42c6adf775a7f36a358a1816b5eced5163d7 Mon Sep 17 00:00:00 2001 From: collinprather Date: Fri, 23 Oct 2020 06:50:08 -0400 Subject: [PATCH 147/603] fixing links to docs in readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d01494fe..4b5dc4e9 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ plugin uses syntax that requires version 2.2.0 or newer. ### Documentation For more information on using Spark with dbt, consult the dbt documentation: -- [Spark profile](https://docs.getdbt.com/docs/profile-spark) -- [Spark specific configs](https://docs.getdbt.com/docs/spark-configs) +- [Spark profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile/) +- [Spark specific configs](https://docs.getdbt.com/reference/resource-configs/spark-configs/) ### Installation This plugin can be installed via pip: From 25bfc61a3eaf5e6cabe3d51db5de235b741cd06b Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 26 Oct 2020 13:05:20 -0400 Subject: [PATCH 148/603] fix SQL parameter style for pyodbc --- dbt/adapters/spark/connections.py | 46 +++++++++++++------ requirements.txt | 3 +- setup.py | 3 +- .../integration/spark-databricks-odbc.dbtspec | 35 ++++++++++++++ tox.ini | 10 ++++ 5 files changed, 80 insertions(+), 17 deletions(-) create mode 100644 test/integration/spark-databricks-odbc.dbtspec diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 3d8aeefe..fc9751a5 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -6,12 +6,14 @@ from dbt.contracts.connection import ConnectionState from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import DECIMALS +from dbt.adapters.spark import __version__ from TCLIService.ttypes import TOperationState as ThriftState from thrift.transport import THttpClient from pyhive import hive import pyodbc from datetime import datetime +import sqlparams from hologram.helpers import StrEnum from dataclasses import dataclass @@ -23,6 +25,10 @@ NUMBERS = DECIMALS + (int, float) +def _build_odbc_connnection_string(**kwargs) -> str: + return ";".join([f"{k}={v}" for k, v in kwargs.items()]) + + class SparkConnectionMethod(StrEnum): THRIFT = 'thrift' HTTP = 'http' @@ -71,7 +77,8 @@ def type(self): return 'spark' def _connection_keys(self): - return 'host', 'port', 'cluster', 'cluster_type', 'schema', 'organization' + return ('host', 'port', 'cluster', + 'cluster_type', 'schema', 'organization') class PyhiveConnectionWrapper(object): @@ -192,12 +199,14 @@ def execute(self, sql, bindings=None): if sql.strip().endswith(";"): sql = sql.strip()[:-1] + query = sqlparams.SQLParams('format', 'qmark') # pyodbc does not handle a None type binding! if bindings is None: + sql, bindings = query.format(sql, []) self._cursor.execute(sql) else: - - self._cursor.execute(sql, bindings) + sql, bindings = query.format(sql, bindings) + self._cursor.execute(sql, *bindings) class SparkConnectionManager(SQLConnectionManager): @@ -317,19 +326,26 @@ def open(cls, connection): http_path = cls.SPARK_VIRTUAL_CLUSTER_HTTP_PATH.format( cluster=creds.cluster ) + else: + raise dbt.exceptions.DbtProfileError( + f"invalid custer type: {creds.cluster_type}" + ) - connection_params = [] - connection_params.append(f"DRIVER={creds.driver}") - connection_params.append(f"Host={creds.host}") - connection_params.append(f"PORT={creds.port}") - connection_params.append("UID=token") - connection_params.append(f"PWD={creds.token}") - connection_params.append(f"HTTPPath={http_path}") - connection_params.append("AuthMech=3") - connection_params.append("ThriftTransport=2") - connection_params.append("SSL=1") - - connection_str = ";".join(connection_params) + dbt_spark_version = __version__.version + user_agent_entry = f"fishtown-analytics-dbt-spark/{dbt_spark_version} (Databricks)" # noqa + + connection_str = _build_odbc_connnection_string( + DRIVER=creds.driver, + HOST=creds.host, + PORT=creds.port, + UID="token", + PWD=creds.token, + HTTPPath=http_path, + AuthMech=3, + ThriftTransport=2, + SSL=1, + UserAgentEntry=user_agent_entry, + ) conn = pyodbc.connect(connection_str, autocommit=True) handle = PyodbcConnectionWrapper(conn) diff --git a/requirements.txt b/requirements.txt index c3770d21..f60496bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ dbt-core==0.18.0 PyHive[hive]>=0.6.0,<0.7.0 -thrift>=0.11.0,<0.12.0 pyodbc>=4.0.30 +sqlparams>=3.0.0 +thrift>=0.11.0,<0.12.0 diff --git a/setup.py b/setup.py index a6ef60f4..b4104baa 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,8 @@ def _dbt_spark_version(): install_requires=[ f'dbt-core=={dbt_version}', 'PyHive[hive]>=0.6.0,<0.7.0', - 'thrift>=0.11.0,<0.12.0', 'pyodbc>=4.0.30', + 'sqlparams>=3.0.0', + 'thrift>=0.11.0,<0.12.0' ] ) diff --git a/test/integration/spark-databricks-odbc.dbtspec b/test/integration/spark-databricks-odbc.dbtspec new file mode 100644 index 00000000..df77dc3f --- /dev/null +++ b/test/integration/spark-databricks-odbc.dbtspec @@ -0,0 +1,35 @@ +target: + type: spark + host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}" + cluster: "{{ env_var('DBT_DATABRICKS_CLUSTER_NAME') }}" + token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}" + method: odbc + driver: /Library/simba/spark/lib/libsparkodbc_sbu.dylib + port: 443 + schema: "analytics_{{ var('_dbt_random_suffix') }}" + connect_retries: 5 + connect_timeout: 60 +projects: + - overrides: incremental + paths: + "models/incremental.sql": + materialized: incremental + body: "select * from {{ source('raw', 'seed') }}" + facts: + base: + rowcount: 10 + extended: + rowcount: 20 + - overrides: snapshot_strategy_check_cols + dbt_project_yml: &file_format_delta + # we're going to UPDATE the seed tables as part of testing, so we must make them delta format + seeds: + dbt_test_project: + file_format: delta + snapshots: + dbt_test_project: + file_format: delta + - overrides: snapshot_strategy_timestamp + dbt_project_yml: *file_format_delta +sequences: + test_dbt_incremental: incremental diff --git a/tox.ini b/tox.ini index c3745707..f631d060 100644 --- a/tox.ini +++ b/tox.ini @@ -27,6 +27,16 @@ deps = -r{toxinidir}/dev_requirements.txt -e. +[testenv:integration-spark-databricks-odbc] +basepython = python3 +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc.dbtspec' +passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/dev_requirements.txt + -e. + + [testenv:integration-spark-thrift] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark.dbtspec' From 9366d2e4b978cbf146dc73c58c98dded97594f70 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 29 Oct 2020 11:42:19 -0400 Subject: [PATCH 149/603] testing --- test/integration/spark-databricks-odbc.dbtspec | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/spark-databricks-odbc.dbtspec b/test/integration/spark-databricks-odbc.dbtspec index df77dc3f..abed9e7b 100644 --- a/test/integration/spark-databricks-odbc.dbtspec +++ b/test/integration/spark-databricks-odbc.dbtspec @@ -4,7 +4,7 @@ target: cluster: "{{ env_var('DBT_DATABRICKS_CLUSTER_NAME') }}" token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}" method: odbc - driver: /Library/simba/spark/lib/libsparkodbc_sbu.dylib + driver: "{{ env_var('ODBC_DRIVER') }}" port: 443 schema: "analytics_{{ var('_dbt_random_suffix') }}" connect_retries: 5 diff --git a/tox.ini b/tox.ini index f631d060..06735a68 100644 --- a/tox.ini +++ b/tox.ini @@ -30,7 +30,7 @@ deps = [testenv:integration-spark-databricks-odbc] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc.dbtspec' -passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV +passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt From c2dc0fd43a6c6d50b35879cb41f0ff8f89127208 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 29 Oct 2020 12:07:27 -0400 Subject: [PATCH 150/603] update credentials schema and integration tests --- .circleci/config.yml | 19 ++++++- dbt/adapters/spark/connections.py | 51 ++++++++++--------- dev_requirements.txt | 2 +- ....dbtspec => spark-databricks-http.dbtspec} | 2 +- ... => spark-databricks-odbc-cluster.dbtspec} | 10 +++- ...spark-databricks-odbc-sql-endpoint.dbtspec | 44 ++++++++++++++++ .../{spark.dbtspec => spark-thrift.dbtspec} | 2 +- tox.ini | 19 +++++-- 8 files changed, 115 insertions(+), 34 deletions(-) rename test/integration/{spark-databricks.dbtspec => spark-databricks-http.dbtspec} (98%) rename test/integration/{spark-databricks-odbc.dbtspec => spark-databricks-odbc-cluster.dbtspec} (74%) create mode 100644 test/integration/spark-databricks-odbc-sql-endpoint.dbtspec rename test/integration/{spark.dbtspec => spark-thrift.dbtspec} (98%) diff --git a/.circleci/config.yml b/.circleci/config.yml index 200c8e0f..fe73dae8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -55,11 +55,25 @@ jobs: - checkout - run: name: Run integration tests - command: tox -e integration-spark-databricks + command: tox -e integration-spark-databricks-http no_output_timeout: 1h - store_artifacts: path: ./logs + # integration-spark-databricks-odbc: + # environment: + # DBT_INVOCATION_ENV: circle + # docker: + # - image: kwigley/spark-test-container:1 + # steps: + # - checkout + # - run: + # name: Run integration tests + # command: ODBC_DRIVER=Simba tox -e integration-spark-databricks-odbc-cluster,integration-spark-databricks-odbc-sql-endpoint + # no_output_timeout: 1h + # - store_artifacts: + # path: ./logs + workflows: version: 2 test-everything: @@ -71,3 +85,6 @@ workflows: - integration-spark-databricks: requires: - unit + # - integration-spark-databricks-odbc: + # requires: + # - unit diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index fc9751a5..bd0c4462 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -35,11 +35,6 @@ class SparkConnectionMethod(StrEnum): ODBC = 'odbc' -class SparkClusterType(StrEnum): - ALL_PURPOSE = "all-purpose" - VIRTUAL = "virtual" - - @dataclass class SparkCredentials(Credentials): host: str @@ -47,8 +42,8 @@ class SparkCredentials(Credentials): schema: str database: Optional[str] driver: Optional[str] = None - cluster_type: Optional[SparkClusterType] = SparkClusterType.ALL_PURPOSE cluster: Optional[str] = None + endpoint: Optional[str] = None token: Optional[str] = None user: Optional[str] = None port: int = 443 @@ -78,7 +73,7 @@ def type(self): def _connection_keys(self): return ('host', 'port', 'cluster', - 'cluster_type', 'schema', 'organization') + 'endpoint', 'schema', 'organization') class PyhiveConnectionWrapper(object): @@ -198,13 +193,12 @@ class PyodbcConnectionWrapper(PyhiveConnectionWrapper): def execute(self, sql, bindings=None): if sql.strip().endswith(";"): sql = sql.strip()[:-1] - - query = sqlparams.SQLParams('format', 'qmark') # pyodbc does not handle a None type binding! if bindings is None: - sql, bindings = query.format(sql, []) self._cursor.execute(sql) else: + # pyodbc only supports `qmark` sql params! + query = sqlparams.SQLParams('format', 'qmark') sql, bindings = query.format(sql, bindings) self._cursor.execute(sql, *bindings) @@ -213,7 +207,7 @@ class SparkConnectionManager(SQLConnectionManager): TYPE = 'spark' SPARK_CLUSTER_HTTP_PATH = "sql/protocolv1/o/{organization}/{cluster}" - SPARK_VIRTUAL_CLUSTER_HTTP_PATH = "/sql/1.0/endpoints/{cluster}" + SPARK_SQL_ENDPOINT_HTTP_PATH = "/sql/1.0/endpoints/{endpoint}" SPARK_CONNECTION_URL = ( "https://{host}:{port}/" + SPARK_CLUSTER_HTTP_PATH ) @@ -277,7 +271,7 @@ def open(cls, connection): for i in range(1 + creds.connect_retries): try: - if creds.method == 'http': + if creds.method == SparkConnectionMethod.HTTP: cls.validate_creds(creds, ['token', 'host', 'port', 'cluster', 'organization']) @@ -300,7 +294,7 @@ def open(cls, connection): conn = hive.connect(thrift_transport=transport) handle = PyhiveConnectionWrapper(conn) - elif creds.method == 'thrift': + elif creds.method == SparkConnectionMethod.THRIFT: cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) @@ -310,30 +304,38 @@ def open(cls, connection): auth=creds.auth, kerberos_service_name=creds.kerberos_service_name) # noqa handle = PyhiveConnectionWrapper(conn) - elif creds.method == 'odbc': - required_fields = ['driver', 'host', 'port', 'token', - 'organization', 'cluster', 'cluster_type'] # noqa - cls.validate_creds(creds, required_fields) - + elif creds.method == SparkConnectionMethod.ODBC: http_path = None - - if creds.cluster_type == SparkClusterType.ALL_PURPOSE: + if creds.cluster and creds.endpoint: + raise dbt.exceptions.DbtProfileError( + "`cluster` and `endpoint` cannot both be set when" + " using the odbc method to connect to Spark" + ) + elif creds.cluster is not None: + required_fields = ['driver', 'host', 'port', 'token', + 'organization', 'cluster'] http_path = cls.SPARK_CLUSTER_HTTP_PATH.format( organization=creds.organization, cluster=creds.cluster ) - elif creds.cluster_type == SparkClusterType.VIRTUAL: - http_path = cls.SPARK_VIRTUAL_CLUSTER_HTTP_PATH.format( - cluster=creds.cluster + elif creds.endpoint is not None: + required_fields = ['driver', 'host', 'port', 'token', + 'endpoint'] + http_path = cls.SPARK_SQL_ENDPOINT_HTTP_PATH.format( + endpoint=creds.endpoint ) else: raise dbt.exceptions.DbtProfileError( - f"invalid custer type: {creds.cluster_type}" + "Either `cluster` or `endpoint` must set when" + " using the odbc method to connect to Spark" ) + cls.validate_creds(creds, required_fields) + dbt_spark_version = __version__.version user_agent_entry = f"fishtown-analytics-dbt-spark/{dbt_spark_version} (Databricks)" # noqa + # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm connection_str = _build_odbc_connnection_string( DRIVER=creds.driver, HOST=creds.host, @@ -342,6 +344,7 @@ def open(cls, connection): PWD=creds.token, HTTPPath=http_path, AuthMech=3, + SparkServerType=3, ThriftTransport=2, SSL=1, UserAgentEntry=user_agent_entry, diff --git a/dev_requirements.txt b/dev_requirements.txt index 1eadc8f5..c657c54f 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,6 @@ pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 # Test requirements -pytest-dbt-adapter==0.2.0 +pytest-dbt-adapter==0.3.0 sasl==0.2.1 thrift_sasl==0.4.1 diff --git a/test/integration/spark-databricks.dbtspec b/test/integration/spark-databricks-http.dbtspec similarity index 98% rename from test/integration/spark-databricks.dbtspec rename to test/integration/spark-databricks-http.dbtspec index 72c36df7..c20e4242 100644 --- a/test/integration/spark-databricks.dbtspec +++ b/test/integration/spark-databricks-http.dbtspec @@ -17,7 +17,7 @@ projects: facts: base: rowcount: 10 - extended: + added: rowcount: 20 - overrides: snapshot_strategy_check_cols dbt_project_yml: &file_format_delta diff --git a/test/integration/spark-databricks-odbc.dbtspec b/test/integration/spark-databricks-odbc-cluster.dbtspec similarity index 74% rename from test/integration/spark-databricks-odbc.dbtspec rename to test/integration/spark-databricks-odbc-cluster.dbtspec index abed9e7b..8dc4975e 100644 --- a/test/integration/spark-databricks-odbc.dbtspec +++ b/test/integration/spark-databricks-odbc-cluster.dbtspec @@ -18,7 +18,7 @@ projects: facts: base: rowcount: 10 - extended: + added: rowcount: 20 - overrides: snapshot_strategy_check_cols dbt_project_yml: &file_format_delta @@ -32,4 +32,12 @@ projects: - overrides: snapshot_strategy_timestamp dbt_project_yml: *file_format_delta sequences: + test_dbt_empty: empty + test_dbt_base: base + test_dbt_ephemeral: ephemeral test_dbt_incremental: incremental + test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp + test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols + test_dbt_data_test: data_test + test_dbt_ephemeral_data_tests: data_test_ephemeral_models + test_dbt_schema_test: schema_test diff --git a/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec b/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec new file mode 100644 index 00000000..d0e23dab --- /dev/null +++ b/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec @@ -0,0 +1,44 @@ +target: + type: spark + host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}" + endpoint: "{{ env_var('DBT_DATABRICKS_ENDPOINT') }}" + token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}" + method: odbc + driver: "{{ env_var('ODBC_DRIVER') }}" + port: 443 + schema: "analytics_{{ var('_dbt_random_suffix') }}" + connect_retries: 5 + connect_timeout: 60 +projects: + - overrides: incremental + paths: + "models/incremental.sql": + materialized: incremental + body: "select * from {{ source('raw', 'seed') }}" + facts: + base: + rowcount: 10 + added: + rowcount: 20 + - overrides: snapshot_strategy_check_cols + dbt_project_yml: &file_format_delta + # we're going to UPDATE the seed tables as part of testing, so we must make them delta format + seeds: + dbt_test_project: + file_format: delta + snapshots: + dbt_test_project: + file_format: delta + - overrides: snapshot_strategy_timestamp + dbt_project_yml: *file_format_delta +sequences: + test_dbt_empty: empty + test_dbt_base: base + test_dbt_ephemeral: ephemeral + # The SQL Endpoint does not support `create temporary view` + # test_dbt_incremental: incremental + test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp + test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols + test_dbt_data_test: data_test + test_dbt_ephemeral_data_tests: data_test_ephemeral_models + test_dbt_schema_test: schema_test diff --git a/test/integration/spark.dbtspec b/test/integration/spark-thrift.dbtspec similarity index 98% rename from test/integration/spark.dbtspec rename to test/integration/spark-thrift.dbtspec index 68b3c919..58f5a906 100644 --- a/test/integration/spark.dbtspec +++ b/test/integration/spark-thrift.dbtspec @@ -16,7 +16,7 @@ projects: facts: base: rowcount: 10 - extended: + added: rowcount: 20 sequences: test_dbt_empty: empty diff --git a/tox.ini b/tox.ini index 06735a68..f865309f 100644 --- a/tox.ini +++ b/tox.ini @@ -18,28 +18,37 @@ deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt -[testenv:integration-spark-databricks] +[testenv:integration-spark-databricks-http] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks.dbtspec' +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-http.dbtspec' passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt -e. -[testenv:integration-spark-databricks-odbc] +[testenv:integration-spark-databricks-odbc-cluster] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc.dbtspec' +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-cluster.dbtspec' passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt -e. +[testenv:integration-spark-databricks-odbc-sql-endpoint] +basepython = python3 +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-sql-endpoint.dbtspec' +passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_ENDPOINT DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/dev_requirements.txt + -e. + [testenv:integration-spark-thrift] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark.dbtspec' +commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-thrift.dbtspec' passenv = DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt From 087a158bab5413929030ec7bd3360cf67f30f6c3 Mon Sep 17 00:00:00 2001 From: Charlotte van der Scheun Date: Fri, 30 Oct 2020 10:59:15 +0100 Subject: [PATCH 151/603] replace partitionOverwriteMode inside merge strategy --- dbt/include/spark/macros/materializations/incremental.sql | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index f5d7335f..387c9ae4 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -97,13 +97,12 @@ {% if strategy == 'merge' %} {%- set unique_key = config.require('unique_key') -%} + {% call statement() %} + set spark.sql.sources.partitionOverwriteMode = DYNAMIC + {% endcall %} {% do dbt_spark_validate_merge(file_format) %} {% endif %} - {% call statement() %} - set spark.sql.sources.partitionOverwriteMode = DYNAMIC - {% endcall %} - {% call statement() %} set spark.sql.hive.convertMetastoreParquet = false {% endcall %} From a18be085900f8c3f0c19943d8606c49e5f7ed63c Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 30 Oct 2020 13:43:48 -0400 Subject: [PATCH 152/603] set up integration tests --- .circleci/config.yml | 51 ++++++++++++++++++------------- dbt/adapters/spark/connections.py | 13 +++++++- setup.py | 6 ++-- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fe73dae8..4f79c707 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ jobs: environment: DBT_INVOCATION_ENV: circle docker: - - image: fishtownanalytics/test-container:9 + - image: fishtownanalytics/test-container:10 steps: - checkout - run: tox -e flake8,unit @@ -14,7 +14,7 @@ jobs: environment: DBT_INVOCATION_ENV: circle docker: - - image: fishtownanalytics/test-container:9 + - image: fishtownanalytics/test-container:10 - image: godatadriven/spark:2 environment: WAIT_FOR: localhost:5432 @@ -46,11 +46,11 @@ jobs: - store_artifacts: path: ./logs - integration-spark-databricks: + integration-spark-databricks-http: environment: DBT_INVOCATION_ENV: circle docker: - - image: fishtownanalytics/test-container:9 + - image: fishtownanalytics/test-container:10 steps: - checkout - run: @@ -60,19 +60,28 @@ jobs: - store_artifacts: path: ./logs - # integration-spark-databricks-odbc: - # environment: - # DBT_INVOCATION_ENV: circle - # docker: - # - image: kwigley/spark-test-container:1 - # steps: - # - checkout - # - run: - # name: Run integration tests - # command: ODBC_DRIVER=Simba tox -e integration-spark-databricks-odbc-cluster,integration-spark-databricks-odbc-sql-endpoint - # no_output_timeout: 1h - # - store_artifacts: - # path: ./logs + integration-spark-databricks-odbc: + environment: + DBT_INVOCATION_ENV: circle + ODBC_DRIVER: Simba + ODBC_DRIVER_URL: https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip + docker: + - image: fishtownanalytics/test-container:10 + command: | + curl -L $ODBC_DRIVER_URL > /tmp/simba_odbc.zip + unzip /tmp/simba_odbc.zip -d /tmp/ + dpkg -i /tmp/SimbaSparkODBC-*/*.deb + echo "[$ODBC_DRIVER]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini + rm /tmp/simba_odbc.zip + rm -rf /tmp/SimbaSparkODBC* + steps: + - checkout + - run: + name: Run integration tests + command: tox -e integration-spark-databricks-odbc-cluster,integration-spark-databricks-odbc-sql-endpoint + no_output_timeout: 1h + - store_artifacts: + path: ./logs workflows: version: 2 @@ -82,9 +91,9 @@ workflows: - integration-spark-thrift: requires: - unit - - integration-spark-databricks: + - integration-spark-databricks-http: + requires: + - unit + - integration-spark-databricks-odbc: requires: - unit - # - integration-spark-databricks-odbc: - # requires: - # - unit diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index bd0c4462..3dc06470 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -11,7 +11,10 @@ from TCLIService.ttypes import TOperationState as ThriftState from thrift.transport import THttpClient from pyhive import hive -import pyodbc +try: + import pyodbc +except ImportError: + pyodbc = None from datetime import datetime import sqlparams @@ -67,6 +70,14 @@ def __post_init__(self): ) self.database = None + if self.method == SparkConnectionMethod.ODBC and pyodbc is None: + raise dbt.exceptions.RuntimeException( + f"{self.method} connection method requires " + "additional dependencies. \n" + "Install the additional required dependencies with " + "`pip install dbt-spark[ODBC]`" + ) + @property def type(self): return 'spark' diff --git a/setup.py b/setup.py index b4104baa..152a7b69 100644 --- a/setup.py +++ b/setup.py @@ -62,8 +62,10 @@ def _dbt_spark_version(): install_requires=[ f'dbt-core=={dbt_version}', 'PyHive[hive]>=0.6.0,<0.7.0', - 'pyodbc>=4.0.30', 'sqlparams>=3.0.0', 'thrift>=0.11.0,<0.12.0' - ] + ], + extra_requires={ + "ODBC": ['pyodbc>=4.0.30'], + } ) From 30b22229623fccdb847c46c913f170d53aa0e5d2 Mon Sep 17 00:00:00 2001 From: Charlotte van der Scheun Date: Mon, 2 Nov 2020 07:50:40 +0100 Subject: [PATCH 153/603] dynamic overwrite when partition || unique key --- dbt/include/spark/macros/materializations/incremental.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 387c9ae4..037941be 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -62,6 +62,7 @@ {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} + {# ignore dest_columns - we will just use `*` #} merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE @@ -97,10 +98,13 @@ {% if strategy == 'merge' %} {%- set unique_key = config.require('unique_key') -%} + {% do dbt_spark_validate_merge(file_format) %} + {% endif %} + + {% if unique_key or config.get('partition_by') %} {% call statement() %} set spark.sql.sources.partitionOverwriteMode = DYNAMIC {% endcall %} - {% do dbt_spark_validate_merge(file_format) %} {% endif %} {% call statement() %} From f240857d4924eb545d41d6aefd41ae8295eed44d Mon Sep 17 00:00:00 2001 From: charlottevdscheun <65390869+charlottevdscheun@users.noreply.github.com> Date: Mon, 2 Nov 2020 10:05:33 +0100 Subject: [PATCH 154/603] Update dbt/include/spark/macros/materializations/incremental.sql Fokko's suggestion to remove unique key from the if statement Co-authored-by: Fokko Driesprong --- dbt/include/spark/macros/materializations/incremental.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 037941be..76c6e490 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -101,7 +101,7 @@ {% do dbt_spark_validate_merge(file_format) %} {% endif %} - {% if unique_key or config.get('partition_by') %} + {% if config.get('partition_by') %} {% call statement() %} set spark.sql.sources.partitionOverwriteMode = DYNAMIC {% endcall %} From f6d54d7fb41988e18412676e24b297868aa10620 Mon Sep 17 00:00:00 2001 From: Charlotte van der Scheun Date: Mon, 2 Nov 2020 15:19:56 +0100 Subject: [PATCH 155/603] add documentation about insert_overwrite when no partition --- README.md | 2 +- dbt/include/spark/macros/materializations/incremental.sql | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 4b5dc4e9..890c5cc4 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ The following configurations can be supplied to models run with the dbt-spark pl **Incremental Models** To use incremental models, specify a `partition_by` clause in your model config. The default incremental strategy used is `insert_overwrite`, which will overwrite the partitions included in your query. Be sure to re-select _all_ of the relevant -data for a partition when using the `insert_overwrite` strategy. +data for a partition when using the `insert_overwrite` strategy. If a `partition_by` config is not specified, dbt will overwrite the entire table as an atomic operation, replacing it with new data of the same schema. This is analogous to `truncate` + `insert`. ``` {{ config( diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 037941be..b16acc9c 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -62,7 +62,6 @@ {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} - {# ignore dest_columns - we will just use `*` #} merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE From 165d83b6adb18c282b6f4ce1ce4371e023e16e53 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 2 Nov 2020 11:46:37 -0500 Subject: [PATCH 156/603] add unit test and update integration test image for ODBC tests --- .circleci/config.yml | 15 ++-- dbt/adapters/spark/connections.py | 21 +++-- test/unit/test_adapter.py | 127 ++++++++++++++++++++++++++++-- 3 files changed, 138 insertions(+), 25 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4f79c707..508eb0ab 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -63,17 +63,12 @@ jobs: integration-spark-databricks-odbc: environment: DBT_INVOCATION_ENV: circle - ODBC_DRIVER: Simba - ODBC_DRIVER_URL: https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip + ODBC_DRIVER: Simba # TODO: move to env var to test image docker: - - image: fishtownanalytics/test-container:10 - command: | - curl -L $ODBC_DRIVER_URL > /tmp/simba_odbc.zip - unzip /tmp/simba_odbc.zip -d /tmp/ - dpkg -i /tmp/SimbaSparkODBC-*/*.deb - echo "[$ODBC_DRIVER]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini - rm /tmp/simba_odbc.zip - rm -rf /tmp/SimbaSparkODBC* + - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest + aws_auth: + aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING + aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING steps: - checkout - run: diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 3dc06470..27c999cd 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -78,6 +78,16 @@ def __post_init__(self): "`pip install dbt-spark[ODBC]`" ) + if ( + self.method == SparkConnectionMethod.ODBC and + self.cluster and + self.endpoint + ): + raise dbt.exceptions.RuntimeException( + "`cluster` and `endpoint` cannot both be set when" + f" using {self.method} method to connect to Spark" + ) + @property def type(self): return 'spark' @@ -217,10 +227,10 @@ def execute(self, sql, bindings=None): class SparkConnectionManager(SQLConnectionManager): TYPE = 'spark' - SPARK_CLUSTER_HTTP_PATH = "sql/protocolv1/o/{organization}/{cluster}" + SPARK_CLUSTER_HTTP_PATH = "/sql/protocolv1/o/{organization}/{cluster}" SPARK_SQL_ENDPOINT_HTTP_PATH = "/sql/1.0/endpoints/{endpoint}" SPARK_CONNECTION_URL = ( - "https://{host}:{port}/" + SPARK_CLUSTER_HTTP_PATH + "https://{host}:{port}" + SPARK_CLUSTER_HTTP_PATH ) @contextmanager @@ -317,12 +327,7 @@ def open(cls, connection): handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.ODBC: http_path = None - if creds.cluster and creds.endpoint: - raise dbt.exceptions.DbtProfileError( - "`cluster` and `endpoint` cannot both be set when" - " using the odbc method to connect to Spark" - ) - elif creds.cluster is not None: + if creds.cluster is not None: required_fields = ['driver', 'host', 'port', 'token', 'organization', 'cluster'] http_path = cls.SPARK_CLUSTER_HTTP_PATH.format( diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 70bc955f..2cbd7810 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -73,6 +73,42 @@ def _get_target_thrift_kerberos(self, project): }, 'target': 'test' }) + + def _get_target_odbc_cluster(self, project): + return config_from_parts_or_dicts(project, { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'odbc', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'organization': '0123456789', + 'cluster': '01234-23423-coffeetime', + 'driver': 'Simba', + } + }, + 'target': 'test' + }) + + def _get_target_odbc_sql_endpoint(self, project): + return config_from_parts_or_dicts(project, { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'odbc', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'endpoint': '012342342393920a', + 'driver': 'Simba', + } + }, + 'target': 'test' + }) + def test_http_connection(self): config = self._get_target_http(self.project_cfg) adapter = SparkAdapter(config) @@ -81,7 +117,8 @@ def hive_http_connect(thrift_transport): self.assertEqual(thrift_transport.scheme, 'https') self.assertEqual(thrift_transport.port, 443) self.assertEqual(thrift_transport.host, 'myorg.sparkhost.com') - self.assertEqual(thrift_transport.path, '/sql/protocolv1/o/0123456789/01234-23423-coffeetime') + self.assertEqual( + thrift_transport.path, '/sql/protocolv1/o/0123456789/01234-23423-coffeetime') # with mock.patch.object(hive, 'connect', new=hive_http_connect): with mock.patch('dbt.adapters.spark.connections.hive.connect', new=hive_http_connect): @@ -90,7 +127,8 @@ def hive_http_connect(thrift_transport): self.assertEqual(connection.state, 'open') self.assertIsNotNone(connection.handle) - self.assertEqual(connection.credentials.cluster, '01234-23423-coffeetime') + self.assertEqual(connection.credentials.cluster, + '01234-23423-coffeetime') self.assertEqual(connection.credentials.token, 'abc123') self.assertEqual(connection.credentials.schema, 'analytics') self.assertIsNone(connection.credentials.database) @@ -135,6 +173,56 @@ def hive_thrift_connect(host, port, username, auth, kerberos_service_name): self.assertEqual(connection.credentials.schema, 'analytics') self.assertIsNone(connection.credentials.database) + def test_odbc_cluster_connection(self): + config = self._get_target_odbc_cluster(self.project_cfg) + adapter = SparkAdapter(config) + + def pyodbc_connect(connection_str, autocommit): + self.assertTrue(autocommit) + self.assertIn('driver=simba;', connection_str.lower()) + self.assertIn('port=443;', connection_str.lower()) + self.assertIn('host=myorg.sparkhost.com;', + connection_str.lower()) + self.assertIn( + 'httppath=/sql/protocolv1/o/0123456789/01234-23423-coffeetime;', connection_str.lower()) # noqa + + with mock.patch('dbt.adapters.spark.connections.pyodbc.connect', new=pyodbc_connect): # noqa + connection = adapter.acquire_connection('dummy') + connection.handle # trigger lazy-load + + self.assertEqual(connection.state, 'open') + self.assertIsNotNone(connection.handle) + self.assertEqual(connection.credentials.cluster, + '01234-23423-coffeetime') + self.assertEqual(connection.credentials.token, 'abc123') + self.assertEqual(connection.credentials.schema, 'analytics') + self.assertIsNone(connection.credentials.database) + + def test_odbc_endpoint_connection(self): + config = self._get_target_odbc_sql_endpoint(self.project_cfg) + adapter = SparkAdapter(config) + + def pyodbc_connect(connection_str, autocommit): + self.assertTrue(autocommit) + self.assertIn('driver=simba;', connection_str.lower()) + self.assertIn('port=443;', connection_str.lower()) + self.assertIn('host=myorg.sparkhost.com;', + connection_str.lower()) + self.assertIn( + 'httppath=/sql/1.0/endpoints/012342342393920a;', connection_str.lower()) # noqa + + with mock.patch('dbt.adapters.spark.connections.pyodbc.connect', new=pyodbc_connect): # noqa + connection = adapter.acquire_connection('dummy') + connection.handle # trigger lazy-load + + self.assertEqual(connection.state, 'open') + self.assertIsNotNone(connection.handle) + self.assertEqual(connection.credentials.endpoint, + '012342342393920a') + self.assertEqual(connection.credentials.token, 'abc123') + self.assertEqual(connection.credentials.schema, 'analytics') + self.assertIsNone(connection.credentials.database) + def test_parse_relation(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.Table @@ -169,10 +257,12 @@ def test_parse_relation(self): ('Partition Provider', 'Catalog') ] - input_cols = [Row(keys=['col_name', 'data_type'], values=r) for r in plain_rows] + input_cols = [Row(keys=['col_name', 'data_type'], values=r) + for r in plain_rows] config = self._get_target_http(self.project_cfg) - rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + rows = SparkAdapter(config).parse_describe_extended( + relation, input_cols) self.assertEqual(len(rows), 3) self.assertEqual(rows[0].to_dict(omit_none=False), { 'table_database': None, @@ -247,10 +337,12 @@ def test_parse_relation_with_statistics(self): ('Partition Provider', 'Catalog') ] - input_cols = [Row(keys=['col_name', 'data_type'], values=r) for r in plain_rows] + input_cols = [Row(keys=['col_name', 'data_type'], values=r) + for r in plain_rows] config = self._get_target_http(self.project_cfg) - rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + rows = SparkAdapter(config).parse_describe_extended( + relation, input_cols) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].to_dict(omit_none=False), { 'table_database': None, @@ -283,7 +375,8 @@ def test_relation_with_database(self): adapter.Relation.create(schema='different', identifier='table') with self.assertRaises(RuntimeException): # not fine - database set - adapter.Relation.create(database='something', schema='different', identifier='table') + adapter.Relation.create( + database='something', schema='different', identifier='table') def test_profile_with_database(self): profile = { @@ -305,3 +398,23 @@ def test_profile_with_database(self): } with self.assertRaises(RuntimeException): config_from_parts_or_dicts(self.project_cfg, profile) + + def test_profile_with_cluster_and_sql_endpoint(self): + profile = { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'odbc', + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 443, + 'token': 'abc123', + 'organization': '0123456789', + 'cluster': '01234-23423-coffeetime', + 'endpoint': '0123412341234e', + } + }, + 'target': 'test' + } + with self.assertRaises(RuntimeException): + config_from_parts_or_dicts(self.project_cfg, profile) From 9db1e6d2fce4c37fbc0e027f9ce857a24fda5be3 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 2 Nov 2020 13:22:40 -0500 Subject: [PATCH 157/603] update docs --- .circleci/config.yml | 1 + README.md | 74 +++++++++++++++++++++++++++++++------------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 508eb0ab..bcbeceef 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,6 +65,7 @@ jobs: DBT_INVOCATION_ENV: circle ODBC_DRIVER: Simba # TODO: move to env var to test image docker: + # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest aws_auth: aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING diff --git a/README.md b/README.md index d01494fe..8c70a09c 100644 --- a/README.md +++ b/README.md @@ -27,11 +27,21 @@ For more information on using Spark with dbt, consult the dbt documentation: ### Installation This plugin can be installed via pip: -``` +```bash # Install dbt-spark from PyPi: $ pip install dbt-spark ``` +dbt-spark also supports connections via ODBC driver, but it requires [`pyodbc`](https://github.com/mkleehammer/pyodbc). You can install it seperately or via pip as well: + +```bash +# Install dbt-spark from PyPi: +$ pip install "dbt-spark[ODBC]" +``` + +See https://github.com/mkleehammer/pyodbc/wiki/Install for more info about installing `pyodbc`. + + ### Configuring your profile **Connection Method** @@ -40,18 +50,20 @@ Connections can be made to Spark in two different modes. The `http` mode is used A dbt profile can be configured to run against Spark using the following configuration: -| Option | Description | Required? | Example | -|---------|----------------------------------------------------|-------------------------|--------------------------| -| method | Specify the connection method (`thrift` or `http`) | Required | `http` | -| schema | Specify the schema (database) to build models into | Required | `analytics` | -| host | The hostname to connect to | Required | `yourorg.sparkhost.com` | -| port | The port to connect to the host on | Optional (default: 443 for `http`, 10001 for `thrift`) | `443` | -| token | The token to use for authenticating to the cluster | Required for `http` | `abc123` | -| organization | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Databricks Note | `1234567891234567` | -| cluster | The name of the cluster to connect to | Required for `http` | `01234-23423-coffeetime` | -| user | The username to use to connect to the cluster | Optional | `hadoop` | -| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | -| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | +| Option | Description | Required? | Example | +| --------------- | ----------------------------------------------------------------------------------- | ------------------------------------------------------------------ | ---------------------------------------------- | +| method | Specify the connection method (`thrift` or `http` or `odbc`) | Required | `http` | +| schema | Specify the schema (database) to build models into | Required | `analytics` | +| host | The hostname to connect to | Required | `yourorg.sparkhost.com` | +| port | The port to connect to the host on | Optional (default: 443 for `http` and `odbc`, 10001 for `thrift`) | `443` | +| token | The token to use for authenticating to the cluster | Required for `http` and `odbc` | `abc123` | +| organization | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Databricks Note | `1234567891234567` | +| cluster | The name of the cluster to connect to | Required for `http` and `odbc` if connecting to a specific cluster | `01234-23423-coffeetime` | +| endpoint | The ID of the SQL endpoint to connect to | Required for `odbc` if connecting to SQL endpoint | `1234567891234a` | +| driver | Path of ODBC driver installed or name of ODBC DSN configured | Required for `odbc` | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | +| user | The username to use to connect to the cluster | Optional | `hadoop` | +| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | +| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | **Databricks Note** @@ -104,6 +116,24 @@ your_profile_name: connect_timeout: 60 ``` +**ODBC connection** +``` +your_profile_name: + target: dev + outputs: + dev: + method: odbc + type: spark + schema: analytics + host: yourorg.sparkhost.com + organization: 1234567891234567 # Azure Databricks ONLY + port: 443 + token: abc123 + cluster: 01234-23423-coffeetime + driver: path/to/driver + connect_retries: 5 + connect_timeout: 60 +``` ### Usage Notes @@ -113,15 +143,15 @@ your_profile_name: The following configurations can be supplied to models run with the dbt-spark plugin: -| Option | Description | Required? | Example | -|---------|----------------------------------------------------|-------------------------|--------------------------| -| file_format | The file format to use when creating tables (`parquet`, `delta`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `parquet`| -| location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | `/mnt/root` | -| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | -| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | -| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | -| incremental_strategy | The strategy to use for incremental models (`insert_overwrite` or `merge`). Note `merge` requires `file_format` = `delta` and `unique_key` to be specified. | Optional (default: `insert_overwrite`) | `merge` | -| persist_docs | Whether dbt should include the model description as a table `comment` | Optional | `{'relation': true}` | +| Option | Description | Required? | Example | +| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | -------------------- | +| file_format | The file format to use when creating tables (`parquet`, `delta`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `parquet` | +| location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | `/mnt/root` | +| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | +| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | +| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | +| incremental_strategy | The strategy to use for incremental models (`insert_overwrite` or `merge`). Note `merge` requires `file_format` = `delta` and `unique_key` to be specified. | Optional (default: `insert_overwrite`) | `merge` | +| persist_docs | Whether dbt should include the model description as a table `comment` | Optional | `{'relation': true}` | **Incremental Models** From 95e0d72ffccfe3ea92b38f3ccbae36510b6dbe59 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 2 Nov 2020 13:26:11 -0500 Subject: [PATCH 158/603] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c70a09c..e72ea13b 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ $ pip install dbt-spark dbt-spark also supports connections via ODBC driver, but it requires [`pyodbc`](https://github.com/mkleehammer/pyodbc). You can install it seperately or via pip as well: ```bash -# Install dbt-spark from PyPi: +# Install dbt-spark w/ pyodbc from PyPi: $ pip install "dbt-spark[ODBC]" ``` From 534486e3237b4a62e63eab712a267eff67ecc053 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Wed, 4 Nov 2020 09:21:26 -0500 Subject: [PATCH 159/603] Update .circleci/config.yml --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bcbeceef..a0d9ac49 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -63,7 +63,7 @@ jobs: integration-spark-databricks-odbc: environment: DBT_INVOCATION_ENV: circle - ODBC_DRIVER: Simba # TODO: move to env var to test image + ODBC_DRIVER: Simba # TODO: move env var to Docker image docker: # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest From f882e157814d68901efab06430bf3429d6403d0c Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 5 Nov 2020 15:38:36 -0500 Subject: [PATCH 160/603] Apply suggestions from code review Co-authored-by: Jeremy Cohen --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e72ea13b..e253b7ef 100644 --- a/README.md +++ b/README.md @@ -129,10 +129,14 @@ your_profile_name: organization: 1234567891234567 # Azure Databricks ONLY port: 443 token: abc123 + + # one of: cluster: 01234-23423-coffeetime + endpoint: coffee01234time + driver: path/to/driver - connect_retries: 5 - connect_timeout: 60 + connect_retries: 5 # cluster only + connect_timeout: 60 # cluster only ``` From a4d66c1816905ab57c973e58ebea1075397b0453 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 6 Nov 2020 15:27:11 -0500 Subject: [PATCH 161/603] Bump to 0.18.1 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index ddf6a8de..8678c9f8 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.0 +current_version = 0.18.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b9e31fd7..65f16345 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.0 +current_version = 0.18.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 561d56e8..f3aff899 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.18.0" +version = "0.18.1" diff --git a/requirements.txt b/requirements.txt index f60496bc..619fc530 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.18.0 +dbt-core==0.18.1 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 152a7b69..a5a2bb0b 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.18.0' +dbt_version = '0.18.1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.18.0 vs 0.18.0a1, 0.18.0.1, ...) +# ends of it. (0.18.1 vs 0.18.1a1, 0.18.1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 824dd9fc6532395587a9dbe5aebbb48fa53d555d Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 9 Nov 2020 09:12:38 -0500 Subject: [PATCH 162/603] Add changelog, issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 32 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 23 ++++++++++++++++ .github/pull_request_template.md | 23 ++++++++++++++++ CHANGELOG.md | 19 ++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/pull_request_template.md create mode 100644 CHANGELOG.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..974c36af --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,32 @@ +--- +name: Bug report +about: Report a bug or an issue you've found with dbt-spark +title: '' +labels: bug, triage +assignees: '' + +--- + +### Describe the bug +A clear and concise description of what the bug is. What command did you run? What happened? + +### Steps To Reproduce +In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here. + +### Expected behavior +A clear and concise description of what you expected to happen. + +### Screenshots and log output +If applicable, add screenshots or log output to help explain your problem. + +**The output of `dbt --version`:** +``` + +``` + +**The operating system you're using:** + +**The output of `python --version`:** + +### Additional context +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..5edc9f6c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature request +about: Suggest an idea for dbt-spark +title: '' +labels: enhancement, triage +assignees: '' + +--- + +### Describe the feature +A clear and concise description of what you want to happen. + +### Describe alternatives you've considered +A clear and concise description of any alternative solutions or features you've considered. + +### Additional context +Please include any other relevant context here. + +### Who will this benefit? +What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. + +### Are you interested in contributing this feature? +Let us know if you want to write some code, and how we can help. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..45a73f84 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,23 @@ +resolves # + + + + +### Description + + + + +### Checklist + - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) + - [ ] I have run this code in development and it appears to resolve the stated issue + - [ ] This PR includes tests, or tests are not required/relevant for this PR + - [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt next" section. + \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..6ffcc870 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +## dbt-spark 0.19.0 (Release TBD) +- Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120)) + +## dbt-spark 0.18.1 (November 6, 2020) +- Allows users to specify `auth` and `kerberos_service_name` +- Updated README links ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115)) +- Add support for ODBC driver connections to Databricks clusters and endpoints ([#116](https://github.com/fishtown-analytics/dbt-spark/pull/116)) +- Support complete atomic overwrite of non-partitioned incremental models ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117)) +- Update to support dbt-core 0.18.1 ([#110](https://github.com/fishtown-analytics/dbt-spark/pull/110), [#118](https://github.com/fishtown-analytics/dbt-spark/pull/118)) + +Contributors: +- [@danielhstahl](https://github.com/danielhstahl) ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107)) +- [@collinprather](https://github.com/collinprather) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115)) +- [@charlottevdscheun](https://github.com/charlottevdscheun) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/117)) +- [@Fokko](https://github.com/Fokko) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/117)) + +## dbt-spark 0.18.0 (September 18, 2020) +- Make a number of changes to support dbt-adapter-tests ([#103](https://github.com/fishtown-analytics/dbt-spark/pull/103)) +- Update to support dbt-core 0.18.0. Run CI tests against local Spark, Databricks ([#105](https://github.com/fishtown-analytics/dbt-spark/pull/105)) From 8a2fd8fc428e3f5c3964a27a5974df0968721d8f Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 9 Nov 2020 09:27:15 -0500 Subject: [PATCH 163/603] Edit changelog --- CHANGELOG.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ffcc870..f91b285d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,19 +1,27 @@ ## dbt-spark 0.19.0 (Release TBD) + +### Under the hood - Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120)) ## dbt-spark 0.18.1 (November 6, 2020) -- Allows users to specify `auth` and `kerberos_service_name` -- Updated README links ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115)) + +### Features +- Allows users to specify `auth` and `kerberos_service_name` ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107)) - Add support for ODBC driver connections to Databricks clusters and endpoints ([#116](https://github.com/fishtown-analytics/dbt-spark/pull/116)) + +### Under the hood +- Updated README links ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115)) - Support complete atomic overwrite of non-partitioned incremental models ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117)) - Update to support dbt-core 0.18.1 ([#110](https://github.com/fishtown-analytics/dbt-spark/pull/110), [#118](https://github.com/fishtown-analytics/dbt-spark/pull/118)) -Contributors: +### Contributors - [@danielhstahl](https://github.com/danielhstahl) ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107)) - [@collinprather](https://github.com/collinprather) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115)) -- [@charlottevdscheun](https://github.com/charlottevdscheun) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/117)) -- [@Fokko](https://github.com/Fokko) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/117)) +- [@charlottevdscheun](https://github.com/charlottevdscheun) ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117)) +- [@Fokko](https://github.com/Fokko) ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117)) ## dbt-spark 0.18.0 (September 18, 2020) + +### Under the hood - Make a number of changes to support dbt-adapter-tests ([#103](https://github.com/fishtown-analytics/dbt-spark/pull/103)) - Update to support dbt-core 0.18.0. Run CI tests against local Spark, Databricks ([#105](https://github.com/fishtown-analytics/dbt-spark/pull/105)) From a92a41db67f4e73fe25984bf4046882afe7f1304 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Wed, 11 Nov 2020 19:08:12 -0500 Subject: [PATCH 164/603] fix typo in in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a5a2bb0b..5dc6309b 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ def _dbt_spark_version(): 'sqlparams>=3.0.0', 'thrift>=0.11.0,<0.12.0' ], - extra_requires={ + extras_require={ "ODBC": ['pyodbc>=4.0.30'], } ) From 60c34f977ad7350fca52bda551c83b7cc09a56e2 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 13 Nov 2020 08:19:54 -0500 Subject: [PATCH 165/603] =?UTF-8?q?Bump=20version:=200.18.1=20=E2=86=92=20?= =?UTF-8?q?0.18.1.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 65f16345..edc25af5 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.1 +current_version = 0.18.1.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index f3aff899..ff9d7d6e 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.18.1" +version = "0.18.1.1" From 1cd639afc086f312ba5d34536e0f2a575f4924c8 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 17 Nov 2020 11:23:17 -0500 Subject: [PATCH 166/603] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f91b285d..9e3b1bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ ### Under the hood - Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120)) +## dbt-spark 0.18.1.1 (November 13, 2020) + +### Fixes +- Fix `extras_require` typo to enable `pip install dbt-spark[ODBC]` (([#121](https://github.com/fishtown-analytics/dbt-spark/pull/121)), ([#122](https://github.com/fishtown-analytics/dbt-spark/pull/122))) + ## dbt-spark 0.18.1 (November 6, 2020) ### Features From d99ed623bc3bec816812e3fe294309404b192329 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 18 Nov 2020 16:59:59 +0100 Subject: [PATCH 167/603] Enable create or replace sql syntax With Delta we can do an atomic operation to replace the current version of the table, with a new version using the create or replace syntax. --- dbt/adapters/spark/column.py | 3 ++- dbt/adapters/spark/impl.py | 7 +++++-- dbt/adapters/spark/relation.py | 3 +++ dbt/include/spark/macros/adapters.sql | 6 +++++- dbt/include/spark/macros/materializations/table.sql | 4 +++- 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 2f5e851d..6512efed 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -2,6 +2,7 @@ from typing import TypeVar, Optional, Dict, Any from dbt.adapters.base.column import Column +from hologram import JsonDict Self = TypeVar('Self', bound='SparkColumn') @@ -54,7 +55,7 @@ def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]: table_stats[f'stats:{key}:include'] = True return table_stats - def to_dict(self, omit_none=False): + def to_dict(self, omit_none: bool = True, validate: bool = False) -> JsonDict: original_dict = super().to_dict(omit_none=omit_none) # If there are stats, merge them into the root of the dict original_stats = original_dict.pop('table_stats') diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 15f89f7d..86955eb7 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Optional, List, Dict, Any, Union, Iterable import agate +from dbt.contracts.relation import RelationType import dbt import dbt.exceptions @@ -131,11 +132,13 @@ def list_relations_without_caching( f'got {len(row)} values, expected 4' ) _schema, name, _, information = row - rel_type = ('view' if 'Type: VIEW' in information else 'table') + rel_type = (RelationType.View if 'Type: VIEW' in information else RelationType.Table) + is_delta = 'Provider: delta' in information relation = self.Relation.create( schema=_schema, identifier=name, - type=rel_type + type=rel_type, + is_delta=is_delta ) relations.append(relation) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 4aa06f82..507f51d3 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -1,3 +1,5 @@ +from typing import Optional + from dataclasses import dataclass from dbt.adapters.base.relation import BaseRelation, Policy @@ -23,6 +25,7 @@ class SparkRelation(BaseRelation): quote_policy: SparkQuotePolicy = SparkQuotePolicy() include_policy: SparkIncludePolicy = SparkIncludePolicy() quote_character: str = '`' + is_delta: Optional[bool] = None def __post_init__(self): if self.database != self.schema and self.database: diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 02253fe5..a45b0d1a 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -77,7 +77,11 @@ {% if temporary -%} {{ create_temporary_view(relation, sql) }} {%- else -%} - create table {{ relation }} + {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %} + create or replace table {{ relation }} + {% else %} + create table {{ relation }} + {% endif %} {{ file_format_clause() }} {{ partition_cols(label="partitioned by") }} {{ clustered_cols(label="clustered by") }} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index d772a554..adfdb7a3 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -11,7 +11,9 @@ {{ run_hooks(pre_hooks) }} -- setup: if the target relation already exists, drop it - {% if old_relation -%} + -- in case if the existing and future table is delta, we want to do a + -- create or replace table instead of dropping, so we don't have the table unavailable + {% if old_relation and not (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') -%} {{ adapter.drop_relation(old_relation) }} {%- endif %} From 880db896cf6f4759ead578c655e2956053162892 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 18 Nov 2020 19:54:49 +0100 Subject: [PATCH 168/603] Trim the whitespace --- test/unit/test_macros.py | 71 ++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py index 325e80b6..e8d51e70 100644 --- a/test/unit/test_macros.py +++ b/test/unit/test_macros.py @@ -8,104 +8,86 @@ class TestSparkMacros(unittest.TestCase): def setUp(self): self.jinja_env = Environment(loader=FileSystemLoader('dbt/include/spark/macros'), - extensions=['jinja2.ext.do',]) + extensions=['jinja2.ext.do', ]) self.config = {} - - self.default_context = {} - self.default_context['validation'] = mock.Mock() - self.default_context['model'] = mock.Mock() - self.default_context['exceptions'] = mock.Mock() - self.default_context['config'] = mock.Mock() + self.default_context = { + 'validation': mock.Mock(), + 'model': mock.Mock(), + 'exceptions': mock.Mock(), + 'config': mock.Mock() + } self.default_context['config'].get = lambda key, default=None, **kwargs: self.config.get(key, default) - def __get_template(self, template_filename): return self.jinja_env.get_template(template_filename, globals=self.default_context) - def __run_macro(self, template, name, temporary, relation, sql): self.default_context['model'].alias = relation value = getattr(template.module, name)(temporary, relation, sql) return re.sub(r'\s\s+', ' ', value) - def test_macros_load(self): self.jinja_env.get_template('adapters.sql') - def test_macros_create_table_as(self): template = self.__get_template('adapters.sql') + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table as select 1") - + self.assertEqual(sql, "create table my_table as select 1") def test_macros_create_table_as_file_format(self): template = self.__get_template('adapters.sql') - self.config['file_format'] = 'delta' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table using delta as select 1") - + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table using delta as select 1") def test_macros_create_table_as_partition(self): template = self.__get_template('adapters.sql') - self.config['partition_by'] = 'partition_1' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table partitioned by (partition_1) as select 1") - + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table partitioned by (partition_1) as select 1") def test_macros_create_table_as_partitions(self): template = self.__get_template('adapters.sql') - self.config['partition_by'] = ['partition_1', 'partition_2'] - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table partitioned by (partition_1,partition_2) as select 1") - def test_macros_create_table_as_cluster(self): template = self.__get_template('adapters.sql') - self.config['clustered_by'] = 'cluster_1' self.config['buckets'] = '1' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table clustered by (cluster_1) into 1 buckets as select 1") - + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table clustered by (cluster_1) into 1 buckets as select 1") def test_macros_create_table_as_clusters(self): template = self.__get_template('adapters.sql') - self.config['clustered_by'] = ['cluster_1', 'cluster_2'] self.config['buckets'] = '1' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1") - + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1") def test_macros_create_table_as_location(self): template = self.__get_template('adapters.sql') - self.config['location_root'] = '/mnt/root' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table location '/mnt/root/my_table' as select 1") - + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table location '/mnt/root/my_table' as select 1") def test_macros_create_table_as_comment(self): template = self.__get_template('adapters.sql') - self.config['persist_docs'] = {'relation': True} self.default_context['model'].description = 'Description Test' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table comment 'Description Test' as select 1") - + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, "create table my_table comment 'Description Test' as select 1") def test_macros_create_table_as_all(self): template = self.__get_template('adapters.sql') @@ -118,5 +100,8 @@ def test_macros_create_table_as_all(self): self.config['persist_docs'] = {'relation': True} self.default_context['model'].description = 'Description Test' - self.assertEqual(self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1'), - "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1") + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual( + sql, + "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1" + ) From b5b69366f1906b7cccc9c14f6afd8446cd04e54b Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 18 Nov 2020 20:06:16 +0100 Subject: [PATCH 169/603] Fix the test to create or replace --- test/unit/test_macros.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py index e8d51e70..5c5e3f8c 100644 --- a/test/unit/test_macros.py +++ b/test/unit/test_macros.py @@ -41,7 +41,7 @@ def test_macros_create_table_as_file_format(self): self.config['file_format'] = 'delta' sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() - self.assertEqual(sql, "create table my_table using delta as select 1") + self.assertEqual(sql, "create or replace table my_table using delta as select 1") def test_macros_create_table_as_partition(self): template = self.__get_template('adapters.sql') @@ -103,5 +103,5 @@ def test_macros_create_table_as_all(self): sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() self.assertEqual( sql, - "create table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1" + "create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1" ) From 8cb6be56174edf6cc8d4c5c720b5d05a468de165 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 18 Nov 2020 20:22:00 +0100 Subject: [PATCH 170/603] Make flake8 happy --- dbt/adapters/spark/column.py | 4 +++- dbt/adapters/spark/impl.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 6512efed..d8292f6e 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -55,7 +55,9 @@ def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]: table_stats[f'stats:{key}:include'] = True return table_stats - def to_dict(self, omit_none: bool = True, validate: bool = False) -> JsonDict: + def to_dict( + self, omit_none: bool = True, validate: bool = False + ) -> JsonDict: original_dict = super().to_dict(omit_none=omit_none) # If there are stats, merge them into the root of the dict original_stats = original_dict.pop('table_stats') diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 86955eb7..0e8ec7b7 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -132,7 +132,8 @@ def list_relations_without_caching( f'got {len(row)} values, expected 4' ) _schema, name, _, information = row - rel_type = (RelationType.View if 'Type: VIEW' in information else RelationType.Table) + rel_type = RelationType.View \ + if 'Type: VIEW' in information else RelationType.Table is_delta = 'Provider: delta' in information relation = self.Relation.create( schema=_schema, From 15d5760b6269c8d44c2181a086c2d1fb3ee71f22 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 24 Nov 2020 19:33:46 -0500 Subject: [PATCH 171/603] Move PyHive reqs to extra --- dbt/adapters/spark/connections.py | 24 +++++++++++++++++++++--- setup.py | 6 ++++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 27c999cd..d3155b30 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -8,9 +8,14 @@ from dbt.utils import DECIMALS from dbt.adapters.spark import __version__ -from TCLIService.ttypes import TOperationState as ThriftState -from thrift.transport import THttpClient -from pyhive import hive +try: + from TCLIService.ttypes import TOperationState as ThriftState + from thrift.transport import THttpClient + from pyhive import hive +except ImportError: + ThriftState = None + THttpClient = None + hive = None try: import pyodbc except ImportError: @@ -88,6 +93,19 @@ def __post_init__(self): f" using {self.method} method to connect to Spark" ) + if ( + self.method == SparkConnectionMethod.HTTP or + self.method == SparkConnectionMethod.THRIFT + ) and not ( + ThriftState and THttpClient and hive + ): + raise dbt.exceptions.RuntimeException( + f"{self.method} connection method requires " + "additional dependencies. \n" + "Install the additional required dependencies with " + "`pip install dbt-spark[PyHive]`" + ) + @property def type(self): return 'spark' diff --git a/setup.py b/setup.py index 5dc6309b..d6be6a9f 100644 --- a/setup.py +++ b/setup.py @@ -61,11 +61,13 @@ def _dbt_spark_version(): }, install_requires=[ f'dbt-core=={dbt_version}', - 'PyHive[hive]>=0.6.0,<0.7.0', 'sqlparams>=3.0.0', - 'thrift>=0.11.0,<0.12.0' ], extras_require={ "ODBC": ['pyodbc>=4.0.30'], + "PyHive": [ + 'PyHive[hive]>=0.6.0,<0.7.0', + 'thrift>=0.11.0,<0.12.0', + ], } ) From bb11843b885d802925b2d5c46d7630d3da3d60d5 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 24 Nov 2020 19:41:24 -0500 Subject: [PATCH 172/603] Add changelog note --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e3b1bdf..f0ecd95a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## dbt-spark 0.19.0 (Release TBD) +### Breaking changes +- Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126)) + ### Under the hood - Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120)) From fe7c9587d71ee64c2eedc4554c46bbc11611e149 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 30 Nov 2020 17:47:57 -0500 Subject: [PATCH 173/603] Update installation, preferred methods --- README.md | 73 +++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 009955ae..c9ce5d7d 100644 --- a/README.md +++ b/README.md @@ -25,34 +25,39 @@ For more information on using Spark with dbt, consult the dbt documentation: - [Spark specific configs](https://docs.getdbt.com/reference/resource-configs/spark-configs/) ### Installation -This plugin can be installed via pip: +This plugin can be installed via pip. Depending on your connection method, you need to specify an extra package. + +If connecting to Databricks via ODBC driver, it requires [`pyodbc`](https://github.com/mkleehammer/pyodbc). Depending on your system1, you can install it seperately or via pip: ```bash -# Install dbt-spark from PyPi: -$ pip install dbt-spark +# Install dbt-spark from PyPi for odbc connections: +$ pip install "dbt-spark[ODBC]" ``` -dbt-spark also supports connections via ODBC driver, but it requires [`pyodbc`](https://github.com/mkleehammer/pyodbc). You can install it seperately or via pip as well: +If connecting to a Spark cluster via the generic `thrift` or `http` methods, it requires [`PyHive`](https://github.com/dropbox/PyHive): ```bash -# Install dbt-spark w/ pyodbc from PyPi: -$ pip install "dbt-spark[ODBC]" +# Install dbt-spark from PyPi for thrift or http connections: +$ pip install "dbt-spark[PyHive]" ``` -See https://github.com/mkleehammer/pyodbc/wiki/Install for more info about installing `pyodbc`. +1See the [`pyodbc` wiki](https://github.com/mkleehammer/pyodbc/wiki/Install) for OS-specific installation details. ### Configuring your profile **Connection Method** -Connections can be made to Spark in two different modes. The `http` mode is used when connecting to a managed service such as Databricks, which provides an HTTP endpoint; the `thrift` mode is used to connect directly to the master node of a cluster (either on-premise or in the cloud). +Connections can be made to Spark in three different modes: +- `odbc` is the preferred mode when connecting to Databricks. It supports connecting to a SQL Endpoint or an all-purpose interactive cluster. +- `http` is a more generic mode for connecting to a managed service that provides an HTTP endpoint. Currently, this includes connections to a Databricks interactive cluster. +- `thrift` connects directly to the lead node of a cluster, either locally hosted / on premise or in the cloud (e.g. Amazon EMR). -A dbt profile can be configured to run against Spark using the following configuration: +A dbt profile for Spark connections support the following configurations: | Option | Description | Required? | Example | | --------------- | ----------------------------------------------------------------------------------- | ------------------------------------------------------------------ | ---------------------------------------------- | -| method | Specify the connection method (`thrift` or `http` or `odbc`) | Required | `http` | +| method | Specify the connection method (`odbc` or `thrift` or `http`) | Required | `odbc` | | schema | Specify the schema (database) to build models into | Required | `analytics` | | host | The hostname to connect to | Required | `yourorg.sparkhost.com` | | port | The port to connect to the host on | Optional (default: 443 for `http` and `odbc`, 10001 for `thrift`) | `443` | @@ -62,45 +67,43 @@ A dbt profile can be configured to run against Spark using the following configu | endpoint | The ID of the SQL endpoint to connect to | Required for `odbc` if connecting to SQL endpoint | `1234567891234a` | | driver | Path of ODBC driver installed or name of ODBC DSN configured | Required for `odbc` | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | | user | The username to use to connect to the cluster | Optional | `hadoop` | -| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional (default: 10) | `60` | -| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional (default: 0) | `5` | - -**Databricks Note** +| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional, relevant for `thrift` + `http` (default: 10) | `60` | +| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional, relevant for `thrift` + `http` (default: 0) | `5` | -AWS and Azure Databricks have differences in their connections, likely due to differences in how their URLs are generated between the two services. +**Databricks** connections differ based on the cloud provider, likely due to differences in how their URLs are generated between the two services. -**Organization:** To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. This is a string field; if there is a leading zero, be sure to include it. +- **Organization:** To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. This is a string field; if there is a leading zero, be sure to include it. -**Port:** Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. +- **Port:** Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. -**Host:** The host field for Databricks can be found at the start of your workspace or cluster url: `region.azuredatabricks.net` for Azure, or `account.cloud.databricks.com` for AWS. Do not include `https://`. +- **Host:** The host field for Databricks can be found at the start of your workspace or cluster url: `region.azuredatabricks.net` for Azure, or `account.cloud.databricks.com` for AWS. Do not include `https://`. -**Usage with Amazon EMR** - -To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. +**Amazon EMR**: To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. **Example profiles.yml entries:** -**http, e.g. Databricks** +**odbc** ``` your_profile_name: target: dev outputs: dev: - method: http type: spark - schema: analytics - host: yourorg.sparkhost.com + method: odbc + driver: path/to/driver + host: yourorg.databricks.com organization: 1234567891234567 # Azure Databricks ONLY port: 443 token: abc123 + schema: analytics + + # one of: cluster: 01234-23423-coffeetime - connect_retries: 5 - connect_timeout: 60 + endpoint: coffee01234time ``` -**Thrift connection** +**thrift** ``` your_profile_name: target: dev @@ -116,27 +119,23 @@ your_profile_name: connect_timeout: 60 ``` -**ODBC connection** + +**http** ``` your_profile_name: target: dev outputs: dev: - method: odbc + method: http type: spark schema: analytics host: yourorg.sparkhost.com organization: 1234567891234567 # Azure Databricks ONLY port: 443 token: abc123 - - # one of: cluster: 01234-23423-coffeetime - endpoint: coffee01234time - - driver: path/to/driver - connect_retries: 5 # cluster only - connect_timeout: 60 # cluster only + connect_retries: 5 + connect_timeout: 60 ``` From 2aab3b415899c0968c0d82cabb2b8cc44e3c26b2 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 30 Nov 2020 18:09:29 -0500 Subject: [PATCH 174/603] Rework table. Add kerberos --- README.md | 61 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c9ce5d7d..8e3dc22d 100644 --- a/README.md +++ b/README.md @@ -55,20 +55,27 @@ Connections can be made to Spark in three different modes: A dbt profile for Spark connections support the following configurations: -| Option | Description | Required? | Example | -| --------------- | ----------------------------------------------------------------------------------- | ------------------------------------------------------------------ | ---------------------------------------------- | -| method | Specify the connection method (`odbc` or `thrift` or `http`) | Required | `odbc` | -| schema | Specify the schema (database) to build models into | Required | `analytics` | -| host | The hostname to connect to | Required | `yourorg.sparkhost.com` | -| port | The port to connect to the host on | Optional (default: 443 for `http` and `odbc`, 10001 for `thrift`) | `443` | -| token | The token to use for authenticating to the cluster | Required for `http` and `odbc` | `abc123` | -| organization | The id of the Azure Databricks workspace being used; only for Azure Databricks | See Databricks Note | `1234567891234567` | -| cluster | The name of the cluster to connect to | Required for `http` and `odbc` if connecting to a specific cluster | `01234-23423-coffeetime` | -| endpoint | The ID of the SQL endpoint to connect to | Required for `odbc` if connecting to SQL endpoint | `1234567891234a` | -| driver | Path of ODBC driver installed or name of ODBC DSN configured | Required for `odbc` | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | -| user | The username to use to connect to the cluster | Optional | `hadoop` | -| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | Optional, relevant for `thrift` + `http` (default: 10) | `60` | -| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | Optional, relevant for `thrift` + `http` (default: 0) | `5` | +**Key**: +- ✅ Required +- ❔ Optional +- ❌ Not used + +| Option | Description | ODBC | Thrift | HTTP | Example | +|-|-|-|-|-|-| +| method | Specify the connection method (`odbc` or `thrift` or `http`) | ✅ | ✅ | ✅ | `odbc` | +| schema | Specify the schema (database) to build models into | ✅ | ✅ | ✅ | `analytics` | +| host | The hostname to connect to | ✅ | ✅ | ✅ | `yourorg.sparkhost.com` | +| port | The port to connect to the host on | ❔ (default: 443) | ❔ (default: 443) | ❔ (default: 10001) | `443` | +| token | The token to use for authenticating to the cluster | ✅ | ❌ | ✅ | `abc123` | +| auth | The value of `hive.server2.authentication` | ❌ | ❔ | ❌ | `KERBEROS` | +| kerberos_service_name | Use with `auth='KERBEROS'` | ❌ | ❔ | ❌ | `hive` | +| organization | The id of the Azure Databricks workspace being used | See note | ❌ | See note | `1234567891234567` | +| cluster | The name of the cluster to connect to | One of `cluster` or `endpoint` is ✅ | ❌ | ✅ | `01234-23423-coffeetime` | +| endpoint | The ID of the SQL endpoint to connect to | One of `cluster` or `endpoint` is ✅ | ❌ | ❌ | `1234567891234a` | +| driver | Path of ODBC driver installed or name of ODBC DSN configured | ✅ | ❌ | ❌ | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | +| user | The username to use to connect to the cluster | ❔ | ❔ | ❔ | `hadoop` | +| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (default: 10) | ❔ (default: 10) | `60` | +| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (default: 0) | ❔ (default: 0) | `5` | **Databricks** connections differ based on the cloud provider, likely due to differences in how their URLs are generated between the two services. @@ -83,7 +90,7 @@ A dbt profile for Spark connections support the following configurations: **Example profiles.yml entries:** -**odbc** +**ODBC** ``` your_profile_name: target: dev @@ -94,7 +101,7 @@ your_profile_name: driver: path/to/driver host: yourorg.databricks.com organization: 1234567891234567 # Azure Databricks ONLY - port: 443 + port: 443 # default token: abc123 schema: analytics @@ -103,37 +110,43 @@ your_profile_name: endpoint: coffee01234time ``` -**thrift** +**Thrift** ``` your_profile_name: target: dev outputs: dev: - method: thrift type: spark - schema: analytics + method: thrift host: 127.0.0.1 - port: 10001 + port: 10001 # default + schema: analytics + + # optional user: hadoop + auth: KERBEROS + kerberos_service_name: hive connect_retries: 5 connect_timeout: 60 ``` -**http** +**HTTP** ``` your_profile_name: target: dev outputs: dev: - method: http type: spark - schema: analytics + method: http host: yourorg.sparkhost.com organization: 1234567891234567 # Azure Databricks ONLY - port: 443 + port: 443 # default token: abc123 + schema: analytics cluster: 01234-23423-coffeetime + + # optional connect_retries: 5 connect_timeout: 60 ``` From 637a388f740f75cff2cf2613ea2bfa3bd58d016e Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 30 Nov 2020 18:18:21 -0500 Subject: [PATCH 175/603] Even better table --- README.md | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 8e3dc22d..7d309ece 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,9 @@ # dbt-spark -This plugin ports [dbt](https://getdbt.com) functionality to Spark. It supports -running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), -Amazon EMR, or Docker. +This plugin ports [dbt](https://getdbt.com) functionality to Spark. It supports running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), Amazon EMR, or Docker. -We have not tested extensively against older versions of Apache Spark. The -plugin uses syntax that requires version 2.2.0 or newer. +We have not tested extensively against older versions of Apache Spark. The plugin uses syntax that requires version 2.2.0 or newer. Some features require Spark 3.0 and/or Delta Lake. ### Documentation For more information on using Spark with dbt, consult the dbt documentation: @@ -25,7 +22,7 @@ For more information on using Spark with dbt, consult the dbt documentation: - [Spark specific configs](https://docs.getdbt.com/reference/resource-configs/spark-configs/) ### Installation -This plugin can be installed via pip. Depending on your connection method, you need to specify an extra package. +This plugin can be installed via pip. Depending on your connection method, you need to specify an extra requirement. If connecting to Databricks via ODBC driver, it requires [`pyodbc`](https://github.com/mkleehammer/pyodbc). Depending on your system1, you can install it seperately or via pip: @@ -57,32 +54,30 @@ A dbt profile for Spark connections support the following configurations: **Key**: - ✅ Required -- ❔ Optional - ❌ Not used +- ❔ Optional (followed by `default value` in parentheses) | Option | Description | ODBC | Thrift | HTTP | Example | |-|-|-|-|-|-| | method | Specify the connection method (`odbc` or `thrift` or `http`) | ✅ | ✅ | ✅ | `odbc` | | schema | Specify the schema (database) to build models into | ✅ | ✅ | ✅ | `analytics` | | host | The hostname to connect to | ✅ | ✅ | ✅ | `yourorg.sparkhost.com` | -| port | The port to connect to the host on | ❔ (default: 443) | ❔ (default: 443) | ❔ (default: 10001) | `443` | +| port | The port to connect to the host on | ❔ (`443`) | ❔ (`443`) | ❔ (`10001`) | `443` | | token | The token to use for authenticating to the cluster | ✅ | ❌ | ✅ | `abc123` | | auth | The value of `hive.server2.authentication` | ❌ | ❔ | ❌ | `KERBEROS` | | kerberos_service_name | Use with `auth='KERBEROS'` | ❌ | ❔ | ❌ | `hive` | -| organization | The id of the Azure Databricks workspace being used | See note | ❌ | See note | `1234567891234567` | -| cluster | The name of the cluster to connect to | One of `cluster` or `endpoint` is ✅ | ❌ | ✅ | `01234-23423-coffeetime` | -| endpoint | The ID of the SQL endpoint to connect to | One of `cluster` or `endpoint` is ✅ | ❌ | ❌ | `1234567891234a` | +| organization | Azure Databricks workspace ID (see note) | ❔ | ❌ | ❔ | `1234567891234567` | +| cluster | The name of the cluster to connect to | ✅ (unless `endpoint`) | ❌ | ✅ | `01234-23423-coffeetime` | +| endpoint | The ID of the SQL endpoint to connect to | ✅ (unless `cluster`) | ❌ | ❌ | `1234567891234a` | | driver | Path of ODBC driver installed or name of ODBC DSN configured | ✅ | ❌ | ❌ | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | | user | The username to use to connect to the cluster | ❔ | ❔ | ❔ | `hadoop` | -| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (default: 10) | ❔ (default: 10) | `60` | -| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (default: 0) | ❔ (default: 0) | `5` | +| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (`10`) | ❔ (`10`) | `60` | +| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (`0`) | ❔ (`0`) | `5` | -**Databricks** connections differ based on the cloud provider, likely due to differences in how their URLs are generated between the two services. +**Databricks** connections differ based on the cloud provider: - **Organization:** To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. This is a string field; if there is a leading zero, be sure to include it. -- **Port:** Please ignore all references to port 15001 in the databricks-connect docs as that is specific to that tool; port 443 is used for dbt-spark's https connection. - - **Host:** The host field for Databricks can be found at the start of your workspace or cluster url: `region.azuredatabricks.net` for Azure, or `account.cloud.databricks.com` for AWS. Do not include `https://`. **Amazon EMR**: To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. @@ -100,7 +95,7 @@ your_profile_name: method: odbc driver: path/to/driver host: yourorg.databricks.com - organization: 1234567891234567 # Azure Databricks ONLY + organization: 1234567891234567 # Azure Databricks only port: 443 # default token: abc123 schema: analytics @@ -140,7 +135,7 @@ your_profile_name: type: spark method: http host: yourorg.sparkhost.com - organization: 1234567891234567 # Azure Databricks ONLY + organization: 1234567891234567 # Azure Databricks only port: 443 # default token: abc123 schema: analytics @@ -221,19 +216,19 @@ A `docker-compose` environment starts a Spark Thrift server and a Postgres datab docker-compose up ``` -Your profile should look like this: +Create a profile like this one: ``` -your_profile_name: +spark-testing: target: local outputs: local: - method: thrift type: spark - schema: analytics + method: thrift host: 127.0.0.1 port: 10000 user: dbt + schema: analytics connect_retries: 5 connect_timeout: 60 ``` From 8794c896937e2ac6bf46b9c3d5252d13d06bbf4a Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 11 Dec 2020 11:12:30 +0100 Subject: [PATCH 176/603] Use is_delta instead of file_format == 'delta' --- .../macros/materializations/incremental.sql | 21 +++++++++---------- .../macros/materializations/snapshot.sql | 14 ++++++------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 000659a8..2fd4f9d3 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -24,7 +24,7 @@ {% do return(file_format) %} {% endmacro %} -{% macro dbt_spark_validate_get_incremental_strategy(file_format) %} +{% macro dbt_spark_validate_get_incremental_strategy(relation) %} {#-- Find and validate the incremental strategy #} {%- set strategy = config.get("incremental_strategy", default="insert_overwrite") -%} @@ -41,7 +41,7 @@ {% if strategy not in ['merge', 'insert_overwrite'] %} {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} {%-else %} - {% if strategy == 'merge' and file_format != 'delta' %} + {% if strategy == 'merge' and not relation.is_delta %} {% do exceptions.raise_compiler_error(invalid_merge_msg) %} {% endif %} {% endif %} @@ -49,15 +49,14 @@ {% do return(strategy) %} {% endmacro %} -{% macro dbt_spark_validate_merge(file_format) %} +{% macro dbt_spark_validate_merge(relation) %} {% set invalid_file_format_msg -%} You can only choose the 'merge' incremental_strategy when file_format is set to 'delta' {%- endset %} - {% if file_format != 'delta' %} + {% if not relation.is_delta %} {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} {% endif %} - {% endmacro %} @@ -84,20 +83,20 @@ {% materialization incremental, adapter='spark' -%} + {% set target_relation = this %} + {% set existing_relation = load_relation(this) %} + {% set tmp_relation = make_temp_relation(this) %} + {#-- Validate early so we don't run SQL if the file_format is invalid --#} {% set file_format = dbt_spark_validate_get_file_format() -%} {#-- Validate early so we don't run SQL if the strategy is invalid --#} - {% set strategy = dbt_spark_validate_get_incremental_strategy(file_format) -%} + {% set strategy = dbt_spark_validate_get_incremental_strategy(target_relation) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} - {% set target_relation = this %} - {% set existing_relation = load_relation(this) %} - {% set tmp_relation = make_temp_relation(this) %} - {% if strategy == 'merge' %} {%- set unique_key = config.require('unique_key') -%} - {% do dbt_spark_validate_merge(file_format) %} + {% do dbt_spark_validate_merge(target_relation) %} {% endif %} {% if config.get('partition_by') %} diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 78214641..fc91329f 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -80,8 +80,14 @@ Invalid file format: {{ file_format }} Snapshot functionality requires file_format be set to 'delta' {%- endset %} + + {% set target_relation_exists, target_relation = get_or_create_relation( + database=none, + schema=model.schema, + identifier=target_table, + type='table') -%} - {%- if file_format != 'delta' -%} + {%- if not target_relation_exists.is_delta -%} {% do exceptions.raise_compiler_error(invalid_format_msg) %} {% endif %} @@ -89,12 +95,6 @@ {% do create_schema(model.database, model.schema) %} {% endif %} - {% set target_relation_exists, target_relation = get_or_create_relation( - database=none, - schema=model.schema, - identifier=target_table, - type='table') -%} - {%- if not target_relation.is_table -%} {% do exceptions.relation_wrong_type(target_relation, 'table') %} {%- endif -%} From f1af6110280779c555f89ea8fff832404bfa9dd4 Mon Sep 17 00:00:00 2001 From: Daniel van der Ende Date: Wed, 23 Dec 2020 10:05:16 +0100 Subject: [PATCH 177/603] Handle case of 0 connect retries better With a default of 0 connect_retries, confusing log messages show up. The logs suggest that a retry is going to be carried out, but it isn't. Also, a sleep is carried out, even if no retries are to happen. This check prevents this from happening. --- CHANGELOG.md | 6 ++++++ dbt/adapters/spark/connections.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0ecd95a..6f1db87a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ ### Under the hood - Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120)) +### Fixes +- Handle case of 0 retries better for HTTP Spark Connections ([#132](https://github.com/fishtown-analytics/dbt-spark/pull/132)) + +### Contributors +- [@danielvdende](https://github.com/danielvdende) ([#132](https://github.com/fishtown-analytics/dbt-spark/pull/132)) + ## dbt-spark 0.18.1.1 (November 13, 2020) ### Fixes diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index d3155b30..11592d66 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -401,7 +401,7 @@ def open(cls, connection): msg += ', is your token valid?' raise dbt.exceptions.FailedToConnectException(msg) from e retryable_message = _is_retryable_error(e) - if retryable_message: + if retryable_message and creds.connect_retries > 0: msg = ( f"Warning: {retryable_message}\n\tRetrying in " f"{creds.connect_timeout} seconds " From a9478eb4831d68662f1eca25ce2a76ce9975bba6 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 23 Dec 2020 20:09:40 +0100 Subject: [PATCH 178/603] Update dbt/include/spark/macros/materializations/snapshot.sql Co-authored-by: Jeremy Cohen --- dbt/include/spark/macros/materializations/snapshot.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index fc91329f..d2c72d78 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -87,7 +87,10 @@ identifier=target_table, type='table') -%} - {%- if not target_relation_exists.is_delta -%} +{%- if not (target_relation.is_delta or ( + not target_relation_exists and + config.get('file_format', validator=validation.any[basestring]) == 'delta' +)) -%} {% do exceptions.raise_compiler_error(invalid_format_msg) %} {% endif %} From 589afedfa951360a5b6b4cd74a9515e99fcfc4cb Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 23 Dec 2020 21:39:02 +0100 Subject: [PATCH 179/603] Clean up error messages when doing snapshots --- .../macros/materializations/snapshot.sql | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index d2c72d78..cd43253e 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -75,25 +75,30 @@ {%- set strategy_name = config.get('strategy') -%} {%- set unique_key = config.get('unique_key') %} {%- set file_format = config.get('file_format', 'parquet') -%} - - {% set invalid_format_msg -%} - Invalid file format: {{ file_format }} - Snapshot functionality requires file_format be set to 'delta' - {%- endset %} {% set target_relation_exists, target_relation = get_or_create_relation( database=none, schema=model.schema, identifier=target_table, type='table') -%} - -{%- if not (target_relation.is_delta or ( - not target_relation_exists and - config.get('file_format', validator=validation.any[basestring]) == 'delta' -)) -%} + + {%- if file_format != 'delta' -%} + {% set invalid_format_msg -%} + Invalid file format: {{ file_format }} + Snapshot functionality requires file_format be set to 'delta' + {%- endset %} {% do exceptions.raise_compiler_error(invalid_format_msg) %} {% endif %} + {%- if target_relation_exists -%} + {%- if not target_relation.is_delta -%} + {% set invalid_format_msg -%} + The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta' + {%- endset %} + {% do exceptions.raise_compiler_error(invalid_format_msg) %} + {% endif %} + {% endif %} + {% if not adapter.check_schema_exists(model.database, model.schema) %} {% do create_schema(model.database, model.schema) %} {% endif %} From 1f8febd3babfa74ed48e23b071b03c40b297bf76 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 23 Dec 2020 21:43:18 +0100 Subject: [PATCH 180/603] Revert incremental for now --- .../macros/materializations/incremental.sql | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 2fd4f9d3..000659a8 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -24,7 +24,7 @@ {% do return(file_format) %} {% endmacro %} -{% macro dbt_spark_validate_get_incremental_strategy(relation) %} +{% macro dbt_spark_validate_get_incremental_strategy(file_format) %} {#-- Find and validate the incremental strategy #} {%- set strategy = config.get("incremental_strategy", default="insert_overwrite") -%} @@ -41,7 +41,7 @@ {% if strategy not in ['merge', 'insert_overwrite'] %} {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} {%-else %} - {% if strategy == 'merge' and not relation.is_delta %} + {% if strategy == 'merge' and file_format != 'delta' %} {% do exceptions.raise_compiler_error(invalid_merge_msg) %} {% endif %} {% endif %} @@ -49,14 +49,15 @@ {% do return(strategy) %} {% endmacro %} -{% macro dbt_spark_validate_merge(relation) %} +{% macro dbt_spark_validate_merge(file_format) %} {% set invalid_file_format_msg -%} You can only choose the 'merge' incremental_strategy when file_format is set to 'delta' {%- endset %} - {% if not relation.is_delta %} + {% if file_format != 'delta' %} {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} {% endif %} + {% endmacro %} @@ -83,20 +84,20 @@ {% materialization incremental, adapter='spark' -%} - {% set target_relation = this %} - {% set existing_relation = load_relation(this) %} - {% set tmp_relation = make_temp_relation(this) %} - {#-- Validate early so we don't run SQL if the file_format is invalid --#} {% set file_format = dbt_spark_validate_get_file_format() -%} {#-- Validate early so we don't run SQL if the strategy is invalid --#} - {% set strategy = dbt_spark_validate_get_incremental_strategy(target_relation) -%} + {% set strategy = dbt_spark_validate_get_incremental_strategy(file_format) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + {% set target_relation = this %} + {% set existing_relation = load_relation(this) %} + {% set tmp_relation = make_temp_relation(this) %} + {% if strategy == 'merge' %} {%- set unique_key = config.require('unique_key') -%} - {% do dbt_spark_validate_merge(target_relation) %} + {% do dbt_spark_validate_merge(file_format) %} {% endif %} {% if config.get('partition_by') %} From 93cc5b3894a02df210871a47ccedecacb0cd1677 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 31 Dec 2020 12:20:33 +0100 Subject: [PATCH 181/603] Disable test failing on endpoint --- test/integration/spark-databricks-odbc-sql-endpoint.dbtspec | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec b/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec index d0e23dab..0251cb31 100644 --- a/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec +++ b/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec @@ -33,7 +33,8 @@ projects: dbt_project_yml: *file_format_delta sequences: test_dbt_empty: empty - test_dbt_base: base + # The SQL Endpoint no longer supports `set` ?? + # test_dbt_base: base test_dbt_ephemeral: ephemeral # The SQL Endpoint does not support `create temporary view` # test_dbt_incremental: incremental From 3ebb8709b7c580724a24962b30d30749aa2f1b23 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 31 Dec 2020 13:17:35 +0100 Subject: [PATCH 182/603] Add me to the contributors list --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0ecd95a..af33934d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,12 @@ - Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126)) ### Under the hood +- Enable `CREATE OR REPLACE` support when using Delta. Instead of dropping and recreating the table, it will keep the existing table, and add a new version as supported by Delta. This will ensure that the table stays available when running the pipeline, and you can track the history. - Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120)) +### Contributors +- [@Fokko](https://github.com/Fokko) ([#125](https://github.com/fishtown-analytics/dbt-spark/pull/125)) + ## dbt-spark 0.18.1.1 (November 13, 2020) ### Fixes From 23cdfd8975ad3fa8c7d32d9ff91818635d33f342 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 4 Jan 2021 10:47:14 -0500 Subject: [PATCH 183/603] bump dbt --- .bumpversion-dbt.cfg | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 8678c9f8..be9371e6 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.1 +current_version = 0.19.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/requirements.txt b/requirements.txt index 619fc530..b323e209 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.18.1 +dbt-core==0.19.0rc1 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index d6be6a9f..af036146 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.18.1' +dbt_version = '0.19.0rc1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.18.1 vs 0.18.1a1, 0.18.1.1, ...) +# ends of it. (0.19.0rc1 vs 0.19.0rc1a1, 0.19.0rc1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 0b914d7158dd2a21beb7e4157e9ed952c5fa569a Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 4 Jan 2021 12:05:02 -0500 Subject: [PATCH 184/603] dbt 0.19.0rc1 updates --- dbt/adapters/spark/connections.py | 2 +- test/unit/test_adapter.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 11592d66..76e574df 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -273,7 +273,7 @@ def cancel(self, connection): connection.handle.cancel() @classmethod - def get_status(cls, cursor): + def get_response(cls, cursor): return 'OK' # No transactions on Spark.... diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 2cbd7810..30f00821 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -22,7 +22,8 @@ def setUp(self): 'quoting': { 'identifier': False, 'schema': False, - } + }, + 'config-version': 2 } def _get_target_http(self, project): From 0a900d0ce28e447295af71194bfcf55a3b1f7153 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 7 Jan 2021 00:10:50 -0500 Subject: [PATCH 185/603] update integration tests --- dbt/adapters/spark/__version__.py | 2 +- dbt/include/spark/macros/materializations/seed.sql | 2 +- dbt/include/spark/macros/materializations/snapshot.sql | 2 +- dev_requirements.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index ff9d7d6e..1b44f861 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.18.1.1" +version = "0.19.0rc1" diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 1f088e83..c857f013 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -80,7 +80,7 @@ {%- set target_relation = api.Relation.create(database=database, schema=schema, identifier=identifier, type='table') -%} {%- set agate_table = load_agate_table() -%} - {%- do store_result('agate_table', status='OK', agate_table=agate_table) -%} + {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%} {{ run_hooks(pre_hooks, inside_transaction=False) }} diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index cd43253e..04ee1f48 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -116,7 +116,7 @@ {% if not target_relation_exists %} - {% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %} + {% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %} {% set final_sql = create_table_as(False, target_relation, build_sql) %} {% else %} diff --git a/dev_requirements.txt b/dev_requirements.txt index c657c54f..d662a12b 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,6 @@ pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 # Test requirements -pytest-dbt-adapter==0.3.0 +pytest-dbt-adapter==0.4.0 sasl==0.2.1 thrift_sasl==0.4.1 From 8ba1fc1b7ae1dc25746bc2b31451cc9c06382899 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 12 Jan 2021 13:28:25 +0100 Subject: [PATCH 186/603] 1. partitionOverwriteMode = DYNAMIC iff insert_overwrite + partition_by --- .../spark/macros/materializations/incremental.sql | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 000659a8..921c165d 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -100,16 +100,10 @@ {% do dbt_spark_validate_merge(file_format) %} {% endif %} - {% if config.get('partition_by') %} - {% call statement() %} - set spark.sql.sources.partitionOverwriteMode = DYNAMIC - {% endcall %} + {% if strategy == 'insert_overwrite' and config.get('partition_by') %} + set spark.sql.sources.partitionOverwriteMode = DYNAMIC {% endif %} - {% call statement() %} - set spark.sql.hive.convertMetastoreParquet = false - {% endcall %} - {{ run_hooks(pre_hooks) }} {% if existing_relation is none %} From 6de3d070d4e77487ecde6d7bb14fe2056beb6fb4 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 12 Jan 2021 13:31:04 +0100 Subject: [PATCH 187/603] 2. Error if insert_overwrite + delta --- .../macros/materializations/incremental.sql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index 921c165d..b824c482 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -37,6 +37,18 @@ Invalid incremental strategy provided: {{ strategy }} You can only choose this strategy when file_format is set to 'delta' {%- endset %} + + {% set invalid_insert_overwrite_delta_msg -%} + Invalid incremental strategy provided: {{ strategy }} + You cannot use this strategy when file_format is set to 'delta' + Use the `merge` strategy instead + {%- endset %} + + {% set invalid_insert_overwrite_endpoint_msg -%} + Invalid incremental strategy provided: {{ strategy }} + You cannot use this strategy when connecting via endpoint + Use `incremental_strategy: merge` with `file_format: delta` instead + {%- endset %} {% if strategy not in ['merge', 'insert_overwrite'] %} {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} @@ -44,6 +56,12 @@ {% if strategy == 'merge' and file_format != 'delta' %} {% do exceptions.raise_compiler_error(invalid_merge_msg) %} {% endif %} + {% if strategy == 'insert_overwrite' and file_format == 'delta' %} + {% do exceptions.raise_compiler_error(invalid_insert_overwrite_delta_msg) %} + {% endif %} + {% if strategy == 'insert_overwrite' and target.endpoint %} + {% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %} + {% endif %} {% endif %} {% do return(strategy) %} From fb6a4bb6096bd0512771c6b7c9db01d758f4f7b3 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 12 Jan 2021 13:47:59 +0100 Subject: [PATCH 188/603] 3. Append-only insert if no partitions --- README.md | 3 ++- .../spark/macros/materializations/incremental.sql | 5 ++++- test/integration/spark-thrift.dbtspec | 11 ----------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 7d309ece..9297c983 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,8 @@ The following configurations can be supplied to models run with the dbt-spark pl **Incremental Models** To use incremental models, specify a `partition_by` clause in your model config. The default incremental strategy used is `insert_overwrite`, which will overwrite the partitions included in your query. Be sure to re-select _all_ of the relevant -data for a partition when using the `insert_overwrite` strategy. If a `partition_by` config is not specified, dbt will overwrite the entire table as an atomic operation, replacing it with new data of the same schema. This is analogous to `truncate` + `insert`. +data for a partition when using the `insert_overwrite` strategy. If a `partition_by` config is not specified, dbt will simply +append new data to the model, without overwriting any existing data. ``` {{ config( diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index b824c482..dd159cf1 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -1,8 +1,11 @@ {% macro get_insert_overwrite_sql(source_relation, target_relation) %} + {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} + {%- set insert = 'insert overwrite' if cols is not none else 'insert into' -%} + {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - insert overwrite table {{ target_relation }} + {{ insert }} table {{ target_relation }} {{ partition_cols(label="partition") }} select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} diff --git a/test/integration/spark-thrift.dbtspec b/test/integration/spark-thrift.dbtspec index 58f5a906..85b843f3 100644 --- a/test/integration/spark-thrift.dbtspec +++ b/test/integration/spark-thrift.dbtspec @@ -7,17 +7,6 @@ target: connect_retries: 5 connect_timeout: 60 schema: "analytics_{{ var('_dbt_random_suffix') }}" -projects: - - overrides: incremental - paths: - "models/incremental.sql": - materialized: incremental - body: "select * from {{ source('raw', 'seed') }}" - facts: - base: - rowcount: 10 - added: - rowcount: 20 sequences: test_dbt_empty: empty test_dbt_base: base From 6f7e1f2f4241e1890c5f236f8338469392a6d7f1 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 12 Jan 2021 14:08:05 +0100 Subject: [PATCH 189/603] 4. Append-only merge if no unique_key --- README.md | 3 +- .../macros/materializations/incremental.sql | 32 ++++++++----------- .../integration/spark-databricks-http.dbtspec | 11 ------- .../spark-databricks-odbc-cluster.dbtspec | 10 ------ ...spark-databricks-odbc-sql-endpoint.dbtspec | 10 ------ 5 files changed, 15 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 9297c983..e9cee6ad 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,8 @@ where date_day::date >= '2019-01-01' group by 1 ``` -The `merge` strategy is only supported when using file_format `delta` (supported in Databricks). It also requires you to specify a `unique key` to match existing records. +The `merge` strategy is only supported when using file_format `delta` (supported in Databricks). If a `unique key` is specified, the statement will match existing records and overwrite them with new values. If a `unique key` config is not specified, dbt will simply +append new data to the model, without overwriting any existing data. (For atomic replacement of an entire Delta table, use the `'table'` materialization instead.) ``` {{ config( diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index dd159cf1..dde4bdd0 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -70,23 +70,21 @@ {% do return(strategy) %} {% endmacro %} -{% macro dbt_spark_validate_merge(file_format) %} - {% set invalid_file_format_msg -%} - You can only choose the 'merge' incremental_strategy when file_format is set to 'delta' - {%- endset %} - - {% if file_format != 'delta' %} - {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} - {% endif %} - -{% endmacro %} - {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} {# ignore dest_columns - we will just use `*` #} + + {% set merge_condition %} + {% if unique_key %} + on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} + {% else %} + on false + {% endif %} + {% endset %} + merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE - on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} + {{ merge_condition }} when matched then update set * when not matched then insert * {% endmacro %} @@ -106,9 +104,10 @@ {% materialization incremental, adapter='spark' -%} {#-- Validate early so we don't run SQL if the file_format is invalid --#} - {% set file_format = dbt_spark_validate_get_file_format() -%} + {%- set file_format = dbt_spark_validate_get_file_format() -%} {#-- Validate early so we don't run SQL if the strategy is invalid --#} - {% set strategy = dbt_spark_validate_get_incremental_strategy(file_format) -%} + {%- set strategy = dbt_spark_validate_get_incremental_strategy(file_format) -%} + {%- set unique_key = config.get('unique_key', none) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} @@ -116,11 +115,6 @@ {% set existing_relation = load_relation(this) %} {% set tmp_relation = make_temp_relation(this) %} - {% if strategy == 'merge' %} - {%- set unique_key = config.require('unique_key') -%} - {% do dbt_spark_validate_merge(file_format) %} - {% endif %} - {% if strategy == 'insert_overwrite' and config.get('partition_by') %} set spark.sql.sources.partitionOverwriteMode = DYNAMIC {% endif %} diff --git a/test/integration/spark-databricks-http.dbtspec b/test/integration/spark-databricks-http.dbtspec index c20e4242..67342da3 100644 --- a/test/integration/spark-databricks-http.dbtspec +++ b/test/integration/spark-databricks-http.dbtspec @@ -9,16 +9,6 @@ target: connect_retries: 5 connect_timeout: 60 projects: - - overrides: incremental - paths: - "models/incremental.sql": - materialized: incremental - body: "select * from {{ source('raw', 'seed') }}" - facts: - base: - rowcount: 10 - added: - rowcount: 20 - overrides: snapshot_strategy_check_cols dbt_project_yml: &file_format_delta # we're going to UPDATE the seed tables as part of testing, so we must make them delta format @@ -40,4 +30,3 @@ sequences: test_dbt_data_test: data_test test_dbt_ephemeral_data_tests: data_test_ephemeral_models test_dbt_schema_test: schema_test - diff --git a/test/integration/spark-databricks-odbc-cluster.dbtspec b/test/integration/spark-databricks-odbc-cluster.dbtspec index 8dc4975e..b320dc3a 100644 --- a/test/integration/spark-databricks-odbc-cluster.dbtspec +++ b/test/integration/spark-databricks-odbc-cluster.dbtspec @@ -10,16 +10,6 @@ target: connect_retries: 5 connect_timeout: 60 projects: - - overrides: incremental - paths: - "models/incremental.sql": - materialized: incremental - body: "select * from {{ source('raw', 'seed') }}" - facts: - base: - rowcount: 10 - added: - rowcount: 20 - overrides: snapshot_strategy_check_cols dbt_project_yml: &file_format_delta # we're going to UPDATE the seed tables as part of testing, so we must make them delta format diff --git a/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec b/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec index 0251cb31..0aa7be76 100644 --- a/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec +++ b/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec @@ -10,16 +10,6 @@ target: connect_retries: 5 connect_timeout: 60 projects: - - overrides: incremental - paths: - "models/incremental.sql": - materialized: incremental - body: "select * from {{ source('raw', 'seed') }}" - facts: - base: - rowcount: 10 - added: - rowcount: 20 - overrides: snapshot_strategy_check_cols dbt_project_yml: &file_format_delta # we're going to UPDATE the seed tables as part of testing, so we must make them delta format From b60d7c2712ba18425b1e1bbfc69c2ff520261938 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 13 Jan 2021 17:30:00 +0100 Subject: [PATCH 190/603] Revert 3. Full table insert_overwrite if no partitions --- dbt/include/spark/macros/materializations/incremental.sql | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index dde4bdd0..d912a45f 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -1,11 +1,8 @@ {% macro get_insert_overwrite_sql(source_relation, target_relation) %} - - {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%} - {%- set insert = 'insert overwrite' if cols is not none else 'insert into' -%} {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - {{ insert }} table {{ target_relation }} + insert overwrite table {{ target_relation }} {{ partition_cols(label="partition") }} select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} From c8e3770e077e8c54026156b14e61133ef59fa7ff Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 13 Jan 2021 17:30:14 +0100 Subject: [PATCH 191/603] Introduce 'append' strategy as default --- CHANGELOG.md | 4 ++ README.md | 49 +++++++++++++------ .../macros/materializations/incremental.sql | 31 +++++++++--- 3 files changed, 60 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6e1560c..31499aa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ### Breaking changes - Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126)) +- Incremental models have `incremental_strategy: append` by default. This strategy adds new records +without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending +on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error +if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) ### Under the hood - Enable `CREATE OR REPLACE` support when using Delta. Instead of dropping and recreating the table, it will keep the existing table, and add a new version as supported by Delta. This will ensure that the table stays available when running the pipeline, and you can track the history. diff --git a/README.md b/README.md index e9cee6ad..38a1e8ec 100644 --- a/README.md +++ b/README.md @@ -161,50 +161,67 @@ The following configurations can be supplied to models run with the dbt-spark pl | partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | -| incremental_strategy | The strategy to use for incremental models (`insert_overwrite` or `merge`). Note `merge` requires `file_format` = `delta` and `unique_key` to be specified. | Optional (default: `insert_overwrite`) | `merge` | +| incremental_strategy | The strategy to use for incremental models (`append`, `insert_overwrite`, or `merge`). | Optional (default: `append`) | `merge` | | persist_docs | Whether dbt should include the model description as a table `comment` | Optional | `{'relation': true}` | **Incremental Models** -To use incremental models, specify a `partition_by` clause in your model config. The default incremental strategy used is `insert_overwrite`, which will overwrite the partitions included in your query. Be sure to re-select _all_ of the relevant -data for a partition when using the `insert_overwrite` strategy. If a `partition_by` config is not specified, dbt will simply -append new data to the model, without overwriting any existing data. +dbt has a number of ways to build models incrementally, called "incremental strategies." Some strategies depend on certain file formats, connection types, and other model configurations: +- `append` (default): Insert new records without updating or overwriting any existing data. +- `insert_overwrite`: If `partition_by` is specified, overwrite partitions in the table with new data. (Be sure to re-select _all_ of the relevant data for a partition.) If no `partition_by` is specified, overwrite the entire table with new data. [Cannot be used with `file_format: delta`. Not available on Databricks SQL Endpoints. For atomic replacement of Delta tables, use the `table` materialization.] +- `merge`: Match records based on a `unique_key`; update old records, insert new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) [Requires `file_format: delta`. Available only on Databricks Runtime.] +Examples: + +```sql +{{ config( + materialized='incremental', + incremental_strategy='append' +) }} + + +-- All rows returned by this query will be appended to the existing table + +select * from {{ ref('events') }} +{% if is_incremental() %} + where event_ts > (select max(event_ts) from {{ this }}) +{% endif %} ``` + +```sql {{ config( materialized='incremental', + incremental_strategy='merge', partition_by=['date_day'], file_format='parquet' ) }} -/* - Every partition returned by this query will be overwritten - when this model runs -*/ +-- Every partition returned by this query will overwrite existing partitions select date_day, count(*) as users from {{ ref('events') }} -where date_day::date >= '2019-01-01' +{% if is_incremental() %} + where date_day > (select max(date_day) from {{ this }}) +{% endif %} group by 1 ``` -The `merge` strategy is only supported when using file_format `delta` (supported in Databricks). If a `unique key` is specified, the statement will match existing records and overwrite them with new values. If a `unique key` config is not specified, dbt will simply -append new data to the model, without overwriting any existing data. (For atomic replacement of an entire Delta table, use the `'table'` materialization instead.) - -``` +```sql {{ config( materialized='incremental', incremental_strategy='merge', - partition_by=['date_day'], + unique_key='event_id', file_format='delta' ) }} -select * -from {{ ref('events') }} +-- Existing events, matched on `event_id`, will be updated +-- New events will be appended + +select * from {{ ref('events') }} {% if is_incremental() %} where date_day > (select max(date_day) from {{ this }}) {% endif %} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql index d912a45f..bb6d5dc0 100644 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental.sql @@ -8,6 +8,17 @@ {% endmacro %} + +{% macro get_insert_into_sql(source_relation, target_relation) %} + + {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} + {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} + insert into table {{ target_relation }} + select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} + +{% endmacro %} + + {% macro dbt_spark_validate_get_file_format() %} {#-- Find and validate the file format #} {%- set file_format = config.get("file_format", default="parquet") -%} @@ -24,13 +35,14 @@ {% do return(file_format) %} {% endmacro %} + {% macro dbt_spark_validate_get_incremental_strategy(file_format) %} {#-- Find and validate the incremental strategy #} - {%- set strategy = config.get("incremental_strategy", default="insert_overwrite") -%} + {%- set strategy = config.get("incremental_strategy", default="append") -%} {% set invalid_strategy_msg -%} Invalid incremental strategy provided: {{ strategy }} - Expected one of: 'merge', 'insert_overwrite' + Expected one of: 'append', 'merge', 'insert_overwrite' {%- endset %} {% set invalid_merge_msg -%} @@ -41,16 +53,16 @@ {% set invalid_insert_overwrite_delta_msg -%} Invalid incremental strategy provided: {{ strategy }} You cannot use this strategy when file_format is set to 'delta' - Use the `merge` strategy instead + Use the 'append' or 'merge' strategy instead {%- endset %} {% set invalid_insert_overwrite_endpoint_msg -%} Invalid incremental strategy provided: {{ strategy }} You cannot use this strategy when connecting via endpoint - Use `incremental_strategy: merge` with `file_format: delta` instead + Use the 'append' or 'merge' strategy instead {%- endset %} - {% if strategy not in ['merge', 'insert_overwrite'] %} + {% if strategy not in ['append', 'merge', 'insert_overwrite'] %} {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} {%-else %} {% if strategy == 'merge' and file_format != 'delta' %} @@ -88,11 +100,14 @@ {% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} - {%- if strategy == 'insert_overwrite' -%} + {%- if strategy == 'append' -%} + {#-- insert new records into existing table, without updating or overwriting #} + {{ get_insert_into_sql(source, target) }} + {%- elif strategy == 'insert_overwrite' -%} {#-- insert statements don't like CTEs, so support them via a temp view #} {{ get_insert_overwrite_sql(source, target) }} - {%- else -%} - {#-- merge all columns with databricks delta - schema changes are handled for us #} + {%- elif strategy == 'merge' -%} + {#-- merge all columns with databricks delta - schema changes are handled for us #} {{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }} {%- endif -%} From bc18f8022e23c2fcdba84253d39be8f071c526e2 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 14 Jan 2021 13:26:47 +0100 Subject: [PATCH 192/603] PR feedback. Reorganize macros --- CHANGELOG.md | 5 +- README.md | 4 +- .../macros/materializations/incremental.sql | 154 ------------------ .../incremental/incremental.sql | 45 +++++ .../incremental/strategies.sql | 58 +++++++ .../materializations/incremental/validate.sql | 59 +++++++ 6 files changed, 165 insertions(+), 160 deletions(-) delete mode 100644 dbt/include/spark/macros/materializations/incremental.sql create mode 100644 dbt/include/spark/macros/materializations/incremental/incremental.sql create mode 100644 dbt/include/spark/macros/materializations/incremental/strategies.sql create mode 100644 dbt/include/spark/macros/materializations/incremental/validate.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 31499aa9..485b88e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,7 @@ ### Breaking changes - Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126)) -- Incremental models have `incremental_strategy: append` by default. This strategy adds new records -without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending -on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error -if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) +- Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) ### Under the hood - Enable `CREATE OR REPLACE` support when using Delta. Instead of dropping and recreating the table, it will keep the existing table, and add a new version as supported by Delta. This will ensure that the table stays available when running the pipeline, and you can track the history. diff --git a/README.md b/README.md index 38a1e8ec..09e716b5 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ The following configurations can be supplied to models run with the dbt-spark pl dbt has a number of ways to build models incrementally, called "incremental strategies." Some strategies depend on certain file formats, connection types, and other model configurations: - `append` (default): Insert new records without updating or overwriting any existing data. -- `insert_overwrite`: If `partition_by` is specified, overwrite partitions in the table with new data. (Be sure to re-select _all_ of the relevant data for a partition.) If no `partition_by` is specified, overwrite the entire table with new data. [Cannot be used with `file_format: delta`. Not available on Databricks SQL Endpoints. For atomic replacement of Delta tables, use the `table` materialization.] +- `insert_overwrite`: If `partition_by` is specified, overwrite partitions in the table with new data. (Be sure to re-select _all_ of the relevant data for a partition.) If no `partition_by` is specified, overwrite the entire table with new data. [Cannot be used with `file_format: delta` or when connectinng via Databricks SQL Endpoints. For atomic replacement of Delta tables, use the `table` materialization instead.] - `merge`: Match records based on a `unique_key`; update old records, insert new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) [Requires `file_format: delta`. Available only on Databricks Runtime.] Examples: @@ -177,7 +177,7 @@ Examples: ```sql {{ config( materialized='incremental', - incremental_strategy='append' + incremental_strategy='append', ) }} diff --git a/dbt/include/spark/macros/materializations/incremental.sql b/dbt/include/spark/macros/materializations/incremental.sql deleted file mode 100644 index bb6d5dc0..00000000 --- a/dbt/include/spark/macros/materializations/incremental.sql +++ /dev/null @@ -1,154 +0,0 @@ -{% macro get_insert_overwrite_sql(source_relation, target_relation) %} - - {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} - {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - insert overwrite table {{ target_relation }} - {{ partition_cols(label="partition") }} - select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} - -{% endmacro %} - - -{% macro get_insert_into_sql(source_relation, target_relation) %} - - {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} - {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - insert into table {{ target_relation }} - select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} - -{% endmacro %} - - -{% macro dbt_spark_validate_get_file_format() %} - {#-- Find and validate the file format #} - {%- set file_format = config.get("file_format", default="parquet") -%} - - {% set invalid_file_format_msg -%} - Invalid file format provided: {{ file_format }} - Expected one of: 'text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm' - {%- endset %} - - {% if file_format not in ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm'] %} - {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} - {% endif %} - - {% do return(file_format) %} -{% endmacro %} - - -{% macro dbt_spark_validate_get_incremental_strategy(file_format) %} - {#-- Find and validate the incremental strategy #} - {%- set strategy = config.get("incremental_strategy", default="append") -%} - - {% set invalid_strategy_msg -%} - Invalid incremental strategy provided: {{ strategy }} - Expected one of: 'append', 'merge', 'insert_overwrite' - {%- endset %} - - {% set invalid_merge_msg -%} - Invalid incremental strategy provided: {{ strategy }} - You can only choose this strategy when file_format is set to 'delta' - {%- endset %} - - {% set invalid_insert_overwrite_delta_msg -%} - Invalid incremental strategy provided: {{ strategy }} - You cannot use this strategy when file_format is set to 'delta' - Use the 'append' or 'merge' strategy instead - {%- endset %} - - {% set invalid_insert_overwrite_endpoint_msg -%} - Invalid incremental strategy provided: {{ strategy }} - You cannot use this strategy when connecting via endpoint - Use the 'append' or 'merge' strategy instead - {%- endset %} - - {% if strategy not in ['append', 'merge', 'insert_overwrite'] %} - {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} - {%-else %} - {% if strategy == 'merge' and file_format != 'delta' %} - {% do exceptions.raise_compiler_error(invalid_merge_msg) %} - {% endif %} - {% if strategy == 'insert_overwrite' and file_format == 'delta' %} - {% do exceptions.raise_compiler_error(invalid_insert_overwrite_delta_msg) %} - {% endif %} - {% if strategy == 'insert_overwrite' and target.endpoint %} - {% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %} - {% endif %} - {% endif %} - - {% do return(strategy) %} -{% endmacro %} - - -{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} - {# ignore dest_columns - we will just use `*` #} - - {% set merge_condition %} - {% if unique_key %} - on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} - {% else %} - on false - {% endif %} - {% endset %} - - merge into {{ target }} as DBT_INTERNAL_DEST - using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE - {{ merge_condition }} - when matched then update set * - when not matched then insert * -{% endmacro %} - - -{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} - {%- if strategy == 'append' -%} - {#-- insert new records into existing table, without updating or overwriting #} - {{ get_insert_into_sql(source, target) }} - {%- elif strategy == 'insert_overwrite' -%} - {#-- insert statements don't like CTEs, so support them via a temp view #} - {{ get_insert_overwrite_sql(source, target) }} - {%- elif strategy == 'merge' -%} - {#-- merge all columns with databricks delta - schema changes are handled for us #} - {{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }} - {%- endif -%} - -{% endmacro %} - - -{% materialization incremental, adapter='spark' -%} - {#-- Validate early so we don't run SQL if the file_format is invalid --#} - {%- set file_format = dbt_spark_validate_get_file_format() -%} - {#-- Validate early so we don't run SQL if the strategy is invalid --#} - {%- set strategy = dbt_spark_validate_get_incremental_strategy(file_format) -%} - {%- set unique_key = config.get('unique_key', none) -%} - - {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} - - {% set target_relation = this %} - {% set existing_relation = load_relation(this) %} - {% set tmp_relation = make_temp_relation(this) %} - - {% if strategy == 'insert_overwrite' and config.get('partition_by') %} - set spark.sql.sources.partitionOverwriteMode = DYNAMIC - {% endif %} - - {{ run_hooks(pre_hooks) }} - - {% if existing_relation is none %} - {% set build_sql = create_table_as(False, target_relation, sql) %} - {% elif existing_relation.is_view or full_refresh_mode %} - {% do adapter.drop_relation(existing_relation) %} - {% set build_sql = create_table_as(False, target_relation, sql) %} - {% else %} - {% do run_query(create_table_as(True, tmp_relation, sql)) %} - {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} - {% endif %} - - {%- call statement('main') -%} - {{ build_sql }} - {%- endcall -%} - - {{ run_hooks(post_hooks) }} - - {{ return({'relations': [target_relation]}) }} - -{%- endmaterialization %} diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql new file mode 100644 index 00000000..b11990b3 --- /dev/null +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -0,0 +1,45 @@ +{% materialization incremental, adapter='spark' -%} + + {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#} + {%- set raw_file_format = config.get('file_format', default='parquet') -%} + {%- set raw_strategy = config.get('incremental_strategy', default='append') -%} + + {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%} + {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%} + + {%- set unique_key = config.get('unique_key', none) -%} + {%- set partition_by = config.get('partition_by', none) -%} + + {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + + {% set target_relation = this %} + {% set existing_relation = load_relation(this) %} + {% set tmp_relation = make_temp_relation(this) %} + + {% if strategy == 'insert_overwrite' and partition_by %} + {% call statement() %} + set spark.sql.sources.partitionOverwriteMode = DYNAMIC + {% endcall %} + {% endif %} + + {{ run_hooks(pre_hooks) }} + + {% if existing_relation is none %} + {% set build_sql = create_table_as(False, target_relation, sql) %} + {% elif existing_relation.is_view or full_refresh_mode %} + {% do adapter.drop_relation(existing_relation) %} + {% set build_sql = create_table_as(False, target_relation, sql) %} + {% else %} + {% do run_query(create_table_as(True, tmp_relation, sql)) %} + {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} + {% endif %} + + {%- call statement('main') -%} + {{ build_sql }} + {%- endcall -%} + + {{ run_hooks(post_hooks) }} + + {{ return({'relations': [target_relation]}) }} + +{%- endmaterialization %} diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql new file mode 100644 index 00000000..d3ffafc1 --- /dev/null +++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql @@ -0,0 +1,58 @@ +{% macro get_insert_overwrite_sql(source_relation, target_relation) %} + + {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} + {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} + insert overwrite table {{ target_relation }} + {{ partition_cols(label="partition") }} + select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} + +{% endmacro %} + + +{% macro get_insert_into_sql(source_relation, target_relation) %} + + {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} + {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} + insert into table {{ target_relation }} + select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} + +{% endmacro %} + + +{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} + {# ignore dest_columns - we will just use `*` #} + + {% set merge_condition %} + {% if unique_key %} + on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} + {% else %} + on false + {% endif %} + {% endset %} + + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE + {{ merge_condition }} + when matched then update set * + when not matched then insert * +{% endmacro %} + + +{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} + {%- if strategy == 'append' -%} + {#-- insert new records into existing table, without updating or overwriting #} + {{ get_insert_into_sql(source, target) }} + {%- elif strategy == 'insert_overwrite' -%} + {#-- insert statements don't like CTEs, so support them via a temp view #} + {{ get_insert_overwrite_sql(source, target) }} + {%- elif strategy == 'merge' -%} + {#-- merge all columns with databricks delta - schema changes are handled for us #} + {{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }} + {%- else -%} + {% set no_sql_for_strategy_msg -%} + No known SQL for the incremental strategy provided: {{ strategy }} + {%- endset %} + {%- do exceptions.raise_compiler_error(no_sql_for_strategy_msg) -%} + {%- endif -%} + +{% endmacro %} diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql new file mode 100644 index 00000000..400a2eee --- /dev/null +++ b/dbt/include/spark/macros/materializations/incremental/validate.sql @@ -0,0 +1,59 @@ +{% macro dbt_spark_validate_get_file_format(raw_file_format) %} + {#-- Validate the file format #} + + {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm'] %} + + {% set invalid_file_format_msg -%} + Invalid file format provided: {{ raw_file_format }} + Expected one of: {{ accepted_formats | join(', ') }} + {%- endset %} + + {% if raw_file_format not in accepted_formats %} + {% do exceptions.raise_compiler_error(invalid_file_format_msg) %} + {% endif %} + + {% do return(raw_file_format) %} +{% endmacro %} + + +{% macro dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) %} + {#-- Validate the incremental strategy #} + + {% set invalid_strategy_msg -%} + Invalid incremental strategy provided: {{ raw_strategy }} + Expected one of: 'append', 'merge', 'insert_overwrite' + {%- endset %} + + {% set invalid_merge_msg -%} + Invalid incremental strategy provided: {{ raw_strategy }} + You can only choose this strategy when file_format is set to 'delta' + {%- endset %} + + {% set invalid_insert_overwrite_delta_msg -%} + Invalid incremental strategy provided: {{ raw_strategy }} + You cannot use this strategy when file_format is set to 'delta' + Use the 'append' or 'merge' strategy instead + {%- endset %} + + {% set invalid_insert_overwrite_endpoint_msg -%} + Invalid incremental strategy provided: {{ raw_strategy }} + You cannot use this strategy when connecting via endpoint + Use the 'append' or 'merge' strategy instead + {%- endset %} + + {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %} + {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} + {%-else %} + {% if raw_strategy == 'merge' and file_format != 'delta' %} + {% do exceptions.raise_compiler_error(invalid_merge_msg) %} + {% endif %} + {% if raw_strategy == 'insert_overwrite' and file_format == 'delta' %} + {% do exceptions.raise_compiler_error(invalid_insert_overwrite_delta_msg) %} + {% endif %} + {% if raw_strategy == 'insert_overwrite' and target.endpoint %} + {% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %} + {% endif %} + {% endif %} + + {% do return(raw_strategy) %} +{% endmacro %} From cccdd4455b30336d4909b652e4cb4ad1d9c3d9e0 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 15 Jan 2021 17:04:40 +0100 Subject: [PATCH 193/603] Capture hard deleted records --- dbt/include/spark/macros/materializations/snapshot.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 04ee1f48..6dad51a0 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -19,7 +19,7 @@ on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id when matched and DBT_INTERNAL_DEST.dbt_valid_to is null - and DBT_INTERNAL_SOURCE.dbt_change_type = 'update' + and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete') then update set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to From 90492319adce4345199a59527fc9787228bb25dc Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 28 Jan 2021 17:35:41 -0500 Subject: [PATCH 194/603] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7d309ece..59bdc106 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ A dbt profile for Spark connections support the following configurations: | organization | Azure Databricks workspace ID (see note) | ❔ | ❌ | ❔ | `1234567891234567` | | cluster | The name of the cluster to connect to | ✅ (unless `endpoint`) | ❌ | ✅ | `01234-23423-coffeetime` | | endpoint | The ID of the SQL endpoint to connect to | ✅ (unless `cluster`) | ❌ | ❌ | `1234567891234a` | -| driver | Path of ODBC driver installed or name of ODBC DSN configured | ✅ | ❌ | ❌ | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | +| driver | Path of ODBC driver installed or name of the ODBC driver configured | ✅ | ❌ | ❌ | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | | user | The username to use to connect to the cluster | ❔ | ❔ | ❔ | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (`10`) | ❔ (`10`) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (`0`) | ❔ (`0`) | `5` | From af77fd83babacd3b29400783e97c5fbfc13fcc86 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 3 Feb 2021 14:31:41 +0100 Subject: [PATCH 195/603] Try adding custom integration tests --- dev_requirements.txt | 2 +- test/custom/__init__.py | 0 test/custom/base.py | 67 ++++++++++++++ .../data/expected_append.csv | 5 ++ .../data/expected_overwrite.csv | 3 + .../data/expected_upsert.csv | 4 + .../models/default_append.sql | 17 ++++ .../models_bad/bad_file_format.sql | 18 ++++ .../models_bad/bad_insert_overwrite_delta.sql | 19 ++++ .../models_bad/bad_merge_not_delta.sql | 18 ++++ .../models_bad/bad_strategy.sql | 18 ++++ .../models_delta/append_delta.sql | 19 ++++ .../models_delta/merge_no_key.sql | 19 ++++ .../models_delta/merge_unique_key.sql | 20 +++++ .../insert_overwrite_no_partitions.sql | 18 ++++ .../insert_overwrite_partitions.sql | 20 +++++ .../test_incremental_strategies.py | 87 +++++++++++++++++++ 17 files changed, 353 insertions(+), 1 deletion(-) create mode 100644 test/custom/__init__.py create mode 100644 test/custom/base.py create mode 100644 test/custom/incremental_strategies/data/expected_append.csv create mode 100644 test/custom/incremental_strategies/data/expected_overwrite.csv create mode 100644 test/custom/incremental_strategies/data/expected_upsert.csv create mode 100644 test/custom/incremental_strategies/models/default_append.sql create mode 100644 test/custom/incremental_strategies/models_bad/bad_file_format.sql create mode 100644 test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql create mode 100644 test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql create mode 100644 test/custom/incremental_strategies/models_bad/bad_strategy.sql create mode 100644 test/custom/incremental_strategies/models_delta/append_delta.sql create mode 100644 test/custom/incremental_strategies/models_delta/merge_no_key.sql create mode 100644 test/custom/incremental_strategies/models_delta/merge_unique_key.sql create mode 100644 test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql create mode 100644 test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql create mode 100644 test/custom/incremental_strategies/test_incremental_strategies.py diff --git a/dev_requirements.txt b/dev_requirements.txt index d662a12b..6180d33c 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,6 @@ pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 # Test requirements -pytest-dbt-adapter==0.4.0 +git+https://github.com/fishtown-analytics/dbt-adapter-tests.git@feature/add-integration-test-tools sasl==0.2.1 thrift_sasl==0.4.1 diff --git a/test/custom/__init__.py b/test/custom/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/custom/base.py b/test/custom/base.py new file mode 100644 index 00000000..ed34e878 --- /dev/null +++ b/test/custom/base.py @@ -0,0 +1,67 @@ +from dbt_adapter_tests import DBTIntegrationTestBase, use_profile + +class DBTSparkIntegrationTest(DBTIntegrationTestBase): + + def apache_spark_profile(self): + return { + 'config': { + 'send_anonymous_usage_stats': False + }, + 'test': { + 'outputs': { + 'default2': { + 'type': 'spark', + 'host': 'localhost', + 'user': 'dbt', + 'method': 'thrift', + 'port': '10000', + 'connect_retries': '5', + 'connect_timeout': '60', + 'schema': self.unique_schema() + }, + 'target': 'default2' + } + } + } + + def databricks_cluster_profile(self): + return { + 'config': { + 'send_anonymous_usage_stats': False + }, + 'test': { + 'outputs': { + 'odbc': { + 'type': 'spark', + 'method': 'odbc', + 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), + 'cluster': os.getenv('DBT_DATABRICKS_CLUSTER_NAME'), + 'token': os.getenv('DBT_DATABRICKS_TOKEN'), + 'port': 443, + 'schema': self.unique_schema() + }, + 'target': 'odbc' + } + } + } + + def databricks_sql_endpoint_profile(self): + return { + 'config': { + 'send_anonymous_usage_stats': False + }, + 'test': { + 'outputs': { + 'default2': { + 'type': 'spark', + 'method': 'odbc', + 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), + 'endpoint': os.getenv('DBT_DATABRICKS_ENDPOINT'), + 'token': os.getenv('DBT_DATABRICKS_TOKEN'), + 'port': 443, + 'schema': self.unique_schema() + }, + 'target': 'default2' + } + } + } diff --git a/test/custom/incremental_strategies/data/expected_append.csv b/test/custom/incremental_strategies/data/expected_append.csv new file mode 100644 index 00000000..c96e569b --- /dev/null +++ b/test/custom/incremental_strategies/data/expected_append.csv @@ -0,0 +1,5 @@ +id,msg +1,hello +2,goodbye +2,yo +3,anyway \ No newline at end of file diff --git a/test/custom/incremental_strategies/data/expected_overwrite.csv b/test/custom/incremental_strategies/data/expected_overwrite.csv new file mode 100644 index 00000000..46d8f605 --- /dev/null +++ b/test/custom/incremental_strategies/data/expected_overwrite.csv @@ -0,0 +1,3 @@ +id,msg +2,yo +3,anyway \ No newline at end of file diff --git a/test/custom/incremental_strategies/data/expected_upsert.csv b/test/custom/incremental_strategies/data/expected_upsert.csv new file mode 100644 index 00000000..71805dfc --- /dev/null +++ b/test/custom/incremental_strategies/data/expected_upsert.csv @@ -0,0 +1,4 @@ +id,msg +1,hello +2,yo +3,anyway \ No newline at end of file diff --git a/test/custom/incremental_strategies/models/default_append.sql b/test/custom/incremental_strategies/models/default_append.sql new file mode 100644 index 00000000..cef0d5ac --- /dev/null +++ b/test/custom/incremental_strategies/models/default_append.sql @@ -0,0 +1,17 @@ +{{ config( + materialized = 'incremental', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_file_format.sql b/test/custom/incremental_strategies/models_bad/bad_file_format.sql new file mode 100644 index 00000000..b0b6abd4 --- /dev/null +++ b/test/custom/incremental_strategies/models_bad/bad_file_format.sql @@ -0,0 +1,18 @@ +{{ config( + materialized = 'incremental', + file_format = 'something_else', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql b/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql new file mode 100644 index 00000000..f0d87b37 --- /dev/null +++ b/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'insert_overwrite', + file_format = 'delta', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql b/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql new file mode 100644 index 00000000..e07674c3 --- /dev/null +++ b/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql @@ -0,0 +1,18 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'merge', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_strategy.sql b/test/custom/incremental_strategies/models_bad/bad_strategy.sql new file mode 100644 index 00000000..256f3484 --- /dev/null +++ b/test/custom/incremental_strategies/models_bad/bad_strategy.sql @@ -0,0 +1,18 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'something_else', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_delta/append_delta.sql b/test/custom/incremental_strategies/models_delta/append_delta.sql new file mode 100644 index 00000000..517c2418 --- /dev/null +++ b/test/custom/incremental_strategies/models_delta/append_delta.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'append', + file_format = 'delta', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_delta/merge_no_key.sql b/test/custom/incremental_strategies/models_delta/merge_no_key.sql new file mode 100644 index 00000000..3039f74e --- /dev/null +++ b/test/custom/incremental_strategies/models_delta/merge_no_key.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'merge', + file_format = 'delta', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_delta/merge_unique_key.sql b/test/custom/incremental_strategies/models_delta/merge_unique_key.sql new file mode 100644 index 00000000..30f5bc06 --- /dev/null +++ b/test/custom/incremental_strategies/models_delta/merge_unique_key.sql @@ -0,0 +1,20 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'merge', + file_format = 'delta', + unique_key = 'id', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql new file mode 100644 index 00000000..ebe6f2f0 --- /dev/null +++ b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql @@ -0,0 +1,18 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'insert_overwrite', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql new file mode 100644 index 00000000..c0663d97 --- /dev/null +++ b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql @@ -0,0 +1,20 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'insert_overwrite', + partition_by = 'id', + file_format = 'parquet', +) }} + +{% if not is_incremental() %} + +select 1 as id, 'hello' as msg +union all +select 2 as id, 'goodbye' as msg + +{% else %} + +select 2 as id, 'yo' as msg +union all +select 3 as id, 'anyway' as msg + +{% endif %} diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py new file mode 100644 index 00000000..5880d2fc --- /dev/null +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -0,0 +1,87 @@ +from test.custom.base import DBTSparkIntegrationTest + +class TestIncrementalStrategies(DBTSparkIntegrationTest): + @property + def schema(self): + return "incremental_strategies" + + @property + def models(self): + return "models" + + def run_and_test(self): + self.run_dbt(["seed"]) + self.run_dbt(["run"]) + self.assertTablesEqual("default_append", "expected_append") + +class TestDefaultAppend(TestIncrementalStrategies): + @use_profile("apache_spark") + def test_default_append_apache_spark(self): + self.run_and_test() + + @use_profile("databricks_cluster") + def test_default_append_databricks(self): + self.run_and_test() + +class TestInsertOverwrite(TestIncrementalStrategies): + @property + def models(self): + return "models_insert_overwrite" + + def run_and_test(self): + self.run_dbt(["seed"]) + self.run_dbt(["run"]) + self.assertTablesEqual("insert_overwrite_no_partitions", "expected_overwrite") + self.assertTablesEqual("insert_overwrite_partitions", "expected_upsert") + + @use_profile("apache_spark") + def test_insert_overwrite_apache_spark(self): + self.run_and_test() + + @use_profile("databricks_cluster") + def test_insert_overwrite_databricks(self): + self.run_and_test() + +class TestDeltaStrategies(TestIncrementalStrategies): + @property + def models(self): + return "models_delta" + + def run_and_test(self): + self.run_dbt(["seed"]) + self.run_dbt(["run"]) + self.assertTablesEqual("append_delta", "expected_append") + self.assertTablesEqual("merge_no_key", "expected_append") + self.assertTablesEqual("merge_unique_key", "expected_upsert") + + @use_profile("databricks_cluster") + def test_delta_strategies_databricks(self): + self.run_and_test() + +class TestBadStrategies(TestIncrementalStrategies): + @property + def models(self): + return "models_insert_overwrite" + + def run_and_test(self): + with self.assertRaises(dbt.exceptions.Exception) as exc: + self.run_dbt(["compile"]) + message = str(exc.exception) + self.assertIn("Invalid file format provided", message) + self.assertIn("Invalid incremental strategy provided", message) + + @use_profile("apache_spark") + def test_bad_strategies_apache_spark(self): + self.run_and_test() + + @use_profile("databricks_cluster") + def test_bad_strategies_databricks(self): + self.run_and_test() + +class TestBadStrategyWithEndpoint(TestInsertOverwrite): + @use_profile("databricks_sql_endpoint") + def run_and_test(self): + with self.assertRaises(dbt.exceptions.Exception) as exc: + self.run_dbt(["compile"], "--target", "odbc-sql-endpoint") + message = str(exc.exception) + self.assertIn("Invalid incremental strategy provided", message) From e08a23e4773f850c2a5847f088fd904882161679 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 3 Feb 2021 14:32:07 +0100 Subject: [PATCH 196/603] Try updating tox, circle (WIP) --- .circleci/config.yml | 20 +++++++++++++++++--- tox.ini | 3 +++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a0d9ac49..a44a14ab 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -60,7 +60,7 @@ jobs: - store_artifacts: path: ./logs - integration-spark-databricks-odbc: + integration-spark-databricks-odbc-cluster: &databricks-odbc environment: DBT_INVOCATION_ENV: circle ODBC_DRIVER: Simba # TODO: move env var to Docker image @@ -74,7 +74,18 @@ jobs: - checkout - run: name: Run integration tests - command: tox -e integration-spark-databricks-odbc-cluster,integration-spark-databricks-odbc-sql-endpoint + command: tox -e integration-spark-databricks-odbc-cluster + no_output_timeout: 1h + - store_artifacts: + path: ./logs + + integration-spark-databricks-odbc-endpoint: + <<: *databricks-odbc + steps: + - checkout + - run: + name: Run integration tests + command: tox -e integration-spark-databricks-odbc-sql-endpoint no_output_timeout: 1h - store_artifacts: path: ./logs @@ -90,6 +101,9 @@ workflows: - integration-spark-databricks-http: requires: - unit - - integration-spark-databricks-odbc: + - integration-spark-databricks-odbc-cluster: + requires: + - unit + - integration-spark-databricks-odbc-endpoint: requires: - unit diff --git a/tox.ini b/tox.ini index f865309f..76b34f6d 100644 --- a/tox.ini +++ b/tox.ini @@ -30,6 +30,7 @@ deps = [testenv:integration-spark-databricks-odbc-cluster] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-cluster.dbtspec' + /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 test/custom/*' passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER deps = -r{toxinidir}/requirements.txt @@ -39,6 +40,7 @@ deps = [testenv:integration-spark-databricks-odbc-sql-endpoint] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-sql-endpoint.dbtspec' + /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 test/custom/*' passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_ENDPOINT DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER deps = -r{toxinidir}/requirements.txt @@ -49,6 +51,7 @@ deps = [testenv:integration-spark-thrift] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-thrift.dbtspec' + /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 test/custom/*' passenv = DBT_INVOCATION_ENV deps = -r{toxinidir}/requirements.txt From f63d13ec709a745063f3e0d7d76f08beabe82dd4 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 4 Feb 2021 11:22:37 +0100 Subject: [PATCH 197/603] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6e1560c..625ac44d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## dbt-spark 0.19.0 (Release TBD) +### Fixes +- Capture hard-deleted records in snapshot merge, when `invalidate_hard_deletes` config is set ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/143), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/144)) + +## dbt-spark 0.19.0rc1 (January 8, 2021) + ### Breaking changes - Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126)) From 9bbc61b1f2fb8ec7b7da0a17768c17c5d389b790 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Wed, 17 Feb 2021 09:38:51 -0500 Subject: [PATCH 198/603] get tests working --- dev_requirements.txt | 2 +- test/custom/base.py | 107 ++++++++++++++++-- test/custom/conftest.py | 10 ++ .../test_incremental_strategies.py | 45 +++++--- 4 files changed, 136 insertions(+), 28 deletions(-) create mode 100644 test/custom/conftest.py diff --git a/dev_requirements.txt b/dev_requirements.txt index 6180d33c..bbcdc9d6 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,6 @@ pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 # Test requirements -git+https://github.com/fishtown-analytics/dbt-adapter-tests.git@feature/add-integration-test-tools +git+https://github.com/fishtown-analytics/dbt-adapter-tests.git@33872d1cc0f936677dae091c3e0b49771c280514 sasl==0.2.1 thrift_sasl==0.4.1 diff --git a/test/custom/base.py b/test/custom/base.py index ed34e878..d0426aed 100644 --- a/test/custom/base.py +++ b/test/custom/base.py @@ -1,7 +1,69 @@ -from dbt_adapter_tests import DBTIntegrationTestBase, use_profile +import pytest +from functools import wraps +import os +from dbt_adapter_tests import DBTIntegrationTestBase + class DBTSparkIntegrationTest(DBTIntegrationTestBase): - + + def get_profile(self, adapter_type): + if adapter_type == 'apache_spark': + return self.apache_spark_profile() + elif adapter_type == 'databricks_cluster': + return self.databricks_cluster_profile() + elif adapter_type == 'databricks_sql_endpoint': + return self.databricks_sql_endpoint_profile() + else: + raise ValueError('invalid adapter type {}'.format(adapter_type)) + + @staticmethod + def _profile_from_test_name(test_name): + adapter_names = ('apache_spark', 'databricks_cluster', + 'databricks_sql_endpoint') + adapters_in_name = sum(x in test_name for x in adapter_names) + if adapters_in_name != 1: + raise ValueError( + 'test names must have exactly 1 profile choice embedded, {} has {}' + .format(test_name, adapters_in_name) + ) + + for adapter_name in adapter_names: + if adapter_name in test_name: + return adapter_name + + raise ValueError( + 'could not find adapter name in test name {}'.format(test_name) + ) + + def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): + if connection_name is None: + connection_name = '__test' + + if query.strip() == "": + return + + sql = self.transform_sql(query, kwargs=kwargs) + + with self.get_connection(connection_name) as conn: + cursor = conn.handle.cursor() + try: + cursor.execute(sql) + if fetch == 'one': + return cursor.fetchall()[0] + elif fetch == 'all': + return cursor.fetchall() + else: + # we have to fetch. + cursor.fetchall() + except Exception as e: + conn.handle.rollback() + conn.transaction_open = False + print(sql) + print(e) + raise + else: + conn.transaction_open = False + def apache_spark_profile(self): return { 'config': { @@ -14,13 +76,13 @@ def apache_spark_profile(self): 'host': 'localhost', 'user': 'dbt', 'method': 'thrift', - 'port': '10000', - 'connect_retries': '5', - 'connect_timeout': '60', + 'port': 10000, + 'connect_retries': 5, + 'connect_timeout': 60, 'schema': self.unique_schema() }, + }, 'target': 'default2' - } } } @@ -40,11 +102,11 @@ def databricks_cluster_profile(self): 'port': 443, 'schema': self.unique_schema() }, + }, 'target': 'odbc' - } } } - + def databricks_sql_endpoint_profile(self): return { 'config': { @@ -61,7 +123,34 @@ def databricks_sql_endpoint_profile(self): 'port': 443, 'schema': self.unique_schema() }, + }, 'target': 'default2' - } } } + + +def use_profile(profile_name): + """A decorator to declare a test method as using a particular profile. + Handles both setting the nose attr and calling self.use_profile. + + Use like this: + + class TestSomething(DBIntegrationTest): + @use_profile('postgres') + def test_postgres_thing(self): + self.assertEqual(self.adapter_type, 'postgres') + + @use_profile('snowflake') + def test_snowflake_thing(self): + self.assertEqual(self.adapter_type, 'snowflake') + """ + def outer(wrapped): + @getattr(pytest.mark, 'profile_'+profile_name) + @wraps(wrapped) + def func(self, *args, **kwargs): + return wrapped(self, *args, **kwargs) + # sanity check at import time + assert DBTSparkIntegrationTest._profile_from_test_name( + wrapped.__name__) == profile_name + return func + return outer diff --git a/test/custom/conftest.py b/test/custom/conftest.py new file mode 100644 index 00000000..02248bae --- /dev/null +++ b/test/custom/conftest.py @@ -0,0 +1,10 @@ +def pytest_configure(config): + config.addinivalue_line( + "markers", "profile_databricks_cluster" + ) + config.addinivalue_line( + "markers", "profile_databricks_sql_endpoint" + ) + config.addinivalue_line( + "markers", "profile_apache_spark" + ) diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py index 5880d2fc..5ad7a3f7 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -1,4 +1,6 @@ -from test.custom.base import DBTSparkIntegrationTest +from test.custom.base import DBTSparkIntegrationTest, use_profile +import dbt.exceptions + class TestIncrementalStrategies(DBTSparkIntegrationTest): @property @@ -14,73 +16,80 @@ def run_and_test(self): self.run_dbt(["run"]) self.assertTablesEqual("default_append", "expected_append") + class TestDefaultAppend(TestIncrementalStrategies): @use_profile("apache_spark") def test_default_append_apache_spark(self): self.run_and_test() - + @use_profile("databricks_cluster") - def test_default_append_databricks(self): + def test_default_append_databricks_cluster(self): self.run_and_test() + class TestInsertOverwrite(TestIncrementalStrategies): @property def models(self): return "models_insert_overwrite" - + def run_and_test(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) - self.assertTablesEqual("insert_overwrite_no_partitions", "expected_overwrite") - self.assertTablesEqual("insert_overwrite_partitions", "expected_upsert") - + self.assertTablesEqual( + "insert_overwrite_no_partitions", "expected_overwrite") + self.assertTablesEqual( + "insert_overwrite_partitions", "expected_upsert") + @use_profile("apache_spark") def test_insert_overwrite_apache_spark(self): self.run_and_test() - + @use_profile("databricks_cluster") - def test_insert_overwrite_databricks(self): + def test_insert_overwrite_databricks_cluster(self): self.run_and_test() + class TestDeltaStrategies(TestIncrementalStrategies): @property def models(self): return "models_delta" - + def run_and_test(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) self.assertTablesEqual("append_delta", "expected_append") self.assertTablesEqual("merge_no_key", "expected_append") self.assertTablesEqual("merge_unique_key", "expected_upsert") - + @use_profile("databricks_cluster") - def test_delta_strategies_databricks(self): + def test_delta_strategies_databricks_cluster(self): self.run_and_test() + class TestBadStrategies(TestIncrementalStrategies): @property def models(self): return "models_insert_overwrite" - + def run_and_test(self): with self.assertRaises(dbt.exceptions.Exception) as exc: self.run_dbt(["compile"]) message = str(exc.exception) self.assertIn("Invalid file format provided", message) self.assertIn("Invalid incremental strategy provided", message) - + @use_profile("apache_spark") def test_bad_strategies_apache_spark(self): self.run_and_test() - + @use_profile("databricks_cluster") - def test_bad_strategies_databricks(self): + def test_bad_strategies_databricks_cluster(self): self.run_and_test() - + + class TestBadStrategyWithEndpoint(TestInsertOverwrite): @use_profile("databricks_sql_endpoint") - def run_and_test(self): + def test_bad_strategies_databricks_sql_endpoint(self): with self.assertRaises(dbt.exceptions.Exception) as exc: self.run_dbt(["compile"], "--target", "odbc-sql-endpoint") message = str(exc.exception) From ab57d625063a7004b10f26695cf56f5337ed63db Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 18 Feb 2021 15:29:59 -0500 Subject: [PATCH 199/603] update integration tests --- .gitignore | 1 + test/custom/base.py | 7 ++ .../models/default_append.sql | 8 +- .../models_bad/bad_file_format.sql | 8 +- .../models_bad/bad_insert_overwrite_delta.sql | 8 +- .../models_bad/bad_merge_not_delta.sql | 8 +- .../models_bad/bad_strategy.sql | 8 +- .../models_delta/append_delta.sql | 8 +- .../models_delta/merge_no_key.sql | 8 +- .../models_delta/merge_unique_key.sql | 8 +- .../insert_overwrite_no_partitions.sql | 8 +- .../insert_overwrite_partitions.sql | 8 +- .../test_incremental_strategies.py | 78 ++++++++++++++----- 13 files changed, 108 insertions(+), 58 deletions(-) diff --git a/.gitignore b/.gitignore index d6f5c9d0..9caf202a 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ dbt-integration-tests test/integration/.user.yml .DS_Store .vscode +*.log diff --git a/test/custom/base.py b/test/custom/base.py index d0426aed..d2dc6dd7 100644 --- a/test/custom/base.py +++ b/test/custom/base.py @@ -2,6 +2,7 @@ from functools import wraps import os from dbt_adapter_tests import DBTIntegrationTestBase +import pyodbc class DBTSparkIntegrationTest(DBTIntegrationTestBase): @@ -55,6 +56,10 @@ def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): else: # we have to fetch. cursor.fetchall() + except pyodbc.ProgrammingError as e: + # hacks for dropping schema + if "No results. Previous SQL was not a query." not in str(e): + raise e except Exception as e: conn.handle.rollback() conn.transaction_open = False @@ -99,6 +104,7 @@ def databricks_cluster_profile(self): 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), 'cluster': os.getenv('DBT_DATABRICKS_CLUSTER_NAME'), 'token': os.getenv('DBT_DATABRICKS_TOKEN'), + 'driver': os.getenv('ODBC_DRIVER'), 'port': 443, 'schema': self.unique_schema() }, @@ -120,6 +126,7 @@ def databricks_sql_endpoint_profile(self): 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), 'endpoint': os.getenv('DBT_DATABRICKS_ENDPOINT'), 'token': os.getenv('DBT_DATABRICKS_TOKEN'), + 'driver': os.getenv('ODBC_DRIVER'), 'port': 443, 'schema': self.unique_schema() }, diff --git a/test/custom/incremental_strategies/models/default_append.sql b/test/custom/incremental_strategies/models/default_append.sql index cef0d5ac..e2a10393 100644 --- a/test/custom/incremental_strategies/models/default_append.sql +++ b/test/custom/incremental_strategies/models/default_append.sql @@ -4,14 +4,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_file_format.sql b/test/custom/incremental_strategies/models_bad/bad_file_format.sql index b0b6abd4..911ccbb8 100644 --- a/test/custom/incremental_strategies/models_bad/bad_file_format.sql +++ b/test/custom/incremental_strategies/models_bad/bad_file_format.sql @@ -5,14 +5,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql b/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql index f0d87b37..b7186e1b 100644 --- a/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql +++ b/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql @@ -6,14 +6,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql b/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql index e07674c3..79a95111 100644 --- a/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql +++ b/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql @@ -5,14 +5,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_bad/bad_strategy.sql b/test/custom/incremental_strategies/models_bad/bad_strategy.sql index 256f3484..72912f50 100644 --- a/test/custom/incremental_strategies/models_bad/bad_strategy.sql +++ b/test/custom/incremental_strategies/models_bad/bad_strategy.sql @@ -5,14 +5,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_delta/append_delta.sql b/test/custom/incremental_strategies/models_delta/append_delta.sql index 517c2418..bfbd2512 100644 --- a/test/custom/incremental_strategies/models_delta/append_delta.sql +++ b/test/custom/incremental_strategies/models_delta/append_delta.sql @@ -6,14 +6,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_delta/merge_no_key.sql b/test/custom/incremental_strategies/models_delta/merge_no_key.sql index 3039f74e..35a71b1a 100644 --- a/test/custom/incremental_strategies/models_delta/merge_no_key.sql +++ b/test/custom/incremental_strategies/models_delta/merge_no_key.sql @@ -6,14 +6,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_delta/merge_unique_key.sql b/test/custom/incremental_strategies/models_delta/merge_unique_key.sql index 30f5bc06..e8dfd37b 100644 --- a/test/custom/incremental_strategies/models_delta/merge_unique_key.sql +++ b/test/custom/incremental_strategies/models_delta/merge_unique_key.sql @@ -7,14 +7,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql index ebe6f2f0..fcc142bd 100644 --- a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql +++ b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql @@ -5,14 +5,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql index c0663d97..cfe235ad 100644 --- a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql +++ b/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql @@ -7,14 +7,14 @@ {% if not is_incremental() %} -select 1 as id, 'hello' as msg +select cast(1 as bigint) as id, 'hello' as msg union all -select 2 as id, 'goodbye' as msg +select cast(2 as bigint) as id, 'goodbye' as msg {% else %} -select 2 as id, 'yo' as msg +select cast(2 as bigint) as id, 'yo' as msg union all -select 3 as id, 'anyway' as msg +select cast(3 as bigint) as id, 'anyway' as msg {% endif %} diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py index 5ad7a3f7..8be088fb 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -1,3 +1,4 @@ +from cProfile import run from test.custom.base import DBTSparkIntegrationTest, use_profile import dbt.exceptions @@ -11,13 +12,20 @@ def schema(self): def models(self): return "models" + @property + def project_config(self): + return { + 'seeds': { + 'quote_columns': False, + }, + } + def run_and_test(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) + self.run_dbt(["run"]) self.assertTablesEqual("default_append", "expected_append") - -class TestDefaultAppend(TestIncrementalStrategies): @use_profile("apache_spark") def test_default_append_apache_spark(self): self.run_and_test() @@ -27,18 +35,31 @@ def test_default_append_databricks_cluster(self): self.run_and_test() -class TestInsertOverwrite(TestIncrementalStrategies): +class TestInsertOverwrite(DBTSparkIntegrationTest): + @property + def schema(self): + return "incremental_strategies" + @property def models(self): return "models_insert_overwrite" + @property + def project_config(self): + return { + 'seeds': { + 'quote_columns': False, + }, + } + def run_and_test(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) + self.run_dbt(["run"]) self.assertTablesEqual( "insert_overwrite_no_partitions", "expected_overwrite") self.assertTablesEqual( - "insert_overwrite_partitions", "expected_upsert") + "insert_overwrite_partitions", "expected_overwrite") @use_profile("apache_spark") def test_insert_overwrite_apache_spark(self): @@ -49,14 +70,27 @@ def test_insert_overwrite_databricks_cluster(self): self.run_and_test() -class TestDeltaStrategies(TestIncrementalStrategies): +class TestDeltaStrategies(DBTSparkIntegrationTest): + @property + def schema(self): + return "incremental_strategies" + @property def models(self): return "models_delta" + @property + def project_config(self): + return { + 'seeds': { + 'quote_columns': False, + }, + } + def run_and_test(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) + self.run_dbt(["run"]) self.assertTablesEqual("append_delta", "expected_append") self.assertTablesEqual("merge_no_key", "expected_append") self.assertTablesEqual("merge_unique_key", "expected_upsert") @@ -66,17 +100,30 @@ def test_delta_strategies_databricks_cluster(self): self.run_and_test() -class TestBadStrategies(TestIncrementalStrategies): +class TestBadStrategies(DBTSparkIntegrationTest): + @property + def schema(self): + return "incremental_strategies" + + @property + def project_config(self): + return { + 'seeds': { + 'quote_columns': False, + }, + } + @property def models(self): - return "models_insert_overwrite" + return "models_bad" def run_and_test(self): - with self.assertRaises(dbt.exceptions.Exception) as exc: - self.run_dbt(["compile"]) - message = str(exc.exception) - self.assertIn("Invalid file format provided", message) - self.assertIn("Invalid incremental strategy provided", message) + self.run_dbt(["seed"]) + results = self.run_dbt(["run"], expect_pass=False) + # assert all models fail with co + for result in results: + self.assertEqual("error", result.status) + self.assertIn("Compilation Error in model", result.message) @use_profile("apache_spark") def test_bad_strategies_apache_spark(self): @@ -86,11 +133,6 @@ def test_bad_strategies_apache_spark(self): def test_bad_strategies_databricks_cluster(self): self.run_and_test() - -class TestBadStrategyWithEndpoint(TestInsertOverwrite): @use_profile("databricks_sql_endpoint") def test_bad_strategies_databricks_sql_endpoint(self): - with self.assertRaises(dbt.exceptions.Exception) as exc: - self.run_dbt(["compile"], "--target", "odbc-sql-endpoint") - message = str(exc.exception) - self.assertIn("Invalid incremental strategy provided", message) + self.run_and_test() From a12f74d47ae0937d14873ff45d1bed29a9c03e29 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 19 Feb 2021 12:06:53 +0100 Subject: [PATCH 200/603] Fix assertTablesEqual mismatch --- .../incremental_strategies/test_incremental_strategies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py index 8be088fb..730320cd 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -59,7 +59,7 @@ def run_and_test(self): self.assertTablesEqual( "insert_overwrite_no_partitions", "expected_overwrite") self.assertTablesEqual( - "insert_overwrite_partitions", "expected_overwrite") + "insert_overwrite_partitions", "expected_upsert") @use_profile("apache_spark") def test_insert_overwrite_apache_spark(self): @@ -120,7 +120,7 @@ def models(self): def run_and_test(self): self.run_dbt(["seed"]) results = self.run_dbt(["run"], expect_pass=False) - # assert all models fail with co + # assert all models fail with compilation errors for result in results: self.assertEqual("error", result.status) self.assertIn("Compilation Error in model", result.message) From 43ab587ba08e8f288c86196a88efd08bc454a5e2 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 19 Feb 2021 12:19:58 +0100 Subject: [PATCH 201/603] Concise test defs. Local tests first --- .circleci/config.yml | 3 + .../test_incremental_strategies.py | 65 ++++--------------- 2 files changed, 17 insertions(+), 51 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a44a14ab..f00d3d7d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -101,9 +101,12 @@ workflows: - integration-spark-databricks-http: requires: - unit + - integration-spark-thrift - integration-spark-databricks-odbc-cluster: requires: - unit + - integration-spark-thrift - integration-spark-databricks-odbc-endpoint: requires: - unit + - integration-spark-thrift diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py index 730320cd..0de1bd05 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -8,10 +8,6 @@ class TestIncrementalStrategies(DBTSparkIntegrationTest): def schema(self): return "incremental_strategies" - @property - def models(self): - return "models" - @property def project_config(self): return { @@ -20,10 +16,18 @@ def project_config(self): }, } - def run_and_test(self): + def seed_and_run_twice(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) self.run_dbt(["run"]) + +class TestDefaultAppend(TestIncrementalStrategies): + @property + def models(self): + return "models" + + def run_and_test(self): + self.seed_and_run_twice() self.assertTablesEqual("default_append", "expected_append") @use_profile("apache_spark") @@ -35,27 +39,13 @@ def test_default_append_databricks_cluster(self): self.run_and_test() -class TestInsertOverwrite(DBTSparkIntegrationTest): - @property - def schema(self): - return "incremental_strategies" - +class TestInsertOverwrite(TestIncrementalStrategies): @property def models(self): return "models_insert_overwrite" - @property - def project_config(self): - return { - 'seeds': { - 'quote_columns': False, - }, - } - def run_and_test(self): - self.run_dbt(["seed"]) - self.run_dbt(["run"]) - self.run_dbt(["run"]) + self.seed_and_run_twice() self.assertTablesEqual( "insert_overwrite_no_partitions", "expected_overwrite") self.assertTablesEqual( @@ -70,27 +60,13 @@ def test_insert_overwrite_databricks_cluster(self): self.run_and_test() -class TestDeltaStrategies(DBTSparkIntegrationTest): - @property - def schema(self): - return "incremental_strategies" - +class TestDeltaStrategies(TestIncrementalStrategies): @property def models(self): return "models_delta" - @property - def project_config(self): - return { - 'seeds': { - 'quote_columns': False, - }, - } - def run_and_test(self): - self.run_dbt(["seed"]) - self.run_dbt(["run"]) - self.run_dbt(["run"]) + self.seed_and_run_twice() self.assertTablesEqual("append_delta", "expected_append") self.assertTablesEqual("merge_no_key", "expected_append") self.assertTablesEqual("merge_unique_key", "expected_upsert") @@ -100,25 +76,12 @@ def test_delta_strategies_databricks_cluster(self): self.run_and_test() -class TestBadStrategies(DBTSparkIntegrationTest): - @property - def schema(self): - return "incremental_strategies" - - @property - def project_config(self): - return { - 'seeds': { - 'quote_columns': False, - }, - } - +class TestBadStrategies(TestIncrementalStrategies): @property def models(self): return "models_bad" def run_and_test(self): - self.run_dbt(["seed"]) results = self.run_dbt(["run"], expect_pass=False) # assert all models fail with compilation errors for result in results: From ac35028a7a1668a958fca701ffd20bd9db24e022 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 19 Feb 2021 14:26:05 +0100 Subject: [PATCH 202/603] Fixups, document spark config setting --- .circleci/config.yml | 3 --- README.md | 2 +- .../incremental_strategies/test_incremental_strategies.py | 3 ++- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f00d3d7d..99154fb6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -100,13 +100,10 @@ workflows: - unit - integration-spark-databricks-http: requires: - - unit - integration-spark-thrift - integration-spark-databricks-odbc-cluster: requires: - - unit - integration-spark-thrift - integration-spark-databricks-odbc-endpoint: requires: - - unit - integration-spark-thrift diff --git a/README.md b/README.md index 09e716b5..8841100f 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ The following configurations can be supplied to models run with the dbt-spark pl dbt has a number of ways to build models incrementally, called "incremental strategies." Some strategies depend on certain file formats, connection types, and other model configurations: - `append` (default): Insert new records without updating or overwriting any existing data. -- `insert_overwrite`: If `partition_by` is specified, overwrite partitions in the table with new data. (Be sure to re-select _all_ of the relevant data for a partition.) If no `partition_by` is specified, overwrite the entire table with new data. [Cannot be used with `file_format: delta` or when connectinng via Databricks SQL Endpoints. For atomic replacement of Delta tables, use the `table` materialization instead.] +- `insert_overwrite`: If `partition_by` is specified, overwrite partitions in the table with new data. (Be sure to re-select _all_ of the relevant data for a partition.) If no `partition_by` is specified, overwrite the entire table with new data. [Cannot be used with `file_format: delta` or when connectinng via Databricks SQL Endpoints. For dynamic partition replacement with `method: odbc` + Databricks `cluster`, you must you **must** include `set spark.sql.sources.partitionOverwriteMode DYNAMIC` in the [cluster SparkConfig](https://docs.databricks.com/clusters/configure.html#spark-config). For atomic replacement of Delta tables, use the `table` materialization instead.] - `merge`: Match records based on a `unique_key`; update old records, insert new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) [Requires `file_format: delta`. Available only on Databricks Runtime.] Examples: diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py index 0de1bd05..4d13a770 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -20,7 +20,8 @@ def seed_and_run_twice(self): self.run_dbt(["seed"]) self.run_dbt(["run"]) self.run_dbt(["run"]) - + + class TestDefaultAppend(TestIncrementalStrategies): @property def models(self): From b8c7d77ab8d8f758d97d33d3c344f8fb5afb95c3 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 19 Feb 2021 14:54:09 +0100 Subject: [PATCH 203/603] Update README, Changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b7e79c9..500dd4f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## dbt-spark 0.19.0 (Release TBD) +### Breaking changes +- Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) + ### Fixes - Capture hard-deleted records in snapshot merge, when `invalidate_hard_deletes` config is set ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/143), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/144)) @@ -7,7 +10,6 @@ ### Breaking changes - Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126)) -- Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) ### Under the hood - Enable `CREATE OR REPLACE` support when using Delta. Instead of dropping and recreating the table, it will keep the existing table, and add a new version as supported by Delta. This will ensure that the table stays available when running the pipeline, and you can track the history. From e2aa2c9ae90f12a082e0b76f18ab279b2e54c046 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Feb 2021 08:27:23 -0500 Subject: [PATCH 204/603] =?UTF-8?q?Bump=20version:=200.19.0rc1=20=E2=86=92?= =?UTF-8?q?=200.19.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index be9371e6..34ea6a48 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.0rc1 +current_version = 0.19.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/requirements.txt b/requirements.txt index b323e209..7878d77e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.19.0rc1 +dbt-core==0.19.0 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index af036146..07abbc37 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.19.0rc1' +dbt_version = '0.19.0' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.19.0rc1 vs 0.19.0rc1a1, 0.19.0rc1.1, ...) +# ends of it. (0.19.0 vs 0.19.0a1, 0.19.0.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From fd527cea2b1938158f02d271df636b7284098f65 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Feb 2021 08:36:19 -0500 Subject: [PATCH 205/603] bump version to 0.19.0 --- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index edc25af5..7ea683ef 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.18.1.1 +current_version = 0.19.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 1b44f861..307a47ae 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.19.0rc1" +version = "0.19.0" From afcde1e7f7dab2c986898709f26d848e7e0bc04e Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Feb 2021 09:08:44 -0500 Subject: [PATCH 206/603] =?UTF-8?q?Bump=20version:=200.19.0=20=E2=86=92=20?= =?UTF-8?q?0.19.1b2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 34ea6a48..6b9679f1 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.0 +current_version = 0.19.1b2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/requirements.txt b/requirements.txt index 7878d77e..3566b2f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.19.0 +dbt-core==0.19.1b2 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 07abbc37..c04c975d 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.19.0' +dbt_version = '0.19.1b2' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.19.0 vs 0.19.0a1, 0.19.0.1, ...) +# ends of it. (0.19.1b2 vs 0.19.1b2a1, 0.19.1b2.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From e3d5b26edbbe37728c9e3e4c5f4e83c203fe924d Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Feb 2021 09:09:35 -0500 Subject: [PATCH 207/603] bump version to 0.19.1b2 --- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 7ea683ef..ade1090b 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.0 +current_version = 0.19.1b2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 307a47ae..9f26800b 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.19.0" +version = "0.19.1b2" From 0f8fbabe612d4aa5413475819a8d53598fdeab74 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Feb 2021 10:43:48 -0500 Subject: [PATCH 208/603] fix unit tests related to new serialization api --- dbt/adapters/spark/column.py | 11 ++++++----- dbt/adapters/spark/connections.py | 8 +++++++- dbt/adapters/spark/impl.py | 2 +- test/unit/test_adapter.py | 8 ++++---- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index d8292f6e..722054f8 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -2,13 +2,14 @@ from typing import TypeVar, Optional, Dict, Any from dbt.adapters.base.column import Column +from dbt.dataclass_schema import dbtClassMixin from hologram import JsonDict Self = TypeVar('Self', bound='SparkColumn') @dataclass -class SparkColumn(Column): +class SparkColumn(dbtClassMixin, Column): table_database: Optional[str] = None table_schema: Optional[str] = None table_name: Optional[str] = None @@ -55,12 +56,12 @@ def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]: table_stats[f'stats:{key}:include'] = True return table_stats - def to_dict( - self, omit_none: bool = True, validate: bool = False + def to_column_dict( + self, keep_none: bool = False, validate: bool = False ) -> JsonDict: - original_dict = super().to_dict(omit_none=omit_none) + original_dict = self.to_dict(options={'keep_none': keep_none}) # If there are stats, merge them into the root of the dict - original_stats = original_dict.pop('table_stats') + original_stats = original_dict.pop('table_stats', None) if original_stats: original_dict.update(original_stats) return original_dict diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 76e574df..7a8bda7d 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -47,7 +47,6 @@ class SparkConnectionMethod(StrEnum): class SparkCredentials(Credentials): host: str method: SparkConnectionMethod - schema: str database: Optional[str] driver: Optional[str] = None cluster: Optional[str] = None @@ -61,6 +60,13 @@ class SparkCredentials(Credentials): connect_retries: int = 0 connect_timeout: int = 10 + @classmethod + def __pre_deserialize__(cls, data, options=None): + data = super().__pre_deserialize__(data, options=options) + if 'database' not in data: + data['database'] = None + return data + def __post_init__(self): # spark classifies database and schema as the same thing if ( diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 0e8ec7b7..d28ad71a 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -211,7 +211,7 @@ def _get_columns_for_catalog( if owner: column.table_owner = owner # convert SparkColumns into catalog dicts - as_dict = column.to_dict() + as_dict = column.to_column_dict() as_dict['column_name'] = as_dict.pop('column', None) as_dict['column_type'] = as_dict.pop('dtype') as_dict['table_database'] = None diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 30f00821..b97c2018 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -265,7 +265,7 @@ def test_parse_relation(self): rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) self.assertEqual(len(rows), 3) - self.assertEqual(rows[0].to_dict(omit_none=False), { + self.assertEqual(rows[0].to_column_dict(keep_none=True), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, @@ -279,7 +279,7 @@ def test_parse_relation(self): 'char_size': None }) - self.assertEqual(rows[1].to_dict(omit_none=False), { + self.assertEqual(rows[1].to_column_dict(keep_none=True), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, @@ -293,7 +293,7 @@ def test_parse_relation(self): 'char_size': None }) - self.assertEqual(rows[2].to_dict(omit_none=False), { + self.assertEqual(rows[2].to_column_dict(keep_none=True), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, @@ -345,7 +345,7 @@ def test_parse_relation_with_statistics(self): rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].to_dict(omit_none=False), { + self.assertEqual(rows[0].to_column_dict(keep_none=True), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, From fb89367d5c3539ab1633d476d8427a37c1068ecb Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Feb 2021 10:59:45 -0500 Subject: [PATCH 209/603] update changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 500dd4f1..d215142c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,10 @@ -## dbt-spark 0.19.0 (Release TBD) +## dbt-spark 0.19.1b2 (Release TBD) + +### Under the hood +- update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150)) + + +## dbt-spark 0.19.0 (February 22, 2021) ### Breaking changes - Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) From bd92dae8dc8b201316d1378976d294f42c1d38b9 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 25 Feb 2021 22:51:47 -0500 Subject: [PATCH 210/603] update package data path --- setup.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 07abbc37..e5d91465 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,12 @@ def _dbt_spark_version(): f'dbt_version={dbt_version}' ) +odbc_extras = ['pyodbc>=4.0.30'] +pyhive_extras = [ + 'PyHive[hive]>=0.6.0,<0.7.0', + 'thrift>=0.11.0,<0.12.0', +] +all_extras = odbc_extras + pyhive_extras setup( name=package_name, @@ -56,7 +62,8 @@ def _dbt_spark_version(): 'include/spark/dbt_project.yml', 'include/spark/sample_profiles.yml', 'include/spark/macros/*.sql', - 'include/spark/macros/**/*.sql', + 'include/spark/macros/*/*.sql', + 'include/spark/macros/*/*/*.sql', ] }, install_requires=[ @@ -64,10 +71,8 @@ def _dbt_spark_version(): 'sqlparams>=3.0.0', ], extras_require={ - "ODBC": ['pyodbc>=4.0.30'], - "PyHive": [ - 'PyHive[hive]>=0.6.0,<0.7.0', - 'thrift>=0.11.0,<0.12.0', - ], + "ODBC": odbc_extras, + "PyHive": pyhive_extras, + "all": all_extras } ) From 85a061bc867ce8986412f65afcf2a45f41e1d140 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 25 Feb 2021 23:16:08 -0500 Subject: [PATCH 211/603] this is much easier, include everything --- MANIFEST.in | 1 + setup.py | 10 +--------- 2 files changed, 2 insertions(+), 9 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..78412d5b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include dbt/include *.sql *.yml *.md \ No newline at end of file diff --git a/setup.py b/setup.py index e5d91465..091667bb 100644 --- a/setup.py +++ b/setup.py @@ -57,15 +57,7 @@ def _dbt_spark_version(): url='https://github.com/fishtown-analytics/dbt-spark', packages=find_namespace_packages(include=['dbt', 'dbt.*']), - package_data={ - 'dbt': [ - 'include/spark/dbt_project.yml', - 'include/spark/sample_profiles.yml', - 'include/spark/macros/*.sql', - 'include/spark/macros/*/*.sql', - 'include/spark/macros/*/*/*.sql', - ] - }, + include_package_data=True, install_requires=[ f'dbt-core=={dbt_version}', 'sqlparams>=3.0.0', From f7b1f597895c55521e624ade4248dcc1e3491695 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Thu, 25 Feb 2021 23:32:42 -0500 Subject: [PATCH 212/603] update changelog --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 500dd4f1..098ad96e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ -## dbt-spark 0.19.0 (Release TBD) +## dbt-spark 0.19.0.1 (Release TBD) + +### Fixes +- Fix package distribution to include incremental model materializations ([#151](https://github.com/fishtown-analytics/dbt-spark/pull/151)) + +## dbt-spark 0.19.0 (February 21, 2021) ### Breaking changes - Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141)) From bd016f99fd19e4c2252ce14ec6e1fdd8e244dbcd Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 26 Feb 2021 09:09:36 -0500 Subject: [PATCH 213/603] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 098ad96e..0f5a5aa4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ ## dbt-spark 0.19.0.1 (Release TBD) ### Fixes -- Fix package distribution to include incremental model materializations ([#151](https://github.com/fishtown-analytics/dbt-spark/pull/151)) +- Fix package distribution to include incremental model materializations ([#151](https://github.com/fishtown-analytics/dbt-spark/pull/151), [#152](https://github.com/fishtown-analytics/dbt-spark/issues/152)) ## dbt-spark 0.19.0 (February 21, 2021) From 130ef709414b5cced1157fa434b505bf8cd2f376 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 26 Feb 2021 09:09:44 -0500 Subject: [PATCH 214/603] =?UTF-8?q?Bump=20version:=200.19.0=20=E2=86=92=20?= =?UTF-8?q?0.19.0.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 7ea683ef..d6c40a28 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.0 +current_version = 0.19.0.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 307a47ae..7efe7ff1 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.19.0" +version = "0.19.0.1" From 8d8059d5d546c85b4c949081dcff90f7429c6ea8 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 26 Feb 2021 10:44:00 -0500 Subject: [PATCH 215/603] update changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26f301c8..d51b5125 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ -## dbt-spark 0.19.1b2 (Release TBD) +## dbt-spark 0.19.1 (Release TBD) + +## dbt-spark 0.19.1b2 (February 26, 2021) ### Under the hood - update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150)) From 67193f41a757a4582717ffbd36db1990b9da7ccd Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 8 Mar 2021 23:14:58 +0100 Subject: [PATCH 216/603] Update to the latest version The odbc endpoint has been added, but this isn't reflected in the sample --- dbt/include/spark/sample_profiles.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dbt/include/spark/sample_profiles.yml b/dbt/include/spark/sample_profiles.yml index 52d07891..b1cff273 100644 --- a/dbt/include/spark/sample_profiles.yml +++ b/dbt/include/spark/sample_profiles.yml @@ -4,13 +4,20 @@ default: # Use this if connecting to a hosted spark (e.g. Databricks) dev: type: spark - method: http - schema: [dev_schema] - host: [host] - organization: [organization id] # Azure Databricks ONLY - port: [port] - token: [token] + method: odbc + driver: [path/to/driver] + schema: [schema_name] + host: [yourorg.sparkhost.com] + organization: [organization id] # Azure Databricks only + token: [abc123] + + # one of: + endpoint: [endpoint id] cluster: [cluster id] + + # optional + port: [port] # default 443 + user: [user] # Use this if connecting to Dockerized spark prod: From c1813904cb8ca45f5253841f600c0e1b79458712 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Mar 2021 17:53:58 -0400 Subject: [PATCH 217/603] Bump version to 0.19.1rc1 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 6b9679f1..83b10e81 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.1b2 +current_version = 0.19.1rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index ade1090b..e55af6de 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.1b2 +current_version = 0.19.1rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 9f26800b..2e0151b0 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.19.1b2" +version = "0.19.1rc1" diff --git a/requirements.txt b/requirements.txt index 3566b2f6..9600932d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.19.1b2 +dbt-core==0.19.1rc1 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 7a2183ee..7a12c701 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.19.1b2' +dbt_version = '0.19.1rc1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.19.1b2 vs 0.19.1b2a1, 0.19.1b2.1, ...) +# ends of it. (0.19.1rc1 vs 0.19.1rc1a1, 0.19.1rc1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From e07021333af9ed88028de57c1f724a0224bfb799 Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Mon, 22 Mar 2021 18:48:39 -0400 Subject: [PATCH 218/603] revert serialization api changes --- dbt/adapters/spark/column.py | 4 ++-- dbt/adapters/spark/connections.py | 4 ++-- test/unit/test_adapter.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 722054f8..fd377ad1 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -57,9 +57,9 @@ def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]: return table_stats def to_column_dict( - self, keep_none: bool = False, validate: bool = False + self, omit_none: bool = True, validate: bool = False ) -> JsonDict: - original_dict = self.to_dict(options={'keep_none': keep_none}) + original_dict = self.to_dict(omit_none=omit_none) # If there are stats, merge them into the root of the dict original_stats = original_dict.pop('table_stats', None) if original_stats: diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 7a8bda7d..457a0d84 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -61,8 +61,8 @@ class SparkCredentials(Credentials): connect_timeout: int = 10 @classmethod - def __pre_deserialize__(cls, data, options=None): - data = super().__pre_deserialize__(data, options=options) + def __pre_deserialize__(cls, data): + data = super().__pre_deserialize__(data) if 'database' not in data: data['database'] = None return data diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index b97c2018..0092e131 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -265,7 +265,7 @@ def test_parse_relation(self): rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) self.assertEqual(len(rows), 3) - self.assertEqual(rows[0].to_column_dict(keep_none=True), { + self.assertEqual(rows[0].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, @@ -279,7 +279,7 @@ def test_parse_relation(self): 'char_size': None }) - self.assertEqual(rows[1].to_column_dict(keep_none=True), { + self.assertEqual(rows[1].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, @@ -293,7 +293,7 @@ def test_parse_relation(self): 'char_size': None }) - self.assertEqual(rows[2].to_column_dict(keep_none=True), { + self.assertEqual(rows[2].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, @@ -345,7 +345,7 @@ def test_parse_relation_with_statistics(self): rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].to_column_dict(keep_none=True), { + self.assertEqual(rows[0].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, 'table_name': relation.name, From 6ad164b315748fef7c0ae0b87ff6b8292632f35e Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 2 Apr 2021 10:14:30 -0400 Subject: [PATCH 219/603] =?UTF-8?q?Bump=20version:=200.19.1rc1=20=E2=86=92?= =?UTF-8?q?=200.19.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 83b10e81..24e65209 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.1rc1 +current_version = 0.19.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e55af6de..8fbe0a5f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.1rc1 +current_version = 0.19.1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 2e0151b0..3f7d7377 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.19.1rc1" +version = "0.19.1" diff --git a/requirements.txt b/requirements.txt index 9600932d..dab0af33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.19.1rc1 +dbt-core==0.19.1 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 7a12c701..25c7ff3e 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.19.1rc1' +dbt_version = '0.19.1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.19.1rc1 vs 0.19.1rc1a1, 0.19.1rc1.1, ...) +# ends of it. (0.19.1 vs 0.19.1a1, 0.19.1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 572cdfaac5768f2ef58413ad73cc857943122bef Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:02:26 +1000 Subject: [PATCH 220/603] Cast table_owner to str Having an integer table_owner causes problems with dbt docs generate --- dbt/adapters/spark/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index d28ad71a..a12e7a94 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -180,7 +180,7 @@ def parse_describe_extended( table_schema=relation.schema, table_name=relation.name, table_type=relation.type, - table_owner=metadata.get(KEY_TABLE_OWNER), + table_owner=str(metadata.get(KEY_TABLE_OWNER)), table_stats=table_stats, column=column['col_name'], column_index=idx, From bc801b9e0315144a8d7647c090d380cba5d7a016 Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:15:41 +1000 Subject: [PATCH 221/603] Add fix to Changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d51b5125..bae4e0c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## dbt next + +### Fixes + +- Cast `table_owner` to string to avoid errors generating docs ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) + ## dbt-spark 0.19.1 (Release TBD) ## dbt-spark 0.19.1b2 (February 26, 2021) From bcfbcbc7c01d201bb51c71ea0575d1adced4c7b1 Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:42:05 +1000 Subject: [PATCH 222/603] Test conversion to string for owner --- test/unit/test_adapter.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 0092e131..2a720af7 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -307,6 +307,35 @@ def test_parse_relation(self): 'char_size': None }) + def test_parse_relation_with_integer_owner(self): + self.maxDiff = None + rel_type = SparkRelation.get_relation_type.Table + + relation = SparkRelation.create( + schema='default_schema', + identifier='mytable', + type=rel_type + ) + assert relation.database is None + + # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED + plain_rows = [ + ('col1', 'decimal(22,0)'), + ('# Detailed Table Information', None), + ('Owner', 1234), + ] + + input_cols = [Row(keys=['col_name', 'data_type'], values=r) + for r in plain_rows] + + config = self._get_target_http(self.project_cfg) + rows = SparkAdapter(config).parse_describe_extended( + relation, input_cols) + + self.assertEqual(rows[0].to_column_dict()['table_owner'], '1234') + self.assertEqual(rows[1].to_column_dict()['table_owner'], '1234') + self.assertEqual(rows[2].to_column_dict()['table_owner'], '1234') + def test_parse_relation_with_statistics(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.Table From 00874ce4b777dae18b9c4dff3a8d2a9199e9e45c Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:42:38 +1000 Subject: [PATCH 223/603] Remove trailing comma --- test/unit/test_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 2a720af7..54831671 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -322,7 +322,7 @@ def test_parse_relation_with_integer_owner(self): plain_rows = [ ('col1', 'decimal(22,0)'), ('# Detailed Table Information', None), - ('Owner', 1234), + ('Owner', 1234) ] input_cols = [Row(keys=['col_name', 'data_type'], values=r) From 818d43c6d6bdbcbc428dc1bffb9efbaeebda2da0 Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:46:35 +1000 Subject: [PATCH 224/603] Use git and limit rows checked --- test/unit/test_adapter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 54831671..5e50e310 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -332,9 +332,7 @@ def test_parse_relation_with_integer_owner(self): rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) - self.assertEqual(rows[0].to_column_dict()['table_owner'], '1234') - self.assertEqual(rows[1].to_column_dict()['table_owner'], '1234') - self.assertEqual(rows[2].to_column_dict()['table_owner'], '1234') + self.assertEqual(rows[0].to_column_dict().get('table_owner'), '1234') def test_parse_relation_with_statistics(self): self.maxDiff = None From 40db36435f8938b5c45dc0f2277e7fa504ea8b6c Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 12 Apr 2021 09:23:22 -0400 Subject: [PATCH 225/603] Update changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bae4e0c5..135df3fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,10 @@ ### Fixes -- Cast `table_owner` to string to avoid errors generating docs ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) +- Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) + +### Contributors +- [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) ## dbt-spark 0.19.1 (Release TBD) From 07e4c5549896ddb9c3c82b938d29a9d11ff6cda2 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Tue, 13 Apr 2021 23:28:26 +0200 Subject: [PATCH 226/603] Parse information returned by show table extended --- dbt/adapters/spark/impl.py | 44 +++++++++++++++++++++++++++------- dbt/adapters/spark/relation.py | 1 + 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index a12e7a94..76ab27a0 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,3 +1,4 @@ +import re from concurrent.futures import Future from dataclasses import dataclass from typing import Optional, List, Dict, Any, Union, Iterable @@ -60,6 +61,8 @@ class SparkAdapter(SQLAdapter): 'stats:rows:description', 'stats:rows:include', ) + INFORMATION_COLUMNS_REGEX = re.compile(r"\|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) + INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) Relation = SparkRelation Column = SparkColumn @@ -139,7 +142,8 @@ def list_relations_without_caching( schema=_schema, identifier=name, type=rel_type, - is_delta=is_delta + information=information, + is_delta=is_delta, ) relations.append(relation) @@ -197,19 +201,43 @@ def find_table_information_separator(rows: List[dict]) -> int: return pos def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: - rows: List[agate.Row] = super().get_columns_in_relation(relation) - return self.parse_describe_extended(relation, rows) + cached_relations = self.cache.get_relations(relation.database, relation.schema) + cached_relation = next((cached_relation + for cached_relation in cached_relations + if str(cached_relation) == str(relation)), None) + if cached_relations is None: + rows: List[agate.Row] = super().get_columns_in_relation(relation) + columns = self.parse_describe_extended(relation, rows) + else: + columns = self.get_columns_from_information(cached_relation) + return columns + + def get_columns_from_information(self, relation: SparkRelation) -> List[SparkColumn]: + owner_match = re.findall(self.INFORMATION_OWNER_REGEX, relation.information) + owner = owner_match[0] if owner_match else None + matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, relation.information) + columns = [] + for match_num, match in enumerate(matches, start=1): + column_name, column_type, nullable = match.groups() + column = SparkColumn( + table_database=None, + table_schema=relation.schema, + table_name=relation.table, + table_type=relation.type, + column_index=match_num, + table_owner=owner, + column=column_name, + dtype=column_type + ) + columns.append(column) + return columns def _get_columns_for_catalog( self, relation: SparkRelation ) -> Iterable[Dict[str, Any]]: - properties = self.get_properties(relation) - columns = self.get_columns_in_relation(relation) - owner = properties.get(KEY_TABLE_OWNER) + columns = self.get_columns_from_information(relation) for column in columns: - if owner: - column.table_owner = owner # convert SparkColumns into catalog dicts as_dict = column.to_column_dict() as_dict['column_name'] = as_dict.pop('column', None) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 507f51d3..5fc09655 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -26,6 +26,7 @@ class SparkRelation(BaseRelation): include_policy: SparkIncludePolicy = SparkIncludePolicy() quote_character: str = '`' is_delta: Optional[bool] = None + information: str = None def __post_init__(self): if self.database != self.schema and self.database: From d98588d768ca97f9df339ccb48679c19afca2e66 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Tue, 13 Apr 2021 23:42:17 +0200 Subject: [PATCH 227/603] Fix linter errors --- dbt/adapters/spark/impl.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 76ab27a0..433dbbbb 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -61,7 +61,8 @@ class SparkAdapter(SQLAdapter): 'stats:rows:description', 'stats:rows:include', ) - INFORMATION_COLUMNS_REGEX = re.compile(r"\|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) + INFORMATION_COLUMNS_REGEX = re.compile( + r"\|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) Relation = SparkRelation @@ -201,10 +202,12 @@ def find_table_information_separator(rows: List[dict]) -> int: return pos def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: - cached_relations = self.cache.get_relations(relation.database, relation.schema) + cached_relations = self.cache.get_relations( + relation.database, relation.schema) cached_relation = next((cached_relation for cached_relation in cached_relations - if str(cached_relation) == str(relation)), None) + if str(cached_relation) == str(relation)), + None) if cached_relations is None: rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) @@ -212,10 +215,14 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: columns = self.get_columns_from_information(cached_relation) return columns - def get_columns_from_information(self, relation: SparkRelation) -> List[SparkColumn]: - owner_match = re.findall(self.INFORMATION_OWNER_REGEX, relation.information) + def get_columns_from_information( + self, relation: SparkRelation + ) -> List[SparkColumn]: + owner_match = re.findall( + self.INFORMATION_OWNER_REGEX, relation.information) owner = owner_match[0] if owner_match else None - matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, relation.information) + matches = re.finditer( + self.INFORMATION_COLUMNS_REGEX, relation.information) columns = [] for match_num, match in enumerate(matches, start=1): column_name, column_type, nullable = match.groups() From fe50b05a12f5f7da02c4b00343410252786c06e7 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Wed, 14 Apr 2021 00:13:34 +0200 Subject: [PATCH 228/603] Add logic when relation is None --- dbt/adapters/spark/impl.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 433dbbbb..5a386214 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -242,11 +242,19 @@ def get_columns_from_information( def _get_columns_for_catalog( self, relation: SparkRelation ) -> Iterable[Dict[str, Any]]: - columns = self.get_columns_from_information(relation) + if relation and relation.information is not None: + columns = self.get_columns_from_information(relation) + owner = None + else: + properties = self.get_properties(relation) + columns = self.get_columns_in_relation(relation) + owner = properties.get(KEY_TABLE_OWNER) for column in columns: # convert SparkColumns into catalog dicts as_dict = column.to_column_dict() + if owner: + column.table_owner = owner as_dict['column_name'] = as_dict.pop('column', None) as_dict['column_type'] = as_dict.pop('dtype') as_dict['table_database'] = None From 2e307f6dee5568a7f7a5c952e7746d1023b11b10 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Wed, 14 Apr 2021 00:18:31 +0200 Subject: [PATCH 229/603] Revert previous commit and fix bug --- dbt/adapters/spark/impl.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 5a386214..aff6e5f4 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -208,7 +208,7 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: for cached_relation in cached_relations if str(cached_relation) == str(relation)), None) - if cached_relations is None: + if cached_relation is None: rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) else: @@ -242,19 +242,11 @@ def get_columns_from_information( def _get_columns_for_catalog( self, relation: SparkRelation ) -> Iterable[Dict[str, Any]]: - if relation and relation.information is not None: - columns = self.get_columns_from_information(relation) - owner = None - else: - properties = self.get_properties(relation) - columns = self.get_columns_in_relation(relation) - owner = properties.get(KEY_TABLE_OWNER) + columns = self.get_columns_from_information(relation) for column in columns: # convert SparkColumns into catalog dicts as_dict = column.to_column_dict() - if owner: - column.table_owner = owner as_dict['column_name'] = as_dict.pop('column', None) as_dict['column_type'] = as_dict.pop('dtype') as_dict['table_database'] = None From 3b54482267399355e322ebcefe6326418c415920 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Wed, 14 Apr 2021 16:00:16 +0200 Subject: [PATCH 230/603] Rename method and add unit test --- dbt/adapters/spark/impl.py | 6 ++--- test/unit/test_adapter.py | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index aff6e5f4..a0dc624d 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -212,10 +212,10 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) else: - columns = self.get_columns_from_information(cached_relation) + columns = self.parse_columns_from_information(cached_relation) return columns - def get_columns_from_information( + def parse_columns_from_information( self, relation: SparkRelation ) -> List[SparkColumn]: owner_match = re.findall( @@ -242,7 +242,7 @@ def get_columns_from_information( def _get_columns_for_catalog( self, relation: SparkRelation ) -> Iterable[Dict[str, Any]]: - columns = self.get_columns_from_information(relation) + columns = self.parse_columns_from_information(relation) for column in columns: # convert SparkColumns into catalog dicts diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 5e50e310..f0aa06b3 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -446,3 +446,55 @@ def test_profile_with_cluster_and_sql_endpoint(self): } with self.assertRaises(RuntimeException): config_from_parts_or_dicts(self.project_cfg, profile) + + def test_parse_columns_from_information_with_table_type(self): + self.maxDiff = None + rel_type = SparkRelation.get_relation_type.Table + + # Mimics the output of Spark in the information column + information = ( + "Database: default_schema\n" + "Table: mytable\n" + "Owner: root\n" + "Created Time: Wed Feb 04 18:15:00 UTC 1815\n" + "Last Access: Wed May 20 19:25:00 UTC 1925\n" + "Created By: Spark 3.0.1\n" + "Type: MANAGED\n" + "Provider: delta\n" + "Statistics: 123456789 bytes\n" + "Location: /mnt/vo\n" + "Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + "InputFormat: org.apache.hadoop.mapred.SequenceFileInputFormat\n" + "OutputFormat: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat\n" + "Partition Provider: Catalog\n" + "Partition Columns: [`dt`]\n" + "Schema: root\n" + " |-- col1: decimal(22,0) (nullable = true)\n" + " |-- col2: string (nullable = true)\n" + " |-- dt: date (nullable = true)\n" + ) + relation = SparkRelation.create( + schema='default_schema', + identifier='mytable', + type=rel_type, + information=information + ) + + config = self._get_target_http(self.project_cfg) + columns = SparkAdapter(config).parse_columns_from_information( + relation) + self.assertEqual(len(columns), 3) + self.assertEqual(columns[0].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'col1', + 'column_index': 0, + 'dtype': 'decimal(22,0)', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) + From e1bf65437a81d2823dcdae2178c4ec1b30396721 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Wed, 14 Apr 2021 16:04:26 +0200 Subject: [PATCH 231/603] Fix bug in column_index --- dbt/adapters/spark/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index a0dc624d..950fc676 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -224,7 +224,7 @@ def parse_columns_from_information( matches = re.finditer( self.INFORMATION_COLUMNS_REGEX, relation.information) columns = [] - for match_num, match in enumerate(matches, start=1): + for match_num, match in enumerate(matches): column_name, column_type, nullable = match.groups() column = SparkColumn( table_database=None, From 64870e9f35c1de6ade4e612b38609d0855d9359a Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Wed, 14 Apr 2021 16:16:09 +0200 Subject: [PATCH 232/603] Add test with view --- test/unit/test_adapter.py | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index f0aa06b3..523a5447 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -498,3 +498,62 @@ def test_parse_columns_from_information_with_table_type(self): 'char_size': None }) + def test_parse_columns_from_information_with_view_type(self): + self.maxDiff = None + rel_type = SparkRelation.get_relation_type.View + information = ( + "Database: default_schema\n" + "Table: myview\n" + "Owner: root\n" + "Created Time: Wed Feb 04 18:15:00 UTC 1815\n" + "Last Access: UNKNOWN\n" + "Created By: Spark 3.0.1\n" + "Type: VIEW\n" + "View Text: WITH base (\n" + " SELECT * FROM source_table\n" + ")\n" + "SELECT col1, col2, dt FROM base\n" + "View Original Text: WITH base (\n" + " SELECT * FROM source_table\n" + ")\n" + "SELECT col1, col2, dt FROM base\n" + "View Catalog and Namespace: spark_catalog.default\n" + "View Query Output Columns: [col1, col2, dt]\n" + "Table Properties: [view.query.out.col.1=col1, view.query.out.col.2=col2, " + "transient_lastDdlTime=1618324324, view.query.out.col.3=dt, " + "view.catalogAndNamespace.part.0=spark_catalog, " + "view.catalogAndNamespace.part.1=default]\n" + "Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + "InputFormat: org.apache.hadoop.mapred.SequenceFileInputFormat\n" + "OutputFormat: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat\n" + "Storage Properties: [serialization.format=1]\n" + "Schema: root\n" + " |-- col1: decimal(22,0) (nullable = true)\n" + " |-- col2: string (nullable = true)\n" + " |-- dt: date (nullable = true)\n" + ) + relation = SparkRelation.create( + schema='default_schema', + identifier='myview', + type=rel_type, + information=information + ) + + config = self._get_target_http(self.project_cfg) + columns = SparkAdapter(config).parse_columns_from_information( + relation) + self.assertEqual(len(columns), 3) + self.assertEqual(columns[1].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'col2', + 'column_index': 1, + 'dtype': 'string', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) + From 2c6a5d871874616d4ba1badfc12083163b1f060c Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Wed, 14 Apr 2021 16:32:55 +0200 Subject: [PATCH 233/603] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 135df3fb..9075551c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,14 @@ - Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) +### Under the hood + +- Parse information returned by `list_relations_without_caching` macro to speed up catalog generation ([#93](https://github.com/fishtown-analytics/dbt-spark/issues/93), [#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) + ### Contributors - [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) +- [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) + ## dbt-spark 0.19.1 (Release TBD) From 7612fcb89c801732ccd4e958fb7d9fbd2908662a Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Thu, 15 Apr 2021 20:05:42 +0200 Subject: [PATCH 234/603] Parse statistics --- dbt/adapters/spark/impl.py | 8 ++++- test/unit/test_adapter.py | 67 ++++++++++++++++++++++++++++++++++++-- test/unit/test_column.py | 38 +++++++++++++++++++++ 3 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 test/unit/test_column.py diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 950fc676..661fc162 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -64,6 +64,7 @@ class SparkAdapter(SQLAdapter): INFORMATION_COLUMNS_REGEX = re.compile( r"\|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) + INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE) Relation = SparkRelation Column = SparkColumn @@ -224,6 +225,10 @@ def parse_columns_from_information( matches = re.finditer( self.INFORMATION_COLUMNS_REGEX, relation.information) columns = [] + stats_match = re.findall( + self.INFORMATION_STATISTICS_REGEX, relation.information) + raw_table_stats = stats_match[0] if stats_match else None + table_stats = SparkColumn.convert_table_stats(raw_table_stats) for match_num, match in enumerate(matches): column_name, column_type, nullable = match.groups() column = SparkColumn( @@ -234,7 +239,8 @@ def parse_columns_from_information( column_index=match_num, table_owner=owner, column=column_name, - dtype=column_type + dtype=column_type, + table_stats=table_stats ) columns.append(column) return columns diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 523a5447..d886ddee 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -447,7 +447,7 @@ def test_profile_with_cluster_and_sql_endpoint(self): with self.assertRaises(RuntimeException): config_from_parts_or_dicts(self.project_cfg, profile) - def test_parse_columns_from_information_with_table_type(self): + def test_parse_columns_from_information_with_table_type_and_delta_provider(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.Table @@ -495,7 +495,12 @@ def test_parse_columns_from_information_with_table_type(self): 'dtype': 'decimal(22,0)', 'numeric_scale': None, 'numeric_precision': None, - 'char_size': None + 'char_size': None, + + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 123456789, }) def test_parse_columns_from_information_with_view_type(self): @@ -557,3 +562,61 @@ def test_parse_columns_from_information_with_view_type(self): 'char_size': None }) + def test_parse_columns_from_information_with_table_type_and_parquet_provider(self): + self.maxDiff = None + rel_type = SparkRelation.get_relation_type.Table + + information = ( + "Database: default_schema\n" + "Table: mytable\n" + "Owner: root\n" + "Created Time: Wed Feb 04 18:15:00 UTC 1815\n" + "Last Access: Wed May 20 19:25:00 UTC 1925\n" + "Created By: Spark 3.0.1\n" + "Type: MANAGED\n" + "Provider: parquet\n" + "Statistics: 1234567890 bytes, 12345678 rows\n" + "Location: /mnt/vo\n" + "Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe\n" + "InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat\n" + "OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat\n" + "Schema: root\n" + " |-- col1: decimal(22,0) (nullable = true)\n" + " |-- col2: string (nullable = true)\n" + " |-- dt: date (nullable = true)\n" + ) + relation = SparkRelation.create( + schema='default_schema', + identifier='mytable', + type=rel_type, + information=information + ) + + config = self._get_target_http(self.project_cfg) + columns = SparkAdapter(config).parse_columns_from_information( + relation) + self.assertEqual(len(columns), 3) + self.assertEqual(columns[2].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'dt', + 'column_index': 2, + 'dtype': 'date', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None, + + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 1234567890, + + 'stats:rows:description': '', + 'stats:rows:include': True, + 'stats:rows:label': 'rows', + 'stats:rows:value': 12345678 + }) + diff --git a/test/unit/test_column.py b/test/unit/test_column.py new file mode 100644 index 00000000..f7f8d877 --- /dev/null +++ b/test/unit/test_column.py @@ -0,0 +1,38 @@ +import unittest + +from dbt.adapters.spark import SparkColumn + + +class TestSparkColumn(unittest.TestCase): + + def test_convert_table_stats_with_no_statistics(self): + self.assertDictEqual( + SparkColumn.convert_table_stats(None), + {} + ) + + def test_convert_table_stats_with_bytes(self): + self.assertDictEqual( + SparkColumn.convert_table_stats("123456789 bytes"), + { + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 123456789 + } + ) + + def test_convert_table_stats_with_bytes_and_rows(self): + self.assertDictEqual( + SparkColumn.convert_table_stats("1234567890 bytes, 12345678 rows"), + { + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 1234567890, + 'stats:rows:description': '', + 'stats:rows:include': True, + 'stats:rows:label': 'rows', + 'stats:rows:value': 12345678 + } + ) From 36367e6ccef3afcb76056dc02c95b8e248ec6ff9 Mon Sep 17 00:00:00 2001 From: Fran Lozano Date: Thu, 15 Apr 2021 22:38:14 +0200 Subject: [PATCH 235/603] Fix inter errors --- dbt/adapters/spark/impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 661fc162..93cfba68 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -64,7 +64,8 @@ class SparkAdapter(SQLAdapter): INFORMATION_COLUMNS_REGEX = re.compile( r"\|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) - INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE) + INFORMATION_STATISTICS_REGEX = re.compile( + r"^Statistics: (.*)$", re.MULTILINE) Relation = SparkRelation Column = SparkColumn From dff1b613ddf87e4e72e8a47475bcfd1d55796a5c Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 6 May 2021 23:32:07 +0200 Subject: [PATCH 236/603] Make the https:// optional (#165) --- CHANGELOG.md | 3 ++- dbt/adapters/spark/connections.py | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9075551c..002a0a1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,12 @@ ### Under the hood - Parse information returned by `list_relations_without_caching` macro to speed up catalog generation ([#93](https://github.com/fishtown-analytics/dbt-spark/issues/93), [#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) +- More flexible host passing, https:// can be omitted ([#153](https://github.com/fishtown-analytics/dbt-spark/issues/153)) ### Contributors - [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) - [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) - +- [@Fokko](https://github.com/Fokko) ([#165](https://github.com/fishtown-analytics/dbt-spark/pull/165)) ## dbt-spark 0.19.1 (Release TBD) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 457a0d84..bd26f6ef 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -254,7 +254,7 @@ class SparkConnectionManager(SQLConnectionManager): SPARK_CLUSTER_HTTP_PATH = "/sql/protocolv1/o/{organization}/{cluster}" SPARK_SQL_ENDPOINT_HTTP_PATH = "/sql/1.0/endpoints/{endpoint}" SPARK_CONNECTION_URL = ( - "https://{host}:{port}" + SPARK_CLUSTER_HTTP_PATH + "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH ) @contextmanager @@ -320,8 +320,13 @@ def open(cls, connection): cls.validate_creds(creds, ['token', 'host', 'port', 'cluster', 'organization']) + # Prepend https:// if it is missing + host = creds.host + if not host.startswith('https://'): + host = 'https://' + creds.host + conn_url = cls.SPARK_CONNECTION_URL.format( - host=creds.host, + host=host, port=creds.port, organization=creds.organization, cluster=creds.cluster @@ -350,7 +355,6 @@ def open(cls, connection): kerberos_service_name=creds.kerberos_service_name) # noqa handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.ODBC: - http_path = None if creds.cluster is not None: required_fields = ['driver', 'host', 'port', 'token', 'organization', 'cluster'] From c13f1dda0a6c04f8c0884962c27682af536ec562 Mon Sep 17 00:00:00 2001 From: Cor Date: Wed, 2 Jun 2021 16:50:42 +0000 Subject: [PATCH 237/603] Add options clause to create table macro (#171) * Add option clause macro * Add option clause to create table macro * Add test for options clause * Add change log entry * Add file format delta to test * Change order of table expression * Make options lower case * Change order of table definitions * Add options to spark config --- CHANGELOG.md | 4 ++++ dbt/adapters/spark/impl.py | 1 + dbt/include/spark/macros/adapters.sql | 11 +++++++++++ test/unit/test_macros.py | 8 ++++++++ 4 files changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 002a0a1e..5a2a966b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## dbt next +### Features +- Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) + ### Fixes - Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) @@ -13,6 +16,7 @@ - [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) - [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) - [@Fokko](https://github.com/Fokko) ([#165](https://github.com/fishtown-analytics/dbt-spark/pull/165)) +- [@JCZuurmond](https://github.com/JCZuurmond) ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) ## dbt-spark 0.19.1 (Release TBD) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 93cfba68..b2060f8c 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -36,6 +36,7 @@ class SparkConfig(AdapterConfig): partition_by: Optional[Union[List[str], str]] = None clustered_by: Optional[Union[List[str], str]] = None buckets: Optional[int] = None + options: Optional[Dict[str, str]] = None class SparkAdapter(SQLAdapter): diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index a45b0d1a..8d095e9c 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -13,6 +13,16 @@ {%- endif %} {%- endmacro -%} +{% macro options_clause() -%} + {%- set options = config.get('options') -%} + {%- if options is not none %} + options ( + {%- for option in options -%} + {{ option }} "{{ options[option] }}" {% if not loop.last %}, {% endif %} + {%- endfor %} + ) + {%- endif %} +{%- endmacro -%} {% macro comment_clause() %} {%- set raw_persist_docs = config.get('persist_docs', {}) -%} @@ -83,6 +93,7 @@ create table {{ relation }} {% endif %} {{ file_format_clause() }} + {{ options_clause() }} {{ partition_cols(label="partitioned by") }} {{ clustered_cols(label="clustered by") }} {{ location_clause() }} diff --git a/test/unit/test_macros.py b/test/unit/test_macros.py index 5c5e3f8c..151631e0 100644 --- a/test/unit/test_macros.py +++ b/test/unit/test_macros.py @@ -43,6 +43,14 @@ def test_macros_create_table_as_file_format(self): sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() self.assertEqual(sql, "create or replace table my_table using delta as select 1") + def test_macros_create_table_as_options(self): + template = self.__get_template('adapters.sql') + + self.config['file_format'] = 'delta' + self.config['options'] = {"compression": "gzip"} + sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip() + self.assertEqual(sql, 'create or replace table my_table using delta options (compression "gzip" ) as select 1') + def test_macros_create_table_as_partition(self): template = self.__get_template('adapters.sql') From 458cdcc64a1394b80219944a8940c5698679c339 Mon Sep 17 00:00:00 2001 From: Cristiano Perez Date: Sun, 6 Jun 2021 16:11:20 -0300 Subject: [PATCH 238/603] Support persist_docs for column descriptions (#170) * feat: spark adapter to change column comment * Update readme * feat: convert statement to lowercase * feat: update changelog --- CHANGELOG.md | 4 ++++ README.md | 2 +- dbt/include/spark/macros/adapters.sql | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2a966b..57878f7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Features - Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) +- Add support for column comment ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) + ### Fixes @@ -17,6 +19,8 @@ - [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) - [@Fokko](https://github.com/Fokko) ([#165](https://github.com/fishtown-analytics/dbt-spark/pull/165)) - [@JCZuurmond](https://github.com/JCZuurmond) ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) +- [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) + ## dbt-spark 0.19.1 (Release TBD) diff --git a/README.md b/README.md index d33fe69d..9e57e607 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ The following configurations can be supplied to models run with the dbt-spark pl | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | | incremental_strategy | The strategy to use for incremental models (`append`, `insert_overwrite`, or `merge`). | Optional (default: `append`) | `merge` | -| persist_docs | Whether dbt should include the model description as a table `comment` | Optional | `{'relation': true}` | +| persist_docs | Whether dbt should include the model description as a table or column `comment` | Optional | `{'relation': true, 'columns': true}` | **Incremental Models** diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 8d095e9c..ac1ec92b 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -173,3 +173,16 @@ {% macro spark__generate_database_name(custom_database_name=none, node=none) -%} {% do return(None) %} {%- endmacro %} + +{% macro spark__alter_column_comment(relation, column_dict) %} + {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %} + {% for column_name in column_dict %} + {% set comment = column_dict[column_name]['description'] %} + {% set comment_query %} + alter table {{ relation }} change column {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} comment '{{ comment }}'; + {% endset %} + {% do run_query(comment_query) %} + {% endfor %} + {% endif %} +{% endmacro %} + From b1e5f77ac334968cf3ea30d2435c183e6c20bac0 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Sun, 6 Jun 2021 15:17:25 -0400 Subject: [PATCH 239/603] Explicitly cast column types when inserting into seeds (#166) * Explicitly cast column types when inserting into seeds * Add changelog entry --- CHANGELOG.md | 3 +- .../spark/macros/materializations/seed.sql | 13 ++++--- .../seed_column_types/data/payments.csv | 11 ++++++ .../test_seed_column_types.py | 36 +++++++++++++++++++ 4 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 test/custom/seed_column_types/data/payments.csv create mode 100644 test/custom/seed_column_types/test_seed_column_types.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 57878f7f..fd2cf9b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,10 @@ ### Fixes - - Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) +- Explicitly cast column types when inserting seeds ([#139](https://github.com/fishtown-analytics/dbt-spark/pull/139), [#166](https://github.com/fishtown-analytics/dbt-spark/pull/166)) ### Under the hood - - Parse information returned by `list_relations_without_caching` macro to speed up catalog generation ([#93](https://github.com/fishtown-analytics/dbt-spark/issues/93), [#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) - More flexible host passing, https:// can be omitted ([#153](https://github.com/fishtown-analytics/dbt-spark/issues/153)) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index c857f013..795f4932 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -1,6 +1,7 @@ {% macro spark__load_csv_rows(model, agate_table) %} {% set batch_size = 1000 %} - + {% set column_override = model['config'].get('column_types', {}) %} + {% set statements = [] %} {% for chunk in agate_table.rows | batch(batch_size) %} @@ -13,12 +14,10 @@ {% set sql %} insert into {{ this.render() }} values {% for row in chunk -%} - ({%- for column in agate_table.columns -%} - {%- if 'ISODate' in (column.data_type | string) -%} - cast(%s as timestamp) - {%- else -%} - %s - {%- endif -%} + ({%- for col_name in agate_table.column_names -%} + {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} + {%- set type = column_override.get(col_name, inferred_type) -%} + cast(%s as {{type}}) {%- if not loop.last%},{%- endif %} {%- endfor -%}) {%- if not loop.last%},{%- endif %} diff --git a/test/custom/seed_column_types/data/payments.csv b/test/custom/seed_column_types/data/payments.csv new file mode 100644 index 00000000..3f49d788 --- /dev/null +++ b/test/custom/seed_column_types/data/payments.csv @@ -0,0 +1,11 @@ +ID,ORDERID,PAYMENTMETHOD,STATUS,AMOUNT,AMOUNT_USD,CREATED +1,1,credit_card,success,1000,10.00,2018-01-01 +2,2,credit_card,success,2000,20.00,2018-01-02 +3,3,coupon,success,100,1.00,2018-01-04 +4,4,coupon,success,2500,25.00,2018-01-05 +5,5,bank_transfer,fail,1700,17.00,2018-01-05 +6,5,bank_transfer,success,1700,17.00,2018-01-05 +7,6,credit_card,success,600,6.00,2018-01-07 +8,7,credit_card,success,1600,16.00,2018-01-09 +9,8,credit_card,success,2300,23.00,2018-01-11 +10,9,gift_card,success,2300,23.00,2018-01-12 diff --git a/test/custom/seed_column_types/test_seed_column_types.py b/test/custom/seed_column_types/test_seed_column_types.py new file mode 100644 index 00000000..e1fc3278 --- /dev/null +++ b/test/custom/seed_column_types/test_seed_column_types.py @@ -0,0 +1,36 @@ +from cProfile import run +from test.custom.base import DBTSparkIntegrationTest, use_profile +import dbt.exceptions + + +class TestSeedColumnTypeCast(DBTSparkIntegrationTest): + @property + def schema(self): + return "seed_column_types" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + 'seeds': { + 'quote_columns': False, + }, + } + + # runs on Spark v2.0 + @use_profile("apache_spark") + def test_seed_column_types_apache_spark(self): + self.run_dbt(["seed"]) + + # runs on Spark v3.0 + @use_profile("databricks_cluster") + def test_seed_column_types_databricks_cluster(self): + self.run_dbt(["seed"]) + + # runs on Spark v3.0 + @use_profile("databricks_sql_endpoint") + def test_seed_column_types_databricks_sql_endpoint(self): + self.run_dbt(["seed"]) From 2ab55238451ef2a4c6c4dec226e844d52148e4b2 Mon Sep 17 00:00:00 2001 From: rahulgoyal2987 Date: Mon, 7 Jun 2021 17:58:05 +0530 Subject: [PATCH 240/603] Feature/ssl support dbtspark (#169) * Added support for hive ssl * Added support for hive ssl * Updated code to remove pure-trasport dependency * Fixed issues * Updated test cases * fixed test cases * Fixed flake8 issues * Update README.md * Update README.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update README.md * Update README.md * Update README.md * Added import except case * Update README.md * Fixed minor issue * Fixed minor issue --- CHANGELOG.md | 3 ++ README.md | 1 + dbt/adapters/spark/connections.py | 75 ++++++++++++++++++++++++++++--- test/unit/test_adapter.py | 35 +++++++++++++++ 4 files changed, 109 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd2cf9b1..948a64a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## dbt next ### Features + +- Allow user to specify `use_ssl` ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169)) - Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) - Add support for column comment ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) @@ -17,6 +19,7 @@ - [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) - [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160)) - [@Fokko](https://github.com/Fokko) ([#165](https://github.com/fishtown-analytics/dbt-spark/pull/165)) +- [@rahulgoyal2987](https://github.com/rahulgoyal2987) ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169)) - [@JCZuurmond](https://github.com/JCZuurmond) ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) - [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) diff --git a/README.md b/README.md index 9e57e607..71ec7cdf 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ A dbt profile for Spark connections support the following configurations: | user | The username to use to connect to the cluster | ❔ | ❔ | ❔ | `hadoop` | | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (`10`) | ❔ (`10`) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (`0`) | ❔ (`0`) | `5` | +| use_ssl | The value of `hive.server2.use.SSL` (`True` or `False`). Default ssl store (ssl.get_default_verify_paths()) is the valid location for SSL certificate | ❌ | ❔ (`False`) | ❌ | `True` | **Databricks** connections differ based on the cloud provider: diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index bd26f6ef..1bc8d80b 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -26,6 +26,18 @@ from hologram.helpers import StrEnum from dataclasses import dataclass from typing import Optional +try: + from thrift.transport.TSSLSocket import TSSLSocket + import thrift + import ssl + import sasl + import thrift_sasl +except ImportError: + TSSLSocket = None + thrift = None + ssl = None + sasl = None + thrift_sasl = None import base64 import time @@ -59,6 +71,7 @@ class SparkCredentials(Credentials): organization: str = '0' connect_retries: int = 0 connect_timeout: int = 10 + use_ssl: bool = False @classmethod def __pre_deserialize__(cls, data): @@ -348,11 +361,20 @@ def open(cls, connection): cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) - conn = hive.connect(host=creds.host, - port=creds.port, - username=creds.user, - auth=creds.auth, - kerberos_service_name=creds.kerberos_service_name) # noqa + if creds.use_ssl: + transport = build_ssl_transport( + host=creds.host, + port=creds.port, + username=creds.user, + auth=creds.auth, + kerberos_service_name=creds.kerberos_service_name) + conn = hive.connect(thrift_transport=transport) + else: + conn = hive.connect(host=creds.host, + port=creds.port, + username=creds.user, + auth=creds.auth, + kerberos_service_name=creds.kerberos_service_name) # noqa handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.ODBC: if creds.cluster is not None: @@ -431,6 +453,49 @@ def open(cls, connection): return connection +def build_ssl_transport(host, port, username, auth, + kerberos_service_name, password=None): + transport = None + if port is None: + port = 10000 + if auth is None: + auth = 'NONE' + socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE) + if auth == 'NOSASL': + # NOSASL corresponds to hive.server2.authentication=NOSASL + # in hive-site.xml + transport = thrift.transport.TTransport.TBufferedTransport(socket) + elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'): + # Defer import so package dependency is optional + if auth == 'KERBEROS': + # KERBEROS mode in hive.server2.authentication is GSSAPI + # in sasl library + sasl_auth = 'GSSAPI' + else: + sasl_auth = 'PLAIN' + if password is None: + # Password doesn't matter in NONE mode, just needs + # to be nonempty. + password = 'x' + + def sasl_factory(): + sasl_client = sasl.Client() + sasl_client.setAttr('host', host) + if sasl_auth == 'GSSAPI': + sasl_client.setAttr('service', kerberos_service_name) + elif sasl_auth == 'PLAIN': + sasl_client.setAttr('username', username) + sasl_client.setAttr('password', password) + else: + raise AssertionError + sasl_client.init() + return sasl_client + + transport = thrift_sasl.TSaslClientTransport(sasl_factory, + sasl_auth, socket) + return transport + + def _is_retryable_error(exc: Exception) -> Optional[str]: message = getattr(exc, 'message', None) if message is None: diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index d886ddee..ddfbeddb 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -75,6 +75,22 @@ def _get_target_thrift_kerberos(self, project): 'target': 'test' }) + def _get_target_use_ssl_thrift(self, project): + return config_from_parts_or_dicts(project, { + 'outputs': { + 'test': { + 'type': 'spark', + 'method': 'thrift', + 'use_ssl': True, + 'schema': 'analytics', + 'host': 'myorg.sparkhost.com', + 'port': 10001, + 'user': 'dbt' + } + }, + 'target': 'test' + }) + def _get_target_odbc_cluster(self, project): return config_from_parts_or_dicts(project, { 'outputs': { @@ -154,6 +170,25 @@ def hive_thrift_connect(host, port, username, auth, kerberos_service_name): self.assertEqual(connection.credentials.schema, 'analytics') self.assertIsNone(connection.credentials.database) + def test_thrift_ssl_connection(self): + config = self._get_target_use_ssl_thrift(self.project_cfg) + adapter = SparkAdapter(config) + + def hive_thrift_connect(thrift_transport): + self.assertIsNotNone(thrift_transport) + transport = thrift_transport._trans + self.assertEqual(transport.host, 'myorg.sparkhost.com') + self.assertEqual(transport.port, 10001) + + with mock.patch.object(hive, 'connect', new=hive_thrift_connect): + connection = adapter.acquire_connection('dummy') + connection.handle # trigger lazy-load + + self.assertEqual(connection.state, 'open') + self.assertIsNotNone(connection.handle) + self.assertEqual(connection.credentials.schema, 'analytics') + self.assertIsNone(connection.credentials.database) + def test_thrift_connection_kerberos(self): config = self._get_target_thrift_kerberos(self.project_cfg) adapter = SparkAdapter(config) From a8a85c54d10920af1c5efcbb4d2a51eb7cfcad11 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 15 Jun 2021 12:40:38 -0400 Subject: [PATCH 241/603] Fix persist_docs for columns (#180) * Fix persist_docs for columns * Disable parquet model on endpoint * Rm parquet model, not worth the fuss * Update changelog [skip ci] --- CHANGELOG.md | 13 ++-- dbt/include/spark/macros/adapters.sql | 12 +++- .../spark/macros/materializations/seed.sql | 12 ++-- .../spark/macros/materializations/table.sql | 2 + test/custom/base.py | 12 ++-- test/custom/persist_docs/data/seed.csv | 3 + test/custom/persist_docs/data/seeds.yml | 26 +++++++ .../custom/persist_docs/models/my_fun_docs.md | 10 +++ .../persist_docs/models/no_docs_model.sql | 1 + test/custom/persist_docs/models/schema.yml | 71 +++++++++++++++++++ .../persist_docs/models/table_delta_model.sql | 2 + .../custom/persist_docs/models/view_model.sql | 2 + test/custom/persist_docs/test_persist_docs.py | 68 ++++++++++++++++++ 13 files changed, 214 insertions(+), 20 deletions(-) create mode 100644 test/custom/persist_docs/data/seed.csv create mode 100644 test/custom/persist_docs/data/seeds.yml create mode 100644 test/custom/persist_docs/models/my_fun_docs.md create mode 100644 test/custom/persist_docs/models/no_docs_model.sql create mode 100644 test/custom/persist_docs/models/schema.yml create mode 100644 test/custom/persist_docs/models/table_delta_model.sql create mode 100644 test/custom/persist_docs/models/view_model.sql create mode 100644 test/custom/persist_docs/test_persist_docs.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 948a64a8..339f7f5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,16 @@ -## dbt next +## dbt-spark 0.20.0 (Release TBD) + +### Fixes + +- Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180)) + +## dbt-spark 0.20.0rc1 (June 8, 2021) ### Features - Allow user to specify `use_ssl` ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169)) - Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) -- Add support for column comment ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) - +- Add support for column-level `persist_docs` on Delta tables ([#84](https://github.com/fishtown-analytics/dbt-spark/pull/84), [#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) ### Fixes - Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) @@ -24,7 +29,7 @@ - [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) -## dbt-spark 0.19.1 (Release TBD) +## dbt-spark 0.19.1 (April 2, 2021) ## dbt-spark 0.19.1b2 (February 26, 2021) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index ac1ec92b..fcdc46c6 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -174,15 +174,23 @@ {% do return(None) %} {%- endmacro %} +{% macro spark__persist_docs(relation, model, for_relation, for_columns) -%} + {% if for_columns and config.persist_column_docs() and model.columns %} + {% do alter_column_comment(relation, model.columns) %} + {% endif %} +{% endmacro %} + {% macro spark__alter_column_comment(relation, column_dict) %} {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %} {% for column_name in column_dict %} {% set comment = column_dict[column_name]['description'] %} + {% set escaped_comment = comment | replace('\'', '\\\'') %} {% set comment_query %} - alter table {{ relation }} change column {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} comment '{{ comment }}'; + alter table {{ relation }} change column + {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} + comment '{{ escaped_comment }}'; {% endset %} {% do run_query(comment_query) %} {% endfor %} {% endif %} {% endmacro %} - diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 795f4932..536e6447 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -81,10 +81,7 @@ {%- set agate_table = load_agate_table() -%} {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%} - {{ run_hooks(pre_hooks, inside_transaction=False) }} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} + {{ run_hooks(pre_hooks) }} -- build model {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %} @@ -98,10 +95,9 @@ {{ sql }} {% endcall %} - {{ run_hooks(post_hooks, inside_transaction=True) }} - -- `COMMIT` happens here - {{ adapter.commit() }} - {{ run_hooks(post_hooks, inside_transaction=False) }} + {% do persist_docs(target_relation, model) %} + + {{ run_hooks(post_hooks) }} {{ return({'relations': [target_relation]}) }} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index adfdb7a3..3ae2df97 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -21,6 +21,8 @@ {% call statement('main') -%} {{ create_table_as(False, target_relation, sql) }} {%- endcall %} + + {% do persist_docs(target_relation, model) %} {{ run_hooks(post_hooks) }} diff --git a/test/custom/base.py b/test/custom/base.py index d2dc6dd7..28fcad3c 100644 --- a/test/custom/base.py +++ b/test/custom/base.py @@ -76,7 +76,7 @@ def apache_spark_profile(self): }, 'test': { 'outputs': { - 'default2': { + 'thrift': { 'type': 'spark', 'host': 'localhost', 'user': 'dbt', @@ -87,7 +87,7 @@ def apache_spark_profile(self): 'schema': self.unique_schema() }, }, - 'target': 'default2' + 'target': 'thrift' } } @@ -98,7 +98,7 @@ def databricks_cluster_profile(self): }, 'test': { 'outputs': { - 'odbc': { + 'cluster': { 'type': 'spark', 'method': 'odbc', 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), @@ -109,7 +109,7 @@ def databricks_cluster_profile(self): 'schema': self.unique_schema() }, }, - 'target': 'odbc' + 'target': 'cluster' } } @@ -120,7 +120,7 @@ def databricks_sql_endpoint_profile(self): }, 'test': { 'outputs': { - 'default2': { + 'endpoint': { 'type': 'spark', 'method': 'odbc', 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), @@ -131,7 +131,7 @@ def databricks_sql_endpoint_profile(self): 'schema': self.unique_schema() }, }, - 'target': 'default2' + 'target': 'endpoint' } } diff --git a/test/custom/persist_docs/data/seed.csv b/test/custom/persist_docs/data/seed.csv new file mode 100644 index 00000000..4a295177 --- /dev/null +++ b/test/custom/persist_docs/data/seed.csv @@ -0,0 +1,3 @@ +id,name +1,Alice +2,Bob \ No newline at end of file diff --git a/test/custom/persist_docs/data/seeds.yml b/test/custom/persist_docs/data/seeds.yml new file mode 100644 index 00000000..7ab82fa6 --- /dev/null +++ b/test/custom/persist_docs/data/seeds.yml @@ -0,0 +1,26 @@ +version: 2 + +seeds: + - name: seed + description: | + Seed model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} diff --git a/test/custom/persist_docs/models/my_fun_docs.md b/test/custom/persist_docs/models/my_fun_docs.md new file mode 100644 index 00000000..f3c0fbf5 --- /dev/null +++ b/test/custom/persist_docs/models/my_fun_docs.md @@ -0,0 +1,10 @@ +{% docs my_fun_doc %} +name Column description "with double quotes" +and with 'single quotes' as welll as other; +'''abc123''' +reserved -- characters +-- +/* comment */ +Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + +{% enddocs %} diff --git a/test/custom/persist_docs/models/no_docs_model.sql b/test/custom/persist_docs/models/no_docs_model.sql new file mode 100644 index 00000000..e39a7a15 --- /dev/null +++ b/test/custom/persist_docs/models/no_docs_model.sql @@ -0,0 +1 @@ +select 1 as id, 'Alice' as name diff --git a/test/custom/persist_docs/models/schema.yml b/test/custom/persist_docs/models/schema.yml new file mode 100644 index 00000000..78dcda79 --- /dev/null +++ b/test/custom/persist_docs/models/schema.yml @@ -0,0 +1,71 @@ +version: 2 + +models: + + - name: table_parquet_model + description: | + Table model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} + + - name: table_delta_model + description: | + Table model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} + + - name: view_model + description: | + View model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting diff --git a/test/custom/persist_docs/models/table_delta_model.sql b/test/custom/persist_docs/models/table_delta_model.sql new file mode 100644 index 00000000..06e40e0c --- /dev/null +++ b/test/custom/persist_docs/models/table_delta_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='table', file_format='delta') }} +select 1 as id, 'Joe' as name diff --git a/test/custom/persist_docs/models/view_model.sql b/test/custom/persist_docs/models/view_model.sql new file mode 100644 index 00000000..a6f96a16 --- /dev/null +++ b/test/custom/persist_docs/models/view_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='view') }} +select 2 as id, 'Bob' as name diff --git a/test/custom/persist_docs/test_persist_docs.py b/test/custom/persist_docs/test_persist_docs.py new file mode 100644 index 00000000..d9acf70d --- /dev/null +++ b/test/custom/persist_docs/test_persist_docs.py @@ -0,0 +1,68 @@ +from cProfile import run +from test.custom.base import DBTSparkIntegrationTest, use_profile +import dbt.exceptions + +import json + + +class TestPersistDocsDelta(DBTSparkIntegrationTest): + @property + def schema(self): + return "persist_docs_columns" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + 'config-version': 2, + 'models': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + } + }, + 'seeds': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + '+file_format': 'delta', + '+quote_columns': True + } + }, + } + + def test_delta_comments(self): + self.run_dbt(['seed']) + self.run_dbt(['run']) + + for table in ['table_delta_model', 'seed']: + results = self.run_sql( + 'describe extended {schema}.{table}'.format(schema=self.unique_schema(), table=table), + fetch='all' + ) + + for result in results: + if result[0] == 'Comment': + whatis = 'Seed' if table == 'seed' else 'Table' + assert result[1].startswith(f'{whatis} model description') + if result[0] == 'id': + assert result[2].startswith('id Column description') + if result[0] == 'name': + assert result[2].startswith('Some stuff here and then a call to') + + # runs on Spark v3.0 + @use_profile("databricks_cluster") + def test_delta_comments_databricks_cluster(self): + self.test_delta_comments() + + # runs on Spark v3.0 + @use_profile("databricks_sql_endpoint") + def test_delta_comments_databricks_sql_endpoint(self): + self.test_delta_comments() From a0e8fde4107384a23a20b14529c2b09e318296ac Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 7 Jun 2021 14:13:38 -0400 Subject: [PATCH 242/603] change dockerized Spark to Spark3 --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8ef97a44..e228e307 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3.7" services: dbt-spark2-thrift: - image: godatadriven/spark:2 + image: godatadriven/spark:3 ports: - "10000:10000" - "4040:4040" From 8104ef0e9fe795572532a7df5a35ce5966efa072 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 8 Jun 2021 13:01:14 -0400 Subject: [PATCH 243/603] bumpversion to 0.20.0rc1 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- dev_requirements.txt | 2 +- docker-compose.yml | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 24e65209..86b21d72 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.1 +current_version = 0.20.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8fbe0a5f..b4313248 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.19.1 +current_version = 0.20.0rc1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 3f7d7377..7cb09608 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.19.1" +version = "0.20.0rc1" diff --git a/dev_requirements.txt b/dev_requirements.txt index bbcdc9d6..95e4df5e 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,6 @@ pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 # Test requirements -git+https://github.com/fishtown-analytics/dbt-adapter-tests.git@33872d1cc0f936677dae091c3e0b49771c280514 +pytest-dbt-adapter==0.5.1 sasl==0.2.1 thrift_sasl==0.4.1 diff --git a/docker-compose.yml b/docker-compose.yml index e228e307..869e4ecd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3.7" services: dbt-spark2-thrift: - image: godatadriven/spark:3 + image: godatadriven/spark:3.0 ports: - "10000:10000" - "4040:4040" diff --git a/requirements.txt b/requirements.txt index dab0af33..b91b4804 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.19.1 +dbt-core==0.20.0rc1 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 25c7ff3e..26020960 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.19.1' +dbt_version = '0.20.0rc1' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.19.1 vs 0.19.1a1, 0.19.1.1, ...) +# ends of it. (0.20.0rc1 vs 0.20.0rc1a1, 0.20.0rc1.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From e84bd4f7b12279005b9401d8c2bdc769a5ff04b6 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Wed, 23 Jun 2021 14:18:40 -0400 Subject: [PATCH 244/603] Add merge_update_columns (#184) --- CHANGELOG.md | 4 ++++ dbt/adapters/spark/impl.py | 1 + .../incremental/strategies.sql | 13 +++++++++-- .../data/expected_partial_upsert.csv | 4 ++++ .../models_delta/merge_update_columns.sql | 22 +++++++++++++++++++ .../test_incremental_strategies.py | 1 + 6 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 test/custom/incremental_strategies/data/expected_partial_upsert.csv create mode 100644 test/custom/incremental_strategies/models_delta/merge_update_columns.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 339f7f5c..d0b8344f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## dbt-spark 0.20.0 (Release TBD) +### Features + +- Add support for `merge_update_columns` config in `merge`-strategy incremental models ([#183](https://github.com/fishtown-analytics/dbt-spark/pull/183), ([#184](https://github.com/fishtown-analytics/dbt-spark/pull/184)) + ### Fixes - Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180)) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index b2060f8c..9f4ae514 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -37,6 +37,7 @@ class SparkConfig(AdapterConfig): clustered_by: Optional[Union[List[str], str]] = None buckets: Optional[int] = None options: Optional[Dict[str, str]] = None + merge_update_columns: Optional[str] = None class SparkAdapter(SQLAdapter): diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql index d3ffafc1..ec5dad67 100644 --- a/dbt/include/spark/macros/materializations/incremental/strategies.sql +++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql @@ -20,7 +20,8 @@ {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} - {# ignore dest_columns - we will just use `*` #} + {# skip dest_columns, use merge_update_columns config if provided, otherwise use "*" #} + {%- set update_columns = config.get("merge_update_columns") -%} {% set merge_condition %} {% if unique_key %} @@ -32,8 +33,16 @@ merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE + {{ merge_condition }} - when matched then update set * + + when matched then update set + {% if update_columns -%}{%- for column_name in update_columns %} + {{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }} + {%- if not loop.last %}, {%- endif %} + {%- endfor %} + {%- else %} * {% endif %} + when not matched then insert * {% endmacro %} diff --git a/test/custom/incremental_strategies/data/expected_partial_upsert.csv b/test/custom/incremental_strategies/data/expected_partial_upsert.csv new file mode 100644 index 00000000..bc922cde --- /dev/null +++ b/test/custom/incremental_strategies/data/expected_partial_upsert.csv @@ -0,0 +1,4 @@ +id,msg,color +1,hello,blue +2,yo,red +3,anyway,purple \ No newline at end of file diff --git a/test/custom/incremental_strategies/models_delta/merge_update_columns.sql b/test/custom/incremental_strategies/models_delta/merge_update_columns.sql new file mode 100644 index 00000000..d934b299 --- /dev/null +++ b/test/custom/incremental_strategies/models_delta/merge_update_columns.sql @@ -0,0 +1,22 @@ +{{ config( + materialized = 'incremental', + incremental_strategy = 'merge', + file_format = 'delta', + unique_key = 'id', + merge_update_columns = ['msg'], +) }} + +{% if not is_incremental() %} + +select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color +union all +select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color + +{% else %} + +-- msg will be updated, color will be ignored +select cast(2 as bigint) as id, 'yo' as msg, 'green' as color +union all +select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color + +{% endif %} diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/test/custom/incremental_strategies/test_incremental_strategies.py index 4d13a770..64966ece 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/test/custom/incremental_strategies/test_incremental_strategies.py @@ -71,6 +71,7 @@ def run_and_test(self): self.assertTablesEqual("append_delta", "expected_append") self.assertTablesEqual("merge_no_key", "expected_append") self.assertTablesEqual("merge_unique_key", "expected_upsert") + self.assertTablesEqual("merge_update_columns", "expected_partial_upsert") @use_profile("databricks_cluster") def test_delta_strategies_databricks_cluster(self): From 5f4441563e9a37dab703e994fbbf5197dc779426 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Wed, 7 Jul 2021 12:40:26 -0400 Subject: [PATCH 245/603] Adding release workflow --- .github/workflows/release.yml | 116 ++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..a160039b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,116 @@ +# Builds the spark plugin and releases it to GitHub and Pypi +name: Build and Release + +on: + workflow_dispatch: + +# Release version number that must be updated for each release +env: + version_number: '0.20.0rc2' + +jobs: + Test: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v2.2.2 + with: + python-version: '3.8' + + - uses: actions/checkout@v2 + + - name: Test release + run: | + python3 -m venv env + source env/bin/activate + pip install -r dev_requirements.txt + pip install twine wheel setuptools + python setup.py sdist bdist_wheel + pip install dist/dbt-spark-*.tar.gz + pip install dist/dbt_spark-*-py3-none-any.whl + twine check dist/dbt_spark-*-py3-none-any.whl dist/dbt-spark-*.tar.gz + + GitHubRelease: + name: GitHub release + runs-on: ubuntu-latest + needs: Test + steps: + - name: Setup Python + uses: actions/setup-python@v2.2.2 + with: + python-version: '3.8' + + - uses: actions/checkout@v2 + + - name: Bumping version + run: | + python3 -m venv env + source env/bin/activate + pip install -r dev_requirements.txt + bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}} + bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty + git status + + - name: Commit version bump and tag + uses: EndBug/add-and-commit@v7 + with: + author_name: 'Leah Antkiewicz' + author_email: 'leah.antkiewicz@dbtlabs.com' + message: 'Bumping version to ${{env.version_number}}' + tag: v${{env.version_number}} + + # Need to set an output variable because env variables can't be taken as input + # This is needed for the next step with releasing to GitHub + - name: Find release type + id: release_type + env: + IS_PRERELEASE: ${{ contains(env.version_number, 'rc') || contains(env.version_number, 'b') }} + run: | + echo ::set-output name=isPrerelease::$IS_PRERELEASE + + - name: Create GitHub release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token + with: + tag_name: v${{env.version_number}} + release_name: dbt-spark v${{env.version_number}} + prerelease: ${{ steps.release_type.outputs.isPrerelease }} + body: | + Tracking [dbt-core v${{env.version_number}}](https://github.com/dbt-labs/dbt/releases/tag/v${{env.version_number}}). + + ```sh + $ pip install dbt-spark==${{env.version_number}} + # or + $ pip install "dbt-spark[ODBC]==${{env.version_number}}" + # or + $ pip install "dbt-spark[PyHive]==${{env.version_number}}" + ``` + + PypiRelease: + name: Pypi release + runs-on: ubuntu-latest + needs: GitHubRelease + environment: PypiProd + steps: + - name: Setup Python + uses: actions/setup-python@v2.2.2 + with: + python-version: '3.8' + + - uses: actions/checkout@v2 + with: + ref: v${{env.version_number}} + + - name: Release to pypi + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python3 -m venv env + source env/bin/activate + pip install -r dev_requirements.txt + pip install twine wheel setuptools + python setup.py sdist bdist_wheel + twine upload --non-interactive dist/dbt_spark-${{env.version_number}}-py3-none-any.whl dist/dbt-spark-${{env.version_number}}.tar.gz + From d3e1daf282a98700f79760884eaa0966d14b0b73 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Wed, 7 Jul 2021 13:16:44 -0400 Subject: [PATCH 246/603] Update release.yml --- .github/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a160039b..21950978 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,6 +23,7 @@ jobs: run: | python3 -m venv env source env/bin/activate + sudo apt-get install libsasl2-dev pip install -r dev_requirements.txt pip install twine wheel setuptools python setup.py sdist bdist_wheel From 6a8c4fbf396d5a9f4d50979b668e28c875b23442 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Wed, 7 Jul 2021 13:21:52 -0400 Subject: [PATCH 247/603] Update release.yml --- .github/workflows/release.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 21950978..b25ea884 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -47,6 +47,7 @@ jobs: run: | python3 -m venv env source env/bin/activate + sudo apt-get install libsasl2-dev pip install -r dev_requirements.txt bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}} bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty @@ -110,6 +111,7 @@ jobs: run: | python3 -m venv env source env/bin/activate + sudo apt-get install libsasl2-dev pip install -r dev_requirements.txt pip install twine wheel setuptools python setup.py sdist bdist_wheel From 071da7d026bb364d27839ed362f3f94274a35978 Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Wed, 7 Jul 2021 17:24:01 +0000 Subject: [PATCH 248/603] Bumping version to 0.20.0rc2 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 86b21d72..954cdbe3 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.20.0rc1 +current_version = 0.20.0rc2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b4313248..0b072dbd 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.20.0rc1 +current_version = 0.20.0rc2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 7cb09608..586d2c55 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.20.0rc1" +version = "0.20.0rc2" diff --git a/requirements.txt b/requirements.txt index b91b4804..cc07c7bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.20.0rc1 +dbt-core==0.20.0rc2 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 26020960..e2fbf050 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.20.0rc1' +dbt_version = '0.20.0rc2' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.20.0rc1 vs 0.20.0rc1a1, 0.20.0rc1.1, ...) +# ends of it. (0.20.0rc2 vs 0.20.0rc2a1, 0.20.0rc2.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 97bfd6c684f884f5540b528d863d9da47936b2e5 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Wed, 7 Jul 2021 13:25:53 -0400 Subject: [PATCH 249/603] Adding dependency to workflow --- .github/workflows/release.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a160039b..b25ea884 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,6 +23,7 @@ jobs: run: | python3 -m venv env source env/bin/activate + sudo apt-get install libsasl2-dev pip install -r dev_requirements.txt pip install twine wheel setuptools python setup.py sdist bdist_wheel @@ -46,6 +47,7 @@ jobs: run: | python3 -m venv env source env/bin/activate + sudo apt-get install libsasl2-dev pip install -r dev_requirements.txt bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}} bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty @@ -109,6 +111,7 @@ jobs: run: | python3 -m venv env source env/bin/activate + sudo apt-get install libsasl2-dev pip install -r dev_requirements.txt pip install twine wheel setuptools python setup.py sdist bdist_wheel From 6db7a203bf07a2231e82f4cb041142779d96501e Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Wed, 7 Jul 2021 17:27:56 +0000 Subject: [PATCH 250/603] Bumping version to 0.20.0rc2 --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 86b21d72..954cdbe3 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.20.0rc1 +current_version = 0.20.0rc2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b4313248..0b072dbd 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.20.0rc1 +current_version = 0.20.0rc2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 7cb09608..586d2c55 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.20.0rc1" +version = "0.20.0rc2" diff --git a/requirements.txt b/requirements.txt index b91b4804..cc07c7bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.20.0rc1 +dbt-core==0.20.0rc2 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 26020960..e2fbf050 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.20.0rc1' +dbt_version = '0.20.0rc2' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.20.0rc1 vs 0.20.0rc1a1, 0.20.0rc1.1, ...) +# ends of it. (0.20.0rc2 vs 0.20.0rc2a1, 0.20.0rc2.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From b315008c039d8d30c0fed0f858c38fa77c135c9b Mon Sep 17 00:00:00 2001 From: Ali Teeney Date: Wed, 28 Jul 2021 03:19:27 +0100 Subject: [PATCH 251/603] Fix get columns in relation (#197) * Add test for failing get_columns_in_relation Specifically, it fails when it's called on a model that was created in the same run. At the start of the run, the cache is populated, setting Relation.information to be the string output of a DESCRIBE EXTENDED query, which allows columns and metadata to be parsed. However, when models are created, a Relation with information=None is saved in the cache (since columns and metadata aren't returned from a CREATE TABLE / VIEW statement). This means that an `expected string or bytes-like object` error is raised when attempting to regex-parse None. * Only parse cols from cache if there's information If the `information` attribute is not yet set, we fall back on the non-cached version to find column information. We could _also_ cache the output of that query, but given that it wasn't cached originally, I leave it as it is. * Add get_columns_in_relation fix to CHANGELOG --- CHANGELOG.md | 1 + dbt/adapters/spark/impl.py | 2 +- .../get_columns_in_relation/models/child.sql | 1 + .../models/get_columns_from_child.sql | 6 +++++ .../test_get_columns_in_relation.py | 27 +++++++++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 test/custom/get_columns_in_relation/models/child.sql create mode 100644 test/custom/get_columns_in_relation/models/get_columns_from_child.sql create mode 100644 test/custom/get_columns_in_relation/test_get_columns_in_relation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d0b8344f..0f8705ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ### Fixes - Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180)) +- Fix `get_columns_in_relation` when called on models created in the same run ([#197](https://github.com/dbt-labs/dbt-spark/pull/197)) ## dbt-spark 0.20.0rc1 (June 8, 2021) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 9f4ae514..f8e72449 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -212,7 +212,7 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: for cached_relation in cached_relations if str(cached_relation) == str(relation)), None) - if cached_relation is None: + if cached_relation is None or cached_relation.information is None: rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) else: diff --git a/test/custom/get_columns_in_relation/models/child.sql b/test/custom/get_columns_in_relation/models/child.sql new file mode 100644 index 00000000..2e3761f7 --- /dev/null +++ b/test/custom/get_columns_in_relation/models/child.sql @@ -0,0 +1 @@ +SELECT 1 diff --git a/test/custom/get_columns_in_relation/models/get_columns_from_child.sql b/test/custom/get_columns_in_relation/models/get_columns_from_child.sql new file mode 100644 index 00000000..5118ae03 --- /dev/null +++ b/test/custom/get_columns_in_relation/models/get_columns_from_child.sql @@ -0,0 +1,6 @@ +SELECT + {% set cols = adapter.get_columns_in_relation(ref('child')) %} + {% for col in cols %} + {{ adapter.quote(col.column) }}{%- if not loop.last %},{{ '\n ' }}{% endif %} + {% endfor %} +FROM {{ ref('child') }} diff --git a/test/custom/get_columns_in_relation/test_get_columns_in_relation.py b/test/custom/get_columns_in_relation/test_get_columns_in_relation.py new file mode 100644 index 00000000..e2c1d7a4 --- /dev/null +++ b/test/custom/get_columns_in_relation/test_get_columns_in_relation.py @@ -0,0 +1,27 @@ +from test.custom.base import DBTSparkIntegrationTest, use_profile + + +class TestGetColumnInRelationInSameRun(DBTSparkIntegrationTest): + @property + def schema(self): + return "get_columns_in_relation" + + @property + def models(self): + return "models" + + def run_and_test(self): + self.run_dbt(["run"]) + self.assertTablesEqual("child", "get_columns_from_child") + + @use_profile("apache_spark") + def test_get_columns_in_relation_in_same_run_apache_spark(self): + self.run_and_test() + + @use_profile("databricks_cluster") + def test_get_columns_in_relation_in_same_run_databricks_cluster(self): + self.run_and_test() + + @use_profile("databricks_sql_endpoint") + def test_get_columns_in_relation_in_same_run_databricks_sql_endpoint(self): + self.run_and_test() From 5cafd59ee6691a42f98dc651b64ce77e107b8698 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 27 Jul 2021 22:25:55 -0400 Subject: [PATCH 252/603] Update changelog --- CHANGELOG.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f8705ad..8df37fad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,12 @@ -## dbt-spark 0.20.0 (Release TBD) +## dbt-spark 0.20.1 (Release TBD) + +### Fixes +- Fix `get_columns_in_relation` when called on models created in the same run ([#196](https://github.com/dbt-labs/dbt-spark/pull/196), [#197](https://github.com/dbt-labs/dbt-spark/pull/197)) + +### Contributors +- [@ali-tny](https://github.com/ali-tny) ([#197](https://github.com/fishtown-analytics/dbt-spark/pull/197)) + +## dbt-spark 0.20.0 (July 12, 2021) ### Features @@ -7,7 +15,6 @@ ### Fixes - Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180)) -- Fix `get_columns_in_relation` when called on models created in the same run ([#197](https://github.com/dbt-labs/dbt-spark/pull/197)) ## dbt-spark 0.20.0rc1 (June 8, 2021) From 1a5c2602212800116bd9995545900fc404a22f9b Mon Sep 17 00:00:00 2001 From: Cor Date: Tue, 10 Aug 2021 01:41:27 +0000 Subject: [PATCH 253/603] Show more detailed feedback when pyodbc import fails (#192) * Use exception chaining to get more detailed feedback when pyodbc is not installed * Remove pyodbc referenced before assignment * Set back try except * Add flake ignore * Add error message to RunTimeException Error chaining does not show the message in `dbt debug`. Therefore we explicitly add the message to the dbt.exceptions.RunTimeException * Update change log Add to change log that we print the import error when pyodbc can not be imported * Fix parenthesis in change log --- CHANGELOG.md | 3 ++- dbt/adapters/spark/connections.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8df37fad..6b4fe958 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ ### Features -- Add support for `merge_update_columns` config in `merge`-strategy incremental models ([#183](https://github.com/fishtown-analytics/dbt-spark/pull/183), ([#184](https://github.com/fishtown-analytics/dbt-spark/pull/184)) +- Add support for `merge_update_columns` config in `merge`-strategy incremental models ([#183](https://github.com/fishtown-analytics/dbt-spark/pull/183), [#184](https://github.com/fishtown-analytics/dbt-spark/pull/184)) +- Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) ### Fixes diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 1bc8d80b..d478f374 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -94,13 +94,17 @@ def __post_init__(self): ) self.database = None - if self.method == SparkConnectionMethod.ODBC and pyodbc is None: - raise dbt.exceptions.RuntimeException( - f"{self.method} connection method requires " - "additional dependencies. \n" - "Install the additional required dependencies with " - "`pip install dbt-spark[ODBC]`" - ) + if self.method == SparkConnectionMethod.ODBC: + try: + import pyodbc # noqa: F401 + except ImportError as e: + raise dbt.exceptions.RuntimeException( + f"{self.method} connection method requires " + "additional dependencies. \n" + "Install the additional required dependencies with " + "`pip install dbt-spark[ODBC]`\n\n" + f"ImportError({e.msg})" + ) from e if ( self.method == SparkConnectionMethod.ODBC and From adc137a680920cc8241c56699f83b45c00c4e348 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 9 Aug 2021 21:54:25 -0400 Subject: [PATCH 254/603] Update changelog [skip ci] --- CHANGELOG.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b4fe958..f0618ec4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ ## dbt-spark 0.20.1 (Release TBD) +### Fixes +- Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) + +### Contributors +- [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) + +## dbt-spark 0.21.0b1 (August 3, 2021) + +## dbt-spark 0.20.1 (Au gust 2, 2021) + +## dbt-spark 0.20.1rc1 (August 2, 2021) + ### Fixes - Fix `get_columns_in_relation` when called on models created in the same run ([#196](https://github.com/dbt-labs/dbt-spark/pull/196), [#197](https://github.com/dbt-labs/dbt-spark/pull/197)) @@ -8,10 +20,11 @@ ## dbt-spark 0.20.0 (July 12, 2021) +## dbt-spark 0.20.0rc2 (July 7, 2021) + ### Features - Add support for `merge_update_columns` config in `merge`-strategy incremental models ([#183](https://github.com/fishtown-analytics/dbt-spark/pull/183), [#184](https://github.com/fishtown-analytics/dbt-spark/pull/184)) -- Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) ### Fixes From 4b23fd256fec113b30156f2c16c3520926a26705 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 9 Aug 2021 21:55:16 -0400 Subject: [PATCH 255/603] Update changelog [skip ci] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0618ec4..776f8357 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ ## dbt-spark 0.21.0b1 (August 3, 2021) -## dbt-spark 0.20.1 (Au gust 2, 2021) +## dbt-spark 0.20.1 (August 2, 2021) ## dbt-spark 0.20.1rc1 (August 2, 2021) From da358d22a5587a4a4e070dce6d0046ad94dbb41e Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 9 Aug 2021 21:58:07 -0400 Subject: [PATCH 256/603] Update changelog [skip ci] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 776f8357..f3c56093 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## dbt-spark 0.20.1 (Release TBD) +## dbt-spark 0.21.0 (Release TBD) ### Fixes - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) From 3a6628582dd3896855f6fd2cb4e6d686d95f2b13 Mon Sep 17 00:00:00 2001 From: Jethro Nederhof Date: Wed, 11 Aug 2021 23:18:58 +1000 Subject: [PATCH 257/603] Add support for ODBC Server Side Parameters (#201) * Add support for ODBC Server Side Parameters * Update CHANGELOG --- CHANGELOG.md | 2 ++ dbt/adapters/spark/connections.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c56093..60d85d50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,11 @@ ### Fixes - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) +- Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201)) ### Contributors - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) +- [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201)) ## dbt-spark 0.21.0b1 (August 3, 2021) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index d478f374..e802c444 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -24,8 +24,8 @@ import sqlparams from hologram.helpers import StrEnum -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Dict, Optional try: from thrift.transport.TSSLSocket import TSSLSocket import thrift @@ -72,6 +72,7 @@ class SparkCredentials(Credentials): connect_retries: int = 0 connect_timeout: int = 10 use_ssl: bool = False + server_side_parameters: Dict[str, Any] = field(default_factory=dict) @classmethod def __pre_deserialize__(cls, data): @@ -405,6 +406,12 @@ def open(cls, connection): dbt_spark_version = __version__.version user_agent_entry = f"fishtown-analytics-dbt-spark/{dbt_spark_version} (Databricks)" # noqa + # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm + ssp = { + f"SSP_{k}": f"{{{v}}}" + for k, v in creds.server_side_parameters.items() + } + # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm connection_str = _build_odbc_connnection_string( DRIVER=creds.driver, @@ -418,6 +425,8 @@ def open(cls, connection): ThriftTransport=2, SSL=1, UserAgentEntry=user_agent_entry, + LCaseSspKeyName=0 if ssp else 1, + **ssp, ) conn = pyodbc.connect(connection_str, autocommit=True) From c6817cf6ca4c02202ff102c51cd325f9755595f1 Mon Sep 17 00:00:00 2001 From: gregingenii <80900458+gregingenii@users.noreply.github.com> Date: Fri, 13 Aug 2021 18:50:24 +0100 Subject: [PATCH 258/603] Feature/able to retry all connections (#194) * Code changes * README changes * Improve error message default * Changelog * Changelog corrections * Restore accidental deletion * Update dbt/adapters/spark/connections.py Co-authored-by: Jeremy Cohen * Add myself to Contributors Co-authored-by: Jeremy Cohen --- CHANGELOG.md | 4 +++- README.md | 4 ++++ dbt/adapters/spark/connections.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60d85d50..011a8f2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,12 @@ ### Fixes - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) - Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201)) +- Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) ### Contributors - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) - [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201)) +- [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) ## dbt-spark 0.21.0b1 (August 3, 2021) @@ -62,7 +64,7 @@ ## dbt-spark 0.19.1b2 (February 26, 2021) ### Under the hood -- update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150)) +- Update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150)) ## dbt-spark 0.19.0.1 (February 26, 2021) diff --git a/README.md b/README.md index 71ec7cdf..c330afde 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ A dbt profile for Spark connections support the following configurations: | connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (`10`) | ❔ (`10`) | `60` | | connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (`0`) | ❔ (`0`) | `5` | | use_ssl | The value of `hive.server2.use.SSL` (`True` or `False`). Default ssl store (ssl.get_default_verify_paths()) is the valid location for SSL certificate | ❌ | ❔ (`False`) | ❌ | `True` | +| retry_all | Whether to retry all failed connections, and not just 'retryable' ones | ❌ | ❔ (`false`) | ❔ (`false`) | `false` | **Databricks** connections differ based on the cloud provider: @@ -124,6 +125,7 @@ your_profile_name: kerberos_service_name: hive connect_retries: 5 connect_timeout: 60 + retry_all: true ``` @@ -145,6 +147,7 @@ your_profile_name: # optional connect_retries: 5 connect_timeout: 60 + retry_all: true ``` @@ -251,6 +254,7 @@ spark-testing: schema: analytics connect_retries: 5 connect_timeout: 60 + retry_all: true ``` Connecting to the local spark instance: diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index e802c444..7bf6d159 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -73,6 +73,7 @@ class SparkCredentials(Credentials): connect_timeout: int = 10 use_ssl: bool = False server_side_parameters: Dict[str, Any] = field(default_factory=dict) + retry_all: bool = False @classmethod def __pre_deserialize__(cls, data): @@ -454,6 +455,16 @@ def open(cls, connection): ) logger.warning(msg) time.sleep(creds.connect_timeout) + elif creds.retry_all and creds.connect_retries > 0: + msg = ( + f"Warning: {getattr(exc, 'message', 'No message')}, " + f"retrying due to 'retry_all' configuration " + f"set to true.\n\tRetrying in " + f"{creds.connect_timeout} seconds " + f"({i} of {creds.connect_retries})" + ) + logger.warning(msg) + time.sleep(creds.connect_timeout) else: raise dbt.exceptions.FailedToConnectException( 'failed to connect' From 087158e0f2feb6c221ce1903bd51184e4507f6b7 Mon Sep 17 00:00:00 2001 From: Hariharan Banukumar Date: Mon, 23 Aug 2021 07:13:54 -0400 Subject: [PATCH 259/603] fixed get_columns_in_relation for open source delta table (#207) * fixed get_columns_in_relation for open source delta table * fixed E501 linting error and added change log --- CHANGELOG.md | 2 ++ dbt/adapters/spark/impl.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 011a8f2e..5b93a5b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,13 @@ ## dbt-spark 0.21.0 (Release TBD) ### Fixes +- Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) - Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201)) - Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) ### Contributors +- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) - [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201)) - [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index f8e72449..03fba9fa 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -212,11 +212,16 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: for cached_relation in cached_relations if str(cached_relation) == str(relation)), None) - if cached_relation is None or cached_relation.information is None: + columns = [] + if cached_relation and cached_relation.information: + columns = self.parse_columns_from_information(cached_relation) + if not columns: + # in open source delta 'show table extended' query output doesnt + # return relation's schema. if columns are empty from cache, + # use get_columns_in_relation spark macro + # which would execute 'describe extended tablename' query rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) - else: - columns = self.parse_columns_from_information(cached_relation) return columns def parse_columns_from_information( From 3980e5ce4bce11e96dccf47b89f48e4ba9c2de1d Mon Sep 17 00:00:00 2001 From: Sergio Date: Mon, 23 Aug 2021 14:10:29 +0200 Subject: [PATCH 260/603] fix issue parsing structs (#204) * fix issue parsing structs * include contributor in changelog * better error explanation Co-authored-by: Jeremy Cohen --- CHANGELOG.md | 11 ++++- dbt/adapters/spark/impl.py | 2 +- test/unit/test_adapter.py | 86 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b93a5b9..c6f8c272 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,20 @@ ### Fixes - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) +- Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202)) + +### Contributors +- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) +- [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204)) + +## dbt-spark 0.21.0b2 (August 20, 2021) + +### Fixes - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) - Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201)) - Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) ### Contributors -- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) - [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201)) - [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) @@ -24,6 +32,7 @@ ### Contributors - [@ali-tny](https://github.com/ali-tny) ([#197](https://github.com/fishtown-analytics/dbt-spark/pull/197)) + ## dbt-spark 0.20.0 (July 12, 2021) ## dbt-spark 0.20.0rc2 (July 7, 2021) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 03fba9fa..6acbcd44 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -64,7 +64,7 @@ class SparkAdapter(SQLAdapter): 'stats:rows:include', ) INFORMATION_COLUMNS_REGEX = re.compile( - r"\|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) + r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) INFORMATION_STATISTICS_REGEX = re.compile( r"^Statistics: (.*)$", re.MULTILINE) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index ddfbeddb..f87a89b2 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -275,6 +275,7 @@ def test_parse_relation(self): ('col1', 'decimal(22,0)'), ('col2', 'string',), ('dt', 'date'), + ('struct_col', 'struct'), ('# Partition Information', 'data_type'), ('# col_name', 'data_type'), ('dt', 'date'), @@ -299,7 +300,7 @@ def test_parse_relation(self): config = self._get_target_http(self.project_cfg) rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) - self.assertEqual(len(rows), 3) + self.assertEqual(len(rows), 4) self.assertEqual(rows[0].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, @@ -342,6 +343,20 @@ def test_parse_relation(self): 'char_size': None }) + self.assertEqual(rows[3].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'struct_col', + 'column_index': 3, + 'dtype': 'struct', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) + def test_parse_relation_with_integer_owner(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.Table @@ -507,6 +522,8 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self) " |-- col1: decimal(22,0) (nullable = true)\n" " |-- col2: string (nullable = true)\n" " |-- dt: date (nullable = true)\n" + " |-- struct_col: struct (nullable = true)\n" + " | |-- struct_inner_col: string (nullable = true)\n" ) relation = SparkRelation.create( schema='default_schema', @@ -518,7 +535,7 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self) config = self._get_target_http(self.project_cfg) columns = SparkAdapter(config).parse_columns_from_information( relation) - self.assertEqual(len(columns), 3) + self.assertEqual(len(columns), 4) self.assertEqual(columns[0].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, @@ -538,6 +555,25 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self) 'stats:bytes:value': 123456789, }) + self.assertEqual(columns[3].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'struct_col', + 'column_index': 3, + 'dtype': 'struct', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None, + + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 123456789, + }) + def test_parse_columns_from_information_with_view_type(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.View @@ -571,6 +607,8 @@ def test_parse_columns_from_information_with_view_type(self): " |-- col1: decimal(22,0) (nullable = true)\n" " |-- col2: string (nullable = true)\n" " |-- dt: date (nullable = true)\n" + " |-- struct_col: struct (nullable = true)\n" + " | |-- struct_inner_col: string (nullable = true)\n" ) relation = SparkRelation.create( schema='default_schema', @@ -582,7 +620,7 @@ def test_parse_columns_from_information_with_view_type(self): config = self._get_target_http(self.project_cfg) columns = SparkAdapter(config).parse_columns_from_information( relation) - self.assertEqual(len(columns), 3) + self.assertEqual(len(columns), 4) self.assertEqual(columns[1].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, @@ -597,6 +635,20 @@ def test_parse_columns_from_information_with_view_type(self): 'char_size': None }) + self.assertEqual(columns[3].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'struct_col', + 'column_index': 3, + 'dtype': 'struct', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None + }) + def test_parse_columns_from_information_with_table_type_and_parquet_provider(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.Table @@ -619,6 +671,8 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel " |-- col1: decimal(22,0) (nullable = true)\n" " |-- col2: string (nullable = true)\n" " |-- dt: date (nullable = true)\n" + " |-- struct_col: struct (nullable = true)\n" + " | |-- struct_inner_col: string (nullable = true)\n" ) relation = SparkRelation.create( schema='default_schema', @@ -630,7 +684,7 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel config = self._get_target_http(self.project_cfg) columns = SparkAdapter(config).parse_columns_from_information( relation) - self.assertEqual(len(columns), 3) + self.assertEqual(len(columns), 4) self.assertEqual(columns[2].to_column_dict(omit_none=False), { 'table_database': None, 'table_schema': relation.schema, @@ -655,3 +709,27 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel 'stats:rows:value': 12345678 }) + self.assertEqual(columns[3].to_column_dict(omit_none=False), { + 'table_database': None, + 'table_schema': relation.schema, + 'table_name': relation.name, + 'table_type': rel_type, + 'table_owner': 'root', + 'column': 'struct_col', + 'column_index': 3, + 'dtype': 'struct', + 'numeric_scale': None, + 'numeric_precision': None, + 'char_size': None, + + 'stats:bytes:description': '', + 'stats:bytes:include': True, + 'stats:bytes:label': 'bytes', + 'stats:bytes:value': 1234567890, + + 'stats:rows:description': '', + 'stats:rows:include': True, + 'stats:rows:label': 'rows', + 'stats:rows:value': 12345678 + }) + From ec7f15f936520b2c18348d66d401fedae7dd1391 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 14 Sep 2021 13:02:17 +0200 Subject: [PATCH 261/603] Add adapter unique_field (#211) * Add adapter unique_field * Fix flake8. Add changelog entry --- CHANGELOG.md | 3 +++ dbt/adapters/spark/connections.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6f8c272..7405e9e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202)) +### Under the hood +- Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211)) + ### Contributors - [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204)) diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 7bf6d159..41413f35 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -135,6 +135,10 @@ def __post_init__(self): def type(self): return 'spark' + @property + def unique_field(self): + return self.host + def _connection_keys(self): return ('host', 'port', 'cluster', 'endpoint', 'schema', 'organization') From f39169efa8175d861a23e9fc6227190f39900119 Mon Sep 17 00:00:00 2001 From: Snyk bot Date: Wed, 15 Sep 2021 18:52:05 +0200 Subject: [PATCH 262/603] [Snyk] Fix for 2 vulnerabilities (#214) * fix: requirements.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-SQLPARSE-1584201 - https://snyk.io/vuln/SNYK-PYTHON-THRIFT-474615 * Removing Thrift conflict with versions over 12 Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com> --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cc07c7bd..6e306e56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ dbt-core==0.20.0rc2 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 -thrift>=0.11.0,<0.12.0 +thrift>=0.13.0 +sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability From b0d05f9293b258be7ee3046243dcf2ccf0ac170a Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 15 Oct 2021 15:26:09 +0200 Subject: [PATCH 263/603] Bumping version to 0.21.0 (#231) --- .bumpversion-dbt.cfg | 2 +- .bumpversion.cfg | 2 +- dbt/adapters/spark/__version__.py | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg index 954cdbe3..f8ad313f 100644 --- a/.bumpversion-dbt.cfg +++ b/.bumpversion-dbt.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.20.0rc2 +current_version = 0.21.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0b072dbd..6a532e05 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.20.0rc2 +current_version = 0.21.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 586d2c55..025ca235 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.20.0rc2" +version = "0.21.0" diff --git a/requirements.txt b/requirements.txt index 6e306e56..88cd8291 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dbt-core==0.20.0rc2 +dbt-core==0.21.0 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index e2fbf050..f0f098fd 100644 --- a/setup.py +++ b/setup.py @@ -28,9 +28,9 @@ def _dbt_spark_version(): package_version = _dbt_spark_version() description = """The SparkSQL plugin for dbt (data build tool)""" -dbt_version = '0.20.0rc2' +dbt_version = '0.21.0' # the package version should be the dbt version, with maybe some things on the -# ends of it. (0.20.0rc2 vs 0.20.0rc2a1, 0.20.0rc2.1, ...) +# ends of it. (0.21.0 vs 0.21.0a1, 0.21.0.1, ...) if not package_version.startswith(dbt_version): raise ValueError( f'Invalid setup.py: package_version={package_version} must start with ' From 9be7bbb2866d3e4257cd94d0e6879a22bfacc74a Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 15 Oct 2021 17:03:30 +0200 Subject: [PATCH 264/603] Fix --store-failures (#233) * Fix --store-failures * Add chnagelog entry * Maybe this? * Fix indentation * Class inheritance, silly --- CHANGELOG.md | 7 ++- dbt/include/spark/macros/adapters.sql | 2 +- test/custom/store_failures/models/schema.yml | 9 ++++ .../store_failures/models/view_model.sql | 5 ++ .../store_failures/test_store_failures.py | 51 +++++++++++++++++++ 5 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 test/custom/store_failures/models/schema.yml create mode 100644 test/custom/store_failures/models/view_model.sql create mode 100644 test/custom/store_failures/test_store_failures.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7405e9e0..240c6810 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ -## dbt-spark 0.21.0 (Release TBD) +## dbt-spark 0.21.1 (Release TBD) + +### Fixes +- Fix `--store-failures` for tests, by suppressing irrelevant error in `comment_clause()` macro ([#232](https://github.com/dbt-labs/dbt-spark/issues/232), [#233](https://github.com/dbt-labs/dbt-spark/pull/233)) + +## dbt-spark 0.21.0 (October 4, 2021) ### Fixes - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index fcdc46c6..b966e9aa 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -32,7 +32,7 @@ {%- if raw_relation -%} comment '{{ model.description | replace("'", "\\'") }}' {% endif %} - {%- else -%} + {%- elif raw_persist_docs -%} {{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }} {% endif %} {%- endmacro -%} diff --git a/test/custom/store_failures/models/schema.yml b/test/custom/store_failures/models/schema.yml new file mode 100644 index 00000000..be559b20 --- /dev/null +++ b/test/custom/store_failures/models/schema.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: view_model + columns: + - name: id + tests: + - unique + - not_null diff --git a/test/custom/store_failures/models/view_model.sql b/test/custom/store_failures/models/view_model.sql new file mode 100644 index 00000000..2ff36b4e --- /dev/null +++ b/test/custom/store_failures/models/view_model.sql @@ -0,0 +1,5 @@ +select 1 as id +union all +select 1 as id +union all +select null as id diff --git a/test/custom/store_failures/test_store_failures.py b/test/custom/store_failures/test_store_failures.py new file mode 100644 index 00000000..7a4aae7d --- /dev/null +++ b/test/custom/store_failures/test_store_failures.py @@ -0,0 +1,51 @@ +from test.custom.base import DBTSparkIntegrationTest, use_profile + +class TestStoreFailures(DBTSparkIntegrationTest): + @property + def schema(self): + return "store_failures" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + 'config-version': 2, + 'tests': { + '+store_failures': True, + '+severity': 'warn', + } + } + + def test_store_failures(self): + self.run_dbt(['run']) + results = self.run_dbt(['test', '--store-failures'], strict = False) + +class TestStoreFailuresApacheSpark(TestStoreFailures): + + @use_profile("apache_spark") + def test_store_failures_apache_spark(self): + self.test_store_failures() + +class TestStoreFailuresDelta(TestStoreFailures): + + @property + def project_config(self): + return { + 'config-version': 2, + 'tests': { + '+store_failures': True, + '+severity': 'warn', + '+file_format': 'delta', + } + } + + @use_profile("databricks_cluster") + def test_store_failures_databricks_cluster(self): + self.test_store_failures() + + @use_profile("databricks_sql_endpoint") + def test_store_failures_databricks_sql_endpoint(self): + self.test_store_failures() From 8869b7a4ee9490ad29e6201c0bd7669635f4179c Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 18 Oct 2021 10:14:27 +0200 Subject: [PATCH 265/603] Add support for on_schema_change (#229) * Add support for on_schema_change * Add changelog note --- .gitignore | 1 + CHANGELOG.md | 1 + dbt/include/spark/macros/adapters.sql | 47 +++++- .../incremental/incremental.sql | 3 + .../models/incremental_append_new_columns.sql | 28 ++++ .../incremental_append_new_columns_target.sql | 19 +++ .../models/incremental_fail.sql | 18 ++ .../models/incremental_ignore.sql | 18 ++ .../models/incremental_ignore_target.sql | 15 ++ .../models/incremental_sync_all_columns.sql | 30 ++++ .../incremental_sync_all_columns_target.sql | 20 +++ .../models/model_a.sql | 22 +++ .../test_incremental_on_schema_change.py | 157 ++++++++++++++++++ 13 files changed, 378 insertions(+), 1 deletion(-) create mode 100644 test/custom/incremental_on_schema_change/models/incremental_append_new_columns.sql create mode 100644 test/custom/incremental_on_schema_change/models/incremental_append_new_columns_target.sql create mode 100644 test/custom/incremental_on_schema_change/models/incremental_fail.sql create mode 100644 test/custom/incremental_on_schema_change/models/incremental_ignore.sql create mode 100644 test/custom/incremental_on_schema_change/models/incremental_ignore_target.sql create mode 100644 test/custom/incremental_on_schema_change/models/incremental_sync_all_columns.sql create mode 100644 test/custom/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql create mode 100644 test/custom/incremental_on_schema_change/models/model_a.sql create mode 100644 test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py diff --git a/.gitignore b/.gitignore index 9caf202a..4c05634f 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ test/integration/.user.yml .DS_Store .vscode *.log +logs/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 240c6810..d821c39c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Fixes - Fix `--store-failures` for tests, by suppressing irrelevant error in `comment_clause()` macro ([#232](https://github.com/dbt-labs/dbt-spark/issues/232), [#233](https://github.com/dbt-labs/dbt-spark/pull/233)) +- Add support for `on_schema_change` config in incremental models: `ignore`, `fail`, `append_new_columns`. For `sync_all_columns`, removing columns is not supported by Apache Spark or Delta Lake ([#198](https://github.com/dbt-labs/dbt-spark/issues/198), [#226](https://github.com/dbt-labs/dbt-spark/issues/226), [#229](https://github.com/dbt-labs/dbt-spark/pull/229)) ## dbt-spark 0.21.0 (October 4, 2021) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index b966e9aa..ee59b813 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -125,7 +125,7 @@ {% macro spark__get_columns_in_relation(relation) -%} {% call statement('get_columns_in_relation', fetch_result=True) %} - describe extended {{ relation }} + describe extended {{ relation.include(schema=(schema is not none)) }} {% endcall %} {% do return(load_result('get_columns_in_relation').table) %} {% endmacro %} @@ -194,3 +194,48 @@ {% endfor %} {% endif %} {% endmacro %} + + +{% macro spark__make_temp_relation(base_relation, suffix) %} + {% set tmp_identifier = base_relation.identifier ~ suffix %} + {% set tmp_relation = base_relation.incorporate(path = { + "identifier": tmp_identifier, + "schema": None + }) -%} + + {% do return(tmp_relation) %} +{% endmacro %} + + +{% macro spark__alter_column_type(relation, column_name, new_column_type) -%} + {% call statement('alter_column_type') %} + alter table {{ relation }} alter column {{ column_name }} type {{ new_column_type }}; + {% endcall %} +{% endmacro %} + + +{% macro spark__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %} + + {% if remove_columns %} + {% set platform_name = 'Delta Lake' if relation.is_delta else 'Apache Spark' %} + {{ exceptions.raise_compiler_error(platform_name + ' does not support dropping columns from tables') }} + {% endif %} + + {% if add_columns is none %} + {% set add_columns = [] %} + {% endif %} + + {% set sql -%} + + alter {{ relation.type }} {{ relation }} + + {% if add_columns %} add columns {% endif %} + {% for column in add_columns %} + {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }} + {% endfor %} + + {%- endset -%} + + {% do run_query(sql) %} + +{% endmacro %} diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql index b11990b3..55bd3174 100644 --- a/dbt/include/spark/macros/materializations/incremental/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -11,6 +11,8 @@ {%- set partition_by = config.get('partition_by', none) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + + {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %} {% set target_relation = this %} {% set existing_relation = load_relation(this) %} @@ -31,6 +33,7 @@ {% set build_sql = create_table_as(False, target_relation, sql) %} {% else %} {% do run_query(create_table_as(True, tmp_relation, sql)) %} + {% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %} {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} {% endif %} diff --git a/test/custom/incremental_on_schema_change/models/incremental_append_new_columns.sql b/test/custom/incremental_on_schema_change/models/incremental_append_new_columns.sql new file mode 100644 index 00000000..86f6c7c4 --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_append_new_columns.sql @@ -0,0 +1,28 @@ +{{ + config( + materialized='incremental', + on_schema_change='append_new_columns' + ) +}} + +{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %} + +WITH source_data AS (SELECT * FROM {{ ref('model_a') }} ) + +{% if is_incremental() %} + +SELECT id, + cast(field1 as {{string_type}}) as field1, + cast(field2 as {{string_type}}) as field2, + cast(field3 as {{string_type}}) as field3, + cast(field4 as {{string_type}}) as field4 +FROM source_data WHERE id NOT IN (SELECT id from {{ this }} ) + +{% else %} + +SELECT id, + cast(field1 as {{string_type}}) as field1, + cast(field2 as {{string_type}}) as field2 +FROM source_data where id <= 3 + +{% endif %} \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/incremental_append_new_columns_target.sql b/test/custom/incremental_on_schema_change/models/incremental_append_new_columns_target.sql new file mode 100644 index 00000000..55ed7b2c --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_append_new_columns_target.sql @@ -0,0 +1,19 @@ +{{ + config(materialized='table') +}} + +{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %} + +with source_data as ( + + select * from {{ ref('model_a') }} + +) + +select id + ,cast(field1 as {{string_type}}) as field1 + ,cast(field2 as {{string_type}}) as field2 + ,cast(CASE WHEN id <= 3 THEN NULL ELSE field3 END as {{string_type}}) AS field3 + ,cast(CASE WHEN id <= 3 THEN NULL ELSE field4 END as {{string_type}}) AS field4 + +from source_data \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/incremental_fail.sql b/test/custom/incremental_on_schema_change/models/incremental_fail.sql new file mode 100644 index 00000000..939fc20c --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_fail.sql @@ -0,0 +1,18 @@ +{{ + config( + materialized='incremental', + on_schema_change='fail' + ) +}} + +WITH source_data AS (SELECT * FROM {{ ref('model_a') }} ) + +{% if is_incremental() %} + +SELECT id, field1, field2 FROM source_data + +{% else %} + +SELECT id, field1, field3 FROm source_data + +{% endif %} \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/incremental_ignore.sql b/test/custom/incremental_on_schema_change/models/incremental_ignore.sql new file mode 100644 index 00000000..98f0a74a --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_ignore.sql @@ -0,0 +1,18 @@ +{{ + config( + materialized='incremental', + on_schema_change='ignore' + ) +}} + +WITH source_data AS (SELECT * FROM {{ ref('model_a') }} ) + +{% if is_incremental() %} + +SELECT id, field1, field2, field3, field4 FROM source_data WHERE id NOT IN (SELECT id from {{ this }} ) + +{% else %} + +SELECT id, field1, field2 FROM source_data LIMIT 3 + +{% endif %} \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/incremental_ignore_target.sql b/test/custom/incremental_on_schema_change/models/incremental_ignore_target.sql new file mode 100644 index 00000000..92d4564e --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_ignore_target.sql @@ -0,0 +1,15 @@ +{{ + config(materialized='table') +}} + +with source_data as ( + + select * from {{ ref('model_a') }} + +) + +select id + ,field1 + ,field2 + +from source_data \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns.sql b/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns.sql new file mode 100644 index 00000000..2c5a461e --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns.sql @@ -0,0 +1,30 @@ +{{ + config( + materialized='incremental', + on_schema_change='sync_all_columns' + + ) +}} + +WITH source_data AS (SELECT * FROM {{ ref('model_a') }} ) + +{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %} + +{% if is_incremental() %} + +SELECT id, + cast(field1 as {{string_type}}) as field1, + cast(field3 as {{string_type}}) as field3, -- to validate new fields + cast(field4 as {{string_type}}) AS field4 -- to validate new fields + +FROM source_data WHERE id NOT IN (SELECT id from {{ this }} ) + +{% else %} + +select id, + cast(field1 as {{string_type}}) as field1, + cast(field2 as {{string_type}}) as field2 + +from source_data where id <= 3 + +{% endif %} \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql b/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql new file mode 100644 index 00000000..56591eb2 --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql @@ -0,0 +1,20 @@ +{{ + config(materialized='table') +}} + +with source_data as ( + + select * from {{ ref('model_a') }} + +) + +{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %} + +select id + ,cast(field1 as {{string_type}}) as field1 + --,field2 + ,cast(case when id <= 3 then null else field3 end as {{string_type}}) as field3 + ,cast(case when id <= 3 then null else field4 end as {{string_type}}) as field4 + +from source_data +order by id \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/models/model_a.sql b/test/custom/incremental_on_schema_change/models/model_a.sql new file mode 100644 index 00000000..2a0b2dda --- /dev/null +++ b/test/custom/incremental_on_schema_change/models/model_a.sql @@ -0,0 +1,22 @@ +{{ + config(materialized='table') +}} + +with source_data as ( + + select 1 as id, 'aaa' as field1, 'bbb' as field2, 111 as field3, 'TTT' as field4 + union all select 2 as id, 'ccc' as field1, 'ddd' as field2, 222 as field3, 'UUU' as field4 + union all select 3 as id, 'eee' as field1, 'fff' as field2, 333 as field3, 'VVV' as field4 + union all select 4 as id, 'ggg' as field1, 'hhh' as field2, 444 as field3, 'WWW' as field4 + union all select 5 as id, 'iii' as field1, 'jjj' as field2, 555 as field3, 'XXX' as field4 + union all select 6 as id, 'kkk' as field1, 'lll' as field2, 666 as field3, 'YYY' as field4 + +) + +select id + ,field1 + ,field2 + ,field3 + ,field4 + +from source_data \ No newline at end of file diff --git a/test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py b/test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py new file mode 100644 index 00000000..e259e5c9 --- /dev/null +++ b/test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py @@ -0,0 +1,157 @@ +from cProfile import run +from test.custom.base import DBTSparkIntegrationTest, use_profile +import dbt.exceptions + + +class TestIncrementalOnSchemaChange(DBTSparkIntegrationTest): + @property + def schema(self): + return "incremental_on_schema_change" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + "config-version": 2, + "test-paths": ["tests"] + } + + def run_twice_and_assert( + self, include, compare_source, compare_target + ): + + # dbt run (twice) + run_args = ['run'] + if include: + run_args.extend(('--models', include)) + results_one = self.run_dbt(run_args) + results_two = self.run_dbt(run_args) + + self.assertEqual(len(results_one), 3) + self.assertEqual(len(results_two), 3) + + self.assertTablesEqual(compare_source, compare_target) + + def run_incremental_ignore(self): + select = 'model_a incremental_ignore incremental_ignore_target' + compare_source = 'incremental_ignore' + compare_target = 'incremental_ignore_target' + self.run_twice_and_assert(select, compare_source, compare_target) + + def run_incremental_append_new_columns(self): + select = 'model_a incremental_append_new_columns incremental_append_new_columns_target' + compare_source = 'incremental_append_new_columns' + compare_target = 'incremental_append_new_columns_target' + self.run_twice_and_assert(select, compare_source, compare_target) + + def run_incremental_fail_on_schema_change(self): + select = 'model_a incremental_fail' + results_one = self.run_dbt(['run', '--models', select, '--full-refresh']) + results_two = self.run_dbt(['run', '--models', select], expect_pass = False) + self.assertIn('Compilation Error', results_two[1].message) + + def run_incremental_sync_all_columns(self): + # this doesn't work on Delta today + select = 'model_a incremental_sync_all_columns incremental_sync_all_columns_target' + compare_source = 'incremental_sync_all_columns' + compare_target = 'incremental_sync_all_columns_target' + results_one = self.run_dbt(['run', '--models', select, '--full-refresh']) + results_two = self.run_dbt(['run', '--models', select], expect_pass = False) + self.assertIn('Compilation Error', results_two[1].message) + + +class TestApacheSparkAppend(TestIncrementalOnSchemaChange): + + @property + def project_config(self): + return { + "config-version": 2, + "test-paths": ["tests"], + "models": { + "+incremental_strategy": "append", + } + } + + # only 'ignore' and 'fail' are supported + + @use_profile('apache_spark') + def test__apache_spark__run_incremental_ignore(self): + self.run_incremental_ignore() + + @use_profile('apache_spark') + def test__apache_spark__run_incremental_fail_on_schema_change(self): + self.run_incremental_fail_on_schema_change() + + +class TestApacheSparkInsertOverwrite(TestIncrementalOnSchemaChange): + + @property + def project_config(self): + return { + "config-version": 2, + "test-paths": ["tests"], + "models": { + "+file_format": "parquet", + "+partition_by": "id", + "+incremental_strategy": "insert_overwrite", + } + } + + # only 'ignore' and 'fail' are supported + + @use_profile('apache_spark') + def test__apache_spark__run_incremental_ignore(self): + self.run_incremental_ignore() + + @use_profile('apache_spark') + def test__apache_spark__run_incremental_fail_on_schema_change(self): + self.run_incremental_fail_on_schema_change() + + +class TestDeltaOnSchemaChange(TestIncrementalOnSchemaChange): + @property + def project_config(self): + return { + "config-version": 2, + "test-paths": ["tests"], + "models": { + "+file_format": "delta", + "+incremental_strategy": "merge", + "+unique_key": "id", + } + } + + @use_profile('databricks_cluster') + def test__databricks_cluster__run_incremental_ignore(self): + self.run_incremental_ignore() + + @use_profile('databricks_cluster') + def test__databricks_cluster__run_incremental_fail_on_schema_change(self): + self.run_incremental_fail_on_schema_change() + + @use_profile('databricks_cluster') + def test__databricks_cluster__run_incremental_append_new_columns(self): + self.run_incremental_append_new_columns() + + @use_profile('databricks_cluster') + def test__databricks_cluster__run_incremental_sync_all_columns(self): + self.run_incremental_sync_all_columns() + + @use_profile('databricks_sql_endpoint') + def test__databricks_sql_endpoint__run_incremental_ignore(self): + self.run_incremental_ignore() + + @use_profile('databricks_sql_endpoint') + def test__databricks_sql_endpoint__run_incremental_fail_on_schema_change(self): + self.run_incremental_fail_on_schema_change() + + @use_profile('databricks_sql_endpoint') + def test__databricks_sql_endpoint__run_incremental_append_new_columns(self): + self.run_incremental_append_new_columns() + + @use_profile('databricks_sql_endpoint') + def test__databricks_sql_endpoint__run_incremental_sync_all_columns(self): + self.run_incremental_sync_all_columns() From 6485ffae56d7bfb66328216cbbebaf53b75962b9 Mon Sep 17 00:00:00 2001 From: Binh Pham Date: Mon, 18 Oct 2021 01:15:29 -0700 Subject: [PATCH 266/603] add persist_doc call to incremental model with tests (#234) * add persist_doc call to incremental model with tests * update changelog * add whatis to test for incremental --- CHANGELOG.md | 2 ++ .../incremental/incremental.sql | 2 ++ .../models/incremental_delta_model.sql | 2 ++ test/custom/persist_docs/models/schema.yml | 24 +++++++++++++++++++ test/custom/persist_docs/test_persist_docs.py | 7 ++++-- 5 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 test/custom/persist_docs/models/incremental_delta_model.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index d821c39c..a2bfa10d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ ### Fixes - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202)) +- Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224)) ### Under the hood - Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211)) @@ -16,6 +17,7 @@ ### Contributors - [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204)) +- [@binhnefits](https://github.com/binhnefits) ([#224](https://github.com/dbt-labs/dbt-spark/issues/224)) ## dbt-spark 0.21.0b2 (August 20, 2021) diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql index 55bd3174..72b4d251 100644 --- a/dbt/include/spark/macros/materializations/incremental/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -41,6 +41,8 @@ {{ build_sql }} {%- endcall -%} + {% do persist_docs(target_relation, model) %} + {{ run_hooks(post_hooks) }} {{ return({'relations': [target_relation]}) }} diff --git a/test/custom/persist_docs/models/incremental_delta_model.sql b/test/custom/persist_docs/models/incremental_delta_model.sql new file mode 100644 index 00000000..c3f325ea --- /dev/null +++ b/test/custom/persist_docs/models/incremental_delta_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='incremental', file_format='delta') }} +select 1 as id, 'Joe' as name diff --git a/test/custom/persist_docs/models/schema.yml b/test/custom/persist_docs/models/schema.yml index 78dcda79..2639037b 100644 --- a/test/custom/persist_docs/models/schema.yml +++ b/test/custom/persist_docs/models/schema.yml @@ -69,3 +69,27 @@ models: -- /* comment */ Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + + - name: incremental_delta_model + description: | + Incremental model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} diff --git a/test/custom/persist_docs/test_persist_docs.py b/test/custom/persist_docs/test_persist_docs.py index d9acf70d..64c54085 100644 --- a/test/custom/persist_docs/test_persist_docs.py +++ b/test/custom/persist_docs/test_persist_docs.py @@ -42,7 +42,11 @@ def test_delta_comments(self): self.run_dbt(['seed']) self.run_dbt(['run']) - for table in ['table_delta_model', 'seed']: + for table, whatis in [ + ('table_delta_model', 'Table'), + ('seed', 'Seed'), + ('incremental_delta_model', 'Incremental') + ]: results = self.run_sql( 'describe extended {schema}.{table}'.format(schema=self.unique_schema(), table=table), fetch='all' @@ -50,7 +54,6 @@ def test_delta_comments(self): for result in results: if result[0] == 'Comment': - whatis = 'Seed' if table == 'seed' else 'Table' assert result[1].startswith(f'{whatis} model description') if result[0] == 'id': assert result[2].startswith('id Column description') From 8c48093e12ce1b928037207e22112b0d1b411752 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 18 Oct 2021 10:17:12 +0200 Subject: [PATCH 267/603] Reorg changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2bfa10d..c336aa50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,16 @@ ### Fixes - Fix `--store-failures` for tests, by suppressing irrelevant error in `comment_clause()` macro ([#232](https://github.com/dbt-labs/dbt-spark/issues/232), [#233](https://github.com/dbt-labs/dbt-spark/pull/233)) - Add support for `on_schema_change` config in incremental models: `ignore`, `fail`, `append_new_columns`. For `sync_all_columns`, removing columns is not supported by Apache Spark or Delta Lake ([#198](https://github.com/dbt-labs/dbt-spark/issues/198), [#226](https://github.com/dbt-labs/dbt-spark/issues/226), [#229](https://github.com/dbt-labs/dbt-spark/pull/229)) +- Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224), [#234](https://github.com/dbt-labs/dbt-spark/pull/234)) + +### Contributors +- [@binhnefits](https://github.com/binhnefits) ([#234](https://github.com/dbt-labs/dbt-spark/pull/234)) ## dbt-spark 0.21.0 (October 4, 2021) ### Fixes - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202)) -- Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224)) ### Under the hood - Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211)) @@ -17,7 +20,6 @@ ### Contributors - [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204)) -- [@binhnefits](https://github.com/binhnefits) ([#224](https://github.com/dbt-labs/dbt-spark/issues/224)) ## dbt-spark 0.21.0b2 (August 20, 2021) From 27e19f65aa1dfdc234822862a8db9e55bdd3d388 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Fri, 22 Oct 2021 11:36:58 -0400 Subject: [PATCH 268/603] Moving unit testing to Actions (#237) --- .github/workflows/main.yml | 201 +++++++++++++++++++++++++++++++++++++ dev_requirements.txt | 1 + scripts/build-dist.sh | 20 ++++ setup.py | 2 +- tox.ini | 12 +-- 5 files changed, 229 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/main.yml create mode 100755 scripts/build-dist.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..f5144b15 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,201 @@ +# **what?** +# Runs code quality checks, unit tests, and verifies python build on +# all code commited to the repository. This workflow should not +# require any secrets since it runs for PRs from forked repos. +# By default, secrets are not passed to workflows running from +# a forked repo. + +# **why?** +# Ensure code for dbt meets a certain quality standard. + +# **when?** +# This will run for all PRs, when code is pushed to a release +# branch, and when manually triggered. + +name: Tests and Code Checks + +on: + push: + branches: + - "main" + - "develop" + - "*.latest" + - "releases/*" + pull_request: + workflow_dispatch: + +permissions: read-all + +# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + code-quality: + name: ${{ matrix.toxenv }} + + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + toxenv: [flake8] + + env: + TOXENV: ${{ matrix.toxenv }} + PYTEST_ADDOPTS: "-v --color=yes" + + steps: + - name: Check out the repository + uses: actions/checkout@v2 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: Install python dependencies + run: | + sudo apt-get install libsasl2-dev + pip install --user --upgrade pip + pip install tox + pip --version + tox --version + - name: Run tox + run: tox + + unit: + name: unit test / python ${{ matrix.python-version }} + + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: [3.6, 3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689) + + env: + TOXENV: "unit" + PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv" + + steps: + - name: Check out the repository + uses: actions/checkout@v2 + with: + persist-credentials: false + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies + run: | + sudo apt-get install libsasl2-dev + pip install --user --upgrade pip + pip install tox + pip --version + tox --version + - name: Run tox + run: tox + + - name: Get current date + if: always() + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%dT%H_%M_%S')" #no colons allowed for artifacts + + - uses: actions/upload-artifact@v2 + if: always() + with: + name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv + path: unit_results.csv + + build: + name: build packages + + runs-on: ubuntu-latest + + steps: + - name: Check out the repository + uses: actions/checkout@v2 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install python dependencies + run: | + pip install --user --upgrade pip + pip install --upgrade setuptools wheel twine check-wheel-contents + pip --version + - name: Build distributions + run: ./scripts/build-dist.sh + + - name: Show distributions + run: ls -lh dist/ + + - name: Check distribution descriptions + run: | + twine check dist/* + - name: Check wheel contents + run: | + check-wheel-contents dist/*.whl --ignore W007,W008 + - uses: actions/upload-artifact@v2 + with: + name: dist + path: dist/ + + test-build: + name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }} + + needs: build + + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies + run: | + pip install --user --upgrade pip + pip install --upgrade wheel + pip --version + - uses: actions/download-artifact@v2 + with: + name: dist + path: dist/ + + - name: Show distributions + run: ls -lh dist/ + + - name: Install wheel distributions + run: | + find ./dist/*.whl -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/ + - name: Check wheel distributions + run: | + dbt --version + - name: Install source distributions + run: | + find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/ + - name: Check source distributions + run: | + dbt --version diff --git a/dev_requirements.txt b/dev_requirements.txt index 95e4df5e..08f3c37b 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -8,6 +8,7 @@ tox==3.2.0 ipdb pytest-xdist>=2.1.0,<3 flaky>=3.5.3,<4 +pytest-csv # Test requirements pytest-dbt-adapter==0.5.1 diff --git a/scripts/build-dist.sh b/scripts/build-dist.sh new file mode 100755 index 00000000..65e6dbc9 --- /dev/null +++ b/scripts/build-dist.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -eo pipefail + +DBT_PATH="$( cd "$(dirname "$0")/.." ; pwd -P )" + +PYTHON_BIN=${PYTHON_BIN:-python} + +echo "$PYTHON_BIN" + +set -x + +rm -rf "$DBT_PATH"/dist +rm -rf "$DBT_PATH"/build +mkdir -p "$DBT_PATH"/dist + +cd "$DBT_PATH" +$PYTHON_BIN setup.py sdist bdist_wheel + +set +x diff --git a/setup.py b/setup.py index f0f098fd..2081e98f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ this_directory = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(this_directory, 'README.md')) as f: +with open(os.path.join(this_directory, 'README.md'), 'r', encoding='utf8') as f: long_description = f.read() diff --git a/tox.ini b/tox.ini index 76b34f6d..fcdd45d8 100644 --- a/tox.ini +++ b/tox.ini @@ -6,14 +6,14 @@ envlist = unit, flake8, integration-spark-thrift [testenv:flake8] basepython = python3 commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 dbt/' -passenv = DBT_INVOCATION_ENV +passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/dev_requirements.txt [testenv:unit] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} test/unit' -passenv = DBT_INVOCATION_ENV +passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -21,7 +21,7 @@ deps = [testenv:integration-spark-databricks-http] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-http.dbtspec' -passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV +passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -31,7 +31,7 @@ deps = basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-cluster.dbtspec' /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 test/custom/*' -passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER +passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -41,7 +41,7 @@ deps = basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-sql-endpoint.dbtspec' /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 test/custom/*' -passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_ENDPOINT DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER +passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -52,7 +52,7 @@ deps = basepython = python3 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-thrift.dbtspec' /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 test/custom/*' -passenv = DBT_INVOCATION_ENV +passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt From 47e4f9c3d26c2abaa3ad32dabc8fcfbab740ca48 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Mon, 25 Oct 2021 21:33:56 -0400 Subject: [PATCH 269/603] Updating issue templates (#241) * Updating issue templates * Updating Readme --- .github/ISSUE_TEMPLATE/bug_report.md | 1 + .github/ISSUE_TEMPLATE/dependabot.yml | 8 + .github/ISSUE_TEMPLATE/release.md | 10 + .github/pull_request_template.md | 12 +- .github/workflows/version-bump.yml | 102 ++++++++++ README.md | 280 ++------------------------ etc/dbt-logo-full.svg | 1 - 7 files changed, 146 insertions(+), 268 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/dependabot.yml create mode 100644 .github/ISSUE_TEMPLATE/release.md create mode 100644 .github/workflows/version-bump.yml delete mode 100644 etc/dbt-logo-full.svg diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 974c36af..43f19a15 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -19,6 +19,7 @@ A clear and concise description of what you expected to happen. ### Screenshots and log output If applicable, add screenshots or log output to help explain your problem. +### System information **The output of `dbt --version`:** ``` diff --git a/.github/ISSUE_TEMPLATE/dependabot.yml b/.github/ISSUE_TEMPLATE/dependabot.yml new file mode 100644 index 00000000..8a8c85b9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + # python dependencies + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + rebase-strategy: "disabled" \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md new file mode 100644 index 00000000..ac28792a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/release.md @@ -0,0 +1,10 @@ +--- +name: Release +about: Release a new version of dbt-spark +title: '' +labels: release +assignees: '' + +--- + +### TBD \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 45a73f84..60e12779 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,15 +9,13 @@ resolves # resolves #1234 --> - ### Description - ### Checklist - - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) - - [ ] I have run this code in development and it appears to resolve the stated issue - - [ ] This PR includes tests, or tests are not required/relevant for this PR - - [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt next" section. - \ No newline at end of file + +- [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) +- [ ] I have run this code in development and it appears to resolve the stated issue +- [ ] This PR includes tests, or tests are not required/relevant for this PR +- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section. \ No newline at end of file diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml new file mode 100644 index 00000000..4913a6e8 --- /dev/null +++ b/.github/workflows/version-bump.yml @@ -0,0 +1,102 @@ +# **what?** +# This workflow will take a version number and a dry run flag. With that +# it will run versionbump to update the version number everywhere in the +# code base and then generate an update Docker requirements file. If this +# is a dry run, a draft PR will open with the changes. If this isn't a dry +# run, the changes will be committed to the branch this is run on. + +# **why?** +# This is to aid in releasing dbt and making sure we have updated +# the versions and Docker requirements in all places. + +# **when?** +# This is triggered either manually OR +# from the repository_dispatch event "version-bump" which is sent from +# the dbt-release repo Action + +name: Version Bump + +on: + workflow_dispatch: + inputs: + version_number: + description: 'The version number to bump to' + required: true + is_dry_run: + description: 'Creates a draft PR to allow testing instead of committing to a branch' + required: true + default: 'true' + repository_dispatch: + types: [version-bump] + +jobs: + bump: + runs-on: ubuntu-latest + steps: + - name: Check out the repository + uses: actions/checkout@v2 + + - name: Set version and dry run values + id: variables + env: + VERSION_NUMBER: "${{ github.event.client_payload.version_number == '' && github.event.inputs.version_number || github.event.client_payload.version_number }}" + IS_DRY_RUN: "${{ github.event.client_payload.is_dry_run == '' && github.event.inputs.is_dry_run || github.event.client_payload.is_dry_run }}" + run: | + echo Repository dispatch event version: ${{ github.event.client_payload.version_number }} + echo Repository dispatch event dry run: ${{ github.event.client_payload.is_dry_run }} + echo Workflow dispatch event version: ${{ github.event.inputs.version_number }} + echo Workflow dispatch event dry run: ${{ github.event.inputs.is_dry_run }} + echo ::set-output name=VERSION_NUMBER::$VERSION_NUMBER + echo ::set-output name=IS_DRY_RUN::$IS_DRY_RUN + + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + + - name: Install python dependencies + run: | + python3 -m venv env + source env/bin/activate + pip install --upgrade pip + + - name: Create PR branch + if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} + run: | + git checkout -b bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID + git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID + git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID + + - name: Bumping version + run: | + source env/bin/activate + pip install -r dev_requirements.txt + env/bin/bumpversion --allow-dirty --new-version ${{steps.variables.outputs.VERSION_NUMBER}} major + git status + + - name: Commit version bump directly + uses: EndBug/add-and-commit@v7 + if: ${{ steps.variables.outputs.IS_DRY_RUN == 'false' }} + with: + author_name: 'Github Build Bot' + author_email: 'buildbot@fishtownanalytics.com' + message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' + + - name: Commit version bump to branch + uses: EndBug/add-and-commit@v7 + if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} + with: + author_name: 'Github Build Bot' + author_email: 'buildbot@fishtownanalytics.com' + message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' + branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' + push: 'origin origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' + + - name: Create Pull Request + uses: peter-evans/create-pull-request@v3 + if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} + with: + author: 'Github Build Bot ' + draft: true + base: ${{github.ref}} + title: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' + branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' diff --git a/README.md b/README.md index c330afde..58516a1e 100644 --- a/README.md +++ b/README.md @@ -1,279 +1,39 @@

- dbt logo + dbt logo

- - CircleCI + + Unit Tests Badge - - Slack + + Integration Tests Badge

-# dbt-spark +**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications. -This plugin ports [dbt](https://getdbt.com) functionality to Spark. It supports running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), Amazon EMR, or Docker. +dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis. -We have not tested extensively against older versions of Apache Spark. The plugin uses syntax that requires version 2.2.0 or newer. Some features require Spark 3.0 and/or Delta Lake. +## dbt-snowflake -### Documentation -For more information on using Spark with dbt, consult the dbt documentation: -- [Spark profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile/) -- [Spark specific configs](https://docs.getdbt.com/reference/resource-configs/spark-configs/) +The `dbt-spark` package contains all of the code enabling dbt to work with Apache Spark and Databricks. For +more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark). -### Installation -This plugin can be installed via pip. Depending on your connection method, you need to specify an extra requirement. +## Getting started -If connecting to Databricks via ODBC driver, it requires [`pyodbc`](https://github.com/mkleehammer/pyodbc). Depending on your system1, you can install it seperately or via pip: +- [Install dbt](https://docs.getdbt.com/docs/installation) +- Read the [introduction](https://docs.getdbt.com/docs/introduction/) and [viewpoint](https://docs.getdbt.com/docs/about/viewpoint/) -```bash -# Install dbt-spark from PyPi for odbc connections: -$ pip install "dbt-spark[ODBC]" -``` +## Join the dbt Community -If connecting to a Spark cluster via the generic `thrift` or `http` methods, it requires [`PyHive`](https://github.com/dropbox/PyHive): +- Be part of the conversation in the [dbt Community Slack](http://community.getdbt.com/) +- Read more on the [dbt Community Discourse](https://discourse.getdbt.com) -```bash -# Install dbt-spark from PyPi for thrift or http connections: -$ pip install "dbt-spark[PyHive]" -``` +## Reporting bugs and contributing code -1See the [`pyodbc` wiki](https://github.com/mkleehammer/pyodbc/wiki/Install) for OS-specific installation details. - - -### Configuring your profile - -**Connection Method** - -Connections can be made to Spark in three different modes: -- `odbc` is the preferred mode when connecting to Databricks. It supports connecting to a SQL Endpoint or an all-purpose interactive cluster. -- `http` is a more generic mode for connecting to a managed service that provides an HTTP endpoint. Currently, this includes connections to a Databricks interactive cluster. -- `thrift` connects directly to the lead node of a cluster, either locally hosted / on premise or in the cloud (e.g. Amazon EMR). - -A dbt profile for Spark connections support the following configurations: - -**Key**: -- ✅ Required -- ❌ Not used -- ❔ Optional (followed by `default value` in parentheses) - -| Option | Description | ODBC | Thrift | HTTP | Example | -|-|-|-|-|-|-| -| method | Specify the connection method (`odbc` or `thrift` or `http`) | ✅ | ✅ | ✅ | `odbc` | -| schema | Specify the schema (database) to build models into | ✅ | ✅ | ✅ | `analytics` | -| host | The hostname to connect to | ✅ | ✅ | ✅ | `yourorg.sparkhost.com` | -| port | The port to connect to the host on | ❔ (`443`) | ❔ (`443`) | ❔ (`10001`) | `443` | -| token | The token to use for authenticating to the cluster | ✅ | ❌ | ✅ | `abc123` | -| auth | The value of `hive.server2.authentication` | ❌ | ❔ | ❌ | `KERBEROS` | -| kerberos_service_name | Use with `auth='KERBEROS'` | ❌ | ❔ | ❌ | `hive` | -| organization | Azure Databricks workspace ID (see note) | ❔ | ❌ | ❔ | `1234567891234567` | -| cluster | The name of the cluster to connect to | ✅ (unless `endpoint`) | ❌ | ✅ | `01234-23423-coffeetime` | -| endpoint | The ID of the SQL endpoint to connect to | ✅ (unless `cluster`) | ❌ | ❌ | `1234567891234a` | -| driver | Path of ODBC driver installed or name of the ODBC driver configured | ✅ | ❌ | ❌ | `/opt/simba/spark/lib/64/libsparkodbc_sb64.so` | -| user | The username to use to connect to the cluster | ❔ | ❔ | ❔ | `hadoop` | -| connect_timeout | The number of seconds to wait before retrying to connect to a Pending Spark cluster | ❌ | ❔ (`10`) | ❔ (`10`) | `60` | -| connect_retries | The number of times to try connecting to a Pending Spark cluster before giving up | ❌ | ❔ (`0`) | ❔ (`0`) | `5` | -| use_ssl | The value of `hive.server2.use.SSL` (`True` or `False`). Default ssl store (ssl.get_default_verify_paths()) is the valid location for SSL certificate | ❌ | ❔ (`False`) | ❌ | `True` | -| retry_all | Whether to retry all failed connections, and not just 'retryable' ones | ❌ | ❔ (`false`) | ❔ (`false`) | `false` | - -**Databricks** connections differ based on the cloud provider: - -- **Organization:** To connect to an Azure Databricks cluster, you will need to obtain your organization ID, which is a unique ID Azure Databricks generates for each customer workspace. To find the organization ID, see https://docs.microsoft.com/en-us/azure/databricks/dev-tools/databricks-connect#step-2-configure-connection-properties. This is a string field; if there is a leading zero, be sure to include it. - -- **Host:** The host field for Databricks can be found at the start of your workspace or cluster url: `region.azuredatabricks.net` for Azure, or `account.cloud.databricks.com` for AWS. Do not include `https://`. - -**Amazon EMR**: To connect to Spark running on an Amazon EMR cluster, you will need to run `sudo /usr/lib/spark/sbin/start-thriftserver.sh` on the master node of the cluster to start the Thrift server (see https://aws.amazon.com/premiumsupport/knowledge-center/jdbc-connection-emr/ for further context). You will also need to connect to port `10001`, which will connect to the Spark backend Thrift server; port `10000` will instead connect to a Hive backend, which will not work correctly with dbt. - - -**Example profiles.yml entries:** - -**ODBC** -``` -your_profile_name: - target: dev - outputs: - dev: - type: spark - method: odbc - driver: path/to/driver - host: yourorg.databricks.com - organization: 1234567891234567 # Azure Databricks only - port: 443 # default - token: abc123 - schema: analytics - - # one of: - cluster: 01234-23423-coffeetime - endpoint: coffee01234time -``` - -**Thrift** -``` -your_profile_name: - target: dev - outputs: - dev: - type: spark - method: thrift - host: 127.0.0.1 - port: 10001 # default - schema: analytics - - # optional - user: hadoop - auth: KERBEROS - kerberos_service_name: hive - connect_retries: 5 - connect_timeout: 60 - retry_all: true -``` - - -**HTTP** -``` -your_profile_name: - target: dev - outputs: - dev: - type: spark - method: http - host: yourorg.sparkhost.com - organization: 1234567891234567 # Azure Databricks only - port: 443 # default - token: abc123 - schema: analytics - cluster: 01234-23423-coffeetime - - # optional - connect_retries: 5 - connect_timeout: 60 - retry_all: true -``` - - -### Usage Notes - -**Model Configuration** - -The following configurations can be supplied to models run with the dbt-spark plugin: - - -| Option | Description | Required? | Example | -| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | -------------------- | -| file_format | The file format to use when creating tables (`parquet`, `delta`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | `parquet` | -| location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | `/mnt/root` | -| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | `partition_1` | -| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | `cluster_1` | -| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | `8` | -| incremental_strategy | The strategy to use for incremental models (`append`, `insert_overwrite`, or `merge`). | Optional (default: `append`) | `merge` | -| persist_docs | Whether dbt should include the model description as a table or column `comment` | Optional | `{'relation': true, 'columns': true}` | - - -**Incremental Models** - -dbt has a number of ways to build models incrementally, called "incremental strategies." Some strategies depend on certain file formats, connection types, and other model configurations: -- `append` (default): Insert new records without updating or overwriting any existing data. -- `insert_overwrite`: If `partition_by` is specified, overwrite partitions in the table with new data. (Be sure to re-select _all_ of the relevant data for a partition.) If no `partition_by` is specified, overwrite the entire table with new data. [Cannot be used with `file_format: delta` or when connectinng via Databricks SQL Endpoints. For dynamic partition replacement with `method: odbc` + Databricks `cluster`, you must you **must** include `set spark.sql.sources.partitionOverwriteMode DYNAMIC` in the [cluster SparkConfig](https://docs.databricks.com/clusters/configure.html#spark-config). For atomic replacement of Delta tables, use the `table` materialization instead.] -- `merge`: Match records based on a `unique_key`; update old records, insert new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) [Requires `file_format: delta`. Available only on Databricks Runtime.] - -Examples: - -```sql -{{ config( - materialized='incremental', - incremental_strategy='append', -) }} - - --- All rows returned by this query will be appended to the existing table - -select * from {{ ref('events') }} -{% if is_incremental() %} - where event_ts > (select max(event_ts) from {{ this }}) -{% endif %} -``` - -```sql -{{ config( - materialized='incremental', - incremental_strategy='merge', - partition_by=['date_day'], - file_format='parquet' -) }} - --- Every partition returned by this query will overwrite existing partitions - -select - date_day, - count(*) as users - -from {{ ref('events') }} -{% if is_incremental() %} - where date_day > (select max(date_day) from {{ this }}) -{% endif %} -group by 1 -``` - -```sql -{{ config( - materialized='incremental', - incremental_strategy='merge', - unique_key='event_id', - file_format='delta' -) }} - --- Existing events, matched on `event_id`, will be updated --- New events will be appended - -select * from {{ ref('events') }} -{% if is_incremental() %} - where date_day > (select max(date_day) from {{ this }}) -{% endif %} -``` - -### Running locally - -A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend. - -``` -docker-compose up -``` - -Create a profile like this one: - -``` -spark-testing: - target: local - outputs: - local: - type: spark - method: thrift - host: 127.0.0.1 - port: 10000 - user: dbt - schema: analytics - connect_retries: 5 - connect_timeout: 60 - retry_all: true -``` - -Connecting to the local spark instance: - -* The Spark UI should be available at [http://localhost:4040/sqlserver/](http://localhost:4040/sqlserver/) -* The endpoint for SQL-based testing is at `http://localhost:10000` and can be referenced with the Hive or Spark JDBC drivers using connection string `jdbc:hive2://localhost:10000` and default credentials `dbt`:`dbt` - -Note that the Hive metastore data is persisted under `./.hive-metastore/`, and the Spark-produced data under `./.spark-warehouse/`. To completely reset you environment run the following: - -``` -docker-compose down -rm -rf ./.hive-metastore/ -rm -rf ./.spark-warehouse/ -``` - -### Reporting bugs and contributing code - -- Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt-spark/issues/new). +- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-snowflake/issues/new) +- Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt/blob/HEAD/CONTRIBUTING.md) ## Code of Conduct -Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/). +Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [dbt Code of Conduct](https://community.getdbt.com/code-of-conduct). diff --git a/etc/dbt-logo-full.svg b/etc/dbt-logo-full.svg deleted file mode 100644 index 88f84b70..00000000 --- a/etc/dbt-logo-full.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file From 66f19158fa452bf0830d486e4ae5d950978a943d Mon Sep 17 00:00:00 2001 From: Leah Antkiewicz Date: Mon, 25 Oct 2021 21:59:09 -0400 Subject: [PATCH 270/603] Fix README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 58516a1e..6795d76b 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis. -## dbt-snowflake +## dbt-spark The `dbt-spark` package contains all of the code enabling dbt to work with Apache Spark and Databricks. For more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark). @@ -31,7 +31,7 @@ more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark) ## Reporting bugs and contributing code -- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-snowflake/issues/new) +- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-spark/issues/new) - Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt/blob/HEAD/CONTRIBUTING.md) ## Code of Conduct From 1da0c19e8b898303cbf4f7c380f564c8433d2ad7 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Mon, 25 Oct 2021 22:54:47 -0400 Subject: [PATCH 271/603] Rename test dir (#242) * Rename test dir * Remove custom dir * Update tox to remove custom dir --- {test => tests}/__init__.py | 0 {test/custom => tests/integration}/__init__.py | 0 {test/custom => tests/integration}/base.py | 4 ++-- {test/custom => tests/integration}/conftest.py | 0 .../get_columns_in_relation/models/child.sql | 0 .../models/get_columns_from_child.sql | 0 .../test_get_columns_in_relation.py | 4 ++-- .../models/incremental_append_new_columns.sql | 0 .../incremental_append_new_columns_target.sql | 0 .../models/incremental_fail.sql | 0 .../models/incremental_ignore.sql | 0 .../models/incremental_ignore_target.sql | 0 .../models/incremental_sync_all_columns.sql | 0 .../incremental_sync_all_columns_target.sql | 0 .../models/model_a.sql | 0 .../test_incremental_on_schema_change.py | 4 ++-- .../data/expected_append.csv | 0 .../data/expected_overwrite.csv | 0 .../data/expected_partial_upsert.csv | 0 .../data/expected_upsert.csv | 0 .../models/default_append.sql | 0 .../models_bad/bad_file_format.sql | 0 .../models_bad/bad_insert_overwrite_delta.sql | 0 .../models_bad/bad_merge_not_delta.sql | 0 .../models_bad/bad_strategy.sql | 0 .../models_delta/append_delta.sql | 0 .../models_delta/merge_no_key.sql | 0 .../models_delta/merge_unique_key.sql | 0 .../models_delta/merge_update_columns.sql | 0 .../insert_overwrite_no_partitions.sql | 0 .../insert_overwrite_partitions.sql | 0 .../test_incremental_strategies.py | 4 ++-- .../integration}/persist_docs/data/seed.csv | 0 .../integration}/persist_docs/data/seeds.yml | 0 .../models/incremental_delta_model.sql | 0 .../persist_docs/models/my_fun_docs.md | 0 .../persist_docs/models/no_docs_model.sql | 0 .../integration}/persist_docs/models/schema.yml | 0 .../persist_docs/models/table_delta_model.sql | 0 .../persist_docs/models/view_model.sql | 0 .../persist_docs/test_persist_docs.py | 4 ++-- .../seed_column_types/data/payments.csv | 0 .../seed_column_types/test_seed_column_types.py | 4 ++-- .../integration/spark-databricks-http.dbtspec | 0 .../spark-databricks-odbc-cluster.dbtspec | 0 .../spark-databricks-odbc-sql-endpoint.dbtspec | 0 {test => tests}/integration/spark-thrift.dbtspec | 0 .../store_failures/models/schema.yml | 0 .../store_failures/models/view_model.sql | 0 .../store_failures/test_store_failures.py | 4 ++-- {test => tests}/unit/__init__.py | 0 {test => tests}/unit/test_adapter.py | 0 {test => tests}/unit/test_column.py | 0 {test => tests}/unit/test_macros.py | 0 {test => tests}/unit/utils.py | 0 tox.ini | 16 ++++++++-------- 56 files changed, 22 insertions(+), 22 deletions(-) rename {test => tests}/__init__.py (100%) rename {test/custom => tests/integration}/__init__.py (100%) rename {test/custom => tests/integration}/base.py (97%) rename {test/custom => tests/integration}/conftest.py (100%) rename {test/custom => tests/integration}/get_columns_in_relation/models/child.sql (100%) rename {test/custom => tests/integration}/get_columns_in_relation/models/get_columns_from_child.sql (100%) rename {test/custom => tests/integration}/get_columns_in_relation/test_get_columns_in_relation.py (84%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_append_new_columns.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_append_new_columns_target.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_fail.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_ignore.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_ignore_target.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_sync_all_columns.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/models/model_a.sql (100%) rename {test/custom => tests/integration}/incremental_on_schema_change/test_incremental_on_schema_change.py (97%) rename {test/custom => tests/integration}/incremental_strategies/data/expected_append.csv (100%) rename {test/custom => tests/integration}/incremental_strategies/data/expected_overwrite.csv (100%) rename {test/custom => tests/integration}/incremental_strategies/data/expected_partial_upsert.csv (100%) rename {test/custom => tests/integration}/incremental_strategies/data/expected_upsert.csv (100%) rename {test/custom => tests/integration}/incremental_strategies/models/default_append.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_bad/bad_file_format.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_bad/bad_merge_not_delta.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_bad/bad_strategy.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_delta/append_delta.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_delta/merge_no_key.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_delta/merge_unique_key.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_delta/merge_update_columns.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql (100%) rename {test/custom => tests/integration}/incremental_strategies/test_incremental_strategies.py (95%) rename {test/custom => tests/integration}/persist_docs/data/seed.csv (100%) rename {test/custom => tests/integration}/persist_docs/data/seeds.yml (100%) rename {test/custom => tests/integration}/persist_docs/models/incremental_delta_model.sql (100%) rename {test/custom => tests/integration}/persist_docs/models/my_fun_docs.md (100%) rename {test/custom => tests/integration}/persist_docs/models/no_docs_model.sql (100%) rename {test/custom => tests/integration}/persist_docs/models/schema.yml (100%) rename {test/custom => tests/integration}/persist_docs/models/table_delta_model.sql (100%) rename {test/custom => tests/integration}/persist_docs/models/view_model.sql (100%) rename {test/custom => tests/integration}/persist_docs/test_persist_docs.py (94%) rename {test/custom => tests/integration}/seed_column_types/data/payments.csv (100%) rename {test/custom => tests/integration}/seed_column_types/test_seed_column_types.py (86%) rename {test => tests}/integration/spark-databricks-http.dbtspec (100%) rename {test => tests}/integration/spark-databricks-odbc-cluster.dbtspec (100%) rename {test => tests}/integration/spark-databricks-odbc-sql-endpoint.dbtspec (100%) rename {test => tests}/integration/spark-thrift.dbtspec (100%) rename {test/custom => tests/integration}/store_failures/models/schema.yml (100%) rename {test/custom => tests/integration}/store_failures/models/view_model.sql (100%) rename {test/custom => tests/integration}/store_failures/test_store_failures.py (91%) rename {test => tests}/unit/__init__.py (100%) rename {test => tests}/unit/test_adapter.py (100%) rename {test => tests}/unit/test_column.py (100%) rename {test => tests}/unit/test_macros.py (100%) rename {test => tests}/unit/utils.py (100%) diff --git a/test/__init__.py b/tests/__init__.py similarity index 100% rename from test/__init__.py rename to tests/__init__.py diff --git a/test/custom/__init__.py b/tests/integration/__init__.py similarity index 100% rename from test/custom/__init__.py rename to tests/integration/__init__.py diff --git a/test/custom/base.py b/tests/integration/base.py similarity index 97% rename from test/custom/base.py rename to tests/integration/base.py index 28fcad3c..962ecbea 100644 --- a/test/custom/base.py +++ b/tests/integration/base.py @@ -5,7 +5,7 @@ import pyodbc -class DBTSparkIntegrationTest(DBTIntegrationTestBase): +class DBTIntegrationTest(DBTIntegrationTestBase): def get_profile(self, adapter_type): if adapter_type == 'apache_spark': @@ -157,7 +157,7 @@ def outer(wrapped): def func(self, *args, **kwargs): return wrapped(self, *args, **kwargs) # sanity check at import time - assert DBTSparkIntegrationTest._profile_from_test_name( + assert DBTIntegrationTest._profile_from_test_name( wrapped.__name__) == profile_name return func return outer diff --git a/test/custom/conftest.py b/tests/integration/conftest.py similarity index 100% rename from test/custom/conftest.py rename to tests/integration/conftest.py diff --git a/test/custom/get_columns_in_relation/models/child.sql b/tests/integration/get_columns_in_relation/models/child.sql similarity index 100% rename from test/custom/get_columns_in_relation/models/child.sql rename to tests/integration/get_columns_in_relation/models/child.sql diff --git a/test/custom/get_columns_in_relation/models/get_columns_from_child.sql b/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql similarity index 100% rename from test/custom/get_columns_in_relation/models/get_columns_from_child.sql rename to tests/integration/get_columns_in_relation/models/get_columns_from_child.sql diff --git a/test/custom/get_columns_in_relation/test_get_columns_in_relation.py b/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py similarity index 84% rename from test/custom/get_columns_in_relation/test_get_columns_in_relation.py rename to tests/integration/get_columns_in_relation/test_get_columns_in_relation.py index e2c1d7a4..418cbd99 100644 --- a/test/custom/get_columns_in_relation/test_get_columns_in_relation.py +++ b/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py @@ -1,7 +1,7 @@ -from test.custom.base import DBTSparkIntegrationTest, use_profile +from tests.integration.base import DBTIntegrationTest, use_profile -class TestGetColumnInRelationInSameRun(DBTSparkIntegrationTest): +class TestGetColumnInRelationInSameRun(DBTIntegrationTest): @property def schema(self): return "get_columns_in_relation" diff --git a/test/custom/incremental_on_schema_change/models/incremental_append_new_columns.sql b/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_append_new_columns.sql rename to tests/integration/incremental_on_schema_change/models/incremental_append_new_columns.sql diff --git a/test/custom/incremental_on_schema_change/models/incremental_append_new_columns_target.sql b/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns_target.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_append_new_columns_target.sql rename to tests/integration/incremental_on_schema_change/models/incremental_append_new_columns_target.sql diff --git a/test/custom/incremental_on_schema_change/models/incremental_fail.sql b/tests/integration/incremental_on_schema_change/models/incremental_fail.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_fail.sql rename to tests/integration/incremental_on_schema_change/models/incremental_fail.sql diff --git a/test/custom/incremental_on_schema_change/models/incremental_ignore.sql b/tests/integration/incremental_on_schema_change/models/incremental_ignore.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_ignore.sql rename to tests/integration/incremental_on_schema_change/models/incremental_ignore.sql diff --git a/test/custom/incremental_on_schema_change/models/incremental_ignore_target.sql b/tests/integration/incremental_on_schema_change/models/incremental_ignore_target.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_ignore_target.sql rename to tests/integration/incremental_on_schema_change/models/incremental_ignore_target.sql diff --git a/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns.sql b/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_sync_all_columns.sql rename to tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns.sql diff --git a/test/custom/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql b/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql rename to tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql diff --git a/test/custom/incremental_on_schema_change/models/model_a.sql b/tests/integration/incremental_on_schema_change/models/model_a.sql similarity index 100% rename from test/custom/incremental_on_schema_change/models/model_a.sql rename to tests/integration/incremental_on_schema_change/models/model_a.sql diff --git a/test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py similarity index 97% rename from test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py rename to tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py index e259e5c9..2d967a4d 100644 --- a/test/custom/incremental_on_schema_change/test_incremental_on_schema_change.py +++ b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py @@ -1,9 +1,9 @@ from cProfile import run -from test.custom.base import DBTSparkIntegrationTest, use_profile +from tests.integration.base import DBTIntegrationTest, use_profile import dbt.exceptions -class TestIncrementalOnSchemaChange(DBTSparkIntegrationTest): +class TestIncrementalOnSchemaChange(DBTIntegrationTest): @property def schema(self): return "incremental_on_schema_change" diff --git a/test/custom/incremental_strategies/data/expected_append.csv b/tests/integration/incremental_strategies/data/expected_append.csv similarity index 100% rename from test/custom/incremental_strategies/data/expected_append.csv rename to tests/integration/incremental_strategies/data/expected_append.csv diff --git a/test/custom/incremental_strategies/data/expected_overwrite.csv b/tests/integration/incremental_strategies/data/expected_overwrite.csv similarity index 100% rename from test/custom/incremental_strategies/data/expected_overwrite.csv rename to tests/integration/incremental_strategies/data/expected_overwrite.csv diff --git a/test/custom/incremental_strategies/data/expected_partial_upsert.csv b/tests/integration/incremental_strategies/data/expected_partial_upsert.csv similarity index 100% rename from test/custom/incremental_strategies/data/expected_partial_upsert.csv rename to tests/integration/incremental_strategies/data/expected_partial_upsert.csv diff --git a/test/custom/incremental_strategies/data/expected_upsert.csv b/tests/integration/incremental_strategies/data/expected_upsert.csv similarity index 100% rename from test/custom/incremental_strategies/data/expected_upsert.csv rename to tests/integration/incremental_strategies/data/expected_upsert.csv diff --git a/test/custom/incremental_strategies/models/default_append.sql b/tests/integration/incremental_strategies/models/default_append.sql similarity index 100% rename from test/custom/incremental_strategies/models/default_append.sql rename to tests/integration/incremental_strategies/models/default_append.sql diff --git a/test/custom/incremental_strategies/models_bad/bad_file_format.sql b/tests/integration/incremental_strategies/models_bad/bad_file_format.sql similarity index 100% rename from test/custom/incremental_strategies/models_bad/bad_file_format.sql rename to tests/integration/incremental_strategies/models_bad/bad_file_format.sql diff --git a/test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql b/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql similarity index 100% rename from test/custom/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql rename to tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql diff --git a/test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql b/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql similarity index 100% rename from test/custom/incremental_strategies/models_bad/bad_merge_not_delta.sql rename to tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql diff --git a/test/custom/incremental_strategies/models_bad/bad_strategy.sql b/tests/integration/incremental_strategies/models_bad/bad_strategy.sql similarity index 100% rename from test/custom/incremental_strategies/models_bad/bad_strategy.sql rename to tests/integration/incremental_strategies/models_bad/bad_strategy.sql diff --git a/test/custom/incremental_strategies/models_delta/append_delta.sql b/tests/integration/incremental_strategies/models_delta/append_delta.sql similarity index 100% rename from test/custom/incremental_strategies/models_delta/append_delta.sql rename to tests/integration/incremental_strategies/models_delta/append_delta.sql diff --git a/test/custom/incremental_strategies/models_delta/merge_no_key.sql b/tests/integration/incremental_strategies/models_delta/merge_no_key.sql similarity index 100% rename from test/custom/incremental_strategies/models_delta/merge_no_key.sql rename to tests/integration/incremental_strategies/models_delta/merge_no_key.sql diff --git a/test/custom/incremental_strategies/models_delta/merge_unique_key.sql b/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql similarity index 100% rename from test/custom/incremental_strategies/models_delta/merge_unique_key.sql rename to tests/integration/incremental_strategies/models_delta/merge_unique_key.sql diff --git a/test/custom/incremental_strategies/models_delta/merge_update_columns.sql b/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql similarity index 100% rename from test/custom/incremental_strategies/models_delta/merge_update_columns.sql rename to tests/integration/incremental_strategies/models_delta/merge_update_columns.sql diff --git a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql similarity index 100% rename from test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql rename to tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql diff --git a/test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql similarity index 100% rename from test/custom/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql rename to tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql diff --git a/test/custom/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py similarity index 95% rename from test/custom/incremental_strategies/test_incremental_strategies.py rename to tests/integration/incremental_strategies/test_incremental_strategies.py index 64966ece..ef253fc5 100644 --- a/test/custom/incremental_strategies/test_incremental_strategies.py +++ b/tests/integration/incremental_strategies/test_incremental_strategies.py @@ -1,9 +1,9 @@ from cProfile import run -from test.custom.base import DBTSparkIntegrationTest, use_profile +from tests.integration.base import DBTIntegrationTest, use_profile import dbt.exceptions -class TestIncrementalStrategies(DBTSparkIntegrationTest): +class TestIncrementalStrategies(DBTIntegrationTest): @property def schema(self): return "incremental_strategies" diff --git a/test/custom/persist_docs/data/seed.csv b/tests/integration/persist_docs/data/seed.csv similarity index 100% rename from test/custom/persist_docs/data/seed.csv rename to tests/integration/persist_docs/data/seed.csv diff --git a/test/custom/persist_docs/data/seeds.yml b/tests/integration/persist_docs/data/seeds.yml similarity index 100% rename from test/custom/persist_docs/data/seeds.yml rename to tests/integration/persist_docs/data/seeds.yml diff --git a/test/custom/persist_docs/models/incremental_delta_model.sql b/tests/integration/persist_docs/models/incremental_delta_model.sql similarity index 100% rename from test/custom/persist_docs/models/incremental_delta_model.sql rename to tests/integration/persist_docs/models/incremental_delta_model.sql diff --git a/test/custom/persist_docs/models/my_fun_docs.md b/tests/integration/persist_docs/models/my_fun_docs.md similarity index 100% rename from test/custom/persist_docs/models/my_fun_docs.md rename to tests/integration/persist_docs/models/my_fun_docs.md diff --git a/test/custom/persist_docs/models/no_docs_model.sql b/tests/integration/persist_docs/models/no_docs_model.sql similarity index 100% rename from test/custom/persist_docs/models/no_docs_model.sql rename to tests/integration/persist_docs/models/no_docs_model.sql diff --git a/test/custom/persist_docs/models/schema.yml b/tests/integration/persist_docs/models/schema.yml similarity index 100% rename from test/custom/persist_docs/models/schema.yml rename to tests/integration/persist_docs/models/schema.yml diff --git a/test/custom/persist_docs/models/table_delta_model.sql b/tests/integration/persist_docs/models/table_delta_model.sql similarity index 100% rename from test/custom/persist_docs/models/table_delta_model.sql rename to tests/integration/persist_docs/models/table_delta_model.sql diff --git a/test/custom/persist_docs/models/view_model.sql b/tests/integration/persist_docs/models/view_model.sql similarity index 100% rename from test/custom/persist_docs/models/view_model.sql rename to tests/integration/persist_docs/models/view_model.sql diff --git a/test/custom/persist_docs/test_persist_docs.py b/tests/integration/persist_docs/test_persist_docs.py similarity index 94% rename from test/custom/persist_docs/test_persist_docs.py rename to tests/integration/persist_docs/test_persist_docs.py index 64c54085..bc93f491 100644 --- a/test/custom/persist_docs/test_persist_docs.py +++ b/tests/integration/persist_docs/test_persist_docs.py @@ -1,11 +1,11 @@ from cProfile import run -from test.custom.base import DBTSparkIntegrationTest, use_profile +from tests.integration.base import DBTIntegrationTest, use_profile import dbt.exceptions import json -class TestPersistDocsDelta(DBTSparkIntegrationTest): +class TestPersistDocsDelta(DBTIntegrationTest): @property def schema(self): return "persist_docs_columns" diff --git a/test/custom/seed_column_types/data/payments.csv b/tests/integration/seed_column_types/data/payments.csv similarity index 100% rename from test/custom/seed_column_types/data/payments.csv rename to tests/integration/seed_column_types/data/payments.csv diff --git a/test/custom/seed_column_types/test_seed_column_types.py b/tests/integration/seed_column_types/test_seed_column_types.py similarity index 86% rename from test/custom/seed_column_types/test_seed_column_types.py rename to tests/integration/seed_column_types/test_seed_column_types.py index e1fc3278..326c9f52 100644 --- a/test/custom/seed_column_types/test_seed_column_types.py +++ b/tests/integration/seed_column_types/test_seed_column_types.py @@ -1,9 +1,9 @@ from cProfile import run -from test.custom.base import DBTSparkIntegrationTest, use_profile +from tests.integration.base import DBTIntegrationTest, use_profile import dbt.exceptions -class TestSeedColumnTypeCast(DBTSparkIntegrationTest): +class TestSeedColumnTypeCast(DBTIntegrationTest): @property def schema(self): return "seed_column_types" diff --git a/test/integration/spark-databricks-http.dbtspec b/tests/integration/spark-databricks-http.dbtspec similarity index 100% rename from test/integration/spark-databricks-http.dbtspec rename to tests/integration/spark-databricks-http.dbtspec diff --git a/test/integration/spark-databricks-odbc-cluster.dbtspec b/tests/integration/spark-databricks-odbc-cluster.dbtspec similarity index 100% rename from test/integration/spark-databricks-odbc-cluster.dbtspec rename to tests/integration/spark-databricks-odbc-cluster.dbtspec diff --git a/test/integration/spark-databricks-odbc-sql-endpoint.dbtspec b/tests/integration/spark-databricks-odbc-sql-endpoint.dbtspec similarity index 100% rename from test/integration/spark-databricks-odbc-sql-endpoint.dbtspec rename to tests/integration/spark-databricks-odbc-sql-endpoint.dbtspec diff --git a/test/integration/spark-thrift.dbtspec b/tests/integration/spark-thrift.dbtspec similarity index 100% rename from test/integration/spark-thrift.dbtspec rename to tests/integration/spark-thrift.dbtspec diff --git a/test/custom/store_failures/models/schema.yml b/tests/integration/store_failures/models/schema.yml similarity index 100% rename from test/custom/store_failures/models/schema.yml rename to tests/integration/store_failures/models/schema.yml diff --git a/test/custom/store_failures/models/view_model.sql b/tests/integration/store_failures/models/view_model.sql similarity index 100% rename from test/custom/store_failures/models/view_model.sql rename to tests/integration/store_failures/models/view_model.sql diff --git a/test/custom/store_failures/test_store_failures.py b/tests/integration/store_failures/test_store_failures.py similarity index 91% rename from test/custom/store_failures/test_store_failures.py rename to tests/integration/store_failures/test_store_failures.py index 7a4aae7d..7475a293 100644 --- a/test/custom/store_failures/test_store_failures.py +++ b/tests/integration/store_failures/test_store_failures.py @@ -1,6 +1,6 @@ -from test.custom.base import DBTSparkIntegrationTest, use_profile +from tests.integration.base import DBTIntegrationTest, use_profile -class TestStoreFailures(DBTSparkIntegrationTest): +class TestStoreFailures(DBTIntegrationTest): @property def schema(self): return "store_failures" diff --git a/test/unit/__init__.py b/tests/unit/__init__.py similarity index 100% rename from test/unit/__init__.py rename to tests/unit/__init__.py diff --git a/test/unit/test_adapter.py b/tests/unit/test_adapter.py similarity index 100% rename from test/unit/test_adapter.py rename to tests/unit/test_adapter.py diff --git a/test/unit/test_column.py b/tests/unit/test_column.py similarity index 100% rename from test/unit/test_column.py rename to tests/unit/test_column.py diff --git a/test/unit/test_macros.py b/tests/unit/test_macros.py similarity index 100% rename from test/unit/test_macros.py rename to tests/unit/test_macros.py diff --git a/test/unit/utils.py b/tests/unit/utils.py similarity index 100% rename from test/unit/utils.py rename to tests/unit/utils.py diff --git a/tox.ini b/tox.ini index fcdd45d8..f1e53d2c 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,7 @@ deps = [testenv:unit] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} test/unit' +commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit' passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt @@ -20,7 +20,7 @@ deps = [testenv:integration-spark-databricks-http] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-http.dbtspec' +commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-databricks-http.dbtspec' passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt @@ -29,8 +29,8 @@ deps = [testenv:integration-spark-databricks-odbc-cluster] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-cluster.dbtspec' - /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 test/custom/*' +commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-databricks-odbc-cluster.dbtspec' + /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*' passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER deps = -r{toxinidir}/requirements.txt @@ -39,8 +39,8 @@ deps = [testenv:integration-spark-databricks-odbc-sql-endpoint] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-sql-endpoint.dbtspec' - /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 test/custom/*' +commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-databricks-odbc-sql-endpoint.dbtspec' + /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*' passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER deps = -r{toxinidir}/requirements.txt @@ -50,8 +50,8 @@ deps = [testenv:integration-spark-thrift] basepython = python3 -commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-thrift.dbtspec' - /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 test/custom/*' +commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-thrift.dbtspec' + /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*' passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt From 1f84005888b9cf716644cf60331a2a27df9125a8 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Wed, 27 Oct 2021 14:13:19 -0400 Subject: [PATCH 272/603] Bumping to 1.0.0b1 (#243) * Bumping to 1.0.0b1 * Update integration base * Update dbt-core version * Get latest adapter test framework * Trying PR branch for adapter framework changes * Undoing requirements change * Moving spec files * Using latest adapter tests changes * Depending on dbt-core latest * Remove strict flag * Fixing path error * Adding pyodbc import * Dir path type * Pointing to latest adapter tests --- .bumpversion-dbt.cfg | 26 - .bumpversion.cfg | 3 +- dbt/adapters/spark/__version__.py | 2 +- dev_requirements.txt | 7 +- requirements.txt | 1 - setup.py | 68 +- tests/integration/base.py | 906 ++++++++++++++++-- .../{data => seeds}/expected_append.csv | 0 .../{data => seeds}/expected_overwrite.csv | 0 .../expected_partial_upsert.csv | 0 .../{data => seeds}/expected_upsert.csv | 0 .../persist_docs/{data => seeds}/seed.csv | 0 .../persist_docs/{data => seeds}/seeds.yml | 0 .../{data => seeds}/payments.csv | 0 .../store_failures/test_store_failures.py | 2 +- .../spark-databricks-http.dbtspec | 0 .../spark-databricks-odbc-cluster.dbtspec | 0 ...spark-databricks-odbc-sql-endpoint.dbtspec | 0 .../spark-thrift.dbtspec | 0 tests/unit/utils.py | 2 +- tox.ini | 8 +- 21 files changed, 908 insertions(+), 117 deletions(-) delete mode 100644 .bumpversion-dbt.cfg rename tests/integration/incremental_strategies/{data => seeds}/expected_append.csv (100%) rename tests/integration/incremental_strategies/{data => seeds}/expected_overwrite.csv (100%) rename tests/integration/incremental_strategies/{data => seeds}/expected_partial_upsert.csv (100%) rename tests/integration/incremental_strategies/{data => seeds}/expected_upsert.csv (100%) rename tests/integration/persist_docs/{data => seeds}/seed.csv (100%) rename tests/integration/persist_docs/{data => seeds}/seeds.yml (100%) rename tests/integration/seed_column_types/{data => seeds}/payments.csv (100%) rename tests/{integration => specs}/spark-databricks-http.dbtspec (100%) rename tests/{integration => specs}/spark-databricks-odbc-cluster.dbtspec (100%) rename tests/{integration => specs}/spark-databricks-odbc-sql-endpoint.dbtspec (100%) rename tests/{integration => specs}/spark-thrift.dbtspec (100%) diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg deleted file mode 100644 index f8ad313f..00000000 --- a/.bumpversion-dbt.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[bumpversion] -current_version = 0.21.0 -parse = (?P\d+) - \.(?P\d+) - \.(?P\d+) - ((?P[a-z]+)(?P\d+))? -serialize = - {major}.{minor}.{patch}{prerelease}{num} - {major}.{minor}.{patch} -commit = False -tag = False - -[bumpversion:part:prerelease] -first_value = a -values = - a - b - rc - -[bumpversion:part:num] -first_value = 1 - -[bumpversion:file:setup.py] - -[bumpversion:file:requirements.txt] - diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 6a532e05..dfa9e026 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.21.0 +current_version = 1.0.0b1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) @@ -27,4 +27,3 @@ first_value = 1 first_value = 1 [bumpversion:file:dbt/adapters/spark/__version__.py] - diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 025ca235..affc65fe 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "0.21.0" +version = "1.0.0b1" diff --git a/dev_requirements.txt b/dev_requirements.txt index 08f3c37b..a44d72a4 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,3 +1,7 @@ +# install latest changes in dbt-core +# TODO: how to automate switching from develop to version branches? +git+https://github.com/dbt-labs/dbt.git#egg=dbt-core&subdirectory=core + freezegun==0.3.9 pytest==6.0.2 mock>=1.3.0 @@ -11,6 +15,7 @@ flaky>=3.5.3,<4 pytest-csv # Test requirements -pytest-dbt-adapter==0.5.1 +#pytest-dbt-adapter==0.5.1 +git+https://github.com/dbt-labs/dbt-adapter-tests.git#egg=pytest-dbt-adapter sasl==0.2.1 thrift_sasl==0.4.1 diff --git a/requirements.txt b/requirements.txt index 88cd8291..e03320a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -dbt-core==0.21.0 PyHive[hive]>=0.6.0,<0.7.0 pyodbc>=4.0.30 sqlparams>=3.0.0 diff --git a/setup.py b/setup.py index 2081e98f..bd7e5522 100644 --- a/setup.py +++ b/setup.py @@ -1,41 +1,65 @@ #!/usr/bin/env python -from setuptools import find_namespace_packages, setup import os +import sys import re +# require python 3.6 or newer +if sys.version_info < (3, 6): + print('Error: dbt does not support this version of Python.') + print('Please upgrade to Python 3.6 or higher.') + sys.exit(1) + +# require version of setuptools that supports find_namespace_packages +from setuptools import setup +try: + from setuptools import find_namespace_packages +except ImportError: + # the user has a downlevel version of setuptools. + print('Error: dbt requires setuptools v40.1.0 or higher.') + print('Please upgrade setuptools with "pip install --upgrade setuptools" ' + 'and try again') + sys.exit(1) + + +# pull long description from README this_directory = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(this_directory, 'README.md'), 'r', encoding='utf8') as f: long_description = f.read() -package_name = "dbt-spark" - - -# get this from a separate file -def _dbt_spark_version(): +# get this package's version from dbt/adapters//__version__.py +def _get_plugin_version_dict(): _version_path = os.path.join( this_directory, 'dbt', 'adapters', 'spark', '__version__.py' ) - _version_pattern = r'''version\s*=\s*["'](.+)["']''' + _semver = r'''(?P\d+)\.(?P\d+)\.(?P\d+)''' + _pre = r'''((?Pa|b|rc)(?P
\d+))?'''
+    _version_pattern = fr'''version\s*=\s*["']{_semver}{_pre}["']'''
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:
             raise ValueError(f'invalid version at {_version_path}')
-        return match.group(1)
+        return match.groupdict()
 
 
-package_version = _dbt_spark_version()
-description = """The SparkSQL plugin for dbt (data build tool)"""
+def _get_plugin_version():
+    parts = _get_plugin_version_dict()
+    return "{major}.{minor}.{patch}{prekind}{pre}".format(**parts)
 
-dbt_version = '0.21.0'
-# the package version should be the dbt version, with maybe some things on the
-# ends of it. (0.21.0 vs 0.21.0a1, 0.21.0.1, ...)
-if not package_version.startswith(dbt_version):
-    raise ValueError(
-        f'Invalid setup.py: package_version={package_version} must start with '
-        f'dbt_version={dbt_version}'
-    )
+
+# require a compatible minor version (~=), prerelease if this is a prerelease
+def _get_dbt_core_version():
+    parts = _get_plugin_version_dict()
+    minor = "{major}.{minor}.0".format(**parts)
+    pre = (parts["prekind"]+"1" if parts["prekind"] else "")
+    return f"{minor}{pre}"
+
+
+package_name = "dbt-spark"
+package_version = _get_plugin_version()
+dbt_core_version = _get_dbt_core_version()
+description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ['pyodbc>=4.0.30']
 pyhive_extras = [
@@ -52,14 +76,14 @@ def _dbt_spark_version():
     long_description=long_description,
     long_description_content_type='text/markdown',
 
-    author='Fishtown Analytics',
-    author_email='info@fishtownanalytics.com',
-    url='https://github.com/fishtown-analytics/dbt-spark',
+    author='dbt Labs',
+    author_email='info@dbtlabs.com',
+    url='https://github.com/dbt-labs/dbt-spark',
 
     packages=find_namespace_packages(include=['dbt', 'dbt.*']),
     include_package_data=True,
     install_requires=[
-        f'dbt-core=={dbt_version}',
+        'dbt-core~={}'.format(dbt_core_version),
         'sqlparams>=3.0.0',
     ],
     extras_require={
diff --git a/tests/integration/base.py b/tests/integration/base.py
index 962ecbea..50f4ef91 100644
--- a/tests/integration/base.py
+++ b/tests/integration/base.py
@@ -1,73 +1,143 @@
-import pytest
-from functools import wraps
+import json
 import os
-from dbt_adapter_tests import DBTIntegrationTestBase
+import io
+import random
+import shutil
+import sys
+import tempfile
+import traceback
+import unittest
+from contextlib import contextmanager
+from datetime import datetime
+from functools import wraps
+
 import pyodbc
+import pytest
+import yaml
+from unittest.mock import patch
 
+import dbt.main as dbt
+from dbt import flags
+from dbt.deprecations import reset_deprecations
+from dbt.adapters.factory import get_adapter, reset_adapters, register_adapter
+from dbt.clients.jinja import template_cache
+from dbt.config import RuntimeConfig
+from dbt.context import providers
+from dbt.logger import GLOBAL_LOGGER as logger, log_manager
+from dbt.contracts.graph.manifest import Manifest
 
-class DBTIntegrationTest(DBTIntegrationTestBase):
+INITIAL_ROOT = os.getcwd()
 
-    def get_profile(self, adapter_type):
-        if adapter_type == 'apache_spark':
-            return self.apache_spark_profile()
-        elif adapter_type == 'databricks_cluster':
-            return self.databricks_cluster_profile()
-        elif adapter_type == 'databricks_sql_endpoint':
-            return self.databricks_sql_endpoint_profile()
-        else:
-            raise ValueError('invalid adapter type {}'.format(adapter_type))
 
-    @staticmethod
-    def _profile_from_test_name(test_name):
-        adapter_names = ('apache_spark', 'databricks_cluster',
-                         'databricks_sql_endpoint')
-        adapters_in_name = sum(x in test_name for x in adapter_names)
-        if adapters_in_name != 1:
-            raise ValueError(
-                'test names must have exactly 1 profile choice embedded, {} has {}'
-                .format(test_name, adapters_in_name)
-            )
+def normalize(path):
+    """On windows, neither is enough on its own:
+
+    >>> normcase('C:\\documents/ALL CAPS/subdir\\..')
+    'c:\\documents\\all caps\\subdir\\..'
+    >>> normpath('C:\\documents/ALL CAPS/subdir\\..')
+    'C:\\documents\\ALL CAPS'
+    >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..'))
+    'c:\\documents\\all caps'
+    """
+    return os.path.normcase(os.path.normpath(path))
 
-        for adapter_name in adapter_names:
-            if adapter_name in test_name:
-                return adapter_name
 
+class Normalized:
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return f'Normalized({self.value!r})'
+
+    def __str__(self):
+        return f'Normalized({self.value!s})'
+
+    def __eq__(self, other):
+        return normalize(self.value) == normalize(other)
+
+
+class FakeArgs:
+    def __init__(self):
+        self.threads = 1
+        self.defer = False
+        self.full_refresh = False
+        self.models = None
+        self.select = None
+        self.exclude = None
+        self.single_threaded = False
+        self.selector_name = None
+        self.state = None
+        self.defer = None
+
+
+class TestArgs:
+    def __init__(self, kwargs):
+        self.which = 'run'
+        self.single_threaded = False
+        self.profiles_dir = None
+        self.project_dir = None
+        self.__dict__.update(kwargs)
+
+
+def _profile_from_test_name(test_name):
+    adapter_names = ('apache_spark', 'databricks_cluster',
+                     'databricks_sql_endpoint')
+    adapters_in_name = sum(x in test_name for x in adapter_names)
+    if adapters_in_name != 1:
         raise ValueError(
-            'could not find adapter name in test name {}'.format(test_name)
+            'test names must have exactly 1 profile choice embedded, {} has {}'
+            .format(test_name, adapters_in_name)
         )
 
-    def run_sql(self, query, fetch='None', kwargs=None, connection_name=None):
-        if connection_name is None:
-            connection_name = '__test'
+    for adapter_name in adapter_names:
+        if adapter_name in test_name:
+            return adapter_name
 
-        if query.strip() == "":
-            return
+    raise ValueError(
+        'could not find adapter name in test name {}'.format(test_name)
+    )
 
-        sql = self.transform_sql(query, kwargs=kwargs)
 
-        with self.get_connection(connection_name) as conn:
-            cursor = conn.handle.cursor()
-            try:
-                cursor.execute(sql)
-                if fetch == 'one':
-                    return cursor.fetchall()[0]
-                elif fetch == 'all':
-                    return cursor.fetchall()
-                else:
-                    # we have to fetch.
-                    cursor.fetchall()
-            except pyodbc.ProgrammingError as e:
-                # hacks for dropping schema
-                if "No results.  Previous SQL was not a query." not in str(e):
-                    raise e
-            except Exception as e:
-                conn.handle.rollback()
-                conn.transaction_open = False
-                print(sql)
-                print(e)
-                raise
-            else:
-                conn.transaction_open = False
+def _pytest_test_name():
+    return os.environ['PYTEST_CURRENT_TEST'].split()[0]
+
+
+def _pytest_get_test_root():
+    test_path = _pytest_test_name().split('::')[0]
+    relative_to = INITIAL_ROOT
+    head = os.path.relpath(test_path, relative_to)
+
+    path_parts = []
+    while head:
+        head, tail = os.path.split(head)
+        path_parts.append(tail)
+    path_parts.reverse()
+    # dbt tests are all of the form 'tests/integration/suite_name'
+    target = os.path.join(*path_parts[:3])  # TODO: try to not hard code this
+    return os.path.join(relative_to, target)
+
+
+def _really_makedirs(path):
+    while not os.path.exists(path):
+        try:
+            os.makedirs(path)
+        except EnvironmentError:
+            raise
+
+
+class DBTIntegrationTest(unittest.TestCase):
+    CREATE_SCHEMA_STATEMENT = 'CREATE SCHEMA {}'
+    DROP_SCHEMA_STATEMENT = 'DROP SCHEMA IF EXISTS {} CASCADE'
+
+    _randint = random.randint(0, 9999)
+    _runtime_timedelta = (datetime.utcnow() - datetime(1970, 1, 1, 0, 0, 0))
+    _runtime = (
+        (int(_runtime_timedelta.total_seconds() * 1e6)) +
+        _runtime_timedelta.microseconds
+    )
+
+    prefix = f'test{_runtime}{_randint:04}'
+    setup_alternate_db = False
 
     def apache_spark_profile(self):
         return {
@@ -135,6 +205,685 @@ def databricks_sql_endpoint_profile(self):
             }
         }
 
+    @property
+    def packages_config(self):
+        return None
+
+    @property
+    def selectors_config(self):
+        return None
+
+    def unique_schema(self):
+        schema = self.schema
+
+        to_return = "{}_{}".format(self.prefix, schema)
+
+        return to_return.lower()
+
+    @property
+    def default_database(self):
+        database = self.config.credentials.database
+        return database
+
+    @property
+    def alternative_database(self):
+        return None
+
+    def get_profile(self, adapter_type):
+        if adapter_type == 'apache_spark':
+            return self.apache_spark_profile()
+        elif adapter_type == 'databricks_cluster':
+            return self.databricks_cluster_profile()
+        elif adapter_type == 'databricks_sql_endpoint':
+            return self.databricks_sql_endpoint_profile()
+        else:
+            raise ValueError('invalid adapter type {}'.format(adapter_type))
+
+    def _pick_profile(self):
+        test_name = self.id().split('.')[-1]
+        return _profile_from_test_name(test_name)
+
+    def _symlink_test_folders(self):
+        for entry in os.listdir(self.test_original_source_path):
+            src = os.path.join(self.test_original_source_path, entry)
+            tst = os.path.join(self.test_root_dir, entry)
+            if os.path.isdir(src) or src.endswith('.sql'):
+                # symlink all sql files and all directories.
+                os.symlink(src, tst)
+        os.symlink(self._logs_dir, os.path.join(self.test_root_dir, 'logs'))
+
+    @property
+    def test_root_realpath(self):
+        if sys.platform == 'darwin':
+            return os.path.realpath(self.test_root_dir)
+        else:
+            return self.test_root_dir
+
+    def _generate_test_root_dir(self):
+        return normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
+
+    def setUp(self):
+        self.dbt_core_install_root = os.path.dirname(dbt.__file__)
+        log_manager.reset_handlers()
+        self.initial_dir = INITIAL_ROOT
+        os.chdir(self.initial_dir)
+        # before we go anywhere, collect the initial path info
+        self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
+        _really_makedirs(self._logs_dir)
+        self.test_original_source_path = _pytest_get_test_root()
+        self.test_root_dir = self._generate_test_root_dir()
+
+        os.chdir(self.test_root_dir)
+        try:
+            self._symlink_test_folders()
+        except Exception as exc:
+            msg = '\n\t'.join((
+                'Failed to symlink test folders!',
+                'initial_dir={0.initial_dir}',
+                'test_original_source_path={0.test_original_source_path}',
+                'test_root_dir={0.test_root_dir}'
+            )).format(self)
+            logger.exception(msg)
+
+            # if logging isn't set up, I still really want this message.
+            print(msg)
+            traceback.print_exc()
+
+            raise
+
+        self._created_schemas = set()
+        reset_deprecations()
+        template_cache.clear()
+
+        self.use_profile(self._pick_profile())
+        self.use_default_project()
+        self.set_packages()
+        self.set_selectors()
+        self.load_config()
+
+    def use_default_project(self, overrides=None):
+        # create a dbt_project.yml
+        base_project_config = {
+            'name': 'test',
+            'version': '1.0',
+            'config-version': 2,
+            'test-paths': [],
+            'source-paths': [self.models],
+            'profile': 'test',
+        }
+
+        project_config = {}
+        project_config.update(base_project_config)
+        project_config.update(self.project_config)
+        project_config.update(overrides or {})
+
+        with open("dbt_project.yml", 'w') as f:
+            yaml.safe_dump(project_config, f, default_flow_style=True)
+
+    def use_profile(self, adapter_type):
+        self.adapter_type = adapter_type
+
+        profile_config = {}
+        default_profile_config = self.get_profile(adapter_type)
+
+        profile_config.update(default_profile_config)
+        profile_config.update(self.profile_config)
+
+        if not os.path.exists(self.test_root_dir):
+            os.makedirs(self.test_root_dir)
+
+        flags.PROFILES_DIR = self.test_root_dir
+        profiles_path = os.path.join(self.test_root_dir, 'profiles.yml')
+        with open(profiles_path, 'w') as f:
+            yaml.safe_dump(profile_config, f, default_flow_style=True)
+        self._profile_config = profile_config
+
+    def set_packages(self):
+        if self.packages_config is not None:
+            with open('packages.yml', 'w') as f:
+                yaml.safe_dump(self.packages_config, f, default_flow_style=True)
+
+    def set_selectors(self):
+        if self.selectors_config is not None:
+            with open('selectors.yml', 'w') as f:
+                yaml.safe_dump(self.selectors_config, f, default_flow_style=True)
+
+    def load_config(self):
+        # we've written our profile and project. Now we want to instantiate a
+        # fresh adapter for the tests.
+        # it's important to use a different connection handle here so
+        # we don't look into an incomplete transaction
+        kwargs = {
+            'profile': None,
+            'profiles_dir': self.test_root_dir,
+            'target': None,
+        }
+
+        config = RuntimeConfig.from_args(TestArgs(kwargs))
+
+        register_adapter(config)
+        adapter = get_adapter(config)
+        adapter.cleanup_connections()
+        self.adapter_type = adapter.type()
+        self.adapter = adapter
+        self.config = config
+
+        self._drop_schemas()
+        self._create_schemas()
+
+    def quote_as_configured(self, value, quote_key):
+        return self.adapter.quote_as_configured(value, quote_key)
+
+    def tearDown(self):
+        # get any current run adapter and clean up its connections before we
+        # reset them. It'll probably be different from ours because
+        # handle_and_check() calls reset_adapters().
+        register_adapter(self.config)
+        adapter = get_adapter(self.config)
+        if adapter is not self.adapter:
+            adapter.cleanup_connections()
+        if not hasattr(self, 'adapter'):
+            self.adapter = adapter
+
+        self._drop_schemas()
+
+        self.adapter.cleanup_connections()
+        reset_adapters()
+        os.chdir(INITIAL_ROOT)
+        try:
+            shutil.rmtree(self.test_root_dir)
+        except EnvironmentError:
+            logger.exception('Could not clean up after test - {} not removable'
+                             .format(self.test_root_dir))
+
+    def _get_schema_fqn(self, database, schema):
+        schema_fqn = self.quote_as_configured(schema, 'schema')
+        return schema_fqn
+
+    def _create_schema_named(self, database, schema):
+        self.run_sql('CREATE SCHEMA {schema}')
+
+    def _drop_schema_named(self, database, schema):
+        self.run_sql('DROP SCHEMA IF EXISTS {schema} CASCADE')
+
+    def _create_schemas(self):
+        schema = self.unique_schema()
+        with self.adapter.connection_named('__test'):
+            self._create_schema_named(self.default_database, schema)
+
+    def _drop_schemas(self):
+        with self.adapter.connection_named('__test'):
+            schema = self.unique_schema()
+            self._drop_schema_named(self.default_database, schema)
+            if self.setup_alternate_db and self.alternative_database:
+                self._drop_schema_named(self.alternative_database, schema)
+
+    @property
+    def project_config(self):
+        return {
+            'config-version': 2,
+        }
+
+    @property
+    def profile_config(self):
+        return {}
+
+    def run_dbt(self, args=None, expect_pass=True, profiles_dir=True):
+        res, success = self.run_dbt_and_check(args=args, profiles_dir=profiles_dir)
+        self.assertEqual(
+            success, expect_pass,
+            "dbt exit state did not match expected")
+
+        return res
+
+
+    def run_dbt_and_capture(self, *args, **kwargs):
+        try:
+            initial_stdout = log_manager.stdout
+            initial_stderr = log_manager.stderr
+            stringbuf = io.StringIO()
+            log_manager.set_output_stream(stringbuf)
+
+            res = self.run_dbt(*args, **kwargs)
+            stdout = stringbuf.getvalue()
+
+        finally:
+            log_manager.set_output_stream(initial_stdout, initial_stderr)
+
+        return res, stdout
+
+    def run_dbt_and_check(self, args=None, profiles_dir=True):
+        log_manager.reset_handlers()
+        if args is None:
+            args = ["run"]
+
+        final_args = []
+
+        if os.getenv('DBT_TEST_SINGLE_THREADED') in ('y', 'Y', '1'):
+            final_args.append('--single-threaded')
+
+        final_args.extend(args)
+
+        if profiles_dir:
+            final_args.extend(['--profiles-dir', self.test_root_dir])
+        final_args.append('--log-cache-events')
+
+        logger.info("Invoking dbt with {}".format(final_args))
+        return dbt.handle_and_check(final_args)
+
+    def run_sql_file(self, path, kwargs=None):
+        with open(path, 'r') as f:
+            statements = f.read().split(";")
+            for statement in statements:
+                self.run_sql(statement, kwargs=kwargs)
+
+    def transform_sql(self, query, kwargs=None):
+        to_return = query
+
+        base_kwargs = {
+            'schema': self.unique_schema(),
+            'database': self.adapter.quote(self.default_database),
+        }
+        if kwargs is None:
+            kwargs = {}
+        base_kwargs.update(kwargs)
+
+        to_return = to_return.format(**base_kwargs)
+
+        return to_return
+
+    def run_sql(self, query, fetch='None', kwargs=None, connection_name=None):
+        if connection_name is None:
+            connection_name = '__test'
+
+        if query.strip() == "":
+            return
+
+        sql = self.transform_sql(query, kwargs=kwargs)
+
+        with self.get_connection(connection_name) as conn:
+            cursor = conn.handle.cursor()
+            try:
+                cursor.execute(sql)
+                if fetch == 'one':
+                    return cursor.fetchall()[0]
+                elif fetch == 'all':
+                    return cursor.fetchall()
+                else:
+                    # we have to fetch.
+                    cursor.fetchall()
+            except pyodbc.ProgrammingError as e:
+                # hacks for dropping schema
+                if "No results.  Previous SQL was not a query." not in str(e):
+                    raise e
+            except Exception as e:
+                conn.handle.rollback()
+                conn.transaction_open = False
+                print(sql)
+                print(e)
+                raise
+            else:
+                conn.transaction_open = False
+
+    def _ilike(self, target, value):
+        return "{} ilike '{}'".format(target, value)
+
+    def get_many_table_columns_bigquery(self, tables, schema, database=None):
+        result = []
+        for table in tables:
+            relation = self._make_relation(table, schema, database)
+            columns = self.adapter.get_columns_in_relation(relation)
+            for col in columns:
+                result.append((table, col.column, col.dtype, col.char_size))
+        return result
+
+    def get_many_table_columns(self, tables, schema, database=None):
+        result = self.get_many_table_columns_bigquery(tables, schema, database)
+        result.sort(key=lambda x: '{}.{}'.format(x[0], x[1]))
+        return result
+
+    def filter_many_columns(self, column):
+        if len(column) == 3:
+            table_name, column_name, data_type = column
+            char_size = None
+        else:
+            table_name, column_name, data_type, char_size = column
+        return (table_name, column_name, data_type, char_size)
+
+    @contextmanager
+    def get_connection(self, name=None):
+        """Create a test connection context where all executed macros, etc will
+        get self.adapter as the adapter.
+
+        This allows tests to run normal adapter macros as if reset_adapters()
+        were not called by handle_and_check (for asserts, etc)
+        """
+        if name is None:
+            name = '__test'
+        with patch.object(providers, 'get_adapter', return_value=self.adapter):
+            with self.adapter.connection_named(name):
+                conn = self.adapter.connections.get_thread_connection()
+                yield conn
+
+    def get_relation_columns(self, relation):
+        with self.get_connection():
+            columns = self.adapter.get_columns_in_relation(relation)
+
+        return sorted(((c.name, c.dtype, c.char_size) for c in columns),
+                      key=lambda x: x[0])
+
+    def get_table_columns(self, table, schema=None, database=None):
+        schema = self.unique_schema() if schema is None else schema
+        database = self.default_database if database is None else database
+        relation = self.adapter.Relation.create(
+            database=database,
+            schema=schema,
+            identifier=table,
+            type='table',
+            quote_policy=self.config.quoting
+        )
+        return self.get_relation_columns(relation)
+
+    def get_table_columns_as_dict(self, tables, schema=None):
+        col_matrix = self.get_many_table_columns(tables, schema)
+        res = {}
+        for row in col_matrix:
+            table_name = row[0]
+            col_def = row[1:]
+            if table_name not in res:
+                res[table_name] = []
+            res[table_name].append(col_def)
+        return res
+
+    def get_models_in_schema(self, schema=None):
+        schema = self.unique_schema() if schema is None else schema
+        sql = """
+                select table_name,
+                        case when table_type = 'BASE TABLE' then 'table'
+                             when table_type = 'VIEW' then 'view'
+                             else table_type
+                        end as materialization
+                from information_schema.tables
+                where {}
+                order by table_name
+                """
+
+        sql = sql.format(self._ilike('table_schema', schema))
+        result = self.run_sql(sql, fetch='all')
+
+        return {model_name: materialization for (model_name, materialization) in result}
+
+    def _assertTablesEqualSql(self, relation_a, relation_b, columns=None):
+        if columns is None:
+            columns = self.get_relation_columns(relation_a)
+        column_names = [c[0] for c in columns]
+
+        sql = self.adapter.get_rows_different_sql(
+            relation_a, relation_b, column_names
+        )
+
+        return sql
+
+    def assertTablesEqual(self, table_a, table_b,
+                          table_a_schema=None, table_b_schema=None,
+                          table_a_db=None, table_b_db=None):
+        if table_a_schema is None:
+            table_a_schema = self.unique_schema()
+
+        if table_b_schema is None:
+            table_b_schema = self.unique_schema()
+
+        if table_a_db is None:
+            table_a_db = self.default_database
+
+        if table_b_db is None:
+            table_b_db = self.default_database
+
+        relation_a = self._make_relation(table_a, table_a_schema, table_a_db)
+        relation_b = self._make_relation(table_b, table_b_schema, table_b_db)
+
+        self._assertTableColumnsEqual(relation_a, relation_b)
+
+        sql = self._assertTablesEqualSql(relation_a, relation_b)
+        result = self.run_sql(sql, fetch='one')
+
+        self.assertEqual(
+            result[0],
+            0,
+            'row_count_difference nonzero: ' + sql
+        )
+        self.assertEqual(
+            result[1],
+            0,
+            'num_mismatched nonzero: ' + sql
+        )
+
+    def _make_relation(self, identifier, schema=None, database=None):
+        if schema is None:
+            schema = self.unique_schema()
+        if database is None:
+            database = self.default_database
+        return self.adapter.Relation.create(
+            database=database,
+            schema=schema,
+            identifier=identifier,
+            quote_policy=self.config.quoting
+        )
+
+    def get_many_relation_columns(self, relations):
+        """Returns a dict of (datbase, schema) -> (dict of (table_name -> list of columns))
+        """
+        schema_fqns = {}
+        for rel in relations:
+            this_schema = schema_fqns.setdefault((rel.database, rel.schema), [])
+            this_schema.append(rel.identifier)
+
+        column_specs = {}
+        for key, tables in schema_fqns.items():
+            database, schema = key
+            columns = self.get_many_table_columns(tables, schema, database=database)
+            table_columns = {}
+            for col in columns:
+                table_columns.setdefault(col[0], []).append(col[1:])
+            for rel_name, columns in table_columns.items():
+                key = (database, schema, rel_name)
+                column_specs[key] = columns
+
+        return column_specs
+
+    def assertManyRelationsEqual(self, relations, default_schema=None, default_database=None):
+        if default_schema is None:
+            default_schema = self.unique_schema()
+        if default_database is None:
+            default_database = self.default_database
+
+        specs = []
+        for relation in relations:
+            if not isinstance(relation, (tuple, list)):
+                relation = [relation]
+
+            assert len(relation) <= 3
+
+            if len(relation) == 3:
+                relation = self._make_relation(*relation)
+            elif len(relation) == 2:
+                relation = self._make_relation(relation[0], relation[1], default_database)
+            elif len(relation) == 1:
+                relation = self._make_relation(relation[0], default_schema, default_database)
+            else:
+                raise ValueError('relation must be a sequence of 1, 2, or 3 values')
+
+            specs.append(relation)
+
+        with self.get_connection():
+            column_specs = self.get_many_relation_columns(specs)
+
+        # make sure everyone has equal column definitions
+        first_columns = None
+        for relation in specs:
+            key = (relation.database, relation.schema, relation.identifier)
+            # get a good error here instead of a hard-to-diagnose KeyError
+            self.assertIn(key, column_specs, f'No columns found for {key}')
+            columns = column_specs[key]
+            if first_columns is None:
+                first_columns = columns
+            else:
+                self.assertEqual(
+                    first_columns, columns,
+                    '{} did not match {}'.format(str(specs[0]), str(relation))
+                )
+
+        # make sure everyone has the same data. if we got here, everyone had
+        # the same column specs!
+        first_relation = None
+        for relation in specs:
+            if first_relation is None:
+                first_relation = relation
+            else:
+                sql = self._assertTablesEqualSql(first_relation, relation,
+                                                 columns=first_columns)
+                result = self.run_sql(sql, fetch='one')
+
+                self.assertEqual(
+                    result[0],
+                    0,
+                    'row_count_difference nonzero: ' + sql
+                )
+                self.assertEqual(
+                    result[1],
+                    0,
+                    'num_mismatched nonzero: ' + sql
+                )
+
+    def assertManyTablesEqual(self, *args):
+        schema = self.unique_schema()
+
+        all_tables = []
+        for table_equivalencies in args:
+            all_tables += list(table_equivalencies)
+
+        all_cols = self.get_table_columns_as_dict(all_tables, schema)
+
+        for table_equivalencies in args:
+            first_table = table_equivalencies[0]
+            first_relation = self._make_relation(first_table)
+
+            # assert that all tables have the same columns
+            base_result = all_cols[first_table]
+            self.assertTrue(len(base_result) > 0)
+
+            for other_table in table_equivalencies[1:]:
+                other_result = all_cols[other_table]
+                self.assertTrue(len(other_result) > 0)
+                self.assertEqual(base_result, other_result)
+
+                other_relation = self._make_relation(other_table)
+                sql = self._assertTablesEqualSql(first_relation,
+                                                 other_relation,
+                                                 columns=base_result)
+                result = self.run_sql(sql, fetch='one')
+
+                self.assertEqual(
+                    result[0],
+                    0,
+                    'row_count_difference nonzero: ' + sql
+                )
+                self.assertEqual(
+                    result[1],
+                    0,
+                    'num_mismatched nonzero: ' + sql
+                )
+
+
+    def _assertTableRowCountsEqual(self, relation_a, relation_b):
+        cmp_query = """
+            with table_a as (
+
+                select count(*) as num_rows from {}
+
+            ), table_b as (
+
+                select count(*) as num_rows from {}
+
+            )
+
+            select table_a.num_rows - table_b.num_rows as difference
+            from table_a, table_b
+
+        """.format(str(relation_a), str(relation_b))
+
+        res = self.run_sql(cmp_query, fetch='one')
+
+        self.assertEqual(int(res[0]), 0, "Row count of table {} doesn't match row count of table {}. ({} rows different)".format(
+                relation_a.identifier,
+                relation_b.identifier,
+                res[0]
+            )
+        )
+
+    def assertTableDoesNotExist(self, table, schema=None, database=None):
+        columns = self.get_table_columns(table, schema, database)
+
+        self.assertEqual(
+            len(columns),
+            0
+        )
+
+    def assertTableDoesExist(self, table, schema=None, database=None):
+        columns = self.get_table_columns(table, schema, database)
+
+        self.assertGreater(
+            len(columns),
+            0
+        )
+
+    def _assertTableColumnsEqual(self, relation_a, relation_b):
+        table_a_result = self.get_relation_columns(relation_a)
+        table_b_result = self.get_relation_columns(relation_b)
+
+        text_types = {'text', 'character varying', 'character', 'varchar'}
+
+        self.assertEqual(len(table_a_result), len(table_b_result))
+        for a_column, b_column in zip(table_a_result, table_b_result):
+            a_name, a_type, a_size = a_column
+            b_name, b_type, b_size = b_column
+            self.assertEqual(a_name, b_name,
+                '{} vs {}: column "{}" != "{}"'.format(
+                    relation_a, relation_b, a_name, b_name
+                ))
+
+            self.assertEqual(a_type, b_type,
+                '{} vs {}: column "{}" has type "{}" != "{}"'.format(
+                    relation_a, relation_b, a_name, a_type, b_type
+                ))
+
+            self.assertEqual(a_size, b_size,
+                '{} vs {}: column "{}" has size "{}" != "{}"'.format(
+                    relation_a, relation_b, a_name, a_size, b_size
+                ))
+
+    def assertEquals(self, *args, **kwargs):
+        # assertEquals is deprecated. This makes the warnings less chatty
+        self.assertEqual(*args, **kwargs)
+
+    def assertBetween(self, timestr, start, end=None):
+        datefmt = '%Y-%m-%dT%H:%M:%S.%fZ'
+        if end is None:
+            end = datetime.utcnow()
+
+        parsed = datetime.strptime(timestr, datefmt)
+
+        self.assertLessEqual(start, parsed,
+            'parsed date {} happened before {}'.format(
+                parsed,
+                start.strftime(datefmt))
+        )
+        self.assertGreaterEqual(end, parsed,
+            'parsed date {} happened after {}'.format(
+                parsed,
+                end.strftime(datefmt))
+        )
+
 
 def use_profile(profile_name):
     """A decorator to declare a test method as using a particular profile.
@@ -157,7 +906,48 @@ def outer(wrapped):
         def func(self, *args, **kwargs):
             return wrapped(self, *args, **kwargs)
         # sanity check at import time
-        assert DBTIntegrationTest._profile_from_test_name(
-            wrapped.__name__) == profile_name
+        assert _profile_from_test_name(wrapped.__name__) == profile_name
         return func
     return outer
+
+
+class AnyFloat:
+    """Any float. Use this in assertEqual() calls to assert that it is a float.
+    """
+    def __eq__(self, other):
+        return isinstance(other, float)
+
+
+class AnyString:
+    """Any string. Use this in assertEqual() calls to assert that it is a string.
+    """
+    def __eq__(self, other):
+        return isinstance(other, str)
+
+
+class AnyStringWith:
+    def __init__(self, contains=None):
+        self.contains = contains
+
+    def __eq__(self, other):
+        if not isinstance(other, str):
+            return False
+
+        if self.contains is None:
+            return True
+
+        return self.contains in other
+
+    def __repr__(self):
+        return 'AnyStringWith<{!r}>'.format(self.contains)
+
+
+def get_manifest():
+    path = './target/partial_parse.msgpack'
+    if os.path.exists(path):
+        with open(path, 'rb') as fp:
+            manifest_mp = fp.read()
+        manifest: Manifest = Manifest.from_msgpack(manifest_mp)
+        return manifest
+    else:
+        return None
diff --git a/tests/integration/incremental_strategies/data/expected_append.csv b/tests/integration/incremental_strategies/seeds/expected_append.csv
similarity index 100%
rename from tests/integration/incremental_strategies/data/expected_append.csv
rename to tests/integration/incremental_strategies/seeds/expected_append.csv
diff --git a/tests/integration/incremental_strategies/data/expected_overwrite.csv b/tests/integration/incremental_strategies/seeds/expected_overwrite.csv
similarity index 100%
rename from tests/integration/incremental_strategies/data/expected_overwrite.csv
rename to tests/integration/incremental_strategies/seeds/expected_overwrite.csv
diff --git a/tests/integration/incremental_strategies/data/expected_partial_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
similarity index 100%
rename from tests/integration/incremental_strategies/data/expected_partial_upsert.csv
rename to tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
diff --git a/tests/integration/incremental_strategies/data/expected_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_upsert.csv
similarity index 100%
rename from tests/integration/incremental_strategies/data/expected_upsert.csv
rename to tests/integration/incremental_strategies/seeds/expected_upsert.csv
diff --git a/tests/integration/persist_docs/data/seed.csv b/tests/integration/persist_docs/seeds/seed.csv
similarity index 100%
rename from tests/integration/persist_docs/data/seed.csv
rename to tests/integration/persist_docs/seeds/seed.csv
diff --git a/tests/integration/persist_docs/data/seeds.yml b/tests/integration/persist_docs/seeds/seeds.yml
similarity index 100%
rename from tests/integration/persist_docs/data/seeds.yml
rename to tests/integration/persist_docs/seeds/seeds.yml
diff --git a/tests/integration/seed_column_types/data/payments.csv b/tests/integration/seed_column_types/seeds/payments.csv
similarity index 100%
rename from tests/integration/seed_column_types/data/payments.csv
rename to tests/integration/seed_column_types/seeds/payments.csv
diff --git a/tests/integration/store_failures/test_store_failures.py b/tests/integration/store_failures/test_store_failures.py
index 7475a293..679bde64 100644
--- a/tests/integration/store_failures/test_store_failures.py
+++ b/tests/integration/store_failures/test_store_failures.py
@@ -21,7 +21,7 @@ def project_config(self):
 
     def test_store_failures(self):
         self.run_dbt(['run'])
-        results = self.run_dbt(['test', '--store-failures'], strict = False)
+        results = self.run_dbt(['test', '--store-failures'])
 
 class TestStoreFailuresApacheSpark(TestStoreFailures):
 
diff --git a/tests/integration/spark-databricks-http.dbtspec b/tests/specs/spark-databricks-http.dbtspec
similarity index 100%
rename from tests/integration/spark-databricks-http.dbtspec
rename to tests/specs/spark-databricks-http.dbtspec
diff --git a/tests/integration/spark-databricks-odbc-cluster.dbtspec b/tests/specs/spark-databricks-odbc-cluster.dbtspec
similarity index 100%
rename from tests/integration/spark-databricks-odbc-cluster.dbtspec
rename to tests/specs/spark-databricks-odbc-cluster.dbtspec
diff --git a/tests/integration/spark-databricks-odbc-sql-endpoint.dbtspec b/tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec
similarity index 100%
rename from tests/integration/spark-databricks-odbc-sql-endpoint.dbtspec
rename to tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec
diff --git a/tests/integration/spark-thrift.dbtspec b/tests/specs/spark-thrift.dbtspec
similarity index 100%
rename from tests/integration/spark-thrift.dbtspec
rename to tests/specs/spark-thrift.dbtspec
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 8c9a19a0..89124d3b 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -166,7 +166,7 @@ def generate_name_macros(package):
             unique_id=f'macro.{package}.{name}',
             package_name=package,
             original_file_path=normalize('macros/macro.sql'),
-            root_path='./dbt_modules/root',
+            root_path='./dbt_packages/root',
             path=normalize('macros/macro.sql'),
             raw_sql=all_sql,
             macro_sql=sql,
diff --git a/tox.ini b/tox.ini
index f1e53d2c..d19e8efb 100644
--- a/tox.ini
+++ b/tox.ini
@@ -20,7 +20,7 @@ deps =
 
 [testenv:integration-spark-databricks-http]
 basepython = python3
-commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-databricks-http.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-http.dbtspec'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
@@ -29,7 +29,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-cluster]
 basepython = python3
-commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-databricks-odbc-cluster.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-odbc-cluster.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -39,7 +39,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
 basepython = python3
-commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-databricks-odbc-sql-endpoint.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -50,7 +50,7 @@ deps =
 
 [testenv:integration-spark-thrift]
 basepython = python3
-commands = /bin/bash -c '{envpython} -m pytest -v tests/integration/spark-thrift.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-thrift.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =

From fcd668b8f8fc12995a8a1f0a96a9fcff3c78acd3 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Mon, 8 Nov 2021 18:38:52 +0100
Subject: [PATCH 273/603] Try bumping thrift upper bound (#248)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index bd7e5522..1ccc7937 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@ def _get_dbt_core_version():
 odbc_extras = ['pyodbc>=4.0.30']
 pyhive_extras = [
     'PyHive[hive]>=0.6.0,<0.7.0',
-    'thrift>=0.11.0,<0.12.0',
+    'thrift>=0.11.0,<0.16.0',
 ]
 all_extras = odbc_extras + pyhive_extras
 

From 6e605cb2d45c2ceb678c91663f74e50e883081d1 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Tue, 9 Nov 2021 08:51:31 +0100
Subject: [PATCH 274/603] Rm py36 support, fix tests failing due to dbt-core
 changes (#253)

* Update tests/unit/utils.py per dbt-core#4212

* Rm py36 support, bump versions

* use python3.8 for testing

Co-authored-by: Kyle Wigley 
---
 .github/workflows/main.yml |   4 +-
 CHANGELOG.md               |   6 +
 setup.py                   |  21 +++-
 tests/unit/utils.py        | 224 +++++++++++++++++++++++++++++++++++--
 tox.ini                    |  12 +-
 5 files changed, 244 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f5144b15..60a0d6f6 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -79,7 +79,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.6, 3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
+        python-version: [3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
 
     env:
       TOXENV: "unit"
@@ -166,7 +166,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: [3.7, 3.8, 3.9]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c336aa50..829889fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## dbt-spark 1.0.0 (Release TBD)
+
+### Under the hood
+
+- Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([dbt-core#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#253](https://github.com/dbt-labs/dbt-snowflake/pull/253))
+
 ## dbt-spark 0.21.1 (Release TBD)
 
 ### Fixes
diff --git a/setup.py b/setup.py
index 1ccc7937..5d08179b 100644
--- a/setup.py
+++ b/setup.py
@@ -4,9 +4,9 @@
 import re
 
 # require python 3.6 or newer
-if sys.version_info < (3, 6):
+if sys.version_info < (3, 7):
     print('Error: dbt does not support this version of Python.')
-    print('Please upgrade to Python 3.6 or higher.')
+    print('Please upgrade to Python 3.7 or higher.')
     sys.exit(1)
 
 
@@ -90,5 +90,20 @@ def _get_dbt_core_version():
         "ODBC": odbc_extras,
         "PyHive":  pyhive_extras,
         "all": all_extras
-    }
+    },
+    zip_safe=False,
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        
+        'License :: OSI Approved :: Apache Software License',
+        
+        'Operating System :: Microsoft :: Windows',
+        'Operating System :: MacOS :: MacOS X',
+        'Operating System :: POSIX :: Linux',
+
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+    ],
+    python_requires=">=3.7",
 )
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 89124d3b..68d0fc33 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -1,18 +1,20 @@
 """Unit test utility functions.
-
 Note that all imports should be inside the functions to avoid import/mocking
 issues.
 """
+import string
 import os
 from unittest import mock
 from unittest import TestCase
 
-from hologram import ValidationError
+import agate
+import pytest
+from dbt.dataclass_schema import ValidationError
+from dbt.config.project import PartialProject
 
 
 def normalize(path):
     """On windows, neither is enough on its own:
-
     >>> normcase('C:\\documents/ALL CAPS/subdir\\..')
     'c:\\documents\\all caps\\subdir\\..'
     >>> normpath('C:\\documents/ALL CAPS/subdir\\..')
@@ -28,9 +30,10 @@ class Obj:
     single_threaded = False
 
 
-def mock_connection(name):
+def mock_connection(name, state='open'):
     conn = mock.MagicMock()
     conn.name = name
+    conn.state = state
     return conn
 
 
@@ -42,7 +45,7 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
-    renderer = ProfileRenderer(generate_base_context(cli_vars))
+    renderer = ProfileRenderer(cli_vars)
     return Profile.from_raw_profile_info(
         profile,
         profile_name,
@@ -58,13 +61,18 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars=
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
-    renderer = DbtProjectYamlRenderer(generate_target_context(profile, cli_vars))
+    renderer = DbtProjectYamlRenderer(profile, cli_vars)
 
     project_root = project.pop('project-root', os.getcwd())
 
-    return Project.render_from_dict(
-            project_root, project, packages, selectors, renderer
-        )
+    partial = PartialProject.from_dicts(
+        project_root=project_root,
+        project_dict=project,
+        packages_dict=packages,
+        selectors_dict=selectors,
+    )
+    return partial.render(renderer)
+
 
 
 def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars='{}'):
@@ -108,6 +116,14 @@ def inject_plugin(plugin):
     FACTORY.plugins[key] = plugin
 
 
+def inject_plugin_for(config):
+    # from dbt.adapters.postgres import Plugin, PostgresAdapter
+    from dbt.adapters.factory import FACTORY
+    FACTORY.load_plugin(config.credentials.type)
+    adapter = FACTORY.get_adapter(config)
+    return adapter
+
+
 def inject_adapter(value, plugin):
     """Inject the given adapter into the adapter factory, so your hand-crafted
     artisanal adapter will be available from get_adapter() as if dbt loaded it.
@@ -118,6 +134,13 @@ def inject_adapter(value, plugin):
     FACTORY.adapters[key] = value
 
 
+def clear_plugin(plugin):
+    from dbt.adapters.factory import FACTORY
+    key = plugin.adapter.type()
+    FACTORY.plugins.pop(key, None)
+    FACTORY.adapters.pop(key, None)
+
+
 class ContractTestCase(TestCase):
     ContractType = None
 
@@ -126,11 +149,12 @@ def setUp(self):
         super().setUp()
 
     def assert_to_dict(self, obj, dct):
-        self.assertEqual(obj.to_dict(), dct)
+        self.assertEqual(obj.to_dict(omit_none=True), dct)
 
     def assert_from_dict(self, obj, dct, cls=None):
         if cls is None:
             cls = self.ContractType
+        cls.validate(dct)
         self.assertEqual(cls.from_dict(dct),  obj)
 
     def assert_symmetric(self, obj, dct, cls=None):
@@ -142,9 +166,59 @@ def assert_fails_validation(self, dct, cls=None):
             cls = self.ContractType
 
         with self.assertRaises(ValidationError):
+            cls.validate(dct)
             cls.from_dict(dct)
 
 
+def compare_dicts(dict1, dict2):
+    first_set = set(dict1.keys())
+    second_set = set(dict2.keys())
+    print(f"--- Difference between first and second keys: {first_set.difference(second_set)}")
+    print(f"--- Difference between second and first keys: {second_set.difference(first_set)}")
+    common_keys = set(first_set).intersection(set(second_set))
+    found_differences = False
+    for key in common_keys:
+        if dict1[key] != dict2[key] :
+            print(f"--- --- first dict: {key}: {str(dict1[key])}")
+            print(f"--- --- second dict: {key}: {str(dict2[key])}")
+            found_differences = True
+    if found_differences:
+        print("--- Found differences in dictionaries")
+    else:
+        print("--- Found no differences in dictionaries")
+
+
+def assert_from_dict(obj, dct, cls=None):
+    if cls is None:
+        cls = obj.__class__
+    cls.validate(dct)
+    obj_from_dict = cls.from_dict(dct)
+    if hasattr(obj, 'created_at'):
+        obj_from_dict.created_at = 1
+        obj.created_at = 1
+    assert obj_from_dict == obj
+
+
+def assert_to_dict(obj, dct):
+    obj_to_dict = obj.to_dict(omit_none=True)
+    if 'created_at' in obj_to_dict:
+        obj_to_dict['created_at'] = 1
+    if 'created_at' in dct:
+        dct['created_at'] = 1
+    assert obj_to_dict == dct
+
+
+def assert_symmetric(obj, dct, cls=None):
+    assert_to_dict(obj, dct)
+    assert_from_dict(obj, dct, cls)
+
+
+def assert_fails_validation(dct, cls):
+    with pytest.raises(ValidationError):
+        cls.validate(dct)
+        cls.from_dict(dct)
+
+
 def generate_name_macros(package):
     from dbt.contracts.graph.parsed import ParsedMacro
     from dbt.node_types import NodeType
@@ -158,7 +232,6 @@ def generate_name_macros(package):
         sql = f'{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}'
         name_sql[name] = sql
 
-    all_sql = '\n'.join(name_sql.values())
     for name, sql in name_sql.items():
         pm = ParsedMacro(
             name=name,
@@ -168,7 +241,134 @@ def generate_name_macros(package):
             original_file_path=normalize('macros/macro.sql'),
             root_path='./dbt_packages/root',
             path=normalize('macros/macro.sql'),
-            raw_sql=all_sql,
             macro_sql=sql,
         )
         yield pm
+
+
+class TestAdapterConversions(TestCase):
+    def _get_tester_for(self, column_type):
+        from dbt.clients import agate_helper
+        if column_type is agate.TimeDelta:  # dbt never makes this!
+            return agate.TimeDelta()
+
+        for instance in agate_helper.DEFAULT_TYPE_TESTER._possible_types:
+            if type(instance) is column_type:
+                return instance
+
+        raise ValueError(f'no tester for {column_type}')
+
+    def _make_table_of(self, rows, column_types):
+        column_names = list(string.ascii_letters[:len(rows[0])])
+        if isinstance(column_types, type):
+            column_types = [self._get_tester_for(column_types) for _ in column_names]
+        else:
+            column_types = [self._get_tester_for(typ) for typ in column_types]
+        table = agate.Table(rows, column_names=column_names, column_types=column_types)
+        return table
+
+
+def MockMacro(package, name='my_macro', **kwargs):
+    from dbt.contracts.graph.parsed import ParsedMacro
+    from dbt.node_types import NodeType
+
+    mock_kwargs = dict(
+        resource_type=NodeType.Macro,
+        package_name=package,
+        unique_id=f'macro.{package}.{name}',
+        original_file_path='/dev/null',
+    )
+
+    mock_kwargs.update(kwargs)
+
+    macro = mock.MagicMock(
+        spec=ParsedMacro,
+        **mock_kwargs
+    )
+    macro.name = name
+    return macro
+
+
+def MockMaterialization(package, name='my_materialization', adapter_type=None, **kwargs):
+    if adapter_type is None:
+        adapter_type = 'default'
+    kwargs['adapter_type'] = adapter_type
+    return MockMacro(package, f'materialization_{name}_{adapter_type}', **kwargs)
+
+
+def MockGenerateMacro(package, component='some_component', **kwargs):
+    name = f'generate_{component}_name'
+    return MockMacro(package, name=name, **kwargs)
+
+
+def MockSource(package, source_name, name, **kwargs):
+    from dbt.node_types import NodeType
+    from dbt.contracts.graph.parsed import ParsedSourceDefinition
+    src = mock.MagicMock(
+        __class__=ParsedSourceDefinition,
+        resource_type=NodeType.Source,
+        source_name=source_name,
+        package_name=package,
+        unique_id=f'source.{package}.{source_name}.{name}',
+        search_name=f'{source_name}.{name}',
+        **kwargs
+    )
+    src.name = name
+    return src
+
+
+def MockNode(package, name, resource_type=None, **kwargs):
+    from dbt.node_types import NodeType
+    from dbt.contracts.graph.parsed import ParsedModelNode, ParsedSeedNode
+    if resource_type is None:
+        resource_type = NodeType.Model
+    if resource_type == NodeType.Model:
+        cls = ParsedModelNode
+    elif resource_type == NodeType.Seed:
+        cls = ParsedSeedNode
+    else:
+        raise ValueError(f'I do not know how to handle {resource_type}')
+    node = mock.MagicMock(
+        __class__=cls,
+        resource_type=resource_type,
+        package_name=package,
+        unique_id=f'{str(resource_type)}.{package}.{name}',
+        search_name=name,
+        **kwargs
+    )
+    node.name = name
+    return node
+
+
+def MockDocumentation(package, name, **kwargs):
+    from dbt.node_types import NodeType
+    from dbt.contracts.graph.parsed import ParsedDocumentation
+    doc = mock.MagicMock(
+        __class__=ParsedDocumentation,
+        resource_type=NodeType.Documentation,
+        package_name=package,
+        search_name=name,
+        unique_id=f'{package}.{name}',
+        **kwargs
+    )
+    doc.name = name
+    return doc
+
+
+def load_internal_manifest_macros(config, macro_hook = lambda m: None):
+    from dbt.parser.manifest import ManifestLoader
+    return ManifestLoader.load_macros(config, macro_hook)
+
+
+
+def dict_replace(dct, **kwargs):
+    dct = dct.copy()
+    dct.update(kwargs)
+    return dct
+
+
+def replace_config(n, **kwargs):
+    return n.replace(
+        config=n.config.replace(**kwargs),
+        unrendered_config=dict_replace(n.unrendered_config, **kwargs),
+    )
diff --git a/tox.ini b/tox.ini
index d19e8efb..993ab801 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,14 +4,14 @@ envlist = unit, flake8, integration-spark-thrift
 
 
 [testenv:flake8]
-basepython = python3
+basepython = python3.8
 commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 dbt/'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
      -r{toxinidir}/dev_requirements.txt
 
 [testenv:unit]
-basepython = python3
+basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
@@ -19,7 +19,7 @@ deps =
     -r{toxinidir}/dev_requirements.txt
 
 [testenv:integration-spark-databricks-http]
-basepython = python3
+basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-http.dbtspec'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
@@ -28,7 +28,7 @@ deps =
     -e.
 
 [testenv:integration-spark-databricks-odbc-cluster]
-basepython = python3
+basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-odbc-cluster.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
@@ -38,7 +38,7 @@ deps =
     -e.
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
-basepython = python3
+basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
@@ -49,7 +49,7 @@ deps =
 
 
 [testenv:integration-spark-thrift]
-basepython = python3
+basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-thrift.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS

From 406868c0415012b9eab7cfb2e2d1648a2d106820 Mon Sep 17 00:00:00 2001
From: Emily Rockman 
Date: Wed, 10 Nov 2021 05:59:48 -0600
Subject: [PATCH 275/603] convert to use struct logging for adapters (#251)

* convert to use struct logging for adapters

* add changelog

* Update integration tests

Co-authored-by: Jeremy Cohen 
---
 CHANGELOG.md                      |  3 +++
 dbt/adapters/spark/connections.py |  4 +++-
 dbt/adapters/spark/impl.py        |  4 +++-
 tests/integration/base.py         | 17 ++++++++++-------
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 829889fe..1cffa467 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,9 @@
 - Add support for `on_schema_change` config in incremental models: `ignore`, `fail`, `append_new_columns`. For `sync_all_columns`, removing columns is not supported by Apache Spark or Delta Lake ([#198](https://github.com/dbt-labs/dbt-spark/issues/198), [#226](https://github.com/dbt-labs/dbt-spark/issues/226), [#229](https://github.com/dbt-labs/dbt-spark/pull/229))
 - Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224), [#234](https://github.com/dbt-labs/dbt-spark/pull/234))
 
+### Under the hood
+- Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
+
 ### Contributors
 - [@binhnefits](https://github.com/binhnefits) ([#234](https://github.com/dbt-labs/dbt-spark/pull/234))
 
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 41413f35..1848a72a 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -4,7 +4,7 @@
 from dbt.adapters.base import Credentials
 from dbt.adapters.sql import SQLConnectionManager
 from dbt.contracts.connection import ConnectionState
-from dbt.logger import GLOBAL_LOGGER as logger
+from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from dbt.adapters.spark import __version__
 
@@ -42,6 +42,8 @@
 import base64
 import time
 
+logger = AdapterLogger("Spark")
+
 NUMBERS = DECIMALS + (int, float)
 
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 6acbcd44..0a8e4c49 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -16,9 +16,11 @@
 from dbt.adapters.spark import SparkColumn
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
-from dbt.logger import GLOBAL_LOGGER as logger
+from dbt.events import AdapterLogger
 from dbt.utils import executor
 
+logger = AdapterLogger("Spark")
+
 GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation'
 LIST_SCHEMAS_MACRO_NAME = 'list_schemas'
 LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching'
diff --git a/tests/integration/base.py b/tests/integration/base.py
index 50f4ef91..acce6a74 100644
--- a/tests/integration/base.py
+++ b/tests/integration/base.py
@@ -23,9 +23,15 @@
 from dbt.clients.jinja import template_cache
 from dbt.config import RuntimeConfig
 from dbt.context import providers
-from dbt.logger import GLOBAL_LOGGER as logger, log_manager
+from dbt.logger import log_manager
+from dbt.events.functions import (
+    capture_stdout_logs, fire_event, setup_event_logger, stop_capture_stdout_logs
+)
+from dbt.events import AdapterLogger
 from dbt.contracts.graph.manifest import Manifest
 
+logger = AdapterLogger("Spark")
+
 INITIAL_ROOT = os.getcwd()
 
 
@@ -269,6 +275,7 @@ def setUp(self):
         os.chdir(self.initial_dir)
         # before we go anywhere, collect the initial path info
         self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
+        setup_event_logger(self._logs_dir)
         _really_makedirs(self._logs_dir)
         self.test_original_source_path = _pytest_get_test_root()
         self.test_root_dir = self._generate_test_root_dir()
@@ -439,16 +446,12 @@ def run_dbt(self, args=None, expect_pass=True, profiles_dir=True):
 
     def run_dbt_and_capture(self, *args, **kwargs):
         try:
-            initial_stdout = log_manager.stdout
-            initial_stderr = log_manager.stderr
-            stringbuf = io.StringIO()
-            log_manager.set_output_stream(stringbuf)
-
+            stringbuf = capture_stdout_logs()
             res = self.run_dbt(*args, **kwargs)
             stdout = stringbuf.getvalue()
 
         finally:
-            log_manager.set_output_stream(initial_stdout, initial_stderr)
+            stop_capture_stdout_logs()
 
         return res, stdout
 

From e239dfb4826b7fad258f655b01f34cc300c6f531 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 10 Nov 2021 17:30:49 +0100
Subject: [PATCH 276/603] Bumping version to 1.0.0rc1 (#254)

* Bump version to 1.0.0rc1

* Update changelog
---
 .bumpversion.cfg                  |  3 ++-
 CHANGELOG.md                      | 10 +++++++---
 dbt/adapters/spark/__version__.py |  2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index dfa9e026..26af54b0 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.0b1
+current_version = 1.0.0rc1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
@@ -27,3 +27,4 @@ first_value = 1
 first_value = 1
 
 [bumpversion:file:dbt/adapters/spark/__version__.py]
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1cffa467..ff6f4908 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,19 +1,23 @@
 ## dbt-spark 1.0.0 (Release TBD)
 
+## dbt-spark 1.0.0rc1 (November 10, 2021)
+
 ### Under the hood
 
 - Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([dbt-core#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#253](https://github.com/dbt-labs/dbt-snowflake/pull/253))
 
+### Under the hood
+- Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
+
 ## dbt-spark 0.21.1 (Release TBD)
 
+## dbt-spark 0.21.1rc1 (November 3, 2021)
+
 ### Fixes
 - Fix `--store-failures` for tests, by suppressing irrelevant error in `comment_clause()` macro ([#232](https://github.com/dbt-labs/dbt-spark/issues/232), [#233](https://github.com/dbt-labs/dbt-spark/pull/233))
 - Add support for `on_schema_change` config in incremental models: `ignore`, `fail`, `append_new_columns`. For `sync_all_columns`, removing columns is not supported by Apache Spark or Delta Lake ([#198](https://github.com/dbt-labs/dbt-spark/issues/198), [#226](https://github.com/dbt-labs/dbt-spark/issues/226), [#229](https://github.com/dbt-labs/dbt-spark/pull/229))
 - Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224), [#234](https://github.com/dbt-labs/dbt-spark/pull/234))
 
-### Under the hood
-- Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
-
 ### Contributors
 - [@binhnefits](https://github.com/binhnefits) ([#234](https://github.com/dbt-labs/dbt-spark/pull/234))
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index affc65fe..b25b7673 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.0.0b1"
+version = "1.0.0rc1"

From 05678b3dd366098814b99ff5d2b919b33375b487 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Fri, 12 Nov 2021 17:42:51 +0100
Subject: [PATCH 277/603] Refactor seed macros, clearer sql param logging
 (#250)

* Try refactoring seed macros

* Add changelog entry
---
 CHANGELOG.md                                  |   5 +-
 .../spark/macros/materializations/seed.sql    | 109 +++++++-----------
 2 files changed, 46 insertions(+), 68 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ff6f4908..386ad2cb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,13 @@
 ## dbt-spark 1.0.0 (Release TBD)
 
+### Under the hood
+- Refactor seed macros: remove duplicated code from dbt-core, and provide clearer logging of SQL parameters that differ by connection method ([#249](https://github.com/dbt-labs/dbt-spark/issues/249), [#250](https://github.com/dbt-labs/dbt-snowflake/pull/250))
+
 ## dbt-spark 1.0.0rc1 (November 10, 2021)
 
 ### Under the hood
-
 - Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([dbt-core#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#253](https://github.com/dbt-labs/dbt-snowflake/pull/253))
+- Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
 
 ### Under the hood
 - Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql
index 536e6447..196479cb 100644
--- a/dbt/include/spark/macros/materializations/seed.sql
+++ b/dbt/include/spark/macros/materializations/seed.sql
@@ -1,40 +1,8 @@
-{% macro spark__load_csv_rows(model, agate_table) %}
-    {% set batch_size = 1000 %}
-    {% set column_override = model['config'].get('column_types', {}) %}
-    
-    {% set statements = [] %}
-
-    {% for chunk in agate_table.rows | batch(batch_size) %}
-        {% set bindings = [] %}
-
-        {% for row in chunk %}
-          {% do bindings.extend(row) %}
-        {% endfor %}
-
-        {% set sql %}
-            insert into {{ this.render() }} values
-            {% for row in chunk -%}
-                ({%- for col_name in agate_table.column_names -%}
-                    {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}
-                    {%- set type = column_override.get(col_name, inferred_type) -%}
-                      cast(%s as {{type}})
-                    {%- if not loop.last%},{%- endif %}
-                {%- endfor -%})
-                {%- if not loop.last%},{%- endif %}
-            {%- endfor %}
-        {% endset %}
-
-        {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}
-
-        {% if loop.index0 == 0 %}
-            {% do statements.append(sql) %}
-        {% endif %}
-    {% endfor %}
-
-    {# Return SQL so we can render it out into the compiled files #}
-    {{ return(statements[0]) }}
+{% macro spark__get_binding_char() %}
+  {{ return('?' if target.method == 'odbc' else '%s') }}
 {% endmacro %}
 
+
 {% macro spark__reset_csv_table(model, full_refresh, old_relation, agate_table) %}
     {% if old_relation %}
         {{ adapter.drop_relation(old_relation) }}
@@ -44,6 +12,45 @@
 {% endmacro %}
 
 
+{% macro spark__load_csv_rows(model, agate_table) %}
+
+  {% set batch_size = get_batch_size() %}
+  {% set column_override = model['config'].get('column_types', {}) %}
+
+  {% set statements = [] %}
+
+  {% for chunk in agate_table.rows | batch(batch_size) %}
+      {% set bindings = [] %}
+
+      {% for row in chunk %}
+          {% do bindings.extend(row) %}
+      {% endfor %}
+
+      {% set sql %}
+          insert into {{ this.render() }} values
+          {% for row in chunk -%}
+              ({%- for col_name in agate_table.column_names -%}
+                  {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}
+                  {%- set type = column_override.get(col_name, inferred_type) -%}
+                    cast({{ get_binding_char() }} as {{type}})
+                  {%- if not loop.last%},{%- endif %}
+              {%- endfor -%})
+              {%- if not loop.last%},{%- endif %}
+          {%- endfor %}
+      {% endset %}
+
+      {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}
+
+      {% if loop.index0 == 0 %}
+          {% do statements.append(sql) %}
+      {% endif %}
+  {% endfor %}
+
+  {# Return SQL so we can render it out into the compiled files #}
+  {{ return(statements[0]) }}
+{% endmacro %}
+
+
 {% macro spark__create_csv_table(model, agate_table) %}
   {%- set column_override = model['config'].get('column_types', {}) -%}
   {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}
@@ -70,35 +77,3 @@
 
   {{ return(sql) }}
 {% endmacro %}
-
-
-{% materialization seed, adapter='spark' %}
-
-  {%- set identifier = model['alias'] -%}
-  {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
-  {%- set target_relation = api.Relation.create(database=database, schema=schema, identifier=identifier,
-                                               type='table') -%}
-  {%- set agate_table = load_agate_table() -%}
-  {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}
-
-  {{ run_hooks(pre_hooks) }}
-
-  -- build model
-  {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}
-  {% set status = 'CREATE' %}
-  {% set num_rows = (agate_table.rows | length) %}
-  {% set sql = load_csv_rows(model, agate_table) %}
-
-  {% call noop_statement('main', status ~ ' ' ~ num_rows) %}
-    {{ create_table_sql }};
-    -- dbt seed --
-    {{ sql }}
-  {% endcall %}
-
-  {% do persist_docs(target_relation, model) %}
-
-  {{ run_hooks(post_hooks) }}
-
-  {{ return({'relations': [target_relation]}) }}
-
-{% endmaterialization %}

From 68a3b5a11a4d5704460821cb394657e03082f657 Mon Sep 17 00:00:00 2001
From: Vinoth Govindarajan 
Date: Fri, 19 Nov 2021 02:39:29 -0800
Subject: [PATCH 278/603] 187: Adding apache hudi support to dbt (#210)

* initial working version

* Rebased and resolve all the merge conflicts.

* Rebased and resolved merge conflicts.

* Removed hudi dep jar and used the released version via packages option

* Added insert overwrite unit tests for hudi

* Used unique_key as default value for hudi primaryKey option

* Updated changelog.md with this new update.

* Final round of testing and few minor fixes

* Fixed lint issues

* Fixed the integration tests

* Fixed the circle ci env to add hudi packages

* Updated hudi spark bundle to use scala 2.11

* Fixed Hudi incremental strategy integration tests and other integration tests

* Fixed the hudi hive sync hms integration test issues

* Added sql HMS config to fix the integration tests.

* Added hudi hive sync mode conf to CI

* Set the hms schema verification to false

* Removed the merge update columns hence its not supported.

* Passed the correct hiveconf to the circle ci build script

* Disabled few incremental tests for spark2 and reverted to spark2 config

* Added hudi configs to the circle ci build script

* Commented out the Hudi integration test until we have the hudi 0.10.0 version

* Fixed the macro which checks the table type.

* Disabled this model since hudi is not supported in databricks runtime, will be added later
---
 .circleci/config.yml                          | 12 +++++++
 CHANGELOG.md                                  |  2 ++
 dbt/adapters/spark/impl.py                    | 13 +++++++
 dbt/adapters/spark/relation.py                |  1 +
 dbt/include/spark/macros/adapters.sql         | 13 ++++++-
 .../materializations/incremental/validate.sql |  6 ++--
 .../macros/materializations/snapshot.sql      |  8 ++---
 docker-compose.yml                            |  1 +
 docker/hive-site.xml                          |  4 +++
 docker/spark-defaults.conf                    |  7 ++++
 .../models_hudi/append.sql                    | 19 ++++++++++
 .../insert_overwrite_no_partitions.sql        | 19 ++++++++++
 .../insert_overwrite_partitions.sql           | 20 +++++++++++
 .../models_hudi/merge_no_key.sql              | 19 ++++++++++
 .../models_hudi/merge_unique_key.sql          | 20 +++++++++++
 .../models_hudi/merge_update_columns.sql      | 22 ++++++++++++
 .../test_incremental_strategies.py            | 24 +++++++++++++
 .../persist_docs/models/schema.yml            | 24 +++++++++++++
 tests/unit/test_macros.py                     | 35 +++++++++++++++++++
 19 files changed, 261 insertions(+), 8 deletions(-)
 create mode 100644 docker/spark-defaults.conf
 create mode 100644 tests/integration/incremental_strategies/models_hudi/append.sql
 create mode 100644 tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
 create mode 100644 tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
 create mode 100644 tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
 create mode 100644 tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
 create mode 100644 tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 99154fb6..4921fac9 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -25,6 +25,18 @@ jobs:
           --conf spark.hadoop.javax.jdo.option.ConnectionUserName=dbt
           --conf spark.hadoop.javax.jdo.option.ConnectionPassword=dbt
           --conf spark.hadoop.javax.jdo.option.ConnectionDriverName=org.postgresql.Driver
+          --conf spark.serializer=org.apache.spark.serializer.KryoSerializer
+          --conf spark.jars.packages=org.apache.hudi:hudi-spark-bundle_2.11:0.9.0
+          --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+          --conf spark.driver.userClassPathFirst=true
+          --conf spark.hadoop.datanucleus.autoCreateTables=true
+          --conf spark.hadoop.datanucleus.schema.autoCreateTables=true
+          --conf spark.hadoop.datanucleus.fixedDatastore=false
+          --conf spark.sql.hive.convertMetastoreParquet=false
+          --hiveconf hoodie.datasource.hive_sync.use_jdbc=false
+          --hiveconf hoodie.datasource.hive_sync.mode=hms
+          --hiveconf datanucleus.schema.autoCreateAll=true
+          --hiveconf hive.metastore.schema.verification=false
 
       - image: postgres:9.6.17-alpine
         environment:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 386ad2cb..1f4a030d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@
 ### Fixes
 - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
 - Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202))
+- Add support for Apache Hudi (hudi file format) which supports incremental merge strategies: Issue ([#187](https://github.com/dbt-labs/dbt-spark/issues/187)) 
 
 ### Under the hood
 - Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211))
@@ -36,6 +37,7 @@
 ### Contributors
 - [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
 - [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204))
+- [@vingov](https://github.com/vingov) ([#210](https://github.com/dbt-labs/dbt-spark/pull/210))
 
 ## dbt-spark 0.21.0b2 (August 20, 2021)
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 0a8e4c49..74845422 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -70,6 +70,13 @@ class SparkAdapter(SQLAdapter):
     INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE)
     INFORMATION_STATISTICS_REGEX = re.compile(
         r"^Statistics: (.*)$", re.MULTILINE)
+    HUDI_METADATA_COLUMNS = [
+        '_hoodie_commit_time',
+        '_hoodie_commit_seqno',
+        '_hoodie_record_key',
+        '_hoodie_partition_path',
+        '_hoodie_file_name'
+    ]
 
     Relation = SparkRelation
     Column = SparkColumn
@@ -145,12 +152,14 @@ def list_relations_without_caching(
             rel_type = RelationType.View \
                 if 'Type: VIEW' in information else RelationType.Table
             is_delta = 'Provider: delta' in information
+            is_hudi = 'Provider: hudi' in information
             relation = self.Relation.create(
                 schema=_schema,
                 identifier=name,
                 type=rel_type,
                 information=information,
                 is_delta=is_delta,
+                is_hudi=is_hudi,
             )
             relations.append(relation)
 
@@ -224,6 +233,10 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
             # which would execute 'describe extended tablename' query
             rows: List[agate.Row] = super().get_columns_in_relation(relation)
             columns = self.parse_describe_extended(relation, rows)
+
+        # strip hudi metadata columns.
+        columns = [x for x in columns
+                   if x.name not in self.HUDI_METADATA_COLUMNS]
         return columns
 
     def parse_columns_from_information(
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index 5fc09655..043cabfa 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -26,6 +26,7 @@ class SparkRelation(BaseRelation):
     include_policy: SparkIncludePolicy = SparkIncludePolicy()
     quote_character: str = '`'
     is_delta: Optional[bool] = None
+    is_hudi: Optional[bool] = None
     information: str = None
 
     def __post_init__(self):
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index ee59b813..2542af81 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -15,6 +15,17 @@
 
 {% macro options_clause() -%}
   {%- set options = config.get('options') -%}
+  {%- if config.get('file_format') == 'hudi' -%}
+    {%- set unique_key = config.get('unique_key') -%}
+    {%- if unique_key is not none and options is none -%}
+      {%- set options = {'primaryKey': config.get('unique_key')} -%}
+    {%- elif unique_key is not none and options is not none and 'primaryKey' not in options -%}
+      {%- set _ = options.update({'primaryKey': config.get('unique_key')}) -%}
+    {%- elif options is not none and 'primaryKey' in options and options['primaryKey'] != unique_key -%}
+      {{ exceptions.raise_compiler_error("unique_key and options('primaryKey') should be the same column(s).") }}
+    {%- endif %}
+  {%- endif %}
+
   {%- if options is not none %}
     options (
       {%- for option in options -%}
@@ -181,7 +192,7 @@
 {% endmacro %}
 
 {% macro spark__alter_column_comment(relation, column_dict) %}
-  {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
+  {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'hudi'] %}
     {% for column_name in column_dict %}
       {% set comment = column_dict[column_name]['description'] %}
       {% set escaped_comment = comment | replace('\'', '\\\'') %}
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql
index 400a2eee..3e9de359 100644
--- a/dbt/include/spark/macros/materializations/incremental/validate.sql
+++ b/dbt/include/spark/macros/materializations/incremental/validate.sql
@@ -1,7 +1,7 @@
 {% macro dbt_spark_validate_get_file_format(raw_file_format) %}
   {#-- Validate the file format #}
 
-  {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm'] %}
+  {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm', 'hudi'] %}
 
   {% set invalid_file_format_msg -%}
     Invalid file format provided: {{ raw_file_format }}
@@ -26,7 +26,7 @@
 
   {% set invalid_merge_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
-    You can only choose this strategy when file_format is set to 'delta'
+    You can only choose this strategy when file_format is set to 'delta' or 'hudi'
   {%- endset %}
   
   {% set invalid_insert_overwrite_delta_msg -%}
@@ -44,7 +44,7 @@
   {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %}
     {% do exceptions.raise_compiler_error(invalid_strategy_msg) %}
   {%-else %}
-    {% if raw_strategy == 'merge' and file_format != 'delta' %}
+    {% if raw_strategy == 'merge' and file_format not in ['delta', 'hudi'] %}
       {% do exceptions.raise_compiler_error(invalid_merge_msg) %}
     {% endif %}
     {% if raw_strategy == 'insert_overwrite' and file_format == 'delta' %}
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index 6dad51a0..82d186ce 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -82,18 +82,18 @@
           identifier=target_table,
           type='table') -%}
 
-  {%- if file_format != 'delta' -%}
+  {%- if file_format not in ['delta', 'hudi'] -%}
     {% set invalid_format_msg -%}
       Invalid file format: {{ file_format }}
-      Snapshot functionality requires file_format be set to 'delta'
+      Snapshot functionality requires file_format be set to 'delta' or 'hudi'
     {%- endset %}
     {% do exceptions.raise_compiler_error(invalid_format_msg) %}
   {% endif %}
 
   {%- if target_relation_exists -%}
-    {%- if not target_relation.is_delta -%}
+    {%- if not target_relation.is_delta and not target_relation.is_hudi -%}
       {% set invalid_format_msg -%}
-        The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta'
+        The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta' or 'hudi'
       {%- endset %}
       {% do exceptions.raise_compiler_error(invalid_format_msg) %}
     {% endif %}
diff --git a/docker-compose.yml b/docker-compose.yml
index 869e4ecd..8054dfd7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,6 +14,7 @@ services:
     volumes:
       - ./.spark-warehouse/:/spark-warehouse/
       - ./docker/hive-site.xml:/usr/spark/conf/hive-site.xml
+      - ./docker/spark-defaults.conf:/usr/spark/conf/spark-defaults.conf
     environment:
       - WAIT_FOR=dbt-hive-metastore:5432
 
diff --git a/docker/hive-site.xml b/docker/hive-site.xml
index a92e87b7..457d04f3 100644
--- a/docker/hive-site.xml
+++ b/docker/hive-site.xml
@@ -39,4 +39,8 @@
         dbt
     
 
+    
+        hive.metastore.schema.verification
+        false
+    
 
diff --git a/docker/spark-defaults.conf b/docker/spark-defaults.conf
new file mode 100644
index 00000000..48a0501c
--- /dev/null
+++ b/docker/spark-defaults.conf
@@ -0,0 +1,7 @@
+spark.hadoop.datanucleus.autoCreateTables	true
+spark.hadoop.datanucleus.schema.autoCreateTables	true
+spark.hadoop.datanucleus.fixedDatastore 	false
+spark.serializer	org.apache.spark.serializer.KryoSerializer
+spark.jars.packages	org.apache.hudi:hudi-spark3-bundle_2.12:0.9.0
+spark.sql.extensions	org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+spark.driver.userClassPathFirst true
diff --git a/tests/integration/incremental_strategies/models_hudi/append.sql b/tests/integration/incremental_strategies/models_hudi/append.sql
new file mode 100644
index 00000000..9be27bec
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_hudi/append.sql
@@ -0,0 +1,19 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'append',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
new file mode 100644
index 00000000..08137408
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
@@ -0,0 +1,19 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
new file mode 100644
index 00000000..0f74cfdb
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
@@ -0,0 +1,20 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    partition_by = 'id',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql b/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
new file mode 100644
index 00000000..8def11dd
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
@@ -0,0 +1,19 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql b/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
new file mode 100644
index 00000000..ee72860d
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
@@ -0,0 +1,20 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+    unique_key = 'id',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql b/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
new file mode 100644
index 00000000..99f0d0b7
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
@@ -0,0 +1,22 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+    unique_key = 'id',
+    merge_update_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be updated, color will be ignored
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index ef253fc5..839f167e 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -16,6 +16,10 @@ def project_config(self):
             },
         }
 
+    def seed_and_run_once(self):
+        self.run_dbt(["seed"])
+        self.run_dbt(["run"])
+
     def seed_and_run_twice(self):
         self.run_dbt(["seed"])
         self.run_dbt(["run"])
@@ -77,6 +81,26 @@ def run_and_test(self):
     def test_delta_strategies_databricks_cluster(self):
         self.run_and_test()
 
+# Uncomment this hudi integration test after the hudi 0.10.0 release to make it work.
+# class TestHudiStrategies(TestIncrementalStrategies):
+#     @property
+#     def models(self):
+#         return "models_hudi"
+#
+#     def run_and_test(self):
+#         self.seed_and_run_once()
+#         self.assertTablesEqual("append", "expected_append")
+#         self.assertTablesEqual("merge_no_key", "expected_append")
+#         self.assertTablesEqual("merge_unique_key", "expected_upsert")
+#         self.assertTablesEqual(
+#             "insert_overwrite_no_partitions", "expected_overwrite")
+#         self.assertTablesEqual(
+#             "insert_overwrite_partitions", "expected_upsert")
+#
+#     @use_profile("apache_spark")
+#     def test_hudi_strategies_apache_spark(self):
+#         self.run_and_test()
+
 
 class TestBadStrategies(TestIncrementalStrategies):
     @property
diff --git a/tests/integration/persist_docs/models/schema.yml b/tests/integration/persist_docs/models/schema.yml
index 2639037b..6680f392 100644
--- a/tests/integration/persist_docs/models/schema.yml
+++ b/tests/integration/persist_docs/models/schema.yml
@@ -49,6 +49,30 @@ models:
         description: |
           Some stuff here and then a call to
           {{ doc('my_fun_doc')}}
+
+  - name: table_hudi_model
+    description: |
+      Table model description "with double quotes"
+      and with 'single  quotes' as welll as other;
+      '''abc123'''
+      reserved -- characters
+      --
+      /* comment */
+      Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
+    columns:
+      - name: id
+        description: |
+          id Column description "with double quotes"
+          and with 'single  quotes' as welll as other;
+          '''abc123'''
+          reserved -- characters
+          --
+          /* comment */
+          Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
+      - name: name
+        description: |
+          Some stuff here and then a call to
+          {{ doc('my_fun_doc')}}
   
   - name: view_model
     description: |
diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py
index 151631e0..06ce202a 100644
--- a/tests/unit/test_macros.py
+++ b/tests/unit/test_macros.py
@@ -43,6 +43,10 @@ def test_macros_create_table_as_file_format(self):
         sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
         self.assertEqual(sql, "create or replace table my_table using delta as select 1")
 
+        self.config['file_format'] = 'hudi'
+        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.assertEqual(sql, "create table my_table using hudi as select 1")
+
     def test_macros_create_table_as_options(self):
         template = self.__get_template('adapters.sql')
 
@@ -51,6 +55,30 @@ def test_macros_create_table_as_options(self):
         sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
         self.assertEqual(sql, 'create or replace table my_table using delta options (compression "gzip" ) as select 1')
 
+        self.config['file_format'] = 'hudi'
+        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.assertEqual(sql, 'create table my_table using hudi options (compression "gzip" ) as select 1')
+
+    def test_macros_create_table_as_hudi_options(self):
+        template = self.__get_template('adapters.sql')
+
+        self.config['file_format'] = 'hudi'
+        self.config['unique_key'] = 'id'
+        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id').strip()
+        self.assertEqual(sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id')
+
+        self.config['file_format'] = 'hudi'
+        self.config['unique_key'] = 'id'
+        self.config['options'] = {'primaryKey': 'id'}
+        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id').strip()
+        self.assertEqual(sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id')
+
+        self.config['file_format'] = 'hudi'
+        self.config['unique_key'] = 'uuid'
+        self.config['options'] = {'primaryKey': 'id'}
+        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id')
+        self.assertIn('mock.raise_compiler_error()', sql)
+
     def test_macros_create_table_as_partition(self):
         template = self.__get_template('adapters.sql')
 
@@ -113,3 +141,10 @@ def test_macros_create_table_as_all(self):
             sql,
             "create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
         )
+
+        self.config['file_format'] = 'hudi'
+        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.assertEqual(
+            sql,
+            "create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
+        )

From b9e16ca9dff3d97e8c916aa28ac4441d50dee070 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Fri, 19 Nov 2021 12:01:45 +0100
Subject: [PATCH 279/603] Update profile_template.yml for v1 (#247)

* Update profile_template.yml for v1

* PR feedback, fix indentation issues

* It was my intention to remove the square brackets

* Fixup changelog entry

* Merge main, update changelog
---
 CHANGELOG.md                           | 14 +++++----
 dbt/include/spark/profile_template.yml | 42 ++++++++++++++++++++++++++
 dbt/include/spark/sample_profiles.yml  | 31 -------------------
 3 files changed, 50 insertions(+), 37 deletions(-)
 create mode 100644 dbt/include/spark/profile_template.yml
 delete mode 100644 dbt/include/spark/sample_profiles.yml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1f4a030d..26e89258 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,16 +1,20 @@
 ## dbt-spark 1.0.0 (Release TBD)
 
+### Features
+- Add support for Apache Hudi (hudi file format) which supports incremental merge strategies ([#187](https://github.com/dbt-labs/dbt-spark/issues/187), [#210](https://github.com/dbt-labs/dbt-spark/pull/210))
+
 ### Under the hood
 - Refactor seed macros: remove duplicated code from dbt-core, and provide clearer logging of SQL parameters that differ by connection method ([#249](https://github.com/dbt-labs/dbt-spark/issues/249), [#250](https://github.com/dbt-labs/dbt-snowflake/pull/250))
+- Replace `sample_profiles.yml` with `profile_template.yml`, for use with new `dbt init` ([#247](https://github.com/dbt-labs/dbt-spark/pull/247))
+
+### Contributors
+- [@vingov](https://github.com/vingov) ([#210](https://github.com/dbt-labs/dbt-spark/pull/210))
 
 ## dbt-spark 1.0.0rc1 (November 10, 2021)
 
 ### Under the hood
 - Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([dbt-core#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#253](https://github.com/dbt-labs/dbt-snowflake/pull/253))
-- Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
-
-### Under the hood
-- Add support for structured logging [#251](https://github.com/dbt-labs/dbt-spark/pull/251)
+- Add support for structured logging ([#251](https://github.com/dbt-labs/dbt-spark/pull/251))
 
 ## dbt-spark 0.21.1 (Release TBD)
 
@@ -29,7 +33,6 @@
 ### Fixes
 - Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
 - Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202))
-- Add support for Apache Hudi (hudi file format) which supports incremental merge strategies: Issue ([#187](https://github.com/dbt-labs/dbt-spark/issues/187)) 
 
 ### Under the hood
 - Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211))
@@ -37,7 +40,6 @@
 ### Contributors
 - [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
 - [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204))
-- [@vingov](https://github.com/vingov) ([#210](https://github.com/dbt-labs/dbt-spark/pull/210))
 
 ## dbt-spark 0.21.0b2 (August 20, 2021)
 
diff --git a/dbt/include/spark/profile_template.yml b/dbt/include/spark/profile_template.yml
new file mode 100644
index 00000000..192b9385
--- /dev/null
+++ b/dbt/include/spark/profile_template.yml
@@ -0,0 +1,42 @@
+fixed:
+  type: spark
+prompts:
+  host:
+    hint: yourorg.sparkhost.com
+  _choose_authentication_method:
+    odbc:
+      _fixed_method: odbc
+      driver:
+        hint: 'path/to/driver'
+      _choose_endpoint_or_cluster:
+        endpoint:
+          endpoint:
+            hint: 'endpoint ID'
+        cluster:
+          cluster:
+            hint: 'cluster ID'
+      token:
+        hint: 'abc123'
+        hide_input: true
+    http:
+      _fixed_method: http
+      token:
+        hint: 'abc123'
+        hide_input: true
+      connect_timeout:
+        default: 10
+        type: 'int'
+      connect_retries:
+        default: 0
+        type: 'int'
+    thrift:
+      _fixed_method: thrift
+  port:
+    default: 443
+    type: 'int'
+  schema:
+    hint: 'default schema that dbt will build objects in'
+  threads:
+    hint: '1 or more'
+    type: 'int'
+    default: 1
diff --git a/dbt/include/spark/sample_profiles.yml b/dbt/include/spark/sample_profiles.yml
deleted file mode 100644
index b1cff273..00000000
--- a/dbt/include/spark/sample_profiles.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-default:
-  outputs:
-
-    # Use this if connecting to a hosted spark (e.g. Databricks)
-    dev:
-      type: spark
-      method: odbc
-      driver: [path/to/driver]
-      schema: [schema_name]
-      host: [yourorg.sparkhost.com]
-      organization: [organization id]    # Azure Databricks only
-      token: [abc123]
-      
-      # one of:
-      endpoint: [endpoint id]
-      cluster: [cluster id]
-      
-      # optional
-      port: [port]              # default 443
-      user: [user]
-
-    # Use this if connecting to Dockerized spark
-    prod:
-      type: spark
-      method: thrift
-      schema: [dev_schema]
-      host: [host]
-      port: [port]
-      user: [prod_user]
-
-  target: dev

From d0a07f6cf41be4dde67434e90aaa7571842d3f6b Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 24 Nov 2021 17:32:50 +0100
Subject: [PATCH 280/603] Bump version to 1.0.0rc2 (#259)

* bumpversion 1.0.0rc2

* Update changelog

* Use pytest-dbt-adapter==0.6.0
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 2 ++
 dbt/adapters/spark/__version__.py | 2 +-
 dev_requirements.txt              | 3 +--
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 26af54b0..b41ae92c 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.0rc1
+current_version = 1.0.0rc2
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 26e89258..551b21c5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,7 @@
 ## dbt-spark 1.0.0 (Release TBD)
 
+## dbt-spark 1.0.0rc2 (November 24, 2021)
+
 ### Features
 - Add support for Apache Hudi (hudi file format) which supports incremental merge strategies ([#187](https://github.com/dbt-labs/dbt-spark/issues/187), [#210](https://github.com/dbt-labs/dbt-spark/pull/210))
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index b25b7673..5f5b4bad 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.0.0rc1"
+version = "1.0.0rc2"
diff --git a/dev_requirements.txt b/dev_requirements.txt
index a44d72a4..9b371f9c 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -15,7 +15,6 @@ flaky>=3.5.3,<4
 pytest-csv
 
 # Test requirements
-#pytest-dbt-adapter==0.5.1 
-git+https://github.com/dbt-labs/dbt-adapter-tests.git#egg=pytest-dbt-adapter
+pytest-dbt-adapter==0.6.0
 sasl==0.2.1
 thrift_sasl==0.4.1

From 0f11f788c6ccf0c93d9cc45443c36977fbd19b8c Mon Sep 17 00:00:00 2001
From: Sindre Grindheim 
Date: Mon, 29 Nov 2021 11:36:08 +0100
Subject: [PATCH 281/603] Corrected definition for set full_refresh_mode (#262)

* Replaced definition for set full_refresh_mode

* Updated changelog

* Edit changelog

Co-authored-by: Jeremy Cohen 
---
 CHANGELOG.md                                                | 6 ++++++
 .../macros/materializations/incremental/incremental.sql     | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 551b21c5..63d24579 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 ## dbt-spark 1.0.0 (Release TBD)
 
+### Fixes
+- Incremental materialization corrected to respect `full_refresh` config, by using `should_full_refresh()` macro ([#260](https://github.com/dbt-labs/dbt-spark/issues/260), [#262](https://github.com/dbt-labs/dbt-spark/pull/262/))
+
+### Contributors
+- [@grindheim](https://github.com/grindheim) ([#262](https://github.com/dbt-labs/dbt-spark/pull/262/))
+
 ## dbt-spark 1.0.0rc2 (November 24, 2021)
 
 ### Features
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 72b4d251..d0b6e89b 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -10,7 +10,7 @@
   {%- set unique_key = config.get('unique_key', none) -%}
   {%- set partition_by = config.get('partition_by', none) -%}
 
-  {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%}
+  {%- set full_refresh_mode = (should_full_refresh()) -%}
   
   {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %}
 

From 04d1cd4b4cfad68581a03a954942b26e9fec71b3 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Fri, 3 Dec 2021 20:47:11 +0100
Subject: [PATCH 282/603] `get_response` -> `AdapterResponse` (#265)

* Return AdapterResponse from get_response

* fix flake
---
 dbt/adapters/spark/connections.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 1848a72a..2a055bf2 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -3,7 +3,7 @@
 import dbt.exceptions
 from dbt.adapters.base import Credentials
 from dbt.adapters.sql import SQLConnectionManager
-from dbt.contracts.connection import ConnectionState
+from dbt.contracts.connection import ConnectionState, AdapterResponse
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from dbt.adapters.spark import __version__
@@ -304,8 +304,12 @@ def cancel(self, connection):
         connection.handle.cancel()
 
     @classmethod
-    def get_response(cls, cursor):
-        return 'OK'
+    def get_response(cls, cursor) -> AdapterResponse:
+        # https://github.com/dbt-labs/dbt-spark/issues/142
+        message = 'OK'
+        return AdapterResponse(
+            _message=message
+        )
 
     # No transactions on Spark....
     def add_begin_query(self, *args, **kwargs):

From eb9d51d9744056ad909fbb417d2627d99563b2fd Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Fri, 7 Jan 2022 11:14:04 -0500
Subject: [PATCH 283/603] Fixing version bumping issues (#271)

---
 .bumpversion.cfg                  | 13 +++++--------
 dbt/adapters/spark/__version__.py |  2 +-
 setup.py                          |  9 ++-------
 3 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index b41ae92c..9db331eb 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,30 +1,27 @@
 [bumpversion]
-current_version = 1.0.0rc2
+current_version = 1.0.0
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
-	(\.(?P\d+))?
-	((?P[a-z]+)(?P\d+))?
+	((?Pa|b|rc)(?P\d+))?
 serialize = 
-	{major}.{minor}.{patch}.{pluginpatch}{prerelease}{num}
 	{major}.{minor}.{patch}{prerelease}{num}
-	{major}.{minor}.{patch}.{pluginpatch}
 	{major}.{minor}.{patch}
 commit = False
 tag = False
 
 [bumpversion:part:prerelease]
 first_value = a
+optional_value = final
 values = 
 	a
 	b
 	rc
+	final
 
 [bumpversion:part:num]
 first_value = 1
 
-[bumpversion:part:pluginpatch]
-first_value = 1
+[bumpversion:file:setup.py]
 
 [bumpversion:file:dbt/adapters/spark/__version__.py]
-
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 5f5b4bad..11a716ec 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.0.0rc2"
+version = "1.0.0"
diff --git a/setup.py b/setup.py
index 5d08179b..fdf8691a 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 import sys
 import re
 
-# require python 3.6 or newer
+# require python 3.7 or newer
 if sys.version_info < (3, 7):
     print('Error: dbt does not support this version of Python.')
     print('Please upgrade to Python 3.7 or higher.')
@@ -43,11 +43,6 @@ def _get_plugin_version_dict():
         return match.groupdict()
 
 
-def _get_plugin_version():
-    parts = _get_plugin_version_dict()
-    return "{major}.{minor}.{patch}{prekind}{pre}".format(**parts)
-
-
 # require a compatible minor version (~=), prerelease if this is a prerelease
 def _get_dbt_core_version():
     parts = _get_plugin_version_dict()
@@ -57,7 +52,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = _get_plugin_version()
+package_version = "1.0.0"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 75cdfe46ca7905b63bd545f2c2a2bbd9f28f7366 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 20 Jan 2022 15:11:00 -0600
Subject: [PATCH 284/603] Mcknight/stale workflow (#275)

* adding stale issue/PR message github action workflow to adapter repos

* add stale workflow to adapter repos
---
 .github/workflows/stale.yml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 .github/workflows/stale.yml

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 00000000..2848ce8f
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,17 @@
+name: 'Close stale issues and PRs'
+on:
+  schedule:
+    - cron: "30 1 * * *"
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      # pinned at v4 (https://github.com/actions/stale/releases/tag/v4.0.0)
+      - uses: actions/stale@cdf15f641adb27a71842045a94023bef6945e3aa
+        with:
+          stale-issue-message: "This issue has been marked as Stale because it has been open for 180 days with no activity. If you would like the issue to remain open, please remove the stale label or comment on the issue, or it will be closed in 7 days."
+          stale-pr-message: "This PR has been marked as Stale because it has been open for 180 days with no activity. If you would like the PR to remain open, please remove the stale label or comment on the PR, or it will be closed in 7 days."
+          # mark issues/PRs stale when they haven't seen activity in 180 days
+          days-before-stale: 180
+          # ignore checking issues with the following labels
+          exempt-issue-labels: "epic, discussion"
\ No newline at end of file

From 63ebe042419a0feb5939c2eb1c010e7e52e8c802 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Mon, 24 Jan 2022 11:35:58 -0500
Subject: [PATCH 285/603] Adding Jira mirring Actions (#278)

---
 .github/workflows/jira-creation.yml   | 26 ++++++++++++++++++++++++++
 .github/workflows/jira-label.yml      | 27 +++++++++++++++++++++++++++
 .github/workflows/jira-transition.yml | 24 ++++++++++++++++++++++++
 3 files changed, 77 insertions(+)
 create mode 100644 .github/workflows/jira-creation.yml
 create mode 100644 .github/workflows/jira-label.yml
 create mode 100644 .github/workflows/jira-transition.yml

diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml
new file mode 100644
index 00000000..c84e106a
--- /dev/null
+++ b/.github/workflows/jira-creation.yml
@@ -0,0 +1,26 @@
+# **what?**
+# Mirrors issues into Jira. Includes the information: title,
+# GitHub Issue ID and URL
+
+# **why?**
+# Jira is our tool for tracking and we need to see these issues in there
+
+# **when?**
+# On issue creation or when an issue is labeled `Jira`
+
+name: Jira Issue Creation
+
+on:
+  issues:
+    types: [opened, labeled]
+    
+permissions:
+  issues: write
+
+jobs:
+  call-label-action:
+    uses: dbt-labs/jira-actions/.github/workflows/jira-creation.yml@main
+    secrets:
+      JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
+      JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
+      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml
new file mode 100644
index 00000000..fd533a17
--- /dev/null
+++ b/.github/workflows/jira-label.yml
@@ -0,0 +1,27 @@
+# **what?**
+# Calls mirroring Jira label Action. Includes adding a new label
+# to an existing issue or removing a label as well
+
+# **why?**
+# Jira is our tool for tracking and we need to see these labels in there
+
+# **when?**
+# On labels being added or removed from issues
+
+name: Jira Label Mirroring
+
+on:
+  issues:
+    types: [labeled, unlabeled]
+    
+permissions:
+  issues: read
+
+jobs:
+  call-label-action:
+    uses: dbt-labs/jira-actions/.github/workflows/jira-label.yml@main
+    secrets:
+      JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
+      JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
+      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
+    
diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml
new file mode 100644
index 00000000..71273c7a
--- /dev/null
+++ b/.github/workflows/jira-transition.yml
@@ -0,0 +1,24 @@
+# **what?**
+# Transition a Jira issue to a new state
+# Only supports these GitHub Issue transitions:
+#   closed, deleted, reopened
+
+# **why?**
+# Jira needs to be kept up-to-date
+
+# **when?**
+# On issue closing, deletion, reopened
+
+name: Jira Issue Transition
+
+on:
+  issues:
+    types: [closed, deleted, reopened]
+
+jobs:
+  call-label-action:
+    uses: dbt-labs/jira-actions/.github/workflows/jira-transition.yml@main
+    secrets:
+      JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
+      JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
+      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
\ No newline at end of file

From 6c4916b53ccbfb1057ee5229c51510333d0e88f8 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN 
Date: Tue, 15 Feb 2022 02:53:25 -0800
Subject: [PATCH 286/603] Closes the connection properly. (#285)

* Closes the connection properly.

* CHANGELOG.md
---
 CHANGELOG.md                      | 8 ++++++++
 dbt/adapters/spark/connections.py | 1 +
 2 files changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63d24579..74790dd8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## dbt-spark 1.0.1rc0 (Release TBD)
+
+### Fixes
+- Closes the connection properly ([#280](https://github.com/dbt-labs/dbt-spark/issues/280), [#285](https://github.com/dbt-labs/dbt-spark/pull/285))
+
+### Contributors
+- [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
+
 ## dbt-spark 1.0.0 (Release TBD)
 
 ### Fixes
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 2a055bf2..6ffd9c01 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -179,6 +179,7 @@ def close(self):
                 logger.debug(
                     "Exception while closing cursor: {}".format(exc)
                 )
+        self.handle.close()
 
     def rollback(self, *args, **kwargs):
         logger.debug("NotImplemented: rollback")

From 7fbd82c8716bcaa9478ec779a3efc00e4ac3281a Mon Sep 17 00:00:00 2001
From: Amy Chen <46451573+amychen1776@users.noreply.github.com>
Date: Tue, 15 Feb 2022 05:55:39 -0500
Subject: [PATCH 287/603] update user agent (#288)

---
 dbt/adapters/spark/connections.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 6ffd9c01..608ab2b4 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -416,7 +416,7 @@ def open(cls, connection):
                     cls.validate_creds(creds, required_fields)
 
                     dbt_spark_version = __version__.version
-                    user_agent_entry = f"fishtown-analytics-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
+                    user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
 
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
                     ssp = {

From d7f1d38d0dcf272dc0e513db4eeada0c08c407f5 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Fri, 18 Feb 2022 19:18:47 +0100
Subject: [PATCH 288/603] Update changelog (#289)

---
 CHANGELOG.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 74790dd8..c2ca4dd9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## dbt-spark 1.1.0 (Release TBD)
+
+### Under the hood
+- Update plugin author name (`fishtown-analytics` → `dbt-labs`) in ODBC user agent ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
+
+### Contributors
+- [@amychen1776](https://github.com/amychen1776) ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
+
 ## dbt-spark 1.0.1rc0 (Release TBD)
 
 ### Fixes
@@ -6,7 +14,7 @@
 ### Contributors
 - [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
 
-## dbt-spark 1.0.0 (Release TBD)
+## dbt-spark 1.0.0 (December 3, 2021)
 
 ### Fixes
 - Incremental materialization corrected to respect `full_refresh` config, by using `should_full_refresh()` macro ([#260](https://github.com/dbt-labs/dbt-spark/issues/260), [#262](https://github.com/dbt-labs/dbt-spark/pull/262/))

From 5b486835cf929e2bddfbed28caa741144bf72197 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Tue, 22 Mar 2022 15:09:51 +0100
Subject: [PATCH 289/603] Configure insert_overwrite models to use parquet
 (#301)

---
 .../models_insert_overwrite/insert_overwrite_no_partitions.sql   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
index fcc142bd..ae007b45 100644
--- a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
+++ b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
@@ -1,6 +1,7 @@
 {{ config(
     materialized = 'incremental',
     incremental_strategy = 'insert_overwrite',
+    file_format = 'parquet',
 ) }}
 
 {% if not is_incremental() %}

From 5917871eda5e11e220ba1ac7098dbc0214ade675 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 22 Mar 2022 09:52:36 -0500
Subject: [PATCH 290/603] adding in unique_key as a list for incremental models
 (#291)

* a heavily pulled from dbt-core implementation for bigquery and snowflake version of possible spark answer to unique keys as a list still researching spark some.

* adding tests from other adapters over to spark and beginning work on changing over

* adding the apache_spark name to first versions of tests to fix error

* Pairing

* Update config

* Try casting data type

* More type casting

* Fix typo in file name

* adding databricks_cluster versions of tests to be in line with rest of integration tests to cover each connection type.

* removing duplicated tests and adding changlog addition

* trying to get databricks_cluster tests to work by casting date on seed inserts

* running tests

Co-authored-by: Jeremy Cohen 
---
 CHANGELOG.md                                  |   3 +
 .../incremental/strategies.sql                |  32 +-
 .../duplicated_unary_unique_key_list.sql      |  17 +
 .../models/empty_str_unique_key.sql           |  14 +
 .../models/empty_unique_key_list.sql          |  12 +
 .../models/expected/one_str__overwrite.sql    |  21 +
 .../unique_key_list__inplace_overwrite.sql    |  21 +
 .../models/no_unique_key.sql                  |  13 +
 .../nontyped_trinary_unique_key_list.sql      |  19 +
 .../models/not_found_unique_key.sql           |  14 +
 .../models/not_found_unique_key_list.sql      |   8 +
 .../models/str_unique_key.sql                 |  17 +
 .../models/trinary_unique_key_list.sql        |  19 +
 .../models/unary_unique_key_list.sql          |  17 +
 .../seeds/add_new_rows.sql                    |   9 +
 .../seeds/duplicate_insert.sql                |   5 +
 .../incremental_unique_id_test/seeds/seed.csv |   7 +
 .../incremental_unique_id_test/seeds/seed.yml |   7 +
 .../test_incremental_unique_id.py             | 481 ++++++++++++++++++
 19 files changed, 726 insertions(+), 10 deletions(-)
 create mode 100644 tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/no_unique_key.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/str_unique_key.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql
 create mode 100644 tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql
 create mode 100644 tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql
 create mode 100644 tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql
 create mode 100644 tests/integration/incremental_unique_id_test/seeds/seed.csv
 create mode 100644 tests/integration/incremental_unique_id_test/seeds/seed.yml
 create mode 100644 tests/integration/incremental_unique_id_test/test_incremental_unique_id.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2ca4dd9..9642b3a2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## dbt-spark 1.1.0 (Release TBD)
 
+### Features
+- Adds new integration test to check against new ability to allow unique_key to be a list. ([#282](https://github.com/dbt-labs/dbt-spark/issues/282))
+
 ### Under the hood
 - Update plugin author name (`fishtown-analytics` → `dbt-labs`) in ODBC user agent ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
 
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index ec5dad67..215b5f3f 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -21,20 +21,32 @@
 
 {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %}
   {# skip dest_columns, use merge_update_columns config if provided, otherwise use "*" #}
+  {%- set predicates = [] if predicates is none else [] + predicates -%}
   {%- set update_columns = config.get("merge_update_columns") -%}
+
+  {% if unique_key %}
+      {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}
+          {% for key in unique_key %}
+              {% set this_key_match %}
+                  DBT_INTERNAL_SOURCE.{{ key }} = DBT_INTERNAL_DEST.{{ key }}
+              {% endset %}
+              {% do predicates.append(this_key_match) %}
+          {% endfor %}
+      {% else %}
+          {% set unique_key_match %}
+              DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}
+          {% endset %}
+          {% do predicates.append(unique_key_match) %}
+      {% endif %}
+  {% else %}
+      {% do predicates.append('FALSE') %}
+  {% endif %}
   
-  {% set merge_condition %}
-    {% if unique_key %}
-        on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}
-    {% else %}
-        on false
-    {% endif %}
-  {% endset %}
+  {{ sql_header if sql_header is not none }}
   
-    merge into {{ target }} as DBT_INTERNAL_DEST
+  merge into {{ target }} as DBT_INTERNAL_DEST
       using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE
-      
-      {{ merge_condition }}
+      on {{ predicates | join(' and ') }}
       
       when matched then update set
         {% if update_columns -%}{%- for column_name in update_columns %}
diff --git a/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql
new file mode 100644
index 00000000..7290b6c4
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql
@@ -0,0 +1,17 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key=['state', 'state']
+    )
+}}
+
+select
+    state as state,
+    county as county,
+    city as city,
+    last_visit_date as last_visit_date
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql b/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql
new file mode 100644
index 00000000..5260e177
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql
@@ -0,0 +1,14 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key=''
+    )
+}}
+
+select
+    *
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql
new file mode 100644
index 00000000..c582d532
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql
@@ -0,0 +1,12 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key=[]
+    )
+}}
+
+select * from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql b/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql
new file mode 100644
index 00000000..c7101152
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql
@@ -0,0 +1,21 @@
+{{
+    config(
+        materialized='table'
+    )
+}}
+
+select
+    'CT' as state,
+    'Hartford' as county,
+    'Hartford' as city,
+    cast('2022-02-14' as date) as last_visit_date
+union all
+select 'MA','Suffolk','Boston',cast('2020-02-12' as date)
+union all
+select 'NJ','Mercer','Trenton',cast('2022-01-01' as date)
+union all
+select 'NY','Kings','Brooklyn',cast('2021-04-02' as date)
+union all
+select 'NY','New York','Manhattan',cast('2021-04-01' as date)
+union all
+select 'PA','Philadelphia','Philadelphia',cast('2021-05-21' as date)
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql b/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql
new file mode 100644
index 00000000..c7101152
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql
@@ -0,0 +1,21 @@
+{{
+    config(
+        materialized='table'
+    )
+}}
+
+select
+    'CT' as state,
+    'Hartford' as county,
+    'Hartford' as city,
+    cast('2022-02-14' as date) as last_visit_date
+union all
+select 'MA','Suffolk','Boston',cast('2020-02-12' as date)
+union all
+select 'NJ','Mercer','Trenton',cast('2022-01-01' as date)
+union all
+select 'NY','Kings','Brooklyn',cast('2021-04-02' as date)
+union all
+select 'NY','New York','Manhattan',cast('2021-04-01' as date)
+union all
+select 'PA','Philadelphia','Philadelphia',cast('2021-05-21' as date)
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/no_unique_key.sql b/tests/integration/incremental_unique_id_test/models/no_unique_key.sql
new file mode 100644
index 00000000..44a63e75
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/no_unique_key.sql
@@ -0,0 +1,13 @@
+{{
+    config(
+        materialized='incremental'
+    )
+}}
+
+select
+    *
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql
new file mode 100644
index 00000000..52b4509f
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql
@@ -0,0 +1,19 @@
+-- for comparing against auto-typed seeds
+
+{{
+    config(
+        materialized='incremental',
+        unique_key=['state', 'county', 'city']
+    )
+}}
+
+select
+    state as state,
+    county as county,
+    city as city,
+    last_visit_date as last_visit_date
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql b/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql
new file mode 100644
index 00000000..d247aa34
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql
@@ -0,0 +1,14 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key='thisisnotacolumn'
+    )
+}}
+
+select
+    *
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql
new file mode 100644
index 00000000..f1462a48
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql
@@ -0,0 +1,8 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key=['state', 'thisisnotacolumn']
+    )
+}}
+
+select * from {{ ref('seed') }}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/str_unique_key.sql b/tests/integration/incremental_unique_id_test/models/str_unique_key.sql
new file mode 100644
index 00000000..2f9fc298
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/str_unique_key.sql
@@ -0,0 +1,17 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key='state'
+    )
+}}
+
+select
+    state as state,
+    county as county,
+    city as city,
+    last_visit_date as last_visit_date
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql
new file mode 100644
index 00000000..0359546b
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql
@@ -0,0 +1,19 @@
+-- types needed to compare against expected model reliably
+
+{{
+    config(
+        materialized='incremental',
+        unique_key=['state', 'county', 'city']
+    )
+}}
+
+select
+    state as state,
+    county as county,
+    city as city,
+    last_visit_date as last_visit_date
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql
new file mode 100644
index 00000000..7f5875f8
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql
@@ -0,0 +1,17 @@
+{{
+    config(
+        materialized='incremental',
+        unique_key=['state']
+    )
+}}
+
+select
+    state as state,
+    county as county,
+    city as city,
+    last_visit_date as last_visit_date
+from {{ ref('seed') }}
+
+{% if is_incremental() %}
+    where last_visit_date > (select max(last_visit_date) from {{ this }})
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql b/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql
new file mode 100644
index 00000000..e5611fe3
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql
@@ -0,0 +1,9 @@
+-- insert two new rows, both of which should be in incremental model
+--   with any unique columns
+insert into {schema}.seed
+    (state, county, city, last_visit_date)
+values ('WA','King','Seattle',cast('2022-02-01' as date));
+
+insert into {schema}.seed
+    (state, county, city, last_visit_date)
+values ('CA','Los Angeles','Los Angeles',cast('2022-02-01' as date));
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql b/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql
new file mode 100644
index 00000000..8abe2808
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql
@@ -0,0 +1,5 @@
+-- insert new row, which should not be in incremental model
+--  with primary or first three columns unique
+insert into {schema}.seed
+    (state, county, city, last_visit_date)
+values ('CT','Hartford','Hartford',cast('2022-02-14' as date));
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/seed.csv b/tests/integration/incremental_unique_id_test/seeds/seed.csv
new file mode 100644
index 00000000..b988827f
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/seeds/seed.csv
@@ -0,0 +1,7 @@
+state,county,city,last_visit_date
+CT,Hartford,Hartford,2020-09-23
+MA,Suffolk,Boston,2020-02-12
+NJ,Mercer,Trenton,2022-01-01
+NY,Kings,Brooklyn,2021-04-02
+NY,New York,Manhattan,2021-04-01
+PA,Philadelphia,Philadelphia,2021-05-21
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/seed.yml b/tests/integration/incremental_unique_id_test/seeds/seed.yml
new file mode 100644
index 00000000..c048548a
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/seeds/seed.yml
@@ -0,0 +1,7 @@
+version: 2
+
+seeds:
+  - name: seed
+    config:
+      column_types:
+        last_visit_date: date
diff --git a/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
new file mode 100644
index 00000000..6ba80bc7
--- /dev/null
+++ b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
@@ -0,0 +1,481 @@
+from tests.integration.base import DBTIntegrationTest, use_profile
+from dbt.contracts.results import RunStatus
+from collections import namedtuple
+from pathlib import Path
+
+
+TestResults = namedtuple(
+    'TestResults',
+    ['seed_count', 'model_count', 'seed_rows', 'inc_test_model_count',
+     'opt_model_count', 'relation'],
+)
+
+
+class TestIncrementalUniqueKey(DBTIntegrationTest):
+    @property
+    def schema(self):
+        return 'incremental_unique_key'
+
+    @property
+    def models(self):
+        return 'models'
+        
+    @property
+    def project_config(self):
+        return {
+            "config-version": 2,
+            "models": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge"
+            }
+        }
+
+    def update_incremental_model(self, incremental_model):
+        '''update incremental model after the seed table has been updated'''
+        model_result_set = self.run_dbt(['run', '--select', incremental_model])
+        return len(model_result_set)
+
+    def setup_test(self, seed, incremental_model, update_sql_file):
+        '''build a test case and return values for assertions'''
+        
+        # Idempotently create some number of seeds and incremental models
+        seed_count = len(self.run_dbt(
+            ['seed', '--select', seed, '--full-refresh']
+        ))
+        model_count = len(self.run_dbt(
+            ['run', '--select', incremental_model, '--full-refresh']
+        ))
+        
+         # Upate seed and return new row count
+        row_count_query = 'select * from {}.{}'.format(
+            self.unique_schema(),
+            seed
+        )
+        self.run_sql_file(Path('seeds') / Path(update_sql_file + '.sql'))
+        seed_rows = len(self.run_sql(row_count_query, fetch='all'))
+
+        inc_test_model_count = self.update_incremental_model(
+            incremental_model=incremental_model
+        )
+
+        return (seed_count, model_count, seed_rows, inc_test_model_count)
+
+    def test_scenario_correctness(self, expected_fields, test_case_fields):
+        '''Invoke assertions to verify correct build functionality'''
+        # 1. test seed(s) should build afresh
+        self.assertEqual(
+            expected_fields.seed_count, test_case_fields.seed_count
+        )
+        # 2. test model(s) should build afresh
+        self.assertEqual(
+            expected_fields.model_count, test_case_fields.model_count
+        )
+        # 3. seeds should have intended row counts post update
+        self.assertEqual(
+            expected_fields.seed_rows, test_case_fields.seed_rows
+        )
+        # 4. incremental test model(s) should be updated
+        self.assertEqual(
+            expected_fields.inc_test_model_count,
+            test_case_fields.inc_test_model_count
+        )
+        # 5. extra incremental model(s) should be built; optional since
+        #   comparison may be between an incremental model and seed
+        if (expected_fields.opt_model_count and
+            test_case_fields.opt_model_count):
+            self.assertEqual(
+                expected_fields.opt_model_count,
+                test_case_fields.opt_model_count
+            )
+        # 6. result table should match intended result set (itself a relation)
+        self.assertTablesEqual(
+            expected_fields.relation, test_case_fields.relation
+        )
+
+    def stub_expected_fields(
+        self, relation, seed_rows, opt_model_count=None
+    ):
+        return TestResults(
+            seed_count=1, model_count=1, seed_rows=seed_rows,
+            inc_test_model_count=1, opt_model_count=opt_model_count,
+            relation=relation
+        )
+
+    def fail_to_build_inc_missing_unique_key_column(self, incremental_model_name):
+        '''should pass back error state when trying build an incremental
+           model whose unique key or keylist includes a column missing
+           from the incremental model'''
+        seed_count = len(self.run_dbt(
+            ['seed', '--select', 'seed', '--full-refresh']
+        ))
+        # unique keys are not applied on first run, so two are needed
+        self.run_dbt(
+            ['run', '--select', incremental_model_name, '--full-refresh'],
+            expect_pass=True
+        )
+        run_result = self.run_dbt(
+            ['run', '--select', incremental_model_name],
+            expect_pass=False
+        ).results[0]
+
+        return run_result.status, run_result.message
+
+
+class TestNoIncrementalUniqueKey(TestIncrementalUniqueKey):
+
+    @use_profile("databricks_sql_endpoint")
+    def test__databricks_sql_endpoint_no_unique_keys(self):
+        '''with no unique keys, seed and model should match'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='no_unique_key'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile("databricks_cluster")
+    def test__databricks_cluster_no_unique_keys(self):
+        '''with no unique keys, seed and model should match'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='no_unique_key'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+
+class TestIncrementalStrUniqueKey(TestIncrementalUniqueKey):
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_empty_str_unique_key(self):
+        '''with empty string for unique key, seed and model should match'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='empty_str_unique_key'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_empty_str_unique_key(self):
+        '''with empty string for unique key, seed and model should match'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='empty_str_unique_key'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_one_unique_key(self):
+        '''with one unique key, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='str_unique_key'
+        update_sql_file='duplicate_insert'
+        expected_model='one_str__overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_one_unique_key(self):
+        '''with one unique key, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='str_unique_key'
+        update_sql_file='duplicate_insert'
+        expected_model='one_str__overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_bad_unique_key(self):
+        '''expect compilation error from unique key not being a column'''
+
+        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
+            incremental_model_name='not_found_unique_key'
+        )
+
+        self.assertEqual(status, RunStatus.Error)
+        self.assertTrue("thisisnotacolumn" in exc)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_bad_unique_key(self):
+        '''expect compilation error from unique key not being a column'''
+
+        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
+            incremental_model_name='not_found_unique_key'
+        )
+
+        self.assertEqual(status, RunStatus.Error)
+        self.assertTrue("thisisnotacolumn" in exc)
+
+
+class TestIncrementalListUniqueKey(TestIncrementalUniqueKey):
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_empty_unique_key_list(self):
+        '''with no unique keys, seed and model should match'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='empty_unique_key_list'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_empty_unique_key_list(self):
+        '''with no unique keys, seed and model should match'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='empty_unique_key_list'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_unary_unique_key_list(self):
+        '''with one unique key, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='unary_unique_key_list'
+        update_sql_file='duplicate_insert'
+        expected_model='unique_key_list__inplace_overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_unary_unique_key_list(self):
+        '''with one unique key, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='unary_unique_key_list'
+        update_sql_file='duplicate_insert'
+        expected_model='unique_key_list__inplace_overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_duplicated_unary_unique_key_list(self):
+        '''with two of the same unique key, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='duplicated_unary_unique_key_list'
+        update_sql_file='duplicate_insert'
+        expected_model='unique_key_list__inplace_overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_duplicated_unary_unique_key_list(self):
+        '''with two of the same unique key, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='duplicated_unary_unique_key_list'
+        update_sql_file='duplicate_insert'
+        expected_model='unique_key_list__inplace_overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_trinary_unique_key_list(self):
+        '''with three unique keys, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='trinary_unique_key_list'
+        update_sql_file='duplicate_insert'
+        expected_model='unique_key_list__inplace_overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_trinary_unique_key_list(self):
+        '''with three unique keys, model will overwrite existing row'''
+        seed='seed'
+        seed_rows=7
+        incremental_model='trinary_unique_key_list'
+        update_sql_file='duplicate_insert'
+        expected_model='unique_key_list__inplace_overwrite'
+
+        expected_fields = self.stub_expected_fields(
+            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=self.update_incremental_model(expected_model),
+            relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_trinary_unique_key_list_no_update(self):
+        '''even with three unique keys, adding distinct rows to seed does not
+           cause seed and model to diverge'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='nontyped_trinary_unique_key_list'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_trinary_unique_key_list_no_update(self):
+        '''even with three unique keys, adding distinct rows to seed does not
+           cause seed and model to diverge'''
+        seed='seed'
+        seed_rows=8
+        incremental_model='nontyped_trinary_unique_key_list'
+        update_sql_file='add_new_rows'
+
+        expected_fields = self.stub_expected_fields(
+            relation=seed, seed_rows=seed_rows
+        )
+        test_case_fields = TestResults(
+            *self.setup_test(seed, incremental_model, update_sql_file),
+            opt_model_count=None, relation=incremental_model
+        )
+
+        self.test_scenario_correctness(expected_fields, test_case_fields)
+
+    @use_profile('databricks_sql_endpoint')
+    def test__databricks_sql_endpoint_bad_unique_key_list(self):
+        '''expect compilation error from unique key not being a column'''
+
+        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
+            incremental_model_name='not_found_unique_key_list'
+        )
+
+        self.assertEqual(status, RunStatus.Error)
+        self.assertTrue("thisisnotacolumn" in exc)
+
+    @use_profile('databricks_cluster')
+    def test__databricks_cluster_bad_unique_key_list(self):
+        '''expect compilation error from unique key not being a column'''
+
+        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
+            incremental_model_name='not_found_unique_key_list'
+        )
+
+        self.assertEqual(status, RunStatus.Error)
+        self.assertTrue("thisisnotacolumn" in exc)
+        
\ No newline at end of file

From ee5276561d55e5eedfbe91915a9cab50dbff956a Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Wed, 23 Mar 2022 15:58:40 -0500
Subject: [PATCH 291/603] Add sasl dependency (#303)

---
 .github/workflows/version-bump.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml
index 4913a6e8..7fb8bb6e 100644
--- a/.github/workflows/version-bump.yml
+++ b/.github/workflows/version-bump.yml
@@ -55,6 +55,7 @@ jobs:
 
       - name: Install python dependencies
         run: |
+          sudo apt-get install libsasl2-dev
           python3 -m venv env
           source env/bin/activate
           pip install --upgrade pip     

From e9d5bfb3dc52bc85ab740d0267d9f86ed7e671a1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 23 Mar 2022 17:47:56 -0400
Subject: [PATCH 292/603] Bumping version to 1.1.0b1 (#304)

* Bumping version to 1.1.0b1

* Update CHANGELOG.md

* Updating changelog release date

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  |  3 ++-
 CHANGELOG.md                      | 11 +++++++++--
 dbt/adapters/spark/__version__.py |  2 +-
 setup.py                          |  2 +-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 9db331eb..9a0c41a5 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.0
+current_version = 1.1.0b1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
@@ -25,3 +25,4 @@ first_value = 1
 [bumpversion:file:setup.py]
 
 [bumpversion:file:dbt/adapters/spark/__version__.py]
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9642b3a2..1a149622 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +1,20 @@
-## dbt-spark 1.1.0 (Release TBD)
+## dbt-spark 1.1.0b1 (March 23, 2022)
 
 ### Features
-- Adds new integration test to check against new ability to allow unique_key to be a list. ([#282](https://github.com/dbt-labs/dbt-spark/issues/282))
+- Adds new integration test to check against new ability to allow unique_key to be a list. ([#282](https://github.com/dbt-labs/dbt-spark/issues/282)), [#291](https://github.com/dbt-labs/dbt-spark/pull/291))
+
+### Fixes
+- Closes the connection properly ([#280](https://github.com/dbt-labs/dbt-spark/issues/280), [#285](https://github.com/dbt-labs/dbt-spark/pull/285))
 
 ### Under the hood
+- get_response -> AdapterResponse ([#265](https://github.com/dbt-labs/dbt-spark/pull/265))
+- Adding stale Actions workflow ([#275](https://github.com/dbt-labs/dbt-spark/pull/275))
 - Update plugin author name (`fishtown-analytics` → `dbt-labs`) in ODBC user agent ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
+- Configure insert_overwrite models to use parquet ([#301](https://github.com/dbt-labs/dbt-spark/pull/301))
 
 ### Contributors
 - [@amychen1776](https://github.com/amychen1776) ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
+- [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
 
 ## dbt-spark 1.0.1rc0 (Release TBD)
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 11a716ec..56ec17a8 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.0.0"
+version = "1.1.0b1"
diff --git a/setup.py b/setup.py
index fdf8691a..6ee6f5f5 100644
--- a/setup.py
+++ b/setup.py
@@ -52,7 +52,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.0.0"
+package_version = "1.1.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 086becba09ac1381dab2bf31fca4763fe6c3f74e Mon Sep 17 00:00:00 2001
From: Cor 
Date: Sat, 26 Mar 2022 15:26:37 +0000
Subject: [PATCH 293/603] Add spark session connection (#279)

* Add session module

* Add session connection method

* Add session extras to setup.py

* Add check for session method

* Add session connection wrapper

* Add sessioin to connection manager

* Remove unused imports

* Add spark session dbtspec

* Add tox spark session environment

* Add missing settings to dbt spec

* Install session requirements

* Add tox spark session to circle ci

* Add pytest spark as test requirement

* Add fixutre to force use spark session

* Add pytest ini

* Update passenv in tox

* Set catalog implementation to hive

* Make separate session connection wrapper

* Format parameters

* Run spark session before thrift

* Add spark to dev requirements

* Fix session module

* Bump Spark session python version

* Change docker image for spark session

* Install python3

* Update ci

* Remove spark fixture

* Move session connection wrapper to session module

* Disable tests that require hive support

* Format

* Change python 3 to python 3.8

* Install non-python dependencies

* Remove dev-requirements

* Remove pytest ini

* Update the install

* Add session method to change log

* Do not pin sasl version

* Delete spark session test profile

* Add postgres container for hive support

* Enable all session tests

* Enable hive support

* Add delta as file format

* Use equals in spark defaults

* Change reference to find spark home

* Copy configs in one go

* List spark conf

* Let session test be the same as thrift

* Update spark defaults

* Enable error logging on postgres

* Remove ls

* Add port to connection url

* Do not copy spark config

* Print postgres

* Remove postgres logging from thrift

* Remove postgres from spark session tests

* Change connection url back to dbt-hive-metastore

* Revert Spark defaults changes

* Disable tests and explain why

* Move change log to top of file

* Move contributor note up in changelog

Co-authored-by: Jeremy Cohen 
---
 .circleci/config.yml              |  23 +++-
 CHANGELOG.md                      |   6 +
 dbt/adapters/spark/connections.py |  19 +++
 dbt/adapters/spark/session.py     | 221 ++++++++++++++++++++++++++++++
 dev_requirements.txt              |   2 +-
 setup.py                          |  10 +-
 tests/integration/conftest.py     |  12 +-
 tests/specs/spark-session.dbtspec |  17 +++
 tox.ini                           |  12 ++
 9 files changed, 308 insertions(+), 14 deletions(-)
 create mode 100644 dbt/adapters/spark/session.py
 create mode 100644 tests/specs/spark-session.dbtspec

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4921fac9..135c22cd 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,6 +10,24 @@ jobs:
       - checkout
       - run: tox -e flake8,unit
 
+  integration-spark-session:
+    environment:
+      DBT_INVOCATION_ENV: circle
+    docker:
+      - image: godatadriven/pyspark:3.1
+    steps:
+      - checkout
+      - run: apt-get update
+      - run: python3 -m pip install --upgrade pip
+      - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev
+      - run: python3 -m pip install tox
+      - run:
+          name: Run integration tests
+          command: tox -e integration-spark-session
+          no_output_timeout: 1h
+      - store_artifacts:
+          path: ./logs
+
   integration-spark-thrift:
     environment:
       DBT_INVOCATION_ENV: circle
@@ -90,7 +108,7 @@ jobs:
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
-          
+
   integration-spark-databricks-odbc-endpoint:
     <<: *databricks-odbc
     steps:
@@ -107,6 +125,9 @@ workflows:
   test-everything:
     jobs:
       - unit
+      - integration-spark-session:
+          requires:
+            - unit
       - integration-spark-thrift:
           requires:
             - unit
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1a149622..2b2e8f97 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+### Features
+- Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
+
+### Contributors
+- [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
+
 ## dbt-spark 1.1.0b1 (March 23, 2022)
 
 ### Features
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 608ab2b4..11163ccf 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -55,6 +55,7 @@ class SparkConnectionMethod(StrEnum):
     THRIFT = 'thrift'
     HTTP = 'http'
     ODBC = 'odbc'
+    SESSION = 'session'
 
 
 @dataclass
@@ -133,6 +134,18 @@ def __post_init__(self):
                 "`pip install dbt-spark[PyHive]`"
             )
 
+        if self.method == SparkConnectionMethod.SESSION:
+            try:
+                import pyspark  # noqa: F401
+            except ImportError as e:
+                raise dbt.exceptions.RuntimeException(
+                    f"{self.method} connection method requires "
+                    "additional dependencies. \n"
+                    "Install the additional required dependencies with "
+                    "`pip install dbt-spark[session]`\n\n"
+                    f"ImportError({e.msg})"
+                ) from e
+
     @property
     def type(self):
         return 'spark'
@@ -443,6 +456,12 @@ def open(cls, connection):
 
                     conn = pyodbc.connect(connection_str, autocommit=True)
                     handle = PyodbcConnectionWrapper(conn)
+                elif creds.method == SparkConnectionMethod.SESSION:
+                    from .session import (  # noqa: F401
+                        Connection,
+                        SessionConnectionWrapper,
+                    )
+                    handle = SessionConnectionWrapper(Connection())
                 else:
                     raise dbt.exceptions.DbtProfileError(
                         f"invalid credential method: {creds.method}"
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
new file mode 100644
index 00000000..6010df92
--- /dev/null
+++ b/dbt/adapters/spark/session.py
@@ -0,0 +1,221 @@
+"""Spark session integration."""
+
+from __future__ import annotations
+
+import datetime as dt
+from types import TracebackType
+from typing import Any
+
+from dbt.events import AdapterLogger
+from dbt.utils import DECIMALS
+from pyspark.sql import DataFrame, Row, SparkSession
+
+
+logger = AdapterLogger("Spark")
+NUMBERS = DECIMALS + (int, float)
+
+
+class Cursor:
+    """
+    Mock a pyodbc cursor.
+
+    Source
+    ------
+    https://github.com/mkleehammer/pyodbc/wiki/Cursor
+    """
+
+    def __init__(self) -> None:
+        self._df: DataFrame | None = None
+        self._rows: list[Row] | None = None
+
+    def __enter__(self) -> Cursor:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: Exception | None,
+        exc_tb: TracebackType | None,
+    ) -> bool:
+        self.close()
+        return True
+
+    @property
+    def description(
+        self,
+    ) -> list[tuple[str, str, None, None, None, None, bool]]:
+        """
+        Get the description.
+
+        Returns
+        -------
+        out : list[tuple[str, str, None, None, None, None, bool]]
+            The description.
+
+        Source
+        ------
+        https://github.com/mkleehammer/pyodbc/wiki/Cursor#description
+        """
+        if self._df is None:
+            description = list()
+        else:
+            description = [
+                (
+                    field.name,
+                    field.dataType.simpleString(),
+                    None,
+                    None,
+                    None,
+                    None,
+                    field.nullable,
+                )
+                for field in self._df.schema.fields
+            ]
+        return description
+
+    def close(self) -> None:
+        """
+        Close the connection.
+
+        Source
+        ------
+        https://github.com/mkleehammer/pyodbc/wiki/Cursor#close
+        """
+        self._df = None
+        self._rows = None
+
+    def execute(self, sql: str, *parameters: Any) -> None:
+        """
+        Execute a sql statement.
+
+        Parameters
+        ----------
+        sql : str
+            Execute a sql statement.
+        *parameters : Any
+            The parameters.
+
+        Raises
+        ------
+        NotImplementedError
+            If there are parameters given. We do not format sql statements.
+
+        Source
+        ------
+        https://github.com/mkleehammer/pyodbc/wiki/Cursor#executesql-parameters
+        """
+        if len(parameters) > 0:
+            sql = sql % parameters
+        spark_session = SparkSession.builder.enableHiveSupport().getOrCreate()
+        self._df = spark_session.sql(sql)
+
+    def fetchall(self) -> list[Row] | None:
+        """
+        Fetch all data.
+
+        Returns
+        -------
+        out : list[Row] | None
+            The rows.
+
+        Source
+        ------
+        https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchall
+        """
+        if self._rows is None and self._df is not None:
+            self._rows = self._df.collect()
+        return self._rows
+
+    def fetchone(self) -> Row | None:
+        """
+        Fetch the first output.
+
+        Returns
+        -------
+        out : Row | None
+            The first row.
+
+        Source
+        ------
+        https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchone
+        """
+        if self._rows is None and self._df is not None:
+            self._rows = self._df.collect()
+
+        if self._rows is not None and len(self._rows) > 0:
+            row = self._rows.pop(0)
+        else:
+            row = None
+
+        return row
+
+
+class Connection:
+    """
+    Mock a pyodbc connection.
+
+    Source
+    ------
+    https://github.com/mkleehammer/pyodbc/wiki/Connection
+    """
+
+    def cursor(self) -> Cursor:
+        """
+        Get a cursor.
+
+        Returns
+        -------
+        out : Cursor
+            The cursor.
+        """
+        return Cursor()
+
+
+class SessionConnectionWrapper(object):
+    """Connection wrapper for the sessoin connection method."""
+
+    def __init__(self, handle):
+        self.handle = handle
+        self._cursor = None
+
+    def cursor(self):
+        self._cursor = self.handle.cursor()
+        return self
+
+    def cancel(self):
+        logger.debug("NotImplemented: cancel")
+
+    def close(self):
+        if self._cursor:
+            self._cursor.close()
+
+    def rollback(self, *args, **kwargs):
+        logger.debug("NotImplemented: rollback")
+
+    def fetchall(self):
+        return self._cursor.fetchall()
+
+    def execute(self, sql, bindings=None):
+        if sql.strip().endswith(";"):
+            sql = sql.strip()[:-1]
+
+        if bindings is None:
+            self._cursor.execute(sql)
+        else:
+            bindings = [self._fix_binding(binding) for binding in bindings]
+            self._cursor.execute(sql, *bindings)
+
+    @property
+    def description(self):
+        return self._cursor.description
+
+    @classmethod
+    def _fix_binding(cls, value):
+        """Convert complex datatypes to primitives that can be loaded by
+        the Spark driver"""
+        if isinstance(value, NUMBERS):
+            return float(value)
+        elif isinstance(value, dt.datetime):
+            return f"'{value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]}'"
+        else:
+            return f"'{value}'"
diff --git a/dev_requirements.txt b/dev_requirements.txt
index 9b371f9c..9b4e8a12 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -16,5 +16,5 @@ pytest-csv
 
 # Test requirements
 pytest-dbt-adapter==0.6.0
-sasl==0.2.1
+sasl>=0.2.1
 thrift_sasl==0.4.1
diff --git a/setup.py b/setup.py
index 6ee6f5f5..2cd44491 100644
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,10 @@ def _get_dbt_core_version():
     'PyHive[hive]>=0.6.0,<0.7.0',
     'thrift>=0.11.0,<0.16.0',
 ]
-all_extras = odbc_extras + pyhive_extras
+session_extras = [
+    "pyspark>=3.0.0,<4.0.0"
+]
+all_extras = odbc_extras + pyhive_extras + session_extras
 
 setup(
     name=package_name,
@@ -83,8 +86,9 @@ def _get_dbt_core_version():
     ],
     extras_require={
         "ODBC": odbc_extras,
-        "PyHive":  pyhive_extras,
-        "all": all_extras
+        "PyHive": pyhive_extras,
+        "session": session_extras,
+        "all": all_extras,
     },
     zip_safe=False,
     classifiers=[
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 02248bae..b76bc6c3 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,10 +1,4 @@
 def pytest_configure(config):
-    config.addinivalue_line(
-        "markers", "profile_databricks_cluster"
-    )
-    config.addinivalue_line(
-        "markers", "profile_databricks_sql_endpoint"
-    )
-    config.addinivalue_line(
-        "markers", "profile_apache_spark"
-    )
+    config.addinivalue_line("markers", "profile_databricks_cluster")
+    config.addinivalue_line("markers", "profile_databricks_sql_endpoint")
+    config.addinivalue_line("markers", "profile_apache_spark")
diff --git a/tests/specs/spark-session.dbtspec b/tests/specs/spark-session.dbtspec
new file mode 100644
index 00000000..cd09aa17
--- /dev/null
+++ b/tests/specs/spark-session.dbtspec
@@ -0,0 +1,17 @@
+target:
+  type: spark
+  method: session
+  host: localhost
+  schema: "analytics_{{ var('_dbt_random_suffix') }}"
+sequences:
+  test_dbt_empty: empty
+  # requires a metastore for persisting over dbt runs
+  # test_dbt_base: base
+  # test_dbt_ephemeral: ephemeral
+  # test_dbt_incremental: incremental
+  # snapshots require delta format
+  # test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp
+  # test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols
+  test_dbt_data_test: data_test
+  test_dbt_schema_test: schema_test
+  test_dbt_ephemeral_data_tests: data_test_ephemeral_models
diff --git a/tox.ini b/tox.ini
index 993ab801..e896421e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -57,3 +57,15 @@ deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev_requirements.txt
     -e.
+
+[testenv:integration-spark-session]
+basepython = python3
+commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-session.dbtspec'
+passenv =
+    DBT_*
+    PYTEST_*
+    PIP_CACHE_DIR
+deps =
+    -r{toxinidir}/requirements.txt
+    -r{toxinidir}/dev_requirements.txt
+    -e.[session]

From bbff5c72693565f1e1e2b0e821186d0b73c49ae3 Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Tue, 29 Mar 2022 17:16:44 -0400
Subject: [PATCH 294/603] Initial implementation of new testing framework
 (#299)

---
 CHANGELOG.md                                  |  5 ++
 dev_requirements.txt                          |  3 +-
 pytest.ini                                    | 10 +++
 tests/conftest.py                             | 90 +++++++++++++++++++
 tests/functional/adapter/test_basic.py        | 79 ++++++++++++++++
 tests/integration/base.py                     |  2 +
 .../test_incremental_unique_id.py             | 40 ++++-----
 tests/specs/spark-databricks-http.dbtspec     | 32 -------
 .../spark-databricks-odbc-cluster.dbtspec     | 33 -------
 ...spark-databricks-odbc-sql-endpoint.dbtspec | 35 --------
 tests/specs/spark-thrift.dbtspec              | 22 -----
 tox.ini                                       |  8 +-
 12 files changed, 212 insertions(+), 147 deletions(-)
 create mode 100644 pytest.ini
 create mode 100644 tests/conftest.py
 create mode 100644 tests/functional/adapter/test_basic.py
 delete mode 100644 tests/specs/spark-databricks-http.dbtspec
 delete mode 100644 tests/specs/spark-databricks-odbc-cluster.dbtspec
 delete mode 100644 tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec
 delete mode 100644 tests/specs/spark-thrift.dbtspec

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b2e8f97..e61a335c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,11 @@
+## dbt-spark 1.1.0 (TBD)
+
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 
+### Under the hood
+- Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
+
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 
diff --git a/dev_requirements.txt b/dev_requirements.txt
index 9b4e8a12..520d1f5b 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,6 +1,7 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 freezegun==0.3.9
 pytest==6.0.2
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..b04a6ccf
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,10 @@
+[pytest]
+filterwarnings =
+    ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning
+    ignore:unclosed file .*:ResourceWarning
+env_files =
+    test.env
+testpaths =
+    tests/unit
+    tests/integration
+    tests/functional
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..603dc139
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,90 @@
+import pytest
+import os
+
+pytest_plugins = ["dbt.tests.fixtures.project"]
+
+
+def pytest_addoption(parser):
+    parser.addoption("--profile", action="store", default="apache_spark", type=str)
+
+
+# Using @pytest.mark.skip_adapter('apache_spark') uses the 'skip_by_adapter_type'
+# autouse fixture below
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers",
+        "skip_profile(profile): skip test for the given profile",
+    )
+
+
+@pytest.fixture(scope="session")
+def dbt_profile_target(request):
+    profile_type = request.config.getoption("--profile")
+    if profile_type == "databricks_cluster":
+        target = databricks_cluster_target()
+    elif profile_type == "databricks_sql_endpoint":
+        target = databricks_sql_endpoint_target()
+    elif profile_type == "apache_spark":
+        target = apache_spark_target()
+    elif profile_type == "databricks_http_cluster":
+        target = databricks_http_cluster_target()
+    else:
+        raise ValueError(f"Invalid profile type '{profile_type}'")
+    return target
+
+
+def apache_spark_target():
+    return {
+        "type": "spark",
+        "host": "localhost",
+        "user": "dbt",
+        "method": "thrift",
+        "port": 10000,
+        "connect_retries": 5,
+        "connect_timeout": 60,
+        "retry_all": True,
+    }
+
+
+def databricks_cluster_target():
+    return {
+        "type": "spark",
+        "method": "odbc",
+        "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
+        "cluster": os.getenv("DBT_DATABRICKS_CLUSTER_NAME"),
+        "token": os.getenv("DBT_DATABRICKS_TOKEN"),
+        "driver": os.getenv("ODBC_DRIVER"),
+        "port": 443,
+    }
+
+
+def databricks_sql_endpoint_target():
+    return {
+        "type": "spark",
+        "method": "odbc",
+        "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
+        "endpoint": os.getenv("DBT_DATABRICKS_ENDPOINT"),
+        "token": os.getenv("DBT_DATABRICKS_TOKEN"),
+        "driver": os.getenv("ODBC_DRIVER"),
+        "port": 443,
+    }
+
+
+def databricks_http_cluster_target():
+    return {
+        "type": "spark",
+        "host": os.getenv('DBT_DATABRICKS_HOST_NAME'),
+        "cluster": os.getenv('DBT_DATABRICKS_CLUSTER_NAME'),
+        "token": os.getenv('DBT_DATABRICKS_TOKEN'),
+        "method": "http",
+        "port": 443,
+        "connect_retries": 5,
+        "connect_timeout": 60,
+    }
+
+@pytest.fixture(autouse=True)
+def skip_by_profile_type(request):
+    profile_type = request.config.getoption("--profile")
+    if request.node.get_closest_marker("skip_profile"):
+        if request.node.get_closest_marker("skip_profile").args[0] == profile_type:
+            pytest.skip("skipped on '{profile_type}' profile")
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
new file mode 100644
index 00000000..c459e946
--- /dev/null
+++ b/tests/functional/adapter/test_basic.py
@@ -0,0 +1,79 @@
+import pytest
+
+from dbt.tests.adapter.basic.test_base import BaseSimpleMaterializations
+from dbt.tests.adapter.basic.test_singular_tests import BaseSingularTests
+from dbt.tests.adapter.basic.test_singular_tests_ephemeral import (
+    BaseSingularTestsEphemeral,
+)
+from dbt.tests.adapter.basic.test_empty import BaseEmpty
+from dbt.tests.adapter.basic.test_ephemeral import BaseEphemeral
+from dbt.tests.adapter.basic.test_incremental import BaseIncremental
+from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests
+from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols
+from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp
+
+
+@pytest.mark.skip_profile('databricks_sql_endpoint')
+class TestSimpleMaterializationsSpark(BaseSimpleMaterializations):
+    pass
+
+
+class TestSingularTestsSpark(BaseSingularTests):
+    pass
+
+
+# The local cluster currently tests on spark 2.x, which does not support this
+# if we upgrade it to 3.x, we can enable this test
+@pytest.mark.skip_profile('apache_spark')
+class TestSingularTestsEphemeralSpark(BaseSingularTestsEphemeral):
+    pass
+
+
+class TestEmptySpark(BaseEmpty):
+    pass
+
+
+@pytest.mark.skip_profile('databricks_sql_endpoint')
+class TestEphemeralSpark(BaseEphemeral):
+    pass
+
+
+@pytest.mark.skip_profile('databricks_sql_endpoint')
+class TestIncrementalSpark(BaseIncremental):
+    pass
+
+
+class TestGenericTestsSpark(BaseGenericTests):
+    pass
+
+
+# These tests were not enabled in the dbtspec files, so skipping here.
+# Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
+@pytest.mark.skip_profile('apache_spark')
+class TestSnapshotCheckColsSpark(BaseSnapshotCheckCols):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "seeds": {
+                "+file_format": "delta",
+            },
+            "snapshots": {
+                "+file_format": "delta",
+            }
+        }
+
+
+#hese tests were not enabled in the dbtspec files, so skipping here.
+# Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
+@pytest.mark.skip_profile('apache_spark')
+class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "seeds": {
+                "+file_format": "delta",
+            },
+            "snapshots": {
+                "+file_format": "delta",
+            }
+        }
diff --git a/tests/integration/base.py b/tests/integration/base.py
index acce6a74..e36162aa 100644
--- a/tests/integration/base.py
+++ b/tests/integration/base.py
@@ -77,6 +77,8 @@ def __init__(self):
 
 
 class TestArgs:
+    __test__ = False
+
     def __init__(self, kwargs):
         self.which = 'run'
         self.single_threaded = False
diff --git a/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
index 6ba80bc7..96e61912 100644
--- a/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
+++ b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
@@ -4,8 +4,8 @@
 from pathlib import Path
 
 
-TestResults = namedtuple(
-    'TestResults',
+ResultHolder = namedtuple(
+    'ResultHolder',
     ['seed_count', 'model_count', 'seed_rows', 'inc_test_model_count',
      'opt_model_count', 'relation'],
 )
@@ -95,7 +95,7 @@ def test_scenario_correctness(self, expected_fields, test_case_fields):
     def stub_expected_fields(
         self, relation, seed_rows, opt_model_count=None
     ):
-        return TestResults(
+        return ResultHolder(
             seed_count=1, model_count=1, seed_rows=seed_rows,
             inc_test_model_count=1, opt_model_count=opt_model_count,
             relation=relation
@@ -134,7 +134,7 @@ def test__databricks_sql_endpoint_no_unique_keys(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -152,7 +152,7 @@ def test__databricks_cluster_no_unique_keys(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -173,7 +173,7 @@ def test__databricks_sql_endpoint_empty_str_unique_key(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -191,7 +191,7 @@ def test__databricks_cluster_empty_str_unique_key(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -210,7 +210,7 @@ def test__databricks_sql_endpoint_one_unique_key(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -230,7 +230,7 @@ def test__databricks_cluster_one_unique_key(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -274,7 +274,7 @@ def test__databricks_sql_endpoint_empty_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -292,7 +292,7 @@ def test__databricks_cluster_empty_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -311,7 +311,7 @@ def test__databricks_sql_endpoint_unary_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -331,7 +331,7 @@ def test__databricks_cluster_unary_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -351,7 +351,7 @@ def test__databricks_sql_endpoint_duplicated_unary_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -371,7 +371,7 @@ def test__databricks_cluster_duplicated_unary_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -391,7 +391,7 @@ def test__databricks_sql_endpoint_trinary_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -411,7 +411,7 @@ def test__databricks_cluster_trinary_unique_key_list(self):
         expected_fields = self.stub_expected_fields(
             relation=expected_model, seed_rows=seed_rows, opt_model_count=1
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=self.update_incremental_model(expected_model),
             relation=incremental_model
@@ -431,7 +431,7 @@ def test__databricks_sql_endpoint_trinary_unique_key_list_no_update(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -450,7 +450,7 @@ def test__databricks_cluster_trinary_unique_key_list_no_update(self):
         expected_fields = self.stub_expected_fields(
             relation=seed, seed_rows=seed_rows
         )
-        test_case_fields = TestResults(
+        test_case_fields = ResultHolder(
             *self.setup_test(seed, incremental_model, update_sql_file),
             opt_model_count=None, relation=incremental_model
         )
@@ -478,4 +478,4 @@ def test__databricks_cluster_bad_unique_key_list(self):
 
         self.assertEqual(status, RunStatus.Error)
         self.assertTrue("thisisnotacolumn" in exc)
-        
\ No newline at end of file
+        
diff --git a/tests/specs/spark-databricks-http.dbtspec b/tests/specs/spark-databricks-http.dbtspec
deleted file mode 100644
index 67342da3..00000000
--- a/tests/specs/spark-databricks-http.dbtspec
+++ /dev/null
@@ -1,32 +0,0 @@
-target:
-  type: spark
-  host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}"
-  cluster: "{{ env_var('DBT_DATABRICKS_CLUSTER_NAME') }}"
-  token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}"
-  method: http
-  port: 443
-  schema: "analytics_{{ var('_dbt_random_suffix') }}"
-  connect_retries: 5
-  connect_timeout: 60
-projects:
-  - overrides: snapshot_strategy_check_cols
-    dbt_project_yml: &file_format_delta
-      # we're going to UPDATE the seed tables as part of testing, so we must make them delta format
-      seeds:
-        dbt_test_project:
-          file_format: delta
-      snapshots:
-        dbt_test_project:
-          file_format: delta
-  - overrides: snapshot_strategy_timestamp
-    dbt_project_yml: *file_format_delta
-sequences:
-  test_dbt_empty: empty
-  test_dbt_base: base
-  test_dbt_ephemeral: ephemeral
-  test_dbt_incremental: incremental
-  test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp
-  test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols
-  test_dbt_data_test: data_test
-  test_dbt_ephemeral_data_tests: data_test_ephemeral_models
-  test_dbt_schema_test: schema_test
diff --git a/tests/specs/spark-databricks-odbc-cluster.dbtspec b/tests/specs/spark-databricks-odbc-cluster.dbtspec
deleted file mode 100644
index b320dc3a..00000000
--- a/tests/specs/spark-databricks-odbc-cluster.dbtspec
+++ /dev/null
@@ -1,33 +0,0 @@
-target:
-  type: spark
-  host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}"
-  cluster: "{{ env_var('DBT_DATABRICKS_CLUSTER_NAME') }}"
-  token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}"
-  method: odbc
-  driver: "{{ env_var('ODBC_DRIVER') }}"
-  port: 443
-  schema: "analytics_{{ var('_dbt_random_suffix') }}"
-  connect_retries: 5
-  connect_timeout: 60
-projects:
-  - overrides: snapshot_strategy_check_cols
-    dbt_project_yml: &file_format_delta
-      # we're going to UPDATE the seed tables as part of testing, so we must make them delta format
-      seeds:
-        dbt_test_project:
-          file_format: delta
-      snapshots:
-        dbt_test_project:
-          file_format: delta
-  - overrides: snapshot_strategy_timestamp
-    dbt_project_yml: *file_format_delta
-sequences:
-  test_dbt_empty: empty
-  test_dbt_base: base
-  test_dbt_ephemeral: ephemeral
-  test_dbt_incremental: incremental
-  test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp
-  test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols
-  test_dbt_data_test: data_test
-  test_dbt_ephemeral_data_tests: data_test_ephemeral_models
-  test_dbt_schema_test: schema_test
diff --git a/tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec b/tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec
deleted file mode 100644
index 0aa7be76..00000000
--- a/tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec
+++ /dev/null
@@ -1,35 +0,0 @@
-target:
-  type: spark
-  host: "{{ env_var('DBT_DATABRICKS_HOST_NAME') }}"
-  endpoint: "{{ env_var('DBT_DATABRICKS_ENDPOINT') }}"
-  token: "{{ env_var('DBT_DATABRICKS_TOKEN') }}"
-  method: odbc
-  driver: "{{ env_var('ODBC_DRIVER') }}"
-  port: 443
-  schema: "analytics_{{ var('_dbt_random_suffix') }}"
-  connect_retries: 5
-  connect_timeout: 60
-projects:
-  - overrides: snapshot_strategy_check_cols
-    dbt_project_yml: &file_format_delta
-      # we're going to UPDATE the seed tables as part of testing, so we must make them delta format
-      seeds:
-        dbt_test_project:
-          file_format: delta
-      snapshots:
-        dbt_test_project:
-          file_format: delta
-  - overrides: snapshot_strategy_timestamp
-    dbt_project_yml: *file_format_delta
-sequences:
-  test_dbt_empty: empty
-  # The SQL Endpoint no longer supports `set` ??
-  # test_dbt_base: base
-  test_dbt_ephemeral: ephemeral
-  # The SQL Endpoint does not support `create temporary view`
-  # test_dbt_incremental: incremental
-  test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp
-  test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols
-  test_dbt_data_test: data_test
-  test_dbt_ephemeral_data_tests: data_test_ephemeral_models
-  test_dbt_schema_test: schema_test
diff --git a/tests/specs/spark-thrift.dbtspec b/tests/specs/spark-thrift.dbtspec
deleted file mode 100644
index 85b843f3..00000000
--- a/tests/specs/spark-thrift.dbtspec
+++ /dev/null
@@ -1,22 +0,0 @@
-target:
-  type: spark
-  host: localhost
-  user: dbt
-  method: thrift
-  port: 10000
-  connect_retries: 5
-  connect_timeout: 60
-  schema: "analytics_{{ var('_dbt_random_suffix') }}"
-sequences:
-  test_dbt_empty: empty
-  test_dbt_base: base
-  test_dbt_ephemeral: ephemeral
-  test_dbt_incremental: incremental
-  # snapshots require delta format
-  # test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp
-  # test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols
-  test_dbt_data_test: data_test
-  test_dbt_schema_test: schema_test
-  # the local cluster currently tests on spark 2.x, which does not support this
-  # if we upgrade it to 3.x, we can enable this test
-  # test_dbt_ephemeral_data_tests: data_test_ephemeral_models
diff --git a/tox.ini b/tox.ini
index e896421e..8e771960 100644
--- a/tox.ini
+++ b/tox.ini
@@ -20,7 +20,7 @@ deps =
 
 [testenv:integration-spark-databricks-http]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-http.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster tests/functional/adapter/test_basic.py'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
@@ -29,7 +29,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-cluster]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-odbc-cluster.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster tests/functional/adapter/test_basic.py'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -39,7 +39,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-databricks-odbc-sql-endpoint.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint tests/functional/adapter/test_basic.py'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -50,7 +50,7 @@ deps =
 
 [testenv:integration-spark-thrift]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-thrift.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark tests/functional/adapter/test_basic.py'
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =

From d92a4e54ab0f69c8d327f0220495d1b8b468189c Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Thu, 31 Mar 2022 17:39:28 -0400
Subject: [PATCH 295/603] Remove requirement for pytest-dbt-adapter (#314)

* Remove requirement for pytest-dbt-adapter

* Check multiple profile types in skip_profile

* Update dev_requirements to include pytest-dotenv
---
 dev_requirements.txt                   | 16 ++++++++--------
 tests/conftest.py                      | 16 ++++++++++++++--
 tests/functional/adapter/test_basic.py | 10 +++++-----
 tests/specs/spark-session.dbtspec      | 17 -----------------
 tox.ini                                |  2 +-
 5 files changed, 28 insertions(+), 33 deletions(-)
 delete mode 100644 tests/specs/spark-session.dbtspec

diff --git a/dev_requirements.txt b/dev_requirements.txt
index 520d1f5b..0f84cbd5 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -4,18 +4,18 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 freezegun==0.3.9
-pytest==6.0.2
+pytest>=6.0.2
 mock>=1.3.0
-flake8>=3.5.0
-pytz==2017.2
-bumpversion==0.5.3
-tox==3.2.0
+flake8
+pytz
+bumpversion
+tox>=3.2.0
 ipdb
-pytest-xdist>=2.1.0,<3
-flaky>=3.5.3,<4
+pytest-xdist
+pytest-dotenv
 pytest-csv
+flaky
 
 # Test requirements
-pytest-dbt-adapter==0.6.0
 sasl>=0.2.1
 thrift_sasl==0.4.1
diff --git a/tests/conftest.py b/tests/conftest.py
index 603dc139..69fbf3d5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,6 +28,8 @@ def dbt_profile_target(request):
         target = apache_spark_target()
     elif profile_type == "databricks_http_cluster":
         target = databricks_http_cluster_target()
+    elif profile_type == "spark_session":
+        target = spark_session_target()
     else:
         raise ValueError(f"Invalid profile type '{profile_type}'")
     return target
@@ -82,9 +84,19 @@ def databricks_http_cluster_target():
         "connect_timeout": 60,
     }
 
+
+def spark_session_target():
+    return {
+        "type": "spark",
+        "host": "localhost",
+        "method": "session",
+    }
+
+
 @pytest.fixture(autouse=True)
 def skip_by_profile_type(request):
     profile_type = request.config.getoption("--profile")
     if request.node.get_closest_marker("skip_profile"):
-        if request.node.get_closest_marker("skip_profile").args[0] == profile_type:
-            pytest.skip("skipped on '{profile_type}' profile")
+        for skip_profile_type in request.node.get_closest_marker("skip_profile").args:
+            if skip_profile_type == profile_type:
+                pytest.skip("skipped on '{profile_type}' profile")
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index c459e946..ff4fdd22 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -13,7 +13,7 @@
 from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp
 
 
-@pytest.mark.skip_profile('databricks_sql_endpoint')
+@pytest.mark.skip_profile('databricks_sql_endpoint', 'spark_session')
 class TestSimpleMaterializationsSpark(BaseSimpleMaterializations):
     pass
 
@@ -33,12 +33,12 @@ class TestEmptySpark(BaseEmpty):
     pass
 
 
-@pytest.mark.skip_profile('databricks_sql_endpoint')
+@pytest.mark.skip_profile('databricks_sql_endpoint', 'spark_session')
 class TestEphemeralSpark(BaseEphemeral):
     pass
 
 
-@pytest.mark.skip_profile('databricks_sql_endpoint')
+@pytest.mark.skip_profile('databricks_sql_endpoint', 'spark_session')
 class TestIncrementalSpark(BaseIncremental):
     pass
 
@@ -49,7 +49,7 @@ class TestGenericTestsSpark(BaseGenericTests):
 
 # These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
-@pytest.mark.skip_profile('apache_spark')
+@pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestSnapshotCheckColsSpark(BaseSnapshotCheckCols):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -65,7 +65,7 @@ def project_config_update(self):
 
 #hese tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
-@pytest.mark.skip_profile('apache_spark')
+@pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
     @pytest.fixture(scope="class")
     def project_config_update(self):
diff --git a/tests/specs/spark-session.dbtspec b/tests/specs/spark-session.dbtspec
deleted file mode 100644
index cd09aa17..00000000
--- a/tests/specs/spark-session.dbtspec
+++ /dev/null
@@ -1,17 +0,0 @@
-target:
-  type: spark
-  method: session
-  host: localhost
-  schema: "analytics_{{ var('_dbt_random_suffix') }}"
-sequences:
-  test_dbt_empty: empty
-  # requires a metastore for persisting over dbt runs
-  # test_dbt_base: base
-  # test_dbt_ephemeral: ephemeral
-  # test_dbt_incremental: incremental
-  # snapshots require delta format
-  # test_dbt_snapshot_strategy_timestamp: snapshot_strategy_timestamp
-  # test_dbt_snapshot_strategy_check_cols: snapshot_strategy_check_cols
-  test_dbt_data_test: data_test
-  test_dbt_schema_test: schema_test
-  test_dbt_ephemeral_data_tests: data_test_ephemeral_models
diff --git a/tox.ini b/tox.ini
index 8e771960..38cb1962 100644
--- a/tox.ini
+++ b/tox.ini
@@ -60,7 +60,7 @@ deps =
 
 [testenv:integration-spark-session]
 basepython = python3
-commands = /bin/bash -c '{envpython} -m pytest -v tests/specs/spark-session.dbtspec'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session tests/functional/adapter/test_basic.py'
 passenv =
     DBT_*
     PYTEST_*

From 183b9bf3819af663f008ca007d45439e540cfa95 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Mon, 4 Apr 2022 10:28:19 -0400
Subject: [PATCH 296/603] bring back local instruction (#309)

---
 README.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/README.md b/README.md
index 6795d76b..037a4989 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,56 @@ more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark)
 - [Install dbt](https://docs.getdbt.com/docs/installation)
 - Read the [introduction](https://docs.getdbt.com/docs/introduction/) and [viewpoint](https://docs.getdbt.com/docs/about/viewpoint/)
 
+## Running locally
+A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend.
+Note that this is spark 2 not spark 3 so some functionalities might not be available.
+
+The following command would start two docker containers
+```
+docker-compose up -d
+```
+It will take a bit of time for the instance to start, you can check the logs of the two containers.
+If the instance doesn't start correctly, try the complete reset command listed below and then try start again.
+
+Create a profile like this one:
+
+```
+spark-testing:
+  target: local
+  outputs:
+    local:
+      type: spark
+      method: thrift
+      host: 127.0.0.1
+      port: 10000
+      user: dbt
+      schema: analytics
+      connect_retries: 5
+      connect_timeout: 60
+      retry_all: true
+```
+
+Connecting to the local spark instance:
+
+* The Spark UI should be available at [http://localhost:4040/sqlserver/](http://localhost:4040/sqlserver/)
+* The endpoint for SQL-based testing is at `http://localhost:10000` and can be referenced with the Hive or Spark JDBC drivers using connection string `jdbc:hive2://localhost:10000` and default credentials `dbt`:`dbt`
+
+Note that the Hive metastore data is persisted under `./.hive-metastore/`, and the Spark-produced data under `./.spark-warehouse/`. To completely reset you environment run the following:
+
+```
+docker-compose down
+rm -rf ./.hive-metastore/
+rm -rf ./.spark-warehouse/
+```
+
+### Reporting bugs and contributing code
+
+-   Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt-spark/issues/new).
+
+## Code of Conduct
+
+Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/).
+
 ## Join the dbt Community
 
 - Be part of the conversation in the [dbt Community Slack](http://community.getdbt.com/)

From fe77af712b27892f4996d438a46fda7453ab3c49 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN 
Date: Wed, 6 Apr 2022 00:02:14 -0700
Subject: [PATCH 297/603] Make internal macros use macro dispatch to be
 overridable in child adapters (#320)

* Make internal macros use macro dispatch to be overridable in child adapters.

* changelog

* Address a comment.

* Fix.

* Fix.
---
 CHANGELOG.md                          |  2 ++
 dbt/include/spark/macros/adapters.sql | 36 ++++++++++++++++++++++++++-
 tests/unit/test_macros.py             |  9 ++++++-
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e61a335c..1c9b599d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,9 +5,11 @@
 
 ### Under the hood
 - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
+- Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
 
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
+- [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
 
 ## dbt-spark 1.1.0b1 (March 23, 2022)
 
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 2542af81..e96501c4 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -1,11 +1,20 @@
 {% macro file_format_clause() %}
+  {{ return(adapter.dispatch('file_format_clause', 'dbt')()) }}
+{%- endmacro -%}
+
+{% macro spark__file_format_clause() %}
   {%- set file_format = config.get('file_format', validator=validation.any[basestring]) -%}
   {%- if file_format is not none %}
     using {{ file_format }}
   {%- endif %}
 {%- endmacro -%}
 
+
 {% macro location_clause() %}
+  {{ return(adapter.dispatch('location_clause', 'dbt')()) }}
+{%- endmacro -%}
+
+{% macro spark__location_clause() %}
   {%- set location_root = config.get('location_root', validator=validation.any[basestring]) -%}
   {%- set identifier = model['alias'] -%}
   {%- if location_root is not none %}
@@ -13,7 +22,12 @@
   {%- endif %}
 {%- endmacro -%}
 
+
 {% macro options_clause() -%}
+  {{ return(adapter.dispatch('options_clause', 'dbt')()) }}
+{%- endmacro -%}
+
+{% macro spark__options_clause() -%}
   {%- set options = config.get('options') -%}
   {%- if config.get('file_format') == 'hudi' -%}
     {%- set unique_key = config.get('unique_key') -%}
@@ -35,7 +49,12 @@
   {%- endif %}
 {%- endmacro -%}
 
+
 {% macro comment_clause() %}
+  {{ return(adapter.dispatch('comment_clause', 'dbt')()) }}
+{%- endmacro -%}
+
+{% macro spark__comment_clause() %}
   {%- set raw_persist_docs = config.get('persist_docs', {}) -%}
 
   {%- if raw_persist_docs is mapping -%}
@@ -48,7 +67,12 @@
   {% endif %}
 {%- endmacro -%}
 
+
 {% macro partition_cols(label, required=false) %}
+  {{ return(adapter.dispatch('partition_cols', 'dbt')(label, required)) }}
+{%- endmacro -%}
+
+{% macro spark__partition_cols(label, required=false) %}
   {%- set cols = config.get('partition_by', validator=validation.any[list, basestring]) -%}
   {%- if cols is not none %}
     {%- if cols is string -%}
@@ -65,6 +89,10 @@
 
 
 {% macro clustered_cols(label, required=false) %}
+  {{ return(adapter.dispatch('clustered_cols', 'dbt')(label, required)) }}
+{%- endmacro -%}
+
+{% macro spark__clustered_cols(label, required=false) %}
   {%- set cols = config.get('clustered_by', validator=validation.any[list, basestring]) -%}
   {%- set buckets = config.get('buckets', validator=validation.any[int]) -%}
   {%- if (cols is not none) and (buckets is not none) %}
@@ -80,6 +108,7 @@
   {%- endif %}
 {%- endmacro -%}
 
+
 {% macro fetch_tbl_properties(relation) -%}
   {% call statement('list_properties', fetch_result=True) -%}
     SHOW TBLPROPERTIES {{ relation }}
@@ -88,12 +117,17 @@
 {%- endmacro %}
 
 
-{#-- We can't use temporary tables with `create ... as ()` syntax #}
 {% macro create_temporary_view(relation, sql) -%}
+  {{ return(adapter.dispatch('create_temporary_view', 'dbt')(relation, sql)) }}
+{%- endmacro -%}
+
+{#-- We can't use temporary tables with `create ... as ()` syntax #}
+{% macro spark__create_temporary_view(relation, sql) -%}
   create temporary view {{ relation.include(schema=false) }} as
     {{ sql }}
 {% endmacro %}
 
+
 {% macro spark__create_table_as(temporary, relation, sql) -%}
   {% if temporary -%}
     {{ create_temporary_view(relation, sql) }}
diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py
index 06ce202a..220a74db 100644
--- a/tests/unit/test_macros.py
+++ b/tests/unit/test_macros.py
@@ -15,7 +15,9 @@ def setUp(self):
             'validation': mock.Mock(),
             'model': mock.Mock(),
             'exceptions': mock.Mock(),
-            'config': mock.Mock()
+            'config': mock.Mock(),
+            'adapter': mock.Mock(),
+            'return': lambda r: r,
         }
         self.default_context['config'].get = lambda key, default=None, **kwargs: self.config.get(key, default)
 
@@ -24,6 +26,11 @@ def __get_template(self, template_filename):
 
     def __run_macro(self, template, name, temporary, relation, sql):
         self.default_context['model'].alias = relation
+
+        def dispatch(macro_name, macro_namespace=None, packages=None):
+            return getattr(template.module, f'spark__{macro_name}')
+        self.default_context['adapter'].dispatch = dispatch
+
         value = getattr(template.module, name)(temporary, relation, sql)
         return re.sub(r'\s\s+', ' ', value)
 

From a6de5d251859f3134f1afb29fa5a158f58b0ff64 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Thu, 7 Apr 2022 15:39:01 +0200
Subject: [PATCH 298/603] CI ergonomics: connection retries (#327)

* Update retries in CI

* Reenable all checks on sql_endpoint

* tox consistency
---
 .circleci/config.yml                   |  1 +
 tests/conftest.py                      | 15 ++++++++++++---
 tests/functional/adapter/test_basic.py |  6 +++---
 tests/integration/base.py              | 11 +++++++++--
 tox.ini                                |  8 ++++----
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 135c22cd..34e449ac 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -79,6 +79,7 @@ jobs:
   integration-spark-databricks-http:
     environment:
       DBT_INVOCATION_ENV: circle
+      DBT_DATABRICKS_RETRY_ALL: True
     docker:
       - image: fishtownanalytics/test-container:10
     steps:
diff --git a/tests/conftest.py b/tests/conftest.py
index 69fbf3d5..7ba95d47 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -42,8 +42,8 @@ def apache_spark_target():
         "user": "dbt",
         "method": "thrift",
         "port": 10000,
-        "connect_retries": 5,
-        "connect_timeout": 60,
+        "connect_retries": 3,
+        "connect_timeout": 5,
         "retry_all": True,
     }
 
@@ -57,6 +57,9 @@ def databricks_cluster_target():
         "token": os.getenv("DBT_DATABRICKS_TOKEN"),
         "driver": os.getenv("ODBC_DRIVER"),
         "port": 443,
+        "connect_retries": 3,
+        "connect_timeout": 5,
+        "retry_all": True,
     }
 
 
@@ -69,6 +72,9 @@ def databricks_sql_endpoint_target():
         "token": os.getenv("DBT_DATABRICKS_TOKEN"),
         "driver": os.getenv("ODBC_DRIVER"),
         "port": 443,
+        "connect_retries": 3,
+        "connect_timeout": 5,
+        "retry_all": True,
     }
 
 
@@ -80,8 +86,11 @@ def databricks_http_cluster_target():
         "token": os.getenv('DBT_DATABRICKS_TOKEN'),
         "method": "http",
         "port": 443,
+        # more retries + longer timout to handle unavailability while cluster is restarting
+        # return failures quickly in dev, retry all failures in CI (up to 5 min)
         "connect_retries": 5,
-        "connect_timeout": 60,
+        "connect_timeout": 60, 
+        "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)),
     }
 
 
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index ff4fdd22..db18da6b 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -13,7 +13,7 @@
 from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp
 
 
-@pytest.mark.skip_profile('databricks_sql_endpoint', 'spark_session')
+@pytest.mark.skip_profile('spark_session')
 class TestSimpleMaterializationsSpark(BaseSimpleMaterializations):
     pass
 
@@ -33,12 +33,12 @@ class TestEmptySpark(BaseEmpty):
     pass
 
 
-@pytest.mark.skip_profile('databricks_sql_endpoint', 'spark_session')
+@pytest.mark.skip_profile('spark_session')
 class TestEphemeralSpark(BaseEphemeral):
     pass
 
 
-@pytest.mark.skip_profile('databricks_sql_endpoint', 'spark_session')
+@pytest.mark.skip_profile('spark_session')
 class TestIncrementalSpark(BaseIncremental):
     pass
 
diff --git a/tests/integration/base.py b/tests/integration/base.py
index e36162aa..7e557217 100644
--- a/tests/integration/base.py
+++ b/tests/integration/base.py
@@ -160,8 +160,9 @@ def apache_spark_profile(self):
                         'user': 'dbt',
                         'method': 'thrift',
                         'port': 10000,
-                        'connect_retries': 5,
-                        'connect_timeout': 60,
+                        'connect_retries': 3,
+                        'connect_timeout': 5,
+                        'retry_all': True,
                         'schema': self.unique_schema()
                     },
                 },
@@ -184,6 +185,9 @@ def databricks_cluster_profile(self):
                         'token': os.getenv('DBT_DATABRICKS_TOKEN'),
                         'driver': os.getenv('ODBC_DRIVER'),
                         'port': 443,
+                        'connect_retries': 3,
+                        'connect_timeout': 5,
+                        'retry_all': True,
                         'schema': self.unique_schema()
                     },
                 },
@@ -206,6 +210,9 @@ def databricks_sql_endpoint_profile(self):
                         'token': os.getenv('DBT_DATABRICKS_TOKEN'),
                         'driver': os.getenv('ODBC_DRIVER'),
                         'port': 443,
+                        'connect_retries': 3,
+                        'connect_timeout': 5,
+                        'retry_all': True,
                         'schema': self.unique_schema()
                     },
                 },
diff --git a/tox.ini b/tox.ini
index 38cb1962..6eb503af 100644
--- a/tox.ini
+++ b/tox.ini
@@ -20,7 +20,7 @@ deps =
 
 [testenv:integration-spark-databricks-http]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/test_basic.py'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
@@ -29,7 +29,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-cluster]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/test_basic.py'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -39,7 +39,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/test_basic.py'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -50,7 +50,7 @@ deps =
 
 [testenv:integration-spark-thrift]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/test_basic.py'
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =

From 5d2f387bdd621b7aa9af5017e8a3e5162231823c Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Thu, 7 Apr 2022 12:46:09 -0400
Subject: [PATCH 299/603] Override 'run_sql_for_tests' for Spark (#324)

---
 CHANGELOG.md               |  1 +
 dbt/adapters/spark/impl.py | 24 ++++++++++++++++++++++++
 tox.ini                    |  2 +-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c9b599d..8fb00e1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 ### Under the hood
 - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
 - Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
+- Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324))
 
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 74845422..12810a64 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -364,6 +364,30 @@ def get_rows_different_sql(
 
         return sql
 
+    # This is for use in the test suite
+    # Spark doesn't have 'commit' and 'rollback', so this override
+    # doesn't include those commands.
+    def run_sql_for_tests(self, sql, fetch, conn):
+        cursor = conn.handle.cursor()
+        try:
+            cursor.execute(sql)
+            if fetch == "one":
+                if hasattr(cursor, 'fetchone'):
+                    return cursor.fetchone()
+                else:
+                    # AttributeError: 'PyhiveConnectionWrapper' object has no attribute 'fetchone'
+                    return cursor.fetchall()[0]
+            elif fetch == "all":
+                return cursor.fetchall()
+            else:
+                return
+        except BaseException as e:
+            print(sql)
+            print(e)
+            raise
+        finally:
+            conn.transaction_open = False
+
 
 # spark does something interesting with joins when both tables have the same
 # static values for the join condition and complains that the join condition is
diff --git a/tox.ini b/tox.ini
index 6eb503af..a268ee5b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,7 +5,7 @@ envlist = unit, flake8, integration-spark-thrift
 
 [testenv:flake8]
 basepython = python3.8
-commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 dbt/'
+commands = /bin/bash -c '$(which flake8) --max-line-length 99 --select=E,W,F --ignore=W504 dbt/'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
      -r{toxinidir}/dev_requirements.txt

From c169bb33627f0c87f1f6b715e1f4f72257a85bba Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Fri, 8 Apr 2022 14:23:33 -0500
Subject: [PATCH 300/603] init push for convert of unique_id as a list tests
 for spark (#321)

* rebase commit

* remove old tests

* adding changelog

* removing changelog entry (per disucssion these prs don't need one and changing dev_requirements install)

* Empty-Commit

* retesting circleci
---
 .gitignore                                    |   2 +
 CHANGELOG.md                                  |   1 +
 .../adapter/test_incremental_unique_id.py     |  13 +
 .../duplicated_unary_unique_key_list.sql      |  17 -
 .../models/empty_str_unique_key.sql           |  14 -
 .../models/empty_unique_key_list.sql          |  12 -
 .../models/expected/one_str__overwrite.sql    |  21 -
 .../unique_key_list__inplace_overwrite.sql    |  21 -
 .../models/no_unique_key.sql                  |  13 -
 .../nontyped_trinary_unique_key_list.sql      |  19 -
 .../models/not_found_unique_key.sql           |  14 -
 .../models/not_found_unique_key_list.sql      |   8 -
 .../models/str_unique_key.sql                 |  17 -
 .../models/trinary_unique_key_list.sql        |  19 -
 .../models/unary_unique_key_list.sql          |  17 -
 .../seeds/add_new_rows.sql                    |   9 -
 .../seeds/duplicate_insert.sql                |   5 -
 .../incremental_unique_id_test/seeds/seed.csv |   7 -
 .../incremental_unique_id_test/seeds/seed.yml |   7 -
 .../test_incremental_unique_id.py             | 481 ------------------
 tox.ini                                       |  10 +-
 21 files changed, 21 insertions(+), 706 deletions(-)
 create mode 100644 tests/functional/adapter/test_incremental_unique_id.py
 delete mode 100644 tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/no_unique_key.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/str_unique_key.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql
 delete mode 100644 tests/integration/incremental_unique_id_test/seeds/seed.csv
 delete mode 100644 tests/integration/incremental_unique_id_test/seeds/seed.yml
 delete mode 100644 tests/integration/incremental_unique_id_test/test_incremental_unique_id.py

diff --git a/.gitignore b/.gitignore
index 4c05634f..cc586f5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,12 +5,14 @@ env/
 *.pyc
 __pycache__
 .tox/
+.env
 .idea/
 build/
 dist/
 dbt-integration-tests
 test/integration/.user.yml
 .DS_Store
+test.env
 .vscode
 *.log
 logs/
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8fb00e1d..bb54c92f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@
 - Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
 - Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324))
 
+
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
diff --git a/tests/functional/adapter/test_incremental_unique_id.py b/tests/functional/adapter/test_incremental_unique_id.py
new file mode 100644
index 00000000..18bac3f3
--- /dev/null
+++ b/tests/functional/adapter/test_incremental_unique_id.py
@@ -0,0 +1,13 @@
+import pytest
+from dbt.tests.adapter.incremental.test_incremental_unique_id import BaseIncrementalUniqueKey
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark')
+class TestUniqueKeySpark(BaseIncrementalUniqueKey):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge",
+            }
+        }
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql
deleted file mode 100644
index 7290b6c4..00000000
--- a/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key=['state', 'state']
-    )
-}}
-
-select
-    state as state,
-    county as county,
-    city as city,
-    last_visit_date as last_visit_date
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql b/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql
deleted file mode 100644
index 5260e177..00000000
--- a/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key=''
-    )
-}}
-
-select
-    *
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql
deleted file mode 100644
index c582d532..00000000
--- a/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql
+++ /dev/null
@@ -1,12 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key=[]
-    )
-}}
-
-select * from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql b/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql
deleted file mode 100644
index c7101152..00000000
--- a/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-{{
-    config(
-        materialized='table'
-    )
-}}
-
-select
-    'CT' as state,
-    'Hartford' as county,
-    'Hartford' as city,
-    cast('2022-02-14' as date) as last_visit_date
-union all
-select 'MA','Suffolk','Boston',cast('2020-02-12' as date)
-union all
-select 'NJ','Mercer','Trenton',cast('2022-01-01' as date)
-union all
-select 'NY','Kings','Brooklyn',cast('2021-04-02' as date)
-union all
-select 'NY','New York','Manhattan',cast('2021-04-01' as date)
-union all
-select 'PA','Philadelphia','Philadelphia',cast('2021-05-21' as date)
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql b/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql
deleted file mode 100644
index c7101152..00000000
--- a/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-{{
-    config(
-        materialized='table'
-    )
-}}
-
-select
-    'CT' as state,
-    'Hartford' as county,
-    'Hartford' as city,
-    cast('2022-02-14' as date) as last_visit_date
-union all
-select 'MA','Suffolk','Boston',cast('2020-02-12' as date)
-union all
-select 'NJ','Mercer','Trenton',cast('2022-01-01' as date)
-union all
-select 'NY','Kings','Brooklyn',cast('2021-04-02' as date)
-union all
-select 'NY','New York','Manhattan',cast('2021-04-01' as date)
-union all
-select 'PA','Philadelphia','Philadelphia',cast('2021-05-21' as date)
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/no_unique_key.sql b/tests/integration/incremental_unique_id_test/models/no_unique_key.sql
deleted file mode 100644
index 44a63e75..00000000
--- a/tests/integration/incremental_unique_id_test/models/no_unique_key.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-{{
-    config(
-        materialized='incremental'
-    )
-}}
-
-select
-    *
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql
deleted file mode 100644
index 52b4509f..00000000
--- a/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql
+++ /dev/null
@@ -1,19 +0,0 @@
--- for comparing against auto-typed seeds
-
-{{
-    config(
-        materialized='incremental',
-        unique_key=['state', 'county', 'city']
-    )
-}}
-
-select
-    state as state,
-    county as county,
-    city as city,
-    last_visit_date as last_visit_date
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql b/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql
deleted file mode 100644
index d247aa34..00000000
--- a/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key='thisisnotacolumn'
-    )
-}}
-
-select
-    *
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql
deleted file mode 100644
index f1462a48..00000000
--- a/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key=['state', 'thisisnotacolumn']
-    )
-}}
-
-select * from {{ ref('seed') }}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/str_unique_key.sql b/tests/integration/incremental_unique_id_test/models/str_unique_key.sql
deleted file mode 100644
index 2f9fc298..00000000
--- a/tests/integration/incremental_unique_id_test/models/str_unique_key.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key='state'
-    )
-}}
-
-select
-    state as state,
-    county as county,
-    city as city,
-    last_visit_date as last_visit_date
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql
deleted file mode 100644
index 0359546b..00000000
--- a/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql
+++ /dev/null
@@ -1,19 +0,0 @@
--- types needed to compare against expected model reliably
-
-{{
-    config(
-        materialized='incremental',
-        unique_key=['state', 'county', 'city']
-    )
-}}
-
-select
-    state as state,
-    county as county,
-    city as city,
-    last_visit_date as last_visit_date
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql
deleted file mode 100644
index 7f5875f8..00000000
--- a/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        unique_key=['state']
-    )
-}}
-
-select
-    state as state,
-    county as county,
-    city as city,
-    last_visit_date as last_visit_date
-from {{ ref('seed') }}
-
-{% if is_incremental() %}
-    where last_visit_date > (select max(last_visit_date) from {{ this }})
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql b/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql
deleted file mode 100644
index e5611fe3..00000000
--- a/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql
+++ /dev/null
@@ -1,9 +0,0 @@
--- insert two new rows, both of which should be in incremental model
---   with any unique columns
-insert into {schema}.seed
-    (state, county, city, last_visit_date)
-values ('WA','King','Seattle',cast('2022-02-01' as date));
-
-insert into {schema}.seed
-    (state, county, city, last_visit_date)
-values ('CA','Los Angeles','Los Angeles',cast('2022-02-01' as date));
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql b/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql
deleted file mode 100644
index 8abe2808..00000000
--- a/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql
+++ /dev/null
@@ -1,5 +0,0 @@
--- insert new row, which should not be in incremental model
---  with primary or first three columns unique
-insert into {schema}.seed
-    (state, county, city, last_visit_date)
-values ('CT','Hartford','Hartford',cast('2022-02-14' as date));
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/seed.csv b/tests/integration/incremental_unique_id_test/seeds/seed.csv
deleted file mode 100644
index b988827f..00000000
--- a/tests/integration/incremental_unique_id_test/seeds/seed.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-state,county,city,last_visit_date
-CT,Hartford,Hartford,2020-09-23
-MA,Suffolk,Boston,2020-02-12
-NJ,Mercer,Trenton,2022-01-01
-NY,Kings,Brooklyn,2021-04-02
-NY,New York,Manhattan,2021-04-01
-PA,Philadelphia,Philadelphia,2021-05-21
\ No newline at end of file
diff --git a/tests/integration/incremental_unique_id_test/seeds/seed.yml b/tests/integration/incremental_unique_id_test/seeds/seed.yml
deleted file mode 100644
index c048548a..00000000
--- a/tests/integration/incremental_unique_id_test/seeds/seed.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-version: 2
-
-seeds:
-  - name: seed
-    config:
-      column_types:
-        last_visit_date: date
diff --git a/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
deleted file mode 100644
index 96e61912..00000000
--- a/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py
+++ /dev/null
@@ -1,481 +0,0 @@
-from tests.integration.base import DBTIntegrationTest, use_profile
-from dbt.contracts.results import RunStatus
-from collections import namedtuple
-from pathlib import Path
-
-
-ResultHolder = namedtuple(
-    'ResultHolder',
-    ['seed_count', 'model_count', 'seed_rows', 'inc_test_model_count',
-     'opt_model_count', 'relation'],
-)
-
-
-class TestIncrementalUniqueKey(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return 'incremental_unique_key'
-
-    @property
-    def models(self):
-        return 'models'
-        
-    @property
-    def project_config(self):
-        return {
-            "config-version": 2,
-            "models": {
-                "+file_format": "delta",
-                "+incremental_strategy": "merge"
-            }
-        }
-
-    def update_incremental_model(self, incremental_model):
-        '''update incremental model after the seed table has been updated'''
-        model_result_set = self.run_dbt(['run', '--select', incremental_model])
-        return len(model_result_set)
-
-    def setup_test(self, seed, incremental_model, update_sql_file):
-        '''build a test case and return values for assertions'''
-        
-        # Idempotently create some number of seeds and incremental models
-        seed_count = len(self.run_dbt(
-            ['seed', '--select', seed, '--full-refresh']
-        ))
-        model_count = len(self.run_dbt(
-            ['run', '--select', incremental_model, '--full-refresh']
-        ))
-        
-         # Upate seed and return new row count
-        row_count_query = 'select * from {}.{}'.format(
-            self.unique_schema(),
-            seed
-        )
-        self.run_sql_file(Path('seeds') / Path(update_sql_file + '.sql'))
-        seed_rows = len(self.run_sql(row_count_query, fetch='all'))
-
-        inc_test_model_count = self.update_incremental_model(
-            incremental_model=incremental_model
-        )
-
-        return (seed_count, model_count, seed_rows, inc_test_model_count)
-
-    def test_scenario_correctness(self, expected_fields, test_case_fields):
-        '''Invoke assertions to verify correct build functionality'''
-        # 1. test seed(s) should build afresh
-        self.assertEqual(
-            expected_fields.seed_count, test_case_fields.seed_count
-        )
-        # 2. test model(s) should build afresh
-        self.assertEqual(
-            expected_fields.model_count, test_case_fields.model_count
-        )
-        # 3. seeds should have intended row counts post update
-        self.assertEqual(
-            expected_fields.seed_rows, test_case_fields.seed_rows
-        )
-        # 4. incremental test model(s) should be updated
-        self.assertEqual(
-            expected_fields.inc_test_model_count,
-            test_case_fields.inc_test_model_count
-        )
-        # 5. extra incremental model(s) should be built; optional since
-        #   comparison may be between an incremental model and seed
-        if (expected_fields.opt_model_count and
-            test_case_fields.opt_model_count):
-            self.assertEqual(
-                expected_fields.opt_model_count,
-                test_case_fields.opt_model_count
-            )
-        # 6. result table should match intended result set (itself a relation)
-        self.assertTablesEqual(
-            expected_fields.relation, test_case_fields.relation
-        )
-
-    def stub_expected_fields(
-        self, relation, seed_rows, opt_model_count=None
-    ):
-        return ResultHolder(
-            seed_count=1, model_count=1, seed_rows=seed_rows,
-            inc_test_model_count=1, opt_model_count=opt_model_count,
-            relation=relation
-        )
-
-    def fail_to_build_inc_missing_unique_key_column(self, incremental_model_name):
-        '''should pass back error state when trying build an incremental
-           model whose unique key or keylist includes a column missing
-           from the incremental model'''
-        seed_count = len(self.run_dbt(
-            ['seed', '--select', 'seed', '--full-refresh']
-        ))
-        # unique keys are not applied on first run, so two are needed
-        self.run_dbt(
-            ['run', '--select', incremental_model_name, '--full-refresh'],
-            expect_pass=True
-        )
-        run_result = self.run_dbt(
-            ['run', '--select', incremental_model_name],
-            expect_pass=False
-        ).results[0]
-
-        return run_result.status, run_result.message
-
-
-class TestNoIncrementalUniqueKey(TestIncrementalUniqueKey):
-
-    @use_profile("databricks_sql_endpoint")
-    def test__databricks_sql_endpoint_no_unique_keys(self):
-        '''with no unique keys, seed and model should match'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='no_unique_key'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile("databricks_cluster")
-    def test__databricks_cluster_no_unique_keys(self):
-        '''with no unique keys, seed and model should match'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='no_unique_key'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-
-class TestIncrementalStrUniqueKey(TestIncrementalUniqueKey):
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_empty_str_unique_key(self):
-        '''with empty string for unique key, seed and model should match'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='empty_str_unique_key'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_empty_str_unique_key(self):
-        '''with empty string for unique key, seed and model should match'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='empty_str_unique_key'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_one_unique_key(self):
-        '''with one unique key, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='str_unique_key'
-        update_sql_file='duplicate_insert'
-        expected_model='one_str__overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_one_unique_key(self):
-        '''with one unique key, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='str_unique_key'
-        update_sql_file='duplicate_insert'
-        expected_model='one_str__overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_bad_unique_key(self):
-        '''expect compilation error from unique key not being a column'''
-
-        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
-            incremental_model_name='not_found_unique_key'
-        )
-
-        self.assertEqual(status, RunStatus.Error)
-        self.assertTrue("thisisnotacolumn" in exc)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_bad_unique_key(self):
-        '''expect compilation error from unique key not being a column'''
-
-        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
-            incremental_model_name='not_found_unique_key'
-        )
-
-        self.assertEqual(status, RunStatus.Error)
-        self.assertTrue("thisisnotacolumn" in exc)
-
-
-class TestIncrementalListUniqueKey(TestIncrementalUniqueKey):
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_empty_unique_key_list(self):
-        '''with no unique keys, seed and model should match'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='empty_unique_key_list'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_empty_unique_key_list(self):
-        '''with no unique keys, seed and model should match'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='empty_unique_key_list'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_unary_unique_key_list(self):
-        '''with one unique key, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='unary_unique_key_list'
-        update_sql_file='duplicate_insert'
-        expected_model='unique_key_list__inplace_overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_unary_unique_key_list(self):
-        '''with one unique key, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='unary_unique_key_list'
-        update_sql_file='duplicate_insert'
-        expected_model='unique_key_list__inplace_overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_duplicated_unary_unique_key_list(self):
-        '''with two of the same unique key, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='duplicated_unary_unique_key_list'
-        update_sql_file='duplicate_insert'
-        expected_model='unique_key_list__inplace_overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_duplicated_unary_unique_key_list(self):
-        '''with two of the same unique key, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='duplicated_unary_unique_key_list'
-        update_sql_file='duplicate_insert'
-        expected_model='unique_key_list__inplace_overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_trinary_unique_key_list(self):
-        '''with three unique keys, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='trinary_unique_key_list'
-        update_sql_file='duplicate_insert'
-        expected_model='unique_key_list__inplace_overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_trinary_unique_key_list(self):
-        '''with three unique keys, model will overwrite existing row'''
-        seed='seed'
-        seed_rows=7
-        incremental_model='trinary_unique_key_list'
-        update_sql_file='duplicate_insert'
-        expected_model='unique_key_list__inplace_overwrite'
-
-        expected_fields = self.stub_expected_fields(
-            relation=expected_model, seed_rows=seed_rows, opt_model_count=1
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=self.update_incremental_model(expected_model),
-            relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_trinary_unique_key_list_no_update(self):
-        '''even with three unique keys, adding distinct rows to seed does not
-           cause seed and model to diverge'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='nontyped_trinary_unique_key_list'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_trinary_unique_key_list_no_update(self):
-        '''even with three unique keys, adding distinct rows to seed does not
-           cause seed and model to diverge'''
-        seed='seed'
-        seed_rows=8
-        incremental_model='nontyped_trinary_unique_key_list'
-        update_sql_file='add_new_rows'
-
-        expected_fields = self.stub_expected_fields(
-            relation=seed, seed_rows=seed_rows
-        )
-        test_case_fields = ResultHolder(
-            *self.setup_test(seed, incremental_model, update_sql_file),
-            opt_model_count=None, relation=incremental_model
-        )
-
-        self.test_scenario_correctness(expected_fields, test_case_fields)
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint_bad_unique_key_list(self):
-        '''expect compilation error from unique key not being a column'''
-
-        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
-            incremental_model_name='not_found_unique_key_list'
-        )
-
-        self.assertEqual(status, RunStatus.Error)
-        self.assertTrue("thisisnotacolumn" in exc)
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster_bad_unique_key_list(self):
-        '''expect compilation error from unique key not being a column'''
-
-        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
-            incremental_model_name='not_found_unique_key_list'
-        )
-
-        self.assertEqual(status, RunStatus.Error)
-        self.assertTrue("thisisnotacolumn" in exc)
-        
diff --git a/tox.ini b/tox.ini
index a268ee5b..1e0e2b8b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -20,7 +20,7 @@ deps =
 
 [testenv:integration-spark-databricks-http]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/*'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
@@ -29,7 +29,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-cluster]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -39,7 +39,7 @@ deps =
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
@@ -50,7 +50,7 @@ deps =
 
 [testenv:integration-spark-thrift]
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
@@ -60,7 +60,7 @@ deps =
 
 [testenv:integration-spark-session]
 basepython = python3
-commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session tests/functional/adapter/test_basic.py'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
 passenv =
     DBT_*
     PYTEST_*

From bb5075ece999e678faa863bf2ce54f2986321b2f Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Fri, 8 Apr 2022 15:28:58 -0600
Subject: [PATCH 301/603] convert adapter test (#328)

---
 dbt/adapters/spark/impl.py             | 16 ++++++++++++++--
 tests/functional/adapter/test_basic.py |  4 ++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 12810a64..268417d0 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -231,8 +231,20 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
             # return relation's schema. if columns are empty from cache,
             # use get_columns_in_relation spark macro
             # which would execute 'describe extended tablename' query
-            rows: List[agate.Row] = super().get_columns_in_relation(relation)
-            columns = self.parse_describe_extended(relation, rows)
+            try:
+                rows: List[agate.Row] = super().get_columns_in_relation(relation)
+                columns = self.parse_describe_extended(relation, rows)
+            except dbt.exceptions.RuntimeException as e:
+                # spark would throw error when table doesn't exist, where other
+                # CDW would just return and empty list, normalizing the behavior here
+                errmsg = getattr(e, "msg", "")
+                if (
+                    f"Table or view not found: {relation}" in errmsg or
+                    "NoSuchTableException" in errmsg
+                ):
+                    pass
+                else:
+                    raise e
 
         # strip hudi metadata columns.
         columns = [x for x in columns
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index db18da6b..70f3267a 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -11,6 +11,7 @@
 from dbt.tests.adapter.basic.test_generic_tests import BaseGenericTests
 from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols
 from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp
+from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod
 
 
 @pytest.mark.skip_profile('spark_session')
@@ -77,3 +78,6 @@ def project_config_update(self):
                 "+file_format": "delta",
             }
         }
+
+class TestBaseAdapterMethod(BaseAdapterMethod):
+    pass
\ No newline at end of file

From fbf9abe397abd11baab6dfa7a903346774fec345 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Wed, 13 Apr 2022 09:50:15 -0600
Subject: [PATCH 302/603] catch table or view not exist error in spark 3.0+
 (#331)

---
 .bumpversion.cfg                  |  2 +-
 .github/workflows/main.yml        | 13 +++++++++++++
 CHANGELOG.md                      |  2 +-
 dbt/adapters/spark/__version__.py |  2 +-
 dbt/adapters/spark/impl.py        |  2 +-
 setup.py                          |  2 +-
 6 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 9a0c41a5..74428484 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.1.0b1
+current_version = 1.2.0a1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 60a0d6f6..fbdbbbaa 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -122,6 +122,9 @@ jobs:
 
     runs-on: ubuntu-latest
 
+    outputs:
+      is_alpha: ${{ steps.check-is-alpha.outputs.is_alpha }}
+
     steps:
       - name: Check out the repository
         uses: actions/checkout@v2
@@ -150,6 +153,14 @@ jobs:
       - name: Check wheel contents
         run: |
           check-wheel-contents dist/*.whl --ignore W007,W008
+          
+      - name: Check if this is an alpha version
+        id: check-is-alpha
+        run: |
+          export is_alpha=0
+          if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi
+          echo "::set-output name=is_alpha::$is_alpha"
+
       - uses: actions/upload-artifact@v2
         with:
           name: dist
@@ -158,6 +169,8 @@ jobs:
   test-build:
     name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }}
 
+    if: needs.build.outputs.is_alpha == 0
+
     needs: build
 
     runs-on: ${{ matrix.os }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb54c92f..f9a09494 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@
 - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
 - Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
 - Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324))
-
+- when a table or view doesn't exist, 'adapter.get_columns_in_relation' will return empty list instead of fail ([#328]https://github.com/dbt-labs/dbt-spark/pull/328)
 
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 56ec17a8..a6b97722 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.1.0b1"
+version = "1.2.0a1"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 268417d0..eb001fbc 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -239,7 +239,7 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
                 # CDW would just return and empty list, normalizing the behavior here
                 errmsg = getattr(e, "msg", "")
                 if (
-                    f"Table or view not found: {relation}" in errmsg or
+                    "Table or view not found" in errmsg or
                     "NoSuchTableException" in errmsg
                 ):
                     pass
diff --git a/setup.py b/setup.py
index 2cd44491..12ecbacd 100644
--- a/setup.py
+++ b/setup.py
@@ -52,7 +52,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.1.0b1"
+package_version = "1.2.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From f03d217ebb004bb889eff80d61da14bc72983370 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Wed, 4 May 2022 15:31:21 -0500
Subject: [PATCH 303/603] rename dev_requirements.txt -> dev-requirements.txt
 to match dbt-core (#344)

* rename dev_requirements.txt -> dev-requirements.txt to match dbt-core

* add changelog entry
---
 .github/workflows/release.yml                | 34 ++++++++++----------
 .github/workflows/version-bump.yml           | 20 ++++++------
 CHANGELOG.md                                 |  1 +
 dev_requirements.txt => dev-requirements.txt |  0
 tox.ini                                      | 14 ++++----
 5 files changed, 35 insertions(+), 34 deletions(-)
 rename dev_requirements.txt => dev-requirements.txt (100%)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b25ea884..b40371e8 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -3,28 +3,28 @@ name: Build and Release
 
 on:
   workflow_dispatch:
-  
+
 # Release version number that must be updated for each release
 env:
   version_number: '0.20.0rc2'
 
-jobs:          
+jobs:
   Test:
     runs-on: ubuntu-latest
     steps:
       - name: Setup Python
         uses: actions/setup-python@v2.2.2
-        with: 
+        with:
           python-version: '3.8'
-          
+
       - uses: actions/checkout@v2
 
-      - name: Test release        
+      - name: Test release
         run: |
           python3 -m venv env
           source env/bin/activate
           sudo apt-get install libsasl2-dev
-          pip install -r dev_requirements.txt
+          pip install -r dev-requirements.txt
           pip install twine wheel setuptools
           python setup.py sdist bdist_wheel
           pip install dist/dbt-spark-*.tar.gz
@@ -38,9 +38,9 @@ jobs:
     steps:
       - name: Setup Python
         uses: actions/setup-python@v2.2.2
-        with: 
+        with:
           python-version: '3.8'
-          
+
       - uses: actions/checkout@v2
 
       - name: Bumping version
@@ -48,7 +48,7 @@ jobs:
           python3 -m venv env
           source env/bin/activate
           sudo apt-get install libsasl2-dev
-          pip install -r dev_requirements.txt
+          pip install -r dev-requirements.txt
           bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}}
           bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty
           git status
@@ -60,7 +60,7 @@ jobs:
           author_email: 'leah.antkiewicz@dbtlabs.com'
           message: 'Bumping version to ${{env.version_number}}'
           tag: v${{env.version_number}}
-          
+
       # Need to set an output variable because env variables can't be taken as input
       # This is needed for the next step with releasing to GitHub
       - name: Find release type
@@ -69,7 +69,7 @@ jobs:
           IS_PRERELEASE: ${{ contains(env.version_number, 'rc') ||  contains(env.version_number, 'b') }}
         run: |
           echo ::set-output name=isPrerelease::$IS_PRERELEASE
-          
+
       - name: Create GitHub release
         uses: actions/create-release@v1
         env:
@@ -88,7 +88,7 @@ jobs:
             # or
             $ pip install "dbt-spark[PyHive]==${{env.version_number}}"
             ```
-          
+
   PypiRelease:
     name: Pypi release
     runs-on: ubuntu-latest
@@ -97,13 +97,13 @@ jobs:
     steps:
       - name: Setup Python
         uses: actions/setup-python@v2.2.2
-        with: 
+        with:
           python-version: '3.8'
-          
+
       - uses: actions/checkout@v2
         with:
           ref: v${{env.version_number}}
-      
+
       - name: Release to pypi
         env:
           TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
@@ -112,8 +112,8 @@ jobs:
           python3 -m venv env
           source env/bin/activate
           sudo apt-get install libsasl2-dev
-          pip install -r dev_requirements.txt
+          pip install -r dev-requirements.txt
           pip install twine wheel setuptools
           python setup.py sdist bdist_wheel
           twine upload --non-interactive dist/dbt_spark-${{env.version_number}}-py3-none-any.whl dist/dbt-spark-${{env.version_number}}.tar.gz
-          
+
diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml
index 7fb8bb6e..a8b3236c 100644
--- a/.github/workflows/version-bump.yml
+++ b/.github/workflows/version-bump.yml
@@ -1,16 +1,16 @@
 # **what?**
 # This workflow will take a version number and a dry run flag. With that
-# it will run versionbump to update the version number everywhere in the 
+# it will run versionbump to update the version number everywhere in the
 # code base and then generate an update Docker requirements file. If this
 # is a dry run, a draft PR will open with the changes. If this isn't a dry
 # run, the changes will be committed to the branch this is run on.
 
 # **why?**
-# This is to aid in releasing dbt and making sure we have updated 
+# This is to aid in releasing dbt and making sure we have updated
 # the versions and Docker requirements in all places.
 
 # **when?**
-# This is triggered either manually OR 
+# This is triggered either manually OR
 # from the repository_dispatch event "version-bump" which is sent from
 # the dbt-release repo Action
 
@@ -25,11 +25,11 @@ on:
       is_dry_run:
        description: 'Creates a draft PR to allow testing instead of committing to a branch'
        required: true
-       default: 'true'  
+       default: 'true'
   repository_dispatch:
     types: [version-bump]
 
-jobs:          
+jobs:
   bump:
     runs-on: ubuntu-latest
     steps:
@@ -58,19 +58,19 @@ jobs:
           sudo apt-get install libsasl2-dev
           python3 -m venv env
           source env/bin/activate
-          pip install --upgrade pip     
-          
+          pip install --upgrade pip
+
       - name: Create PR branch
         if: ${{ steps.variables.outputs.IS_DRY_RUN  == 'true' }}
         run: |
           git checkout -b bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
           git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
           git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-   
+
       - name: Bumping version
         run: |
           source env/bin/activate
-          pip install -r dev_requirements.txt 
+          pip install -r dev-requirements.txt
           env/bin/bumpversion --allow-dirty --new-version ${{steps.variables.outputs.VERSION_NUMBER}} major
           git status
 
@@ -100,4 +100,4 @@ jobs:
           draft: true
           base: ${{github.ref}}
           title: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-          branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'    
+          branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9a09494..5ad68a5c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
+- rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
 
 ### Under the hood
 - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
diff --git a/dev_requirements.txt b/dev-requirements.txt
similarity index 100%
rename from dev_requirements.txt
rename to dev-requirements.txt
diff --git a/tox.ini b/tox.ini
index 1e0e2b8b..59b931dc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -8,7 +8,7 @@ basepython = python3.8
 commands = /bin/bash -c '$(which flake8) --max-line-length 99 --select=E,W,F --ignore=W504 dbt/'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
-     -r{toxinidir}/dev_requirements.txt
+     -r{toxinidir}/dev-requirements.txt
 
 [testenv:unit]
 basepython = python3.8
@@ -16,7 +16,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+    -r{toxinidir}/dev-requirements.txt
 
 [testenv:integration-spark-databricks-http]
 basepython = python3.8
@@ -24,7 +24,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_clus
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+    -r{toxinidir}/dev-requirements.txt
     -e.
 
 [testenv:integration-spark-databricks-odbc-cluster]
@@ -34,7 +34,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
     -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+    -r{toxinidir}/dev-requirements.txt
     -e.
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
@@ -44,7 +44,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpo
 passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
 deps =
     -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+    -r{toxinidir}/dev-requirements.txt
     -e.
 
 
@@ -55,7 +55,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posarg
 passenv = DBT_* PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+    -r{toxinidir}/dev-requirements.txt
     -e.
 
 [testenv:integration-spark-session]
@@ -67,5 +67,5 @@ passenv =
     PIP_CACHE_DIR
 deps =
     -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+    -r{toxinidir}/dev-requirements.txt
     -e.[session]

From 5ff1c42560fbf4265d9cc36eb18f18fff0f91877 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Fri, 13 May 2022 09:06:43 -0700
Subject: [PATCH 304/603] Add commit hook tooling. (#356)

* Add commit hook tooling. (#346)
* Make pre commit code changes (#345)
* Refactor exception function using accurate type specs.
* Upgrade the mypy version in order to use TypeAlias-es.
* Upgrade the mypy version in the worfklow. Fix bug introduced by using str's instead of Optional[str]'s
* Address code review comments: Remove integration test command since there are multiple ways we can run tests in spark.
* Add changelog entry
* Altering names of dev_requirements references.
---
 .bumpversion.cfg                              |   5 +-
 .flake8                                       |  12 +
 .github/ISSUE_TEMPLATE/dependabot.yml         |   2 +-
 .github/ISSUE_TEMPLATE/release.md             |   2 +-
 .github/pull_request_template.md              |   2 +-
 .github/workflows/jira-creation.yml           |   2 +-
 .github/workflows/jira-label.yml              |   3 +-
 .github/workflows/jira-transition.yml         |   2 +-
 .github/workflows/main.yml                    |  26 +-
 .github/workflows/release.yml                 |   1 -
 .github/workflows/stale.yml                   |   2 -
 .gitignore                                    |  53 +++-
 .pre-commit-config.yaml                       |  66 +++++
 CHANGELOG.md                                  |   1 +
 MANIFEST.in                                   |   2 +-
 Makefile                                      |  56 ++++
 dbt/adapters/spark/__init__.py                |   5 +-
 dbt/adapters/spark/column.py                  |  25 +-
 dbt/adapters/spark/connections.py             | 210 +++++++--------
 dbt/adapters/spark/impl.py                    | 240 ++++++++----------
 dbt/adapters/spark/relation.py                |  10 +-
 dbt/adapters/spark/session.py                 |  22 +-
 dbt/include/spark/__init__.py                 |   1 +
 dbt/include/spark/macros/adapters.sql         |  14 +-
 .../incremental/incremental.sql               |  10 +-
 .../incremental/strategies.sql                |  10 +-
 .../materializations/incremental/validate.sql |   4 +-
 .../macros/materializations/snapshot.sql      |   2 +-
 .../spark/macros/materializations/table.sql   |   2 +-
 dev-requirements.txt                          |  20 +-
 scripts/build-dist.sh                         |   2 +-
 setup.py                                      |  72 +++---
 tox.ini                                       |   8 -
 33 files changed, 489 insertions(+), 405 deletions(-)
 create mode 100644 .flake8
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 Makefile

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 74428484..4de02c34 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -4,7 +4,7 @@ parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
 	((?Pa|b|rc)(?P\d+))?
-serialize = 
+serialize =
 	{major}.{minor}.{patch}{prerelease}{num}
 	{major}.{minor}.{patch}
 commit = False
@@ -13,7 +13,7 @@ tag = False
 [bumpversion:part:prerelease]
 first_value = a
 optional_value = final
-values = 
+values =
 	a
 	b
 	rc
@@ -25,4 +25,3 @@ first_value = 1
 [bumpversion:file:setup.py]
 
 [bumpversion:file:dbt/adapters/spark/__version__.py]
-
diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..f39d154c
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,12 @@
+[flake8]
+select =
+    E
+    W
+    F
+ignore =
+    W503 # makes Flake8 work like black
+    W504
+    E203 # makes Flake8 work like black
+    E741
+    E501
+exclude = test
diff --git a/.github/ISSUE_TEMPLATE/dependabot.yml b/.github/ISSUE_TEMPLATE/dependabot.yml
index 8a8c85b9..2a6f3449 100644
--- a/.github/ISSUE_TEMPLATE/dependabot.yml
+++ b/.github/ISSUE_TEMPLATE/dependabot.yml
@@ -5,4 +5,4 @@ updates:
     directory: "/"
     schedule:
       interval: "daily"
-    rebase-strategy: "disabled"
\ No newline at end of file
+    rebase-strategy: "disabled"
diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md
index ac28792a..a69349f5 100644
--- a/.github/ISSUE_TEMPLATE/release.md
+++ b/.github/ISSUE_TEMPLATE/release.md
@@ -7,4 +7,4 @@ assignees: ''
 
 ---
 
-### TBD
\ No newline at end of file
+### TBD
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 60e12779..5928b1cb 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -18,4 +18,4 @@ resolves #
 - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
 - [ ] I have run this code in development and it appears to resolve the stated issue
 - [ ] This PR includes tests, or tests are not required/relevant for this PR
-- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section.
\ No newline at end of file
+- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section.
diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml
index c84e106a..b4016bef 100644
--- a/.github/workflows/jira-creation.yml
+++ b/.github/workflows/jira-creation.yml
@@ -13,7 +13,7 @@ name: Jira Issue Creation
 on:
   issues:
     types: [opened, labeled]
-    
+
 permissions:
   issues: write
 
diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml
index fd533a17..3da2e3a3 100644
--- a/.github/workflows/jira-label.yml
+++ b/.github/workflows/jira-label.yml
@@ -13,7 +13,7 @@ name: Jira Label Mirroring
 on:
   issues:
     types: [labeled, unlabeled]
-    
+
 permissions:
   issues: read
 
@@ -24,4 +24,3 @@ jobs:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
       JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-    
diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml
index 71273c7a..ed9f9cd4 100644
--- a/.github/workflows/jira-transition.yml
+++ b/.github/workflows/jira-transition.yml
@@ -21,4 +21,4 @@ jobs:
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
\ No newline at end of file
+      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index fbdbbbaa..56685bfc 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -37,19 +37,10 @@ defaults:
 
 jobs:
   code-quality:
-    name: ${{ matrix.toxenv }}
+    name: code-quality
 
     runs-on: ubuntu-latest
 
-    strategy:
-      fail-fast: false
-      matrix:
-        toxenv: [flake8]
-
-    env:
-      TOXENV: ${{ matrix.toxenv }}
-      PYTEST_ADDOPTS: "-v --color=yes"
-
     steps:
       - name: Check out the repository
         uses: actions/checkout@v2
@@ -58,18 +49,19 @@ jobs:
 
       - name: Set up Python
         uses: actions/setup-python@v2
-        with: 
+        with:
           python-version: '3.8'
 
       - name: Install python dependencies
         run: |
           sudo apt-get install libsasl2-dev
           pip install --user --upgrade pip
-          pip install tox
-          pip --version
-          tox --version
-      - name: Run tox
-        run: tox
+          pip install -r dev-requirements.txt
+          pre-commit --version
+          mypy --version
+          dbt --version
+      - name: pre-commit hooks
+        run: pre-commit run --all-files --show-diff-on-failure
 
   unit:
     name: unit test / python ${{ matrix.python-version }}
@@ -153,7 +145,7 @@ jobs:
       - name: Check wheel contents
         run: |
           check-wheel-contents dist/*.whl --ignore W007,W008
-          
+
       - name: Check if this is an alpha version
         id: check-is-alpha
         run: |
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b40371e8..554e13a8 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -116,4 +116,3 @@ jobs:
           pip install twine wheel setuptools
           python setup.py sdist bdist_wheel
           twine upload --non-interactive dist/dbt_spark-${{env.version_number}}-py3-none-any.whl dist/dbt-spark-${{env.version_number}}.tar.gz
-
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 2848ce8f..a56455d5 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -13,5 +13,3 @@ jobs:
           stale-pr-message: "This PR has been marked as Stale because it has been open for 180 days with no activity. If you would like the PR to remain open, please remove the stale label or comment on the PR, or it will be closed in 7 days."
           # mark issues/PRs stale when they haven't seen activity in 180 days
           days-before-stale: 180
-          # ignore checking issues with the following labels
-          exempt-issue-labels: "epic, discussion"
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index cc586f5f..189589cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,18 +1,47 @@
-.hive-metastore/
-.spark-warehouse/
-*.egg-info
-env/
-*.pyc
+# Byte-compiled / optimized / DLL files
 __pycache__
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+build/
+env*/
+dbt_env/
+dist/
+*.egg-info
+logs/
+
+
+# Unit test
 .tox/
 .env
+test.env
+
+
+# Django stuff
+*.log
+
+# Mypy
+*.pytest_cache/
+
+# Vim
+*.sw*
+
+# Pyenv
+.python-version
+
+# pycharm
 .idea/
-build/
-dist/
-dbt-integration-tests
-test/integration/.user.yml
+
+# MacOS
 .DS_Store
-test.env
+
+# vscode
 .vscode
-*.log
-logs/
\ No newline at end of file
+
+# other
+.hive-metastore/
+.spark-warehouse/
+dbt-integration-tests
+test/integration/.user.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..e70156dc
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,66 @@
+# For more on configuring pre-commit hooks (see https://pre-commit.com/)
+
+# TODO: remove global exclusion of tests when testing overhaul is complete
+exclude: '^tests/.*'
+
+# Force all unspecified python hooks to run python 3.8
+default_language_version:
+  python: python3.8
+
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v3.2.0
+  hooks:
+  - id: check-yaml
+    args: [--unsafe]
+  - id: check-json
+  - id: end-of-file-fixer
+  - id: trailing-whitespace
+  - id: check-case-conflict
+- repo: https://github.com/psf/black
+  rev: 21.12b0
+  hooks:
+  - id: black
+    additional_dependencies: ['click==8.0.4']
+    args:
+    - "--line-length=99"
+    - "--target-version=py38"
+  - id: black
+    alias: black-check
+    stages: [manual]
+    additional_dependencies: ['click==8.0.4']
+    args:
+    - "--line-length=99"
+    - "--target-version=py38"
+    - "--check"
+    - "--diff"
+- repo: https://gitlab.com/pycqa/flake8
+  rev: 4.0.1
+  hooks:
+  - id: flake8
+  - id: flake8
+    alias: flake8-check
+    stages: [manual]
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.950
+  hooks:
+  - id: mypy
+    # N.B.: Mypy is... a bit fragile.
+    #
+    # By using `language: system` we run this hook in the local
+    # environment instead of a pre-commit isolated one.  This is needed
+    # to ensure mypy correctly parses the project.
+
+    # It may cause trouble in that it adds environmental variables out
+    # of our control to the mix.  Unfortunately, there's nothing we can
+    # do about per pre-commit's author.
+    # See https://github.com/pre-commit/pre-commit/issues/730 for details.
+    args: [--show-error-codes, --ignore-missing-imports]
+    files: ^dbt/adapters/.*
+    language: system
+  - id: mypy
+    alias: mypy-check
+    stages: [manual]
+    args: [--show-error-codes, --pretty, --ignore-missing-imports]
+    files: ^dbt/adapters
+    language: system
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5ad68a5c..77eb7258 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 - rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
 
 ### Under the hood
+- Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
 - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
 - Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
 - Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324))
diff --git a/MANIFEST.in b/MANIFEST.in
index 78412d5b..cfbc714e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1 @@
-recursive-include dbt/include *.sql *.yml *.md
\ No newline at end of file
+recursive-include dbt/include *.sql *.yml *.md
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..a520c425
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,56 @@
+.DEFAULT_GOAL:=help
+
+.PHONY: dev
+dev: ## Installs adapter in develop mode along with development depedencies
+	@\
+	pip install -r dev-requirements.txt && pre-commit install
+
+.PHONY: mypy
+mypy: ## Runs mypy against staged changes for static type checking.
+	@\
+	pre-commit run --hook-stage manual mypy-check | grep -v "INFO"
+
+.PHONY: flake8
+flake8: ## Runs flake8 against staged changes to enforce style guide.
+	@\
+	pre-commit run --hook-stage manual flake8-check | grep -v "INFO"
+
+.PHONY: black
+black: ## Runs black  against staged changes to enforce style guide.
+	@\
+	pre-commit run --hook-stage manual black-check -v | grep -v "INFO"
+
+.PHONY: lint
+lint: ## Runs flake8 and mypy code checks against staged changes.
+	@\
+	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
+	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
+
+.PHONY: linecheck
+linecheck: ## Checks for all Python lines 100 characters or more
+	@\
+	find dbt -type f -name "*.py" -exec grep -I -r -n '.\{100\}' {} \;
+
+.PHONY: unit
+unit: ## Runs unit tests with py38.
+	@\
+	tox -e py38
+
+.PHONY: test
+test: ## Runs unit tests with py38 and code checks against staged changes.
+	@\
+	tox -p -e py38; \
+	pre-commit run black-check --hook-stage manual | grep -v "INFO"; \
+	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
+	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
+
+.PHONY: clean
+	@echo "cleaning repo"
+	@git clean -f -X
+
+.PHONY: help
+help: ## Show this help message.
+	@echo 'usage: make [target]'
+	@echo
+	@echo 'targets:'
+	@grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index 469e202b..6ecc5ecc 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -8,6 +8,5 @@
 from dbt.include import spark
 
 Plugin = AdapterPlugin(
-    adapter=SparkAdapter,
-    credentials=SparkCredentials,
-    include_path=spark.PACKAGE_PATH)
+    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH
+)
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index fd377ad1..4df6b301 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -1,11 +1,11 @@
 from dataclasses import dataclass
-from typing import TypeVar, Optional, Dict, Any
+from typing import Any, Dict, Optional, TypeVar, Union
 
 from dbt.adapters.base.column import Column
 from dbt.dataclass_schema import dbtClassMixin
 from hologram import JsonDict
 
-Self = TypeVar('Self', bound='SparkColumn')
+Self = TypeVar("Self", bound="SparkColumn")
 
 
 @dataclass
@@ -31,7 +31,7 @@ def literal(self, value):
 
     @property
     def quoted(self) -> str:
-        return '`{}`'.format(self.column)
+        return "`{}`".format(self.column)
 
     @property
     def data_type(self) -> str:
@@ -42,26 +42,23 @@ def __repr__(self) -> str:
 
     @staticmethod
     def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]:
-        table_stats = {}
+        table_stats: Dict[str, Union[int, str, bool]] = {}
         if raw_stats:
             # format: 1109049927 bytes, 14093476 rows
             stats = {
-                stats.split(" ")[1]: int(stats.split(" ")[0])
-                for stats in raw_stats.split(', ')
+                stats.split(" ")[1]: int(stats.split(" ")[0]) for stats in raw_stats.split(", ")
             }
             for key, val in stats.items():
-                table_stats[f'stats:{key}:label'] = key
-                table_stats[f'stats:{key}:value'] = val
-                table_stats[f'stats:{key}:description'] = ''
-                table_stats[f'stats:{key}:include'] = True
+                table_stats[f"stats:{key}:label"] = key
+                table_stats[f"stats:{key}:value"] = val
+                table_stats[f"stats:{key}:description"] = ""
+                table_stats[f"stats:{key}:include"] = True
         return table_stats
 
-    def to_column_dict(
-            self, omit_none: bool = True, validate: bool = False
-    ) -> JsonDict:
+    def to_column_dict(self, omit_none: bool = True, validate: bool = False) -> JsonDict:
         original_dict = self.to_dict(omit_none=omit_none)
         # If there are stats, merge them into the root of the dict
-        original_stats = original_dict.pop('table_stats', None)
+        original_stats = original_dict.pop("table_stats", None)
         if original_stats:
             original_dict.update(original_stats)
         return original_dict
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 11163ccf..59ceb9dd 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -26,6 +26,7 @@
 from hologram.helpers import StrEnum
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional
+
 try:
     from thrift.transport.TSSLSocket import TSSLSocket
     import thrift
@@ -33,11 +34,7 @@
     import sasl
     import thrift_sasl
 except ImportError:
-    TSSLSocket = None
-    thrift = None
-    ssl = None
-    sasl = None
-    thrift_sasl = None
+    pass  # done deliberately: setting modules to None explicitly violates MyPy contracts by degrading type semantics
 
 import base64
 import time
@@ -52,10 +49,10 @@ def _build_odbc_connnection_string(**kwargs) -> str:
 
 
 class SparkConnectionMethod(StrEnum):
-    THRIFT = 'thrift'
-    HTTP = 'http'
-    ODBC = 'odbc'
-    SESSION = 'session'
+    THRIFT = "thrift"
+    HTTP = "http"
+    ODBC = "odbc"
+    SESSION = "session"
 
 
 @dataclass
@@ -71,7 +68,7 @@ class SparkCredentials(Credentials):
     port: int = 443
     auth: Optional[str] = None
     kerberos_service_name: Optional[str] = None
-    organization: str = '0'
+    organization: str = "0"
     connect_retries: int = 0
     connect_timeout: int = 10
     use_ssl: bool = False
@@ -81,27 +78,24 @@ class SparkCredentials(Credentials):
     @classmethod
     def __pre_deserialize__(cls, data):
         data = super().__pre_deserialize__(data)
-        if 'database' not in data:
-            data['database'] = None
+        if "database" not in data:
+            data["database"] = None
         return data
 
     def __post_init__(self):
         # spark classifies database and schema as the same thing
-        if (
-            self.database is not None and
-            self.database != self.schema
-        ):
+        if self.database is not None and self.database != self.schema:
             raise dbt.exceptions.RuntimeException(
-                f'    schema: {self.schema} \n'
-                f'    database: {self.database} \n'
-                f'On Spark, database must be omitted or have the same value as'
-                f' schema.'
+                f"    schema: {self.schema} \n"
+                f"    database: {self.database} \n"
+                f"On Spark, database must be omitted or have the same value as"
+                f" schema."
             )
         self.database = None
 
         if self.method == SparkConnectionMethod.ODBC:
             try:
-                import pyodbc    # noqa: F401
+                import pyodbc  # noqa: F401
             except ImportError as e:
                 raise dbt.exceptions.RuntimeException(
                     f"{self.method} connection method requires "
@@ -111,22 +105,16 @@ def __post_init__(self):
                     f"ImportError({e.msg})"
                 ) from e
 
-        if (
-            self.method == SparkConnectionMethod.ODBC and
-            self.cluster and
-            self.endpoint
-        ):
+        if self.method == SparkConnectionMethod.ODBC and self.cluster and self.endpoint:
             raise dbt.exceptions.RuntimeException(
                 "`cluster` and `endpoint` cannot both be set when"
                 f" using {self.method} method to connect to Spark"
             )
 
         if (
-            self.method == SparkConnectionMethod.HTTP or
-            self.method == SparkConnectionMethod.THRIFT
-        ) and not (
-            ThriftState and THttpClient and hive
-        ):
+            self.method == SparkConnectionMethod.HTTP
+            or self.method == SparkConnectionMethod.THRIFT
+        ) and not (ThriftState and THttpClient and hive):
             raise dbt.exceptions.RuntimeException(
                 f"{self.method} connection method requires "
                 "additional dependencies. \n"
@@ -148,19 +136,19 @@ def __post_init__(self):
 
     @property
     def type(self):
-        return 'spark'
+        return "spark"
 
     @property
     def unique_field(self):
         return self.host
 
     def _connection_keys(self):
-        return ('host', 'port', 'cluster',
-                'endpoint', 'schema', 'organization')
+        return ("host", "port", "cluster", "endpoint", "schema", "organization")
 
 
 class PyhiveConnectionWrapper(object):
     """Wrap a Spark connection in a way that no-ops transactions"""
+
     # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html  # noqa
 
     def __init__(self, handle):
@@ -178,9 +166,7 @@ def cancel(self):
             try:
                 self._cursor.cancel()
             except EnvironmentError as exc:
-                logger.debug(
-                    "Exception while cancelling query: {}".format(exc)
-                )
+                logger.debug("Exception while cancelling query: {}".format(exc))
 
     def close(self):
         if self._cursor:
@@ -189,9 +175,7 @@ def close(self):
             try:
                 self._cursor.close()
             except EnvironmentError as exc:
-                logger.debug(
-                    "Exception while closing cursor: {}".format(exc)
-                )
+                logger.debug("Exception while closing cursor: {}".format(exc))
         self.handle.close()
 
     def rollback(self, *args, **kwargs):
@@ -247,23 +231,20 @@ def execute(self, sql, bindings=None):
             dbt.exceptions.raise_database_error(poll_state.errorMessage)
 
         elif state not in STATE_SUCCESS:
-            status_type = ThriftState._VALUES_TO_NAMES.get(
-                state,
-                'Unknown<{!r}>'.format(state))
+            status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state))
 
-            dbt.exceptions.raise_database_error(
-                "Query failed with status: {}".format(status_type))
+            dbt.exceptions.raise_database_error("Query failed with status: {}".format(status_type))
 
         logger.debug("Poll status: {}, query complete".format(state))
 
     @classmethod
     def _fix_binding(cls, value):
         """Convert complex datatypes to primitives that can be loaded by
-           the Spark driver"""
+        the Spark driver"""
         if isinstance(value, NUMBERS):
             return float(value)
         elif isinstance(value, datetime):
-            return value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
+            return value.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
         else:
             return value
 
@@ -273,7 +254,6 @@ def description(self):
 
 
 class PyodbcConnectionWrapper(PyhiveConnectionWrapper):
-
     def execute(self, sql, bindings=None):
         if sql.strip().endswith(";"):
             sql = sql.strip()[:-1]
@@ -282,19 +262,17 @@ def execute(self, sql, bindings=None):
             self._cursor.execute(sql)
         else:
             # pyodbc only supports `qmark` sql params!
-            query = sqlparams.SQLParams('format', 'qmark')
+            query = sqlparams.SQLParams("format", "qmark")
             sql, bindings = query.format(sql, bindings)
             self._cursor.execute(sql, *bindings)
 
 
 class SparkConnectionManager(SQLConnectionManager):
-    TYPE = 'spark'
+    TYPE = "spark"
 
     SPARK_CLUSTER_HTTP_PATH = "/sql/protocolv1/o/{organization}/{cluster}"
     SPARK_SQL_ENDPOINT_HTTP_PATH = "/sql/1.0/endpoints/{endpoint}"
-    SPARK_CONNECTION_URL = (
-        "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH
-    )
+    SPARK_CONNECTION_URL = "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH
 
     @contextmanager
     def exception_handler(self, sql):
@@ -308,7 +286,7 @@ def exception_handler(self, sql):
                 raise
 
             thrift_resp = exc.args[0]
-            if hasattr(thrift_resp, 'status'):
+            if hasattr(thrift_resp, "status"):
                 msg = thrift_resp.status.errorMessage
                 raise dbt.exceptions.RuntimeException(msg)
             else:
@@ -320,10 +298,8 @@ def cancel(self, connection):
     @classmethod
     def get_response(cls, cursor) -> AdapterResponse:
         # https://github.com/dbt-labs/dbt-spark/issues/142
-        message = 'OK'
-        return AdapterResponse(
-            _message=message
-        )
+        message = "OK"
+        return AdapterResponse(_message=message)
 
     # No transactions on Spark....
     def add_begin_query(self, *args, **kwargs):
@@ -346,12 +322,13 @@ def validate_creds(cls, creds, required):
             if not hasattr(creds, key):
                 raise dbt.exceptions.DbtProfileError(
                     "The config '{}' is required when using the {} method"
-                    " to connect to Spark".format(key, method))
+                    " to connect to Spark".format(key, method)
+                )
 
     @classmethod
     def open(cls, connection):
         if connection.state == ConnectionState.OPEN:
-            logger.debug('Connection is already open, skipping open.')
+            logger.debug("Connection is already open, skipping open.")
             return connection
 
         creds = connection.credentials
@@ -360,19 +337,18 @@ def open(cls, connection):
         for i in range(1 + creds.connect_retries):
             try:
                 if creds.method == SparkConnectionMethod.HTTP:
-                    cls.validate_creds(creds, ['token', 'host', 'port',
-                                               'cluster', 'organization'])
+                    cls.validate_creds(creds, ["token", "host", "port", "cluster", "organization"])
 
                     # Prepend https:// if it is missing
                     host = creds.host
-                    if not host.startswith('https://'):
-                        host = 'https://' + creds.host
+                    if not host.startswith("https://"):
+                        host = "https://" + creds.host
 
                     conn_url = cls.SPARK_CONNECTION_URL.format(
                         host=host,
                         port=creds.port,
                         organization=creds.organization,
-                        cluster=creds.cluster
+                        cluster=creds.cluster,
                     )
 
                     logger.debug("connection url: {}".format(conn_url))
@@ -381,15 +357,12 @@ def open(cls, connection):
 
                     raw_token = "token:{}".format(creds.token).encode()
                     token = base64.standard_b64encode(raw_token).decode()
-                    transport.setCustomHeaders({
-                        'Authorization': 'Basic {}'.format(token)
-                    })
+                    transport.setCustomHeaders({"Authorization": "Basic {}".format(token)})
 
                     conn = hive.connect(thrift_transport=transport)
                     handle = PyhiveConnectionWrapper(conn)
                 elif creds.method == SparkConnectionMethod.THRIFT:
-                    cls.validate_creds(creds,
-                                       ['host', 'port', 'user', 'schema'])
+                    cls.validate_creds(creds, ["host", "port", "user", "schema"])
 
                     if creds.use_ssl:
                         transport = build_ssl_transport(
@@ -397,26 +370,33 @@ def open(cls, connection):
                             port=creds.port,
                             username=creds.user,
                             auth=creds.auth,
-                            kerberos_service_name=creds.kerberos_service_name)
+                            kerberos_service_name=creds.kerberos_service_name,
+                        )
                         conn = hive.connect(thrift_transport=transport)
                     else:
-                        conn = hive.connect(host=creds.host,
-                                            port=creds.port,
-                                            username=creds.user,
-                                            auth=creds.auth,
-                                            kerberos_service_name=creds.kerberos_service_name)  # noqa
+                        conn = hive.connect(
+                            host=creds.host,
+                            port=creds.port,
+                            username=creds.user,
+                            auth=creds.auth,
+                            kerberos_service_name=creds.kerberos_service_name,
+                        )  # noqa
                     handle = PyhiveConnectionWrapper(conn)
                 elif creds.method == SparkConnectionMethod.ODBC:
                     if creds.cluster is not None:
-                        required_fields = ['driver', 'host', 'port', 'token',
-                                           'organization', 'cluster']
+                        required_fields = [
+                            "driver",
+                            "host",
+                            "port",
+                            "token",
+                            "organization",
+                            "cluster",
+                        ]
                         http_path = cls.SPARK_CLUSTER_HTTP_PATH.format(
-                            organization=creds.organization,
-                            cluster=creds.cluster
+                            organization=creds.organization, cluster=creds.cluster
                         )
                     elif creds.endpoint is not None:
-                        required_fields = ['driver', 'host', 'port', 'token',
-                                           'endpoint']
+                        required_fields = ["driver", "host", "port", "token", "endpoint"]
                         http_path = cls.SPARK_SQL_ENDPOINT_HTTP_PATH.format(
                             endpoint=creds.endpoint
                         )
@@ -429,13 +409,12 @@ def open(cls, connection):
                     cls.validate_creds(creds, required_fields)
 
                     dbt_spark_version = __version__.version
-                    user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
+                    user_agent_entry = (
+                        f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
+                    )
 
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
-                    ssp = {
-                        f"SSP_{k}": f"{{{v}}}"
-                        for k, v in creds.server_side_parameters.items()
-                    }
+                    ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()}
 
                     # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm
                     connection_str = _build_odbc_connnection_string(
@@ -461,6 +440,7 @@ def open(cls, connection):
                         Connection,
                         SessionConnectionWrapper,
                     )
+
                     handle = SessionConnectionWrapper(Connection())
                 else:
                     raise dbt.exceptions.DbtProfileError(
@@ -472,9 +452,9 @@ def open(cls, connection):
                 if isinstance(e, EOFError):
                     # The user almost certainly has invalid credentials.
                     # Perhaps a token expired, or something
-                    msg = 'Failed to connect'
+                    msg = "Failed to connect"
                     if creds.token is not None:
-                        msg += ', is your token valid?'
+                        msg += ", is your token valid?"
                     raise dbt.exceptions.FailedToConnectException(msg) from e
                 retryable_message = _is_retryable_error(e)
                 if retryable_message and creds.connect_retries > 0:
@@ -496,9 +476,7 @@ def open(cls, connection):
                     logger.warning(msg)
                     time.sleep(creds.connect_timeout)
                 else:
-                    raise dbt.exceptions.FailedToConnectException(
-                        'failed to connect'
-                    ) from e
+                    raise dbt.exceptions.FailedToConnectException("failed to connect") from e
         else:
             raise exc
 
@@ -507,56 +485,50 @@ def open(cls, connection):
         return connection
 
 
-def build_ssl_transport(host, port, username, auth,
-                        kerberos_service_name, password=None):
+def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None):
     transport = None
     if port is None:
         port = 10000
     if auth is None:
-        auth = 'NONE'
+        auth = "NONE"
     socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE)
-    if auth == 'NOSASL':
+    if auth == "NOSASL":
         # NOSASL corresponds to hive.server2.authentication=NOSASL
         # in hive-site.xml
         transport = thrift.transport.TTransport.TBufferedTransport(socket)
-    elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'):
+    elif auth in ("LDAP", "KERBEROS", "NONE", "CUSTOM"):
         # Defer import so package dependency is optional
-        if auth == 'KERBEROS':
+        if auth == "KERBEROS":
             # KERBEROS mode in hive.server2.authentication is GSSAPI
             # in sasl library
-            sasl_auth = 'GSSAPI'
+            sasl_auth = "GSSAPI"
         else:
-            sasl_auth = 'PLAIN'
+            sasl_auth = "PLAIN"
             if password is None:
                 # Password doesn't matter in NONE mode, just needs
                 # to be nonempty.
-                password = 'x'
+                password = "x"
 
         def sasl_factory():
             sasl_client = sasl.Client()
-            sasl_client.setAttr('host', host)
-            if sasl_auth == 'GSSAPI':
-                sasl_client.setAttr('service', kerberos_service_name)
-            elif sasl_auth == 'PLAIN':
-                sasl_client.setAttr('username', username)
-                sasl_client.setAttr('password', password)
+            sasl_client.setAttr("host", host)
+            if sasl_auth == "GSSAPI":
+                sasl_client.setAttr("service", kerberos_service_name)
+            elif sasl_auth == "PLAIN":
+                sasl_client.setAttr("username", username)
+                sasl_client.setAttr("password", password)
             else:
                 raise AssertionError
             sasl_client.init()
             return sasl_client
 
-        transport = thrift_sasl.TSaslClientTransport(sasl_factory,
-                                                     sasl_auth, socket)
+        transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket)
     return transport
 
 
-def _is_retryable_error(exc: Exception) -> Optional[str]:
-    message = getattr(exc, 'message', None)
-    if message is None:
-        return None
-    message = message.lower()
-    if 'pending' in message:
-        return exc.message
-    if 'temporarily_unavailable' in message:
-        return exc.message
-    return None
+def _is_retryable_error(exc: Exception) -> str:
+    message = str(exc).lower()
+    if "pending" in message or "temporarily_unavailable" in message:
+        return str(exc)
+    else:
+        return ""
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index eb001fbc..dd090a23 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,9 @@
 import re
 from concurrent.futures import Future
 from dataclasses import dataclass
-from typing import Optional, List, Dict, Any, Union, Iterable
+from typing import Any, Dict, Iterable, List, Optional, Union
+from typing_extensions import TypeAlias
+
 import agate
 from dbt.contracts.relation import RelationType
 
@@ -21,19 +23,19 @@
 
 logger = AdapterLogger("Spark")
 
-GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation'
-LIST_SCHEMAS_MACRO_NAME = 'list_schemas'
-LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching'
-DROP_RELATION_MACRO_NAME = 'drop_relation'
-FETCH_TBL_PROPERTIES_MACRO_NAME = 'fetch_tbl_properties'
+GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation"
+LIST_SCHEMAS_MACRO_NAME = "list_schemas"
+LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
+DROP_RELATION_MACRO_NAME = "drop_relation"
+FETCH_TBL_PROPERTIES_MACRO_NAME = "fetch_tbl_properties"
 
-KEY_TABLE_OWNER = 'Owner'
-KEY_TABLE_STATISTICS = 'Statistics'
+KEY_TABLE_OWNER = "Owner"
+KEY_TABLE_STATISTICS = "Statistics"
 
 
 @dataclass
 class SparkConfig(AdapterConfig):
-    file_format: str = 'parquet'
+    file_format: str = "parquet"
     location_root: Optional[str] = None
     partition_by: Optional[Union[List[str], str]] = None
     clustered_by: Optional[Union[List[str], str]] = None
@@ -44,48 +46,44 @@ class SparkConfig(AdapterConfig):
 
 class SparkAdapter(SQLAdapter):
     COLUMN_NAMES = (
-        'table_database',
-        'table_schema',
-        'table_name',
-        'table_type',
-        'table_comment',
-        'table_owner',
-        'column_name',
-        'column_index',
-        'column_type',
-        'column_comment',
-
-        'stats:bytes:label',
-        'stats:bytes:value',
-        'stats:bytes:description',
-        'stats:bytes:include',
-
-        'stats:rows:label',
-        'stats:rows:value',
-        'stats:rows:description',
-        'stats:rows:include',
+        "table_database",
+        "table_schema",
+        "table_name",
+        "table_type",
+        "table_comment",
+        "table_owner",
+        "column_name",
+        "column_index",
+        "column_type",
+        "column_comment",
+        "stats:bytes:label",
+        "stats:bytes:value",
+        "stats:bytes:description",
+        "stats:bytes:include",
+        "stats:rows:label",
+        "stats:rows:value",
+        "stats:rows:description",
+        "stats:rows:include",
     )
-    INFORMATION_COLUMNS_REGEX = re.compile(
-        r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE)
+    INFORMATION_COLUMNS_REGEX = re.compile(r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE)
     INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE)
-    INFORMATION_STATISTICS_REGEX = re.compile(
-        r"^Statistics: (.*)$", re.MULTILINE)
+    INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE)
     HUDI_METADATA_COLUMNS = [
-        '_hoodie_commit_time',
-        '_hoodie_commit_seqno',
-        '_hoodie_record_key',
-        '_hoodie_partition_path',
-        '_hoodie_file_name'
+        "_hoodie_commit_time",
+        "_hoodie_commit_seqno",
+        "_hoodie_record_key",
+        "_hoodie_partition_path",
+        "_hoodie_file_name",
     ]
 
-    Relation = SparkRelation
-    Column = SparkColumn
-    ConnectionManager = SparkConnectionManager
-    AdapterSpecificConfigs = SparkConfig
+    Relation: TypeAlias = SparkRelation
+    Column: TypeAlias = SparkColumn
+    ConnectionManager: TypeAlias = SparkConnectionManager
+    AdapterSpecificConfigs: TypeAlias = SparkConfig
 
     @classmethod
     def date_function(cls) -> str:
-        return 'current_timestamp()'
+        return "current_timestamp()"
 
     @classmethod
     def convert_text_type(cls, agate_table, col_idx):
@@ -109,31 +107,28 @@ def convert_datetime_type(cls, agate_table, col_idx):
         return "timestamp"
 
     def quote(self, identifier):
-        return '`{}`'.format(identifier)
+        return "`{}`".format(identifier)
 
     def add_schema_to_cache(self, schema) -> str:
         """Cache a new schema in dbt. It will show up in `list relations`."""
         if schema is None:
             name = self.nice_connection_name()
             dbt.exceptions.raise_compiler_error(
-                'Attempted to cache a null schema for {}'.format(name)
+                "Attempted to cache a null schema for {}".format(name)
             )
         if dbt.flags.USE_CACHE:
             self.cache.add_schema(None, schema)
         # so jinja doesn't render things
-        return ''
+        return ""
 
     def list_relations_without_caching(
         self, schema_relation: SparkRelation
     ) -> List[SparkRelation]:
-        kwargs = {'schema_relation': schema_relation}
+        kwargs = {"schema_relation": schema_relation}
         try:
-            results = self.execute_macro(
-                LIST_RELATIONS_MACRO_NAME,
-                kwargs=kwargs
-            )
+            results = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs)
         except dbt.exceptions.RuntimeException as e:
-            errmsg = getattr(e, 'msg', '')
+            errmsg = getattr(e, "msg", "")
             if f"Database '{schema_relation}' not found" in errmsg:
                 return []
             else:
@@ -146,13 +141,12 @@ def list_relations_without_caching(
             if len(row) != 4:
                 raise dbt.exceptions.RuntimeException(
                     f'Invalid value from "show table extended ...", '
-                    f'got {len(row)} values, expected 4'
+                    f"got {len(row)} values, expected 4"
                 )
             _schema, name, _, information = row
-            rel_type = RelationType.View \
-                if 'Type: VIEW' in information else RelationType.Table
-            is_delta = 'Provider: delta' in information
-            is_hudi = 'Provider: hudi' in information
+            rel_type = RelationType.View if "Type: VIEW" in information else RelationType.Table
+            is_delta = "Provider: delta" in information
+            is_hudi = "Provider: hudi" in information
             relation = self.Relation.create(
                 schema=_schema,
                 identifier=name,
@@ -166,7 +160,7 @@ def list_relations_without_caching(
         return relations
 
     def get_relation(
-        self, database: str, schema: str, identifier: str
+        self, database: Optional[str], schema: str, identifier: str
     ) -> Optional[BaseRelation]:
         if not self.Relation.include_policy.database:
             database = None
@@ -174,9 +168,7 @@ def get_relation(
         return super().get_relation(database, schema, identifier)
 
     def parse_describe_extended(
-            self,
-            relation: Relation,
-            raw_rows: List[agate.Row]
+        self, relation: Relation, raw_rows: List[agate.Row]
     ) -> List[SparkColumn]:
         # Convert the Row to a dict
         dict_rows = [dict(zip(row._keys, row._values)) for row in raw_rows]
@@ -185,44 +177,45 @@ def parse_describe_extended(
         pos = self.find_table_information_separator(dict_rows)
 
         # Remove rows that start with a hash, they are comments
-        rows = [
-            row for row in raw_rows[0:pos]
-            if not row['col_name'].startswith('#')
-        ]
-        metadata = {
-            col['col_name']: col['data_type'] for col in raw_rows[pos + 1:]
-        }
+        rows = [row for row in raw_rows[0:pos] if not row["col_name"].startswith("#")]
+        metadata = {col["col_name"]: col["data_type"] for col in raw_rows[pos + 1 :]}
 
         raw_table_stats = metadata.get(KEY_TABLE_STATISTICS)
         table_stats = SparkColumn.convert_table_stats(raw_table_stats)
-        return [SparkColumn(
-            table_database=None,
-            table_schema=relation.schema,
-            table_name=relation.name,
-            table_type=relation.type,
-            table_owner=str(metadata.get(KEY_TABLE_OWNER)),
-            table_stats=table_stats,
-            column=column['col_name'],
-            column_index=idx,
-            dtype=column['data_type'],
-        ) for idx, column in enumerate(rows)]
+        return [
+            SparkColumn(
+                table_database=None,
+                table_schema=relation.schema,
+                table_name=relation.name,
+                table_type=relation.type,
+                table_owner=str(metadata.get(KEY_TABLE_OWNER)),
+                table_stats=table_stats,
+                column=column["col_name"],
+                column_index=idx,
+                dtype=column["data_type"],
+            )
+            for idx, column in enumerate(rows)
+        ]
 
     @staticmethod
     def find_table_information_separator(rows: List[dict]) -> int:
         pos = 0
         for row in rows:
-            if not row['col_name'] or row['col_name'].startswith('#'):
+            if not row["col_name"] or row["col_name"].startswith("#"):
                 break
             pos += 1
         return pos
 
     def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
-        cached_relations = self.cache.get_relations(
-            relation.database, relation.schema)
-        cached_relation = next((cached_relation
-                                for cached_relation in cached_relations
-                                if str(cached_relation) == str(relation)),
-                               None)
+        cached_relations = self.cache.get_relations(relation.database, relation.schema)
+        cached_relation = next(
+            (
+                cached_relation
+                for cached_relation in cached_relations
+                if str(cached_relation) == str(relation)
+            ),
+            None,
+        )
         columns = []
         if cached_relation and cached_relation.information:
             columns = self.parse_columns_from_information(cached_relation)
@@ -238,30 +231,21 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
                 # spark would throw error when table doesn't exist, where other
                 # CDW would just return and empty list, normalizing the behavior here
                 errmsg = getattr(e, "msg", "")
-                if (
-                    "Table or view not found" in errmsg or
-                    "NoSuchTableException" in errmsg
-                ):
+                if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg:
                     pass
                 else:
                     raise e
 
         # strip hudi metadata columns.
-        columns = [x for x in columns
-                   if x.name not in self.HUDI_METADATA_COLUMNS]
+        columns = [x for x in columns if x.name not in self.HUDI_METADATA_COLUMNS]
         return columns
 
-    def parse_columns_from_information(
-            self, relation: SparkRelation
-    ) -> List[SparkColumn]:
-        owner_match = re.findall(
-            self.INFORMATION_OWNER_REGEX, relation.information)
+    def parse_columns_from_information(self, relation: SparkRelation) -> List[SparkColumn]:
+        owner_match = re.findall(self.INFORMATION_OWNER_REGEX, relation.information)
         owner = owner_match[0] if owner_match else None
-        matches = re.finditer(
-            self.INFORMATION_COLUMNS_REGEX, relation.information)
+        matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, relation.information)
         columns = []
-        stats_match = re.findall(
-            self.INFORMATION_STATISTICS_REGEX, relation.information)
+        stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, relation.information)
         raw_table_stats = stats_match[0] if stats_match else None
         table_stats = SparkColumn.convert_table_stats(raw_table_stats)
         for match_num, match in enumerate(matches):
@@ -275,28 +259,25 @@ def parse_columns_from_information(
                 table_owner=owner,
                 column=column_name,
                 dtype=column_type,
-                table_stats=table_stats
+                table_stats=table_stats,
             )
             columns.append(column)
         return columns
 
-    def _get_columns_for_catalog(
-        self, relation: SparkRelation
-    ) -> Iterable[Dict[str, Any]]:
+    def _get_columns_for_catalog(self, relation: SparkRelation) -> Iterable[Dict[str, Any]]:
         columns = self.parse_columns_from_information(relation)
 
         for column in columns:
             # convert SparkColumns into catalog dicts
             as_dict = column.to_column_dict()
-            as_dict['column_name'] = as_dict.pop('column', None)
-            as_dict['column_type'] = as_dict.pop('dtype')
-            as_dict['table_database'] = None
+            as_dict["column_name"] = as_dict.pop("column", None)
+            as_dict["column_type"] = as_dict.pop("dtype")
+            as_dict["table_database"] = None
             yield as_dict
 
     def get_properties(self, relation: Relation) -> Dict[str, str]:
         properties = self.execute_macro(
-            FETCH_TBL_PROPERTIES_MACRO_NAME,
-            kwargs={'relation': relation}
+            FETCH_TBL_PROPERTIES_MACRO_NAME, kwargs={"relation": relation}
         )
         return dict(properties)
 
@@ -304,28 +285,30 @@ def get_catalog(self, manifest):
         schema_map = self._get_catalog_schemas(manifest)
         if len(schema_map) > 1:
             dbt.exceptions.raise_compiler_error(
-                f'Expected only one database in get_catalog, found '
-                f'{list(schema_map)}'
+                f"Expected only one database in get_catalog, found " f"{list(schema_map)}"
             )
 
         with executor(self.config) as tpe:
             futures: List[Future[agate.Table]] = []
             for info, schemas in schema_map.items():
                 for schema in schemas:
-                    futures.append(tpe.submit_connected(
-                        self, schema,
-                        self._get_one_catalog, info, [schema], manifest
-                    ))
+                    futures.append(
+                        tpe.submit_connected(
+                            self, schema, self._get_one_catalog, info, [schema], manifest
+                        )
+                    )
             catalogs, exceptions = catch_as_completed(futures)
         return catalogs, exceptions
 
     def _get_one_catalog(
-        self, information_schema, schemas, manifest,
+        self,
+        information_schema,
+        schemas,
+        manifest,
     ) -> agate.Table:
         if len(schemas) != 1:
             dbt.exceptions.raise_compiler_error(
-                f'Expected only one schema in spark _get_one_catalog, found '
-                f'{schemas}'
+                f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}"
             )
 
         database = information_schema.database
@@ -335,15 +318,10 @@ def _get_one_catalog(
         for relation in self.list_relations(database, schema):
             logger.debug("Getting table schema for relation {}", relation)
             columns.extend(self._get_columns_for_catalog(relation))
-        return agate.Table.from_object(
-            columns, column_types=DEFAULT_TYPE_TESTER
-        )
+        return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER)
 
     def check_schema_exists(self, database, schema):
-        results = self.execute_macro(
-            LIST_SCHEMAS_MACRO_NAME,
-            kwargs={'database': database}
-        )
+        results = self.execute_macro(LIST_SCHEMAS_MACRO_NAME, kwargs={"database": database})
 
         exists = True if schema in [row[0] for row in results] else False
         return exists
@@ -353,7 +331,7 @@ def get_rows_different_sql(
         relation_a: BaseRelation,
         relation_b: BaseRelation,
         column_names: Optional[List[str]] = None,
-        except_operator: str = 'EXCEPT',
+        except_operator: str = "EXCEPT",
     ) -> str:
         """Generate SQL for a query that returns a single row with a two
         columns: the number of rows that are different between the two
@@ -366,7 +344,7 @@ def get_rows_different_sql(
             names = sorted((self.quote(c.name) for c in columns))
         else:
             names = sorted((self.quote(n) for n in column_names))
-        columns_csv = ', '.join(names)
+        columns_csv = ", ".join(names)
 
         sql = COLUMNS_EQUAL_SQL.format(
             columns=columns_csv,
@@ -384,7 +362,7 @@ def run_sql_for_tests(self, sql, fetch, conn):
         try:
             cursor.execute(sql)
             if fetch == "one":
-                if hasattr(cursor, 'fetchone'):
+                if hasattr(cursor, "fetchone"):
                     return cursor.fetchone()
                 else:
                     # AttributeError: 'PyhiveConnectionWrapper' object has no attribute 'fetchone'
@@ -406,7 +384,7 @@ def run_sql_for_tests(self, sql, fetch, conn):
 # "trivial". Which is true, though it seems like an unreasonable cause for
 # failure! It also doesn't like the `from foo, bar` syntax as opposed to
 # `from foo cross join bar`.
-COLUMNS_EQUAL_SQL = '''
+COLUMNS_EQUAL_SQL = """
 with diff_count as (
     SELECT
         1 as id,
@@ -433,4 +411,4 @@ def run_sql_for_tests(self, sql, fetch, conn):
     diff_count.num_missing as num_mismatched
 from row_count_diff
 cross join diff_count
-'''.strip()
+""".strip()
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index 043cabfa..249caf0d 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -24,19 +24,19 @@ class SparkIncludePolicy(Policy):
 class SparkRelation(BaseRelation):
     quote_policy: SparkQuotePolicy = SparkQuotePolicy()
     include_policy: SparkIncludePolicy = SparkIncludePolicy()
-    quote_character: str = '`'
+    quote_character: str = "`"
     is_delta: Optional[bool] = None
     is_hudi: Optional[bool] = None
-    information: str = None
+    information: Optional[str] = None
 
     def __post_init__(self):
         if self.database != self.schema and self.database:
-            raise RuntimeException('Cannot set database in spark!')
+            raise RuntimeException("Cannot set database in spark!")
 
     def render(self):
         if self.include_policy.database and self.include_policy.schema:
             raise RuntimeException(
-                'Got a spark relation with schema and database set to '
-                'include, but only one can be set'
+                "Got a spark relation with schema and database set to "
+                "include, but only one can be set"
             )
         return super().render()
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index 6010df92..beb77d54 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -4,7 +4,7 @@
 
 import datetime as dt
 from types import TracebackType
-from typing import Any
+from typing import Any, List, Optional, Tuple
 
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
@@ -25,17 +25,17 @@ class Cursor:
     """
 
     def __init__(self) -> None:
-        self._df: DataFrame | None = None
-        self._rows: list[Row] | None = None
+        self._df: Optional[DataFrame] = None
+        self._rows: Optional[List[Row]] = None
 
     def __enter__(self) -> Cursor:
         return self
 
     def __exit__(
         self,
-        exc_type: type[BaseException] | None,
-        exc_val: Exception | None,
-        exc_tb: TracebackType | None,
+        exc_type: Optional[BaseException],
+        exc_val: Optional[Exception],
+        exc_tb: Optional[TracebackType],
     ) -> bool:
         self.close()
         return True
@@ -43,13 +43,13 @@ def __exit__(
     @property
     def description(
         self,
-    ) -> list[tuple[str, str, None, None, None, None, bool]]:
+    ) -> List[Tuple[str, str, None, None, None, None, bool]]:
         """
         Get the description.
 
         Returns
         -------
-        out : list[tuple[str, str, None, None, None, None, bool]]
+        out : List[Tuple[str, str, None, None, None, None, bool]]
             The description.
 
         Source
@@ -109,13 +109,13 @@ def execute(self, sql: str, *parameters: Any) -> None:
         spark_session = SparkSession.builder.enableHiveSupport().getOrCreate()
         self._df = spark_session.sql(sql)
 
-    def fetchall(self) -> list[Row] | None:
+    def fetchall(self) -> Optional[List[Row]]:
         """
         Fetch all data.
 
         Returns
         -------
-        out : list[Row] | None
+        out : Optional[List[Row]]
             The rows.
 
         Source
@@ -126,7 +126,7 @@ def fetchall(self) -> list[Row] | None:
             self._rows = self._df.collect()
         return self._rows
 
-    def fetchone(self) -> Row | None:
+    def fetchone(self) -> Optional[Row]:
         """
         Fetch the first output.
 
diff --git a/dbt/include/spark/__init__.py b/dbt/include/spark/__init__.py
index 564a3d1e..b177e5d4 100644
--- a/dbt/include/spark/__init__.py
+++ b/dbt/include/spark/__init__.py
@@ -1,2 +1,3 @@
 import os
+
 PACKAGE_PATH = os.path.dirname(__file__)
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index e96501c4..22381d9e 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -231,7 +231,7 @@
       {% set comment = column_dict[column_name]['description'] %}
       {% set escaped_comment = comment | replace('\'', '\\\'') %}
       {% set comment_query %}
-        alter table {{ relation }} change column 
+        alter table {{ relation }} change column
             {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }}
             comment '{{ escaped_comment }}';
       {% endset %}
@@ -260,25 +260,25 @@
 
 
 {% macro spark__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
-  
+
   {% if remove_columns %}
     {% set platform_name = 'Delta Lake' if relation.is_delta else 'Apache Spark' %}
     {{ exceptions.raise_compiler_error(platform_name + ' does not support dropping columns from tables') }}
   {% endif %}
-  
+
   {% if add_columns is none %}
     {% set add_columns = [] %}
   {% endif %}
-  
+
   {% set sql -%}
-     
+
      alter {{ relation.type }} {{ relation }}
-       
+
        {% if add_columns %} add columns {% endif %}
             {% for column in add_columns %}
                {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
             {% endfor %}
-  
+
   {%- endset -%}
 
   {% do run_query(sql) %}
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index d0b6e89b..8d8e69d9 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -1,17 +1,17 @@
 {% materialization incremental, adapter='spark' -%}
-  
+
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy', default='append') -%}
-  
+
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}
   {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%}
-  
+
   {%- set unique_key = config.get('unique_key', none) -%}
   {%- set partition_by = config.get('partition_by', none) -%}
 
   {%- set full_refresh_mode = (should_full_refresh()) -%}
-  
+
   {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %}
 
   {% set target_relation = this %}
@@ -42,7 +42,7 @@
   {%- endcall -%}
 
   {% do persist_docs(target_relation, model) %}
-  
+
   {{ run_hooks(post_hooks) }}
 
   {{ return({'relations': [target_relation]}) }}
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index 215b5f3f..28b8f200 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -1,5 +1,5 @@
 {% macro get_insert_overwrite_sql(source_relation, target_relation) %}
-    
+
     {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
     insert overwrite table {{ target_relation }}
@@ -41,20 +41,20 @@
   {% else %}
       {% do predicates.append('FALSE') %}
   {% endif %}
-  
+
   {{ sql_header if sql_header is not none }}
-  
+
   merge into {{ target }} as DBT_INTERNAL_DEST
       using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE
       on {{ predicates | join(' and ') }}
-      
+
       when matched then update set
         {% if update_columns -%}{%- for column_name in update_columns %}
             {{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }}
             {%- if not loop.last %}, {%- endif %}
         {%- endfor %}
         {%- else %} * {% endif %}
-    
+
       when not matched then insert *
 {% endmacro %}
 
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql
index 3e9de359..ffd56f10 100644
--- a/dbt/include/spark/macros/materializations/incremental/validate.sql
+++ b/dbt/include/spark/macros/materializations/incremental/validate.sql
@@ -28,13 +28,13 @@
     Invalid incremental strategy provided: {{ raw_strategy }}
     You can only choose this strategy when file_format is set to 'delta' or 'hudi'
   {%- endset %}
-  
+
   {% set invalid_insert_overwrite_delta_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
     You cannot use this strategy when file_format is set to 'delta'
     Use the 'append' or 'merge' strategy instead
   {%- endset %}
-  
+
   {% set invalid_insert_overwrite_endpoint_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
     You cannot use this strategy when connecting via endpoint
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index 82d186ce..9c891ef0 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -32,7 +32,7 @@
 
 {% macro spark_build_snapshot_staging_table(strategy, sql, target_relation) %}
     {% set tmp_identifier = target_relation.identifier ~ '__dbt_tmp' %}
-                                
+
     {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier,
                                                   schema=target_relation.schema,
                                                   database=none,
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 3ae2df97..2eeb806f 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -21,7 +21,7 @@
   {% call statement('main') -%}
     {{ create_table_as(False, target_relation, sql) }}
   {%- endcall %}
-  
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks) }}
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 0f84cbd5..b94cb8b6 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,18 +3,22 @@
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
+black==22.3.0
+bumpversion
+click~=8.0.4
+flake8
+flaky
 freezegun==0.3.9
-pytest>=6.0.2
+ipdb
 mock>=1.3.0
-flake8
+mypy==0.950
+pre-commit
+pytest-csv
+pytest-dotenv
+pytest-xdist
+pytest>=6.0.2
 pytz
-bumpversion
 tox>=3.2.0
-ipdb
-pytest-xdist
-pytest-dotenv
-pytest-csv
-flaky
 
 # Test requirements
 sasl>=0.2.1
diff --git a/scripts/build-dist.sh b/scripts/build-dist.sh
index 65e6dbc9..3c380839 100755
--- a/scripts/build-dist.sh
+++ b/scripts/build-dist.sh
@@ -1,4 +1,4 @@
-#!/bin/bash 
+#!/bin/bash
 
 set -eo pipefail
 
diff --git a/setup.py b/setup.py
index 12ecbacd..836aeed4 100644
--- a/setup.py
+++ b/setup.py
@@ -5,41 +5,39 @@
 
 # require python 3.7 or newer
 if sys.version_info < (3, 7):
-    print('Error: dbt does not support this version of Python.')
-    print('Please upgrade to Python 3.7 or higher.')
+    print("Error: dbt does not support this version of Python.")
+    print("Please upgrade to Python 3.7 or higher.")
     sys.exit(1)
 
 
 # require version of setuptools that supports find_namespace_packages
 from setuptools import setup
+
 try:
     from setuptools import find_namespace_packages
 except ImportError:
     # the user has a downlevel version of setuptools.
-    print('Error: dbt requires setuptools v40.1.0 or higher.')
-    print('Please upgrade setuptools with "pip install --upgrade setuptools" '
-          'and try again')
+    print("Error: dbt requires setuptools v40.1.0 or higher.")
+    print('Please upgrade setuptools with "pip install --upgrade setuptools" ' "and try again")
     sys.exit(1)
 
 
 # pull long description from README
 this_directory = os.path.abspath(os.path.dirname(__file__))
-with open(os.path.join(this_directory, 'README.md'), 'r', encoding='utf8') as f:
+with open(os.path.join(this_directory, "README.md"), "r", encoding="utf8") as f:
     long_description = f.read()
 
 
 # get this package's version from dbt/adapters//__version__.py
 def _get_plugin_version_dict():
-    _version_path = os.path.join(
-        this_directory, 'dbt', 'adapters', 'spark', '__version__.py'
-    )
-    _semver = r'''(?P\d+)\.(?P\d+)\.(?P\d+)'''
-    _pre = r'''((?Pa|b|rc)(?P
\d+))?'''
-    _version_pattern = fr'''version\s*=\s*["']{_semver}{_pre}["']'''
+    _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py")
+    _semver = r"""(?P\d+)\.(?P\d+)\.(?P\d+)"""
+    _pre = r"""((?Pa|b|rc)(?P
\d+))?"""
+    _version_pattern = fr"""version\s*=\s*["']{_semver}{_pre}["']"""
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:
-            raise ValueError(f'invalid version at {_version_path}')
+            raise ValueError(f"invalid version at {_version_path}")
         return match.groupdict()
 
 
@@ -47,7 +45,7 @@ def _get_plugin_version_dict():
 def _get_dbt_core_version():
     parts = _get_plugin_version_dict()
     minor = "{major}.{minor}.0".format(**parts)
-    pre = (parts["prekind"]+"1" if parts["prekind"] else "")
+    pre = parts["prekind"] + "1" if parts["prekind"] else ""
     return f"{minor}{pre}"
 
 
@@ -56,33 +54,28 @@ def _get_dbt_core_version():
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
-odbc_extras = ['pyodbc>=4.0.30']
+odbc_extras = ["pyodbc>=4.0.30"]
 pyhive_extras = [
-    'PyHive[hive]>=0.6.0,<0.7.0',
-    'thrift>=0.11.0,<0.16.0',
-]
-session_extras = [
-    "pyspark>=3.0.0,<4.0.0"
+    "PyHive[hive]>=0.6.0,<0.7.0",
+    "thrift>=0.11.0,<0.16.0",
 ]
+session_extras = ["pyspark>=3.0.0,<4.0.0"]
 all_extras = odbc_extras + pyhive_extras + session_extras
 
 setup(
     name=package_name,
     version=package_version,
-
     description=description,
     long_description=long_description,
-    long_description_content_type='text/markdown',
-
-    author='dbt Labs',
-    author_email='info@dbtlabs.com',
-    url='https://github.com/dbt-labs/dbt-spark',
-
-    packages=find_namespace_packages(include=['dbt', 'dbt.*']),
+    long_description_content_type="text/markdown",
+    author="dbt Labs",
+    author_email="info@dbtlabs.com",
+    url="https://github.com/dbt-labs/dbt-spark",
+    packages=find_namespace_packages(include=["dbt", "dbt.*"]),
     include_package_data=True,
     install_requires=[
-        'dbt-core~={}'.format(dbt_core_version),
-        'sqlparams>=3.0.0',
+        "dbt-core~={}".format(dbt_core_version),
+        "sqlparams>=3.0.0",
     ],
     extras_require={
         "ODBC": odbc_extras,
@@ -92,17 +85,14 @@ def _get_dbt_core_version():
     },
     zip_safe=False,
     classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        
-        'License :: OSI Approved :: Apache Software License',
-        
-        'Operating System :: Microsoft :: Windows',
-        'Operating System :: MacOS :: MacOS X',
-        'Operating System :: POSIX :: Linux',
-
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
+        "Development Status :: 5 - Production/Stable",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: Microsoft :: Windows",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: POSIX :: Linux",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
     ],
     python_requires=">=3.7",
 )
diff --git a/tox.ini b/tox.ini
index 59b931dc..a75e2a26 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,14 +2,6 @@
 skipsdist = True
 envlist = unit, flake8, integration-spark-thrift
 
-
-[testenv:flake8]
-basepython = python3.8
-commands = /bin/bash -c '$(which flake8) --max-line-length 99 --select=E,W,F --ignore=W504 dbt/'
-passenv = DBT_* PYTEST_ADDOPTS
-deps =
-     -r{toxinidir}/dev-requirements.txt
-
 [testenv:unit]
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'

From fad79e31d37b027d28ba1430e980e684bb7c2556 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Mon, 16 May 2022 14:37:02 +0200
Subject: [PATCH 305/603] More consistent results from get_columns_in_relation
 (#355)

* More consistent results from get_columns_in_relation

* Not dispatched, full name

* Add changelog entry
---
 CHANGELOG.md                          |  7 ++++++-
 dbt/adapters/spark/impl.py            |  6 ++++--
 dbt/include/spark/macros/adapters.sql | 10 +++++++---
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 77eb7258..961fe01a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,9 @@
-## dbt-spark 1.1.0 (TBD)
+## dbt-spark 1.2.0 (April 28, 2022)
+
+### Fixes
+- `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
+
+## dbt-spark 1.1.0 (April 28, 2022)
 
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index dd090a23..4f7b9d4c 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -23,7 +23,7 @@
 
 logger = AdapterLogger("Spark")
 
-GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation"
+GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "spark__get_columns_in_relation_raw"
 LIST_SCHEMAS_MACRO_NAME = "list_schemas"
 LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
 DROP_RELATION_MACRO_NAME = "drop_relation"
@@ -225,7 +225,9 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
             # use get_columns_in_relation spark macro
             # which would execute 'describe extended tablename' query
             try:
-                rows: List[agate.Row] = super().get_columns_in_relation(relation)
+                rows: List[agate.Row] = self.execute_macro(
+                    GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
+                )
                 columns = self.parse_describe_extended(relation, rows)
             except dbt.exceptions.RuntimeException as e:
                 # spark would throw error when table doesn't exist, where other
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 22381d9e..5322597f 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -168,11 +168,15 @@
   {%- endcall -%}
 {% endmacro %}
 
-{% macro spark__get_columns_in_relation(relation) -%}
-  {% call statement('get_columns_in_relation', fetch_result=True) %}
+{% macro spark__get_columns_in_relation_raw(relation) -%}
+  {% call statement('get_columns_in_relation_raw', fetch_result=True) %}
       describe extended {{ relation.include(schema=(schema is not none)) }}
   {% endcall %}
-  {% do return(load_result('get_columns_in_relation').table) %}
+  {% do return(load_result('get_columns_in_relation_raw').table) %}
+{% endmacro %}
+
+{% macro spark__get_columns_in_relation(relation) -%}
+  {{ return(adapter.get_columns_in_relation(relation)) }}
 {% endmacro %}
 
 {% macro spark__list_relations_without_caching(relation) %}

From 8744cf1faa0b57fe9e797a32a109ba4e7a056e76 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 17 May 2022 12:43:31 -0600
Subject: [PATCH 306/603] Fix misnomers within the comment (#352)

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 7ba95d47..0c624713 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,7 +8,7 @@ def pytest_addoption(parser):
     parser.addoption("--profile", action="store", default="apache_spark", type=str)
 
 
-# Using @pytest.mark.skip_adapter('apache_spark') uses the 'skip_by_adapter_type'
+# Using @pytest.mark.skip_profile('apache_spark') uses the 'skip_by_profile_type'
 # autouse fixture below
 def pytest_configure(config):
     config.addinivalue_line(

From 4c41d9e3686250827da958bc56623bb7995d7566 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN 
Date: Tue, 31 May 2022 11:53:26 -0700
Subject: [PATCH 307/603] Use dispatch pattern for get_columns_in_relation_raw
 macro. (#365)

---
 dbt/adapters/spark/impl.py            | 2 +-
 dbt/include/spark/macros/adapters.sql | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 4f7b9d4c..699eca9d 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -23,7 +23,7 @@
 
 logger = AdapterLogger("Spark")
 
-GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "spark__get_columns_in_relation_raw"
+GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw"
 LIST_SCHEMAS_MACRO_NAME = "list_schemas"
 LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
 DROP_RELATION_MACRO_NAME = "drop_relation"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 5322597f..abdeacb7 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -168,6 +168,10 @@
   {%- endcall -%}
 {% endmacro %}
 
+{% macro get_columns_in_relation_raw(relation) -%}
+  {{ return(adapter.dispatch('get_columns_in_relation_raw', 'dbt')(relation)) }}
+{%- endmacro -%}
+
 {% macro spark__get_columns_in_relation_raw(relation) -%}
   {% call statement('get_columns_in_relation_raw', fetch_result=True) %}
       describe extended {{ relation.include(schema=(schema is not none)) }}

From bc9fc0baaacce28706d0542391704d9ea971cee4 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Wed, 1 Jun 2022 11:30:39 -0500
Subject: [PATCH 308/603] First draft of adding contributing.md to each adapter
 repo (#276)

* First draft of adding contributing.md to each adapter repo

* updates after kyle review, and minor changes regarding review process and CI as spark still uses CircleCI and not GHA

* minor addition

* add test.env.example

* fix eof black errors

* added example for functional tests
---
 CONTRIBUTING.MD  | 101 +++++++++++++++++++++++++++++++++++++++++++++++
 test.env.example |  10 +++++
 2 files changed, 111 insertions(+)
 create mode 100644 CONTRIBUTING.MD
 create mode 100644 test.env.example

diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD
new file mode 100644
index 00000000..c0d9bb3d
--- /dev/null
+++ b/CONTRIBUTING.MD
@@ -0,0 +1,101 @@
+# Contributing to `dbt-spark`
+
+1. [About this document](#about-this-document)
+3. [Getting the code](#getting-the-code)
+5. [Running `dbt-spark` in development](#running-dbt-spark-in-development)
+6. [Testing](#testing)
+7. [Updating Docs](#updating-docs)
+7. [Submitting a Pull Request](#submitting-a-pull-request)
+
+## About this document
+This document is a guide intended for folks interested in contributing to `dbt-spark`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt-spark`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, Python modules, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
+
+For those wishing to contribute we highly suggest reading the dbt-core's [contribution guide](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md) if you haven't already. Almost all of the information there is applicable to contributing here, too!
+
+### Signing the CLA
+
+Please note that all contributors to `dbt-spark` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into an `dbt-spark` codebase. If you are unable to sign the CLA, then the `dbt-spark` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
+
+
+## Getting the code
+
+You will need `git` in order to download and modify the `dbt-spark` source code. You can find directions [here](https://github.com/git-guides/install-git) on how to install `git`.
+
+### External contributors
+
+If you are not a member of the `dbt-labs` GitHub organization, you can contribute to `dbt-spark` by forking the `dbt-spark` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
+
+1. fork the `dbt-spark` repository
+2. clone your fork locally
+3. check out a new branch for your proposed changes
+4. push changes to your fork
+5. open a pull request against `dbt-labs/dbt-spark` from your forked repository
+
+### dbt Labs contributors
+
+If you are a member of the `dbt Labs` GitHub organization, you will have push access to the `dbt-spark` repo. Rather than forking `dbt-spark` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
+
+
+## Running `dbt-spark` in development
+
+### Installation
+
+First make sure that you set up your `virtualenv` as described in [Setting up an environment](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md#setting-up-an-environment).  Ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt-spark` latest dependencies:
+
+```sh
+pip install -e . -r dev-requirements.txt
+```
+
+When `dbt-spark` is installed this way, any changes you make to the `dbt-spark` source code will be reflected immediately in your next `dbt-spark` run.
+
+To confirm you have correct version of `dbt-core` installed please run `dbt --version` and `which dbt`.
+
+
+## Testing
+
+### Initial Setup
+
+`dbt-spark` uses test credentials specified in a `test.env` file in the root of the repository. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing. To create your `test.env` file, copy the provided example file, then supply your relevant credentials.
+
+```
+cp test.env.example test.env
+$EDITOR test.env
+```
+
+### Test commands
+There are a few methods for running tests locally.
+
+#### `tox`
+`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.7, Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py37`. The configuration of these tests are located in `tox.ini`.
+
+#### `pytest`
+Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like:
+
+```sh
+# run specific spark integration tests
+python -m pytest -m profile_spark tests/integration/get_columns_in_relation
+# run specific functional tests
+python -m pytest --profile databricks_sql_endpoint tests/functional/adapter/test_basic.py
+# run all unit tests in a file
+python -m pytest tests/unit/test_adapter.py
+# run a specific unit test
+python -m pytest test/unit/test_adapter.py::TestSparkAdapter::test_profile_with_database
+```
+## Updating Docs
+
+Many changes will require and update to the `dbt-spark` docs here are some useful resources.
+
+- Docs are [here](https://docs.getdbt.com/).
+- The docs repo for making changes is located [here]( https://github.com/dbt-labs/docs.getdbt.com).
+- The changes made are likely to impact one or both of [Spark Profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile), or [Saprk Configs](https://docs.getdbt.com/reference/resource-configs/spark-configs).
+- We ask every community member who makes a user-facing change to open an issue or PR regarding doc changes.
+
+## Submitting a Pull Request
+
+dbt Labs provides a CI environment to test changes to the `dbt-spark` adapter, and periodic checks against the development version of `dbt-core` through Github Actions.
+
+A `dbt-spark` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
+
+Once all requests and answers have been answered the `dbt-spark` maintainer can trigger CI testing.
+
+Once all tests are passing and your PR has been approved, a `dbt-spark` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
diff --git a/test.env.example b/test.env.example
new file mode 100644
index 00000000..bf4cf2ee
--- /dev/null
+++ b/test.env.example
@@ -0,0 +1,10 @@
+# Cluster ID
+DBT_DATABRICKS_CLUSTER_NAME=
+# SQL Endpoint
+DBT_DATABRICKS_ENDPOINT=
+# Server Hostname value
+DBT_DATABRICKS_HOST_NAME=
+# personal token
+DBT_DATABRICKS_TOKEN=
+# file path to local ODBC driver
+ODBC_DRIVER=

From ca1b5b6df849068b0151dd8e7166623256b83c67 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 8 Jun 2022 09:33:36 +0200
Subject: [PATCH 309/603] Add invocation env to user agent string (#367)

* Add invocation env to user agent string

* Consistency + fixups

* Changelog entry

* Try diff pattern
---
 CHANGELOG.md                      | 3 +++
 dbt/adapters/spark/connections.py | 8 +++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 961fe01a..49e030ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,9 @@
 ### Fixes
 - `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
 
+### Under the hood
+- Add `DBT_INVOCATION_ENV` environment variable to ODBC user agent string ([#366](https://github.com/dbt-labs/dbt-spark/pull/366))
+
 ## dbt-spark 1.1.0 (April 28, 2022)
 
 ### Features
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 59ceb9dd..135463eb 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,3 +1,5 @@
+import os
+
 from contextlib import contextmanager
 
 import dbt.exceptions
@@ -7,6 +9,7 @@
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from dbt.adapters.spark import __version__
+from dbt.tracking import DBT_INVOCATION_ENV
 
 try:
     from TCLIService.ttypes import TOperationState as ThriftState
@@ -409,9 +412,8 @@ def open(cls, connection):
                     cls.validate_creds(creds, required_fields)
 
                     dbt_spark_version = __version__.version
-                    user_agent_entry = (
-                        f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
-                    )
+                    dbt_invocation_env = os.getenv(DBT_INVOCATION_ENV) or "manual"
+                    user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks, {dbt_invocation_env})"  # noqa
 
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
                     ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()}

From 9614bca5b471089692bd1df73760b23e83d537bb Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Fri, 17 Jun 2022 16:06:39 +0200
Subject: [PATCH 310/603] Initialize lift + shift for cross-db macros (#359)

* Initialize lift + shift, dateadd + datediff

* Fixups

* More fixups

* Next round of utilities

* Reorgnanize, skip, max for bool_or

* fail -> skip_profile

* Rm branch names
---
 dbt/include/spark/macros/utils/any_value.sql  |   5 +
 .../spark/macros/utils/assert_not_null.sql    |   9 ++
 dbt/include/spark/macros/utils/bool_or.sql    |  11 ++
 dbt/include/spark/macros/utils/concat.sql     |   3 +
 dbt/include/spark/macros/utils/dateadd.sql    |  62 +++++++++
 dbt/include/spark/macros/utils/datediff.sql   | 107 ++++++++++++++++
 dbt/include/spark/macros/utils/listagg.sql    |  17 +++
 dbt/include/spark/macros/utils/split_part.sql |  23 ++++
 tests/functional/adapter/test_basic.py        |   4 +-
 .../adapter/utils/fixture_listagg.py          |  61 +++++++++
 tests/functional/adapter/utils/test_utils.py  | 121 ++++++++++++++++++
 11 files changed, 422 insertions(+), 1 deletion(-)
 create mode 100644 dbt/include/spark/macros/utils/any_value.sql
 create mode 100644 dbt/include/spark/macros/utils/assert_not_null.sql
 create mode 100644 dbt/include/spark/macros/utils/bool_or.sql
 create mode 100644 dbt/include/spark/macros/utils/concat.sql
 create mode 100644 dbt/include/spark/macros/utils/dateadd.sql
 create mode 100644 dbt/include/spark/macros/utils/datediff.sql
 create mode 100644 dbt/include/spark/macros/utils/listagg.sql
 create mode 100644 dbt/include/spark/macros/utils/split_part.sql
 create mode 100644 tests/functional/adapter/utils/fixture_listagg.py
 create mode 100644 tests/functional/adapter/utils/test_utils.py

diff --git a/dbt/include/spark/macros/utils/any_value.sql b/dbt/include/spark/macros/utils/any_value.sql
new file mode 100644
index 00000000..eb0a019b
--- /dev/null
+++ b/dbt/include/spark/macros/utils/any_value.sql
@@ -0,0 +1,5 @@
+{% macro spark__any_value(expression) -%}
+    {#-- return any value (non-deterministic)  --#}
+    first({{ expression }})
+
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/assert_not_null.sql b/dbt/include/spark/macros/utils/assert_not_null.sql
new file mode 100644
index 00000000..e5454bce
--- /dev/null
+++ b/dbt/include/spark/macros/utils/assert_not_null.sql
@@ -0,0 +1,9 @@
+{% macro assert_not_null(function, arg) -%}
+  {{ return(adapter.dispatch('assert_not_null', 'dbt')(function, arg)) }}
+{%- endmacro %}
+
+{% macro spark__assert_not_null(function, arg) %}
+
+    coalesce({{function}}({{arg}}), nvl2({{function}}({{arg}}), assert_true({{function}}({{arg}}) is not null), null))
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/utils/bool_or.sql b/dbt/include/spark/macros/utils/bool_or.sql
new file mode 100644
index 00000000..60d705eb
--- /dev/null
+++ b/dbt/include/spark/macros/utils/bool_or.sql
@@ -0,0 +1,11 @@
+{#-- Spark v3 supports 'bool_or' and 'any', but Spark v2 needs to use 'max' for this
+  -- https://spark.apache.org/docs/latest/api/sql/index.html#any
+  -- https://spark.apache.org/docs/latest/api/sql/index.html#bool_or
+  -- https://spark.apache.org/docs/latest/api/sql/index.html#max
+#}
+
+{% macro spark__bool_or(expression) -%}
+
+    max({{ expression }})
+
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/concat.sql b/dbt/include/spark/macros/utils/concat.sql
new file mode 100644
index 00000000..30f1a420
--- /dev/null
+++ b/dbt/include/spark/macros/utils/concat.sql
@@ -0,0 +1,3 @@
+{% macro spark__concat(fields) -%}
+    concat({{ fields|join(', ') }})
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/dateadd.sql b/dbt/include/spark/macros/utils/dateadd.sql
new file mode 100644
index 00000000..e2a20d0f
--- /dev/null
+++ b/dbt/include/spark/macros/utils/dateadd.sql
@@ -0,0 +1,62 @@
+{% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %}
+
+    {%- set clock_component -%}
+        {# make sure the dates + timestamps are real, otherwise raise an error asap #}
+        to_unix_timestamp({{ assert_not_null('to_timestamp', from_date_or_timestamp) }})
+        - to_unix_timestamp({{ assert_not_null('date', from_date_or_timestamp) }})
+    {%- endset -%}
+
+    {%- if datepart in ['day', 'week'] -%}
+
+        {%- set multiplier = 7 if datepart == 'week' else 1 -%}
+
+        to_timestamp(
+            to_unix_timestamp(
+                date_add(
+                    {{ assert_not_null('date', from_date_or_timestamp) }},
+                    cast({{interval}} * {{multiplier}} as int)
+                )
+            ) + {{clock_component}}
+        )
+
+    {%- elif datepart in ['month', 'quarter', 'year'] -%}
+
+        {%- set multiplier -%}
+            {%- if datepart == 'month' -%} 1
+            {%- elif datepart == 'quarter' -%} 3
+            {%- elif datepart == 'year' -%} 12
+            {%- endif -%}
+        {%- endset -%}
+
+        to_timestamp(
+            to_unix_timestamp(
+                add_months(
+                    {{ assert_not_null('date', from_date_or_timestamp) }},
+                    cast({{interval}} * {{multiplier}} as int)
+                )
+            ) + {{clock_component}}
+        )
+
+    {%- elif datepart in ('hour', 'minute', 'second', 'millisecond', 'microsecond') -%}
+
+        {%- set multiplier -%}
+            {%- if datepart == 'hour' -%} 3600
+            {%- elif datepart == 'minute' -%} 60
+            {%- elif datepart == 'second' -%} 1
+            {%- elif datepart == 'millisecond' -%} (1/1000000)
+            {%- elif datepart == 'microsecond' -%} (1/1000000)
+            {%- endif -%}
+        {%- endset -%}
+
+        to_timestamp(
+            {{ assert_not_null('to_unix_timestamp', from_date_or_timestamp) }}
+            + cast({{interval}} * {{multiplier}} as int)
+        )
+
+    {%- else -%}
+
+        {{ exceptions.raise_compiler_error("macro dateadd not implemented for datepart ~ '" ~ datepart ~ "' ~ on Spark") }}
+
+    {%- endif -%}
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/utils/datediff.sql b/dbt/include/spark/macros/utils/datediff.sql
new file mode 100644
index 00000000..d0e684c4
--- /dev/null
+++ b/dbt/include/spark/macros/utils/datediff.sql
@@ -0,0 +1,107 @@
+{% macro spark__datediff(first_date, second_date, datepart) %}
+
+    {%- if datepart in ['day', 'week', 'month', 'quarter', 'year'] -%}
+
+        {# make sure the dates are real, otherwise raise an error asap #}
+        {% set first_date = assert_not_null('date', first_date) %}
+        {% set second_date = assert_not_null('date', second_date) %}
+
+    {%- endif -%}
+
+    {%- if datepart == 'day' -%}
+
+        datediff({{second_date}}, {{first_date}})
+
+    {%- elif datepart == 'week' -%}
+
+        case when {{first_date}} < {{second_date}}
+            then floor(datediff({{second_date}}, {{first_date}})/7)
+            else ceil(datediff({{second_date}}, {{first_date}})/7)
+            end
+
+        -- did we cross a week boundary (Sunday)?
+        + case
+            when {{first_date}} < {{second_date}} and dayofweek({{second_date}}) < dayofweek({{first_date}}) then 1
+            when {{first_date}} > {{second_date}} and dayofweek({{second_date}}) > dayofweek({{first_date}}) then -1
+            else 0 end
+
+    {%- elif datepart == 'month' -%}
+
+        case when {{first_date}} < {{second_date}}
+            then floor(months_between(date({{second_date}}), date({{first_date}})))
+            else ceil(months_between(date({{second_date}}), date({{first_date}})))
+            end
+
+        -- did we cross a month boundary?
+        + case
+            when {{first_date}} < {{second_date}} and dayofmonth({{second_date}}) < dayofmonth({{first_date}}) then 1
+            when {{first_date}} > {{second_date}} and dayofmonth({{second_date}}) > dayofmonth({{first_date}}) then -1
+            else 0 end
+
+    {%- elif datepart == 'quarter' -%}
+
+        case when {{first_date}} < {{second_date}}
+            then floor(months_between(date({{second_date}}), date({{first_date}}))/3)
+            else ceil(months_between(date({{second_date}}), date({{first_date}}))/3)
+            end
+
+        -- did we cross a quarter boundary?
+        + case
+            when {{first_date}} < {{second_date}} and (
+                (dayofyear({{second_date}}) - (quarter({{second_date}}) * 365/4))
+                < (dayofyear({{first_date}}) - (quarter({{first_date}}) * 365/4))
+            ) then 1
+            when {{first_date}} > {{second_date}} and (
+                (dayofyear({{second_date}}) - (quarter({{second_date}}) * 365/4))
+                > (dayofyear({{first_date}}) - (quarter({{first_date}}) * 365/4))
+            ) then -1
+            else 0 end
+
+    {%- elif datepart == 'year' -%}
+
+        year({{second_date}}) - year({{first_date}})
+
+    {%- elif datepart in ('hour', 'minute', 'second', 'millisecond', 'microsecond') -%}
+
+        {%- set divisor -%}
+            {%- if datepart == 'hour' -%} 3600
+            {%- elif datepart == 'minute' -%} 60
+            {%- elif datepart == 'second' -%} 1
+            {%- elif datepart == 'millisecond' -%} (1/1000)
+            {%- elif datepart == 'microsecond' -%} (1/1000000)
+            {%- endif -%}
+        {%- endset -%}
+
+        case when {{first_date}} < {{second_date}}
+            then ceil((
+                {# make sure the timestamps are real, otherwise raise an error asap #}
+                {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', second_date)) }}
+                - {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', first_date)) }}
+            ) / {{divisor}})
+            else floor((
+                {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', second_date)) }}
+                - {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', first_date)) }}
+            ) / {{divisor}})
+            end
+
+            {% if datepart == 'millisecond' %}
+                + cast(date_format({{second_date}}, 'SSS') as int)
+                - cast(date_format({{first_date}}, 'SSS') as int)
+            {% endif %}
+
+            {% if datepart == 'microsecond' %}
+                {% set capture_str = '[0-9]{4}-[0-9]{2}-[0-9]{2}.[0-9]{2}:[0-9]{2}:[0-9]{2}.([0-9]{6})' %}
+                -- Spark doesn't really support microseconds, so this is a massive hack!
+                -- It will only work if the timestamp-string is of the format
+                -- 'yyyy-MM-dd-HH mm.ss.SSSSSS'
+                + cast(regexp_extract({{second_date}}, '{{capture_str}}', 1) as int)
+                - cast(regexp_extract({{first_date}}, '{{capture_str}}', 1) as int)
+            {% endif %}
+
+    {%- else -%}
+
+        {{ exceptions.raise_compiler_error("macro datediff not implemented for datepart ~ '" ~ datepart ~ "' ~ on Spark") }}
+
+    {%- endif -%}
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/utils/listagg.sql b/dbt/include/spark/macros/utils/listagg.sql
new file mode 100644
index 00000000..3577edb7
--- /dev/null
+++ b/dbt/include/spark/macros/utils/listagg.sql
@@ -0,0 +1,17 @@
+{% macro spark__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}
+
+  {% if order_by_clause %}
+    {{ exceptions.warn("order_by_clause is not supported for listagg on Spark/Databricks") }}
+  {% endif %}
+
+  {% set collect_list %} collect_list({{ measure }}) {% endset %}
+
+  {% set limited %} slice({{ collect_list }}, 1, {{ limit_num }}) {% endset %}
+
+  {% set collected = limited if limit_num else collect_list %}
+
+  {% set final %} array_join({{ collected }}, {{ delimiter_text }}) {% endset %}
+
+  {% do return(final) %}
+
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/split_part.sql b/dbt/include/spark/macros/utils/split_part.sql
new file mode 100644
index 00000000..d5ae3092
--- /dev/null
+++ b/dbt/include/spark/macros/utils/split_part.sql
@@ -0,0 +1,23 @@
+{% macro spark__split_part(string_text, delimiter_text, part_number) %}
+
+    {% set delimiter_expr %}
+
+        -- escape if starts with a special character
+        case when regexp_extract({{ delimiter_text }}, '([^A-Za-z0-9])(.*)', 1) != '_'
+            then concat('\\', {{ delimiter_text }})
+            else {{ delimiter_text }} end
+
+    {% endset %}
+
+    {% set split_part_expr %}
+
+    split(
+        {{ string_text }},
+        {{ delimiter_expr }}
+        )[({{ part_number - 1 }})]
+
+    {% endset %}
+
+    {{ return(split_part_expr) }}
+
+{% endmacro %}
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index 70f3267a..e0cf2f7f 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -64,7 +64,7 @@ def project_config_update(self):
         }
 
 
-#hese tests were not enabled in the dbtspec files, so skipping here.
+# These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
 @pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
@@ -79,5 +79,7 @@ def project_config_update(self):
             }
         }
 
+
+@pytest.mark.skip_profile('spark_session')
 class TestBaseAdapterMethod(BaseAdapterMethod):
     pass
\ No newline at end of file
diff --git a/tests/functional/adapter/utils/fixture_listagg.py b/tests/functional/adapter/utils/fixture_listagg.py
new file mode 100644
index 00000000..0262ca23
--- /dev/null
+++ b/tests/functional/adapter/utils/fixture_listagg.py
@@ -0,0 +1,61 @@
+# SparkSQL does not support 'order by' for its 'listagg' equivalent
+# the argument is ignored, so let's ignore those fields when checking equivalency
+
+models__test_listagg_no_order_by_sql = """
+with data as (
+    select * from {{ ref('data_listagg') }}
+),
+data_output as (
+    select * from {{ ref('data_listagg_output') }}
+),
+calculate as (
+/*
+
+    select
+        group_col,
+        {{ listagg('string_text', "'_|_'", "order by order_col") }} as actual,
+        'bottom_ordered' as version
+    from data
+    group by group_col
+    union all
+    select
+        group_col,
+        {{ listagg('string_text', "'_|_'", "order by order_col", 2) }} as actual,
+        'bottom_ordered_limited' as version
+    from data
+    group by group_col
+    union all
+
+*/
+    select
+        group_col,
+        {{ listagg('string_text', "', '") }} as actual,
+        'comma_whitespace_unordered' as version
+    from data
+    where group_col = 3
+    group by group_col
+    union all
+    select
+        group_col,
+        {{ listagg('DISTINCT string_text', "','") }} as actual,
+        'distinct_comma' as version
+    from data
+    where group_col = 3
+    group by group_col
+    union all
+    select
+        group_col,
+        {{ listagg('string_text') }} as actual,
+        'no_params' as version
+    from data
+    where group_col = 3
+    group by group_col
+)
+select
+    calculate.actual,
+    data_output.expected
+from calculate
+left join data_output
+on calculate.group_col = data_output.group_col
+and calculate.version = data_output.version
+"""
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
new file mode 100644
index 00000000..9137c2f7
--- /dev/null
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -0,0 +1,121 @@
+import pytest
+
+from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
+from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
+from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
+from dbt.tests.adapter.utils.test_concat import BaseConcat
+from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
+from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
+from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
+from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesQuote
+from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesBackslash
+from dbt.tests.adapter.utils.test_except import BaseExcept
+from dbt.tests.adapter.utils.test_hash import BaseHash
+from dbt.tests.adapter.utils.test_intersect import BaseIntersect
+from dbt.tests.adapter.utils.test_last_day import BaseLastDay
+from dbt.tests.adapter.utils.test_length import BaseLength
+from dbt.tests.adapter.utils.test_position import BasePosition
+from dbt.tests.adapter.utils.test_replace import BaseReplace
+from dbt.tests.adapter.utils.test_right import BaseRight
+from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast
+from dbt.tests.adapter.utils.test_split_part import BaseSplitPart
+from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral
+
+# requires modification
+from dbt.tests.adapter.utils.test_listagg import BaseListagg
+from dbt.tests.adapter.utils.fixture_listagg import models__test_listagg_yml
+from tests.functional.adapter.utils.fixture_listagg import models__test_listagg_no_order_by_sql
+
+
+class TestAnyValue(BaseAnyValue):
+    pass
+
+
+class TestBoolOr(BaseBoolOr):
+    pass
+
+
+class TestCastBoolToText(BaseCastBoolToText):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestConcat(BaseConcat):
+    pass
+
+
+class TestDateAdd(BaseDateAdd):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestDateDiff(BaseDateDiff):
+    pass
+
+
+class TestDateTrunc(BaseDateTrunc):
+    pass
+
+
+class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote):
+    pass
+
+
+class TestExcept(BaseExcept):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestHash(BaseHash):
+    pass
+
+
+class TestIntersect(BaseIntersect):
+    pass
+
+
+class TestLastDay(BaseLastDay):
+    pass
+
+
+class TestLength(BaseLength):
+    pass
+
+
+# SparkSQL does not support 'order by' for its 'listagg' equivalent
+# the argument is ignored, so let's ignore those fields when checking equivalency
+class TestListagg(BaseListagg):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "test_listagg.yml": models__test_listagg_yml,
+            "test_listagg.sql": self.interpolate_macro_namespace(
+                models__test_listagg_no_order_by_sql, "listagg"
+            ),
+        }
+
+
+class TestPosition(BasePosition):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestReplace(BaseReplace):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestRight(BaseRight):
+    pass
+
+
+class TestSafeCast(BaseSafeCast):
+    pass
+
+
+class TestSplitPart(BaseSplitPart):
+    pass
+
+
+class TestStringLiteral(BaseStringLiteral):
+    pass

From 120ec42d7f848cd84e9e88512eb10c63ac8f88bc Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 24 Jun 2022 14:32:58 -0400
Subject: [PATCH 311/603] Bumping version to 1.2.0b1 (#374)

* Bumping version to 1.2.0b1

* Remove whitespace

* Update CHANGELOG.md

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 9 ++++++++-
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 4de02c34..0a892fdc 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0a1
+current_version = 1.2.0b1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 49e030ab..0a3b400f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,17 @@
-## dbt-spark 1.2.0 (April 28, 2022)
+## dbt-spark 1.2.0b1 (June 24, 2022)
 
 ### Fixes
 - `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
 
 ### Under the hood
 - Add `DBT_INVOCATION_ENV` environment variable to ODBC user agent string ([#366](https://github.com/dbt-labs/dbt-spark/pull/366))
+- Initialize lift + shift for cross-db macros ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
+- Add invocation env to user agent string ([#367](https://github.com/dbt-labs/dbt-spark/pull/367))
+- Use dispatch pattern for get_columns_in_relation_raw macro ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
+
+### Contributors
+- [@ueshin](https://github.com/dbt-labs/dbt-spark/commits?author=ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
+- [@dbeatty10](https://github.com/dbeatty10) ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
 
 ## dbt-spark 1.1.0 (April 28, 2022)
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index a6b97722..6e8eee92 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.2.0a1"
+version = "1.2.0b1"
diff --git a/setup.py b/setup.py
index 836aeed4..aba51b34 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.2.0a1"
+package_version = "1.2.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 0082e7372d27f7c81019d0b9bb6193baad564e15 Mon Sep 17 00:00:00 2001
From: Neelesh Salian 
Date: Tue, 28 Jun 2022 08:46:09 -0700
Subject: [PATCH 312/603] Upgrade to Spark 3.1.1 with testing (#349)

* Testing Spark3 upgrade.WIP

* Skip tests.WIP

* update readme and setup for pyspark.WIP

* Fix circle ci version and bump mem value

* Bump memory, fix nit, bump pyhive version

* Pyhive version change

* enabled sasl for metastore

* Explicit server2 host port

* Try showing debug-level logs

* Rm -n4

* move to godatadriven lates spark image

* restore to 2 to check output

* Restore debug and parallelized to check output

* Revert to 3.0

* Revert to normal state

* open source spark image

* Change to pyspark image

* Testing with gdd spark 3.0 for thrift

* Switch back to dbt user pass

* Spark 3.1.1 gdd image without configs

* Clean up

* Skip session test

* Clean up for review

* Update to CHANGELOG

Co-authored-by: Jeremy Cohen 
---
 .circleci/config.yml                   | 19 +------------------
 CHANGELOG.md                           |  2 ++
 README.md                              |  2 +-
 docker-compose.yml                     |  4 ++--
 docker/spark-defaults.conf             |  4 +++-
 tests/functional/adapter/test_basic.py |  2 +-
 6 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 34e449ac..0a1a3e1b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,29 +33,12 @@ jobs:
       DBT_INVOCATION_ENV: circle
     docker:
       - image: fishtownanalytics/test-container:10
-      - image: godatadriven/spark:2
+      - image: godatadriven/spark:3.1.1
         environment:
           WAIT_FOR: localhost:5432
         command: >
           --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
           --name Thrift JDBC/ODBC Server
-          --conf spark.hadoop.javax.jdo.option.ConnectionURL=jdbc:postgresql://localhost/metastore
-          --conf spark.hadoop.javax.jdo.option.ConnectionUserName=dbt
-          --conf spark.hadoop.javax.jdo.option.ConnectionPassword=dbt
-          --conf spark.hadoop.javax.jdo.option.ConnectionDriverName=org.postgresql.Driver
-          --conf spark.serializer=org.apache.spark.serializer.KryoSerializer
-          --conf spark.jars.packages=org.apache.hudi:hudi-spark-bundle_2.11:0.9.0
-          --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension
-          --conf spark.driver.userClassPathFirst=true
-          --conf spark.hadoop.datanucleus.autoCreateTables=true
-          --conf spark.hadoop.datanucleus.schema.autoCreateTables=true
-          --conf spark.hadoop.datanucleus.fixedDatastore=false
-          --conf spark.sql.hive.convertMetastoreParquet=false
-          --hiveconf hoodie.datasource.hive_sync.use_jdbc=false
-          --hiveconf hoodie.datasource.hive_sync.mode=hms
-          --hiveconf datanucleus.schema.autoCreateAll=true
-          --hiveconf hive.metastore.schema.verification=false
-
       - image: postgres:9.6.17-alpine
         environment:
           POSTGRES_USER: dbt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a3b400f..0ab9f4e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
+- Upgrade Spark version to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ### Under the hood
 - Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
@@ -29,6 +30,7 @@
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
+- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ## dbt-spark 1.1.0b1 (March 23, 2022)
 
diff --git a/README.md b/README.md
index 037a4989..241d869d 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark)
 
 ## Running locally
 A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend.
-Note that this is spark 2 not spark 3 so some functionalities might not be available.
+Note: dbt-spark now supports Spark 3.1.1 (formerly on Spark 2.x).
 
 The following command would start two docker containers
 ```
diff --git a/docker-compose.yml b/docker-compose.yml
index 8054dfd7..9bc9e509 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,8 @@
 version: "3.7"
 services:
 
-  dbt-spark2-thrift:
-    image: godatadriven/spark:3.0
+  dbt-spark3-thrift:
+    image: godatadriven/spark:3.1.1
     ports:
       - "10000:10000"
       - "4040:4040"
diff --git a/docker/spark-defaults.conf b/docker/spark-defaults.conf
index 48a0501c..30ec5959 100644
--- a/docker/spark-defaults.conf
+++ b/docker/spark-defaults.conf
@@ -1,7 +1,9 @@
+spark.driver.memory 2g
+spark.executor.memory 2g
 spark.hadoop.datanucleus.autoCreateTables	true
 spark.hadoop.datanucleus.schema.autoCreateTables	true
 spark.hadoop.datanucleus.fixedDatastore 	false
 spark.serializer	org.apache.spark.serializer.KryoSerializer
-spark.jars.packages	org.apache.hudi:hudi-spark3-bundle_2.12:0.9.0
+spark.jars.packages	org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0
 spark.sql.extensions	org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 spark.driver.userClassPathFirst true
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index e0cf2f7f..e1a57fd3 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -82,4 +82,4 @@ def project_config_update(self):
 
 @pytest.mark.skip_profile('spark_session')
 class TestBaseAdapterMethod(BaseAdapterMethod):
-    pass
\ No newline at end of file
+    pass

From cc2daed76627203d7e97d4394b607b21ee6abed8 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 29 Jun 2022 15:23:43 +0200
Subject: [PATCH 313/603] Disable TestDateDiff on apache_spark (#375)

* Disable TestDateDiff on apache_spark

* i dont really know what im doing
---
 tests/functional/adapter/utils/test_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 9137c2f7..c71161e6 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -48,7 +48,8 @@ class TestDateAdd(BaseDateAdd):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+# this generates too much SQL to run successfully in our testing environments :(
+@pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestDateDiff(BaseDateDiff):
     pass
 

From 48e1989888bc1a858d58d93344fe777d7bca887c Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Thu, 30 Jun 2022 12:23:29 +0200
Subject: [PATCH 314/603] Revert #367 (#378)

---
 CHANGELOG.md                      | 3 +--
 dbt/adapters/spark/connections.py | 8 +++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ab9f4e8..f99b14c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,13 +4,12 @@
 - `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
 
 ### Under the hood
-- Add `DBT_INVOCATION_ENV` environment variable to ODBC user agent string ([#366](https://github.com/dbt-labs/dbt-spark/pull/366))
 - Initialize lift + shift for cross-db macros ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
 - Add invocation env to user agent string ([#367](https://github.com/dbt-labs/dbt-spark/pull/367))
 - Use dispatch pattern for get_columns_in_relation_raw macro ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
 
 ### Contributors
-- [@ueshin](https://github.com/dbt-labs/dbt-spark/commits?author=ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
+- [@ueshin](https://github.com/ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
 - [@dbeatty10](https://github.com/dbeatty10) ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
 
 ## dbt-spark 1.1.0 (April 28, 2022)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 135463eb..59ceb9dd 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,5 +1,3 @@
-import os
-
 from contextlib import contextmanager
 
 import dbt.exceptions
@@ -9,7 +7,6 @@
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from dbt.adapters.spark import __version__
-from dbt.tracking import DBT_INVOCATION_ENV
 
 try:
     from TCLIService.ttypes import TOperationState as ThriftState
@@ -412,8 +409,9 @@ def open(cls, connection):
                     cls.validate_creds(creds, required_fields)
 
                     dbt_spark_version = __version__.version
-                    dbt_invocation_env = os.getenv(DBT_INVOCATION_ENV) or "manual"
-                    user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks, {dbt_invocation_env})"  # noqa
+                    user_agent_entry = (
+                        f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
+                    )
 
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
                     ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()}

From ed8a17036be15ce929a068e2daeaf7433573778a Mon Sep 17 00:00:00 2001
From: Sindre Grindheim 
Date: Tue, 5 Jul 2022 10:25:44 +0200
Subject: [PATCH 315/603] Not dropping table for incremental full refresh with
 delta (#287)

* Not dropping table for incremental full refresh with delta

* Updated changelog

* Simplified conditional logic according to suggestion

* Updated changelog

* Only drop table if not delta table

Co-authored-by: Jeremy Cohen 

* Update changelog, trigger CircleCI tests

Co-authored-by: Jeremy Cohen 
Co-authored-by: Jeremy Cohen 
---
 CHANGELOG.md                                               | 7 +++++++
 .../macros/materializations/incremental/incremental.sql    | 6 +++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f99b14c7..f7164641 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## dbt-spark 1.2.0rc1 (Release TBD)
+
+- Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+
+### Contributors
+- [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+
 ## dbt-spark 1.2.0b1 (June 24, 2022)
 
 ### Fixes
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 8d8e69d9..99cd31db 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -26,10 +26,14 @@
 
   {{ run_hooks(pre_hooks) }}
 
+  {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
+
   {% if existing_relation is none %}
     {% set build_sql = create_table_as(False, target_relation, sql) %}
   {% elif existing_relation.is_view or full_refresh_mode %}
-    {% do adapter.drop_relation(existing_relation) %}
+    {% if not is_delta %} {#-- If Delta, we will `create or replace` below, so no need to drop --#}
+      {% do adapter.drop_relation(existing_relation) %}
+    {% endif %}
     {% set build_sql = create_table_as(False, target_relation, sql) %}
   {% else %}
     {% do run_query(create_table_as(True, tmp_relation, sql)) %}

From f284cde4c44010ddf3fdfa420492e6190937e81c Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 6 Jul 2022 13:42:23 +0200
Subject: [PATCH 316/603] Data type macros (#380)

* Run tests for data type macros. Fine tune numeric_type

* Hard code seed loading types for float + int

* Repoint, fixup, changelog entry
---
 CHANGELOG.md                                  |  4 ++
 dbt/adapters/spark/column.py                  |  8 +++
 .../adapter/utils/test_data_types.py          | 67 +++++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 tests/functional/adapter/utils/test_data_types.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7164641..81b890f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,11 @@
 ## dbt-spark 1.2.0rc1 (Release TBD)
 
+### Fixes
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
 
+### Under the hood
+- Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
+
 ### Contributors
 - [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
 
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index 4df6b301..dcf7590e 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -37,6 +37,14 @@ def quoted(self) -> str:
     def data_type(self) -> str:
         return self.dtype
 
+    @classmethod
+    def numeric_type(cls, dtype: str, precision: Any, scale: Any) -> str:
+        # SparkSQL does not support 'numeric' or 'number', only 'decimal'
+        if precision is None or scale is None:
+            return "decimal"
+        else:
+            return "{}({},{})".format("decimal", precision, scale)
+
     def __repr__(self) -> str:
         return "".format(self.name, self.data_type)
 
diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py
new file mode 100644
index 00000000..65a24a3a
--- /dev/null
+++ b/tests/functional/adapter/utils/test_data_types.py
@@ -0,0 +1,67 @@
+import pytest
+from dbt.tests.adapter.utils.data_types.test_type_bigint import BaseTypeBigInt
+from dbt.tests.adapter.utils.data_types.test_type_float import (
+    BaseTypeFloat, seeds__expected_csv as seeds__float_expected_csv
+)
+from dbt.tests.adapter.utils.data_types.test_type_int import (
+    BaseTypeInt, seeds__expected_csv as seeds__int_expected_csv
+)
+from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
+from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
+from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp
+
+
+class TestTypeBigInt(BaseTypeBigInt):
+    pass
+
+
+# need to explicitly cast this to avoid it being inferred/loaded as a DOUBLE on Spark
+# in SparkSQL, the two are equivalent for `=` comparison, but distinct for EXCEPT comparison
+seeds__float_expected_yml = """
+version: 2
+seeds:
+  - name: expected
+    config:
+      column_types:
+        float_col: float
+"""
+
+class TestTypeFloat(BaseTypeFloat):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "expected.csv": seeds__float_expected_csv,
+            "expected.yml": seeds__float_expected_yml,
+        }
+
+
+# need to explicitly cast this to avoid it being inferred/loaded as a BIGINT on Spark
+seeds__int_expected_yml = """
+version: 2
+seeds:
+  - name: expected
+    config:
+      column_types:
+        int_col: int
+"""
+
+class TestTypeInt(BaseTypeInt):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "expected.csv": seeds__int_expected_csv,
+            "expected.yml": seeds__int_expected_yml,
+        }
+
+    
+class TestTypeNumeric(BaseTypeNumeric):
+    def numeric_fixture_type(self):
+        return "decimal(28,6)"
+
+    
+class TestTypeString(BaseTypeString):
+    pass
+
+    
+class TestTypeTimestamp(BaseTypeTimestamp):
+    pass

From 75d2933665bb3110d8392b35f60c4f0bb039c8c9 Mon Sep 17 00:00:00 2001
From: Neelesh Salian 
Date: Fri, 8 Jul 2022 03:25:53 -0700
Subject: [PATCH 317/603] Fix changelog for spark upgrade feature (#385)

---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81b890f2..36958eff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,14 @@
 
 ### Fixes
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+- Apache Spark version upgraded to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ### Under the hood
 - Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
 
 ### Contributors
 - [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ## dbt-spark 1.2.0b1 (June 24, 2022)
 
@@ -28,7 +30,6 @@
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
-- Upgrade Spark version to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ### Under the hood
 - Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
@@ -40,7 +41,6 @@
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
-- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ## dbt-spark 1.1.0b1 (March 23, 2022)
 

From 27504209ae37823af560f4952452c559791b1e23 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski 
Date: Mon, 11 Jul 2022 23:42:29 +0200
Subject: [PATCH 318/603] Use lowercase file ext for CONTRIBUTING.md (#384)

---
 CONTRIBUTING.MD => CONTRIBUTING.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename CONTRIBUTING.MD => CONTRIBUTING.md (100%)

diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.md
similarity index 100%
rename from CONTRIBUTING.MD
rename to CONTRIBUTING.md

From 9109fe1babaab92cbe1c58868977c7a9c998c2a8 Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Tue, 12 Jul 2022 10:14:11 -0400
Subject: [PATCH 319/603] Add apply_grants call to materialization macros
 (#381)

* Add apply_grants call to materialization macros

* add standardize_grants_dict

* Working grant macros

* Initialize tests in CI

* Refactor to account for core macro changes. Passing tests

* Fix code checks

* Try default__reset_csv_table

* Code checks

* Revert "Try default__reset_csv_table"

This reverts commit 8bd41451249afee1f2884f24c292b81a0b1da82c.

* Account for refactor in dbt-labs/dbt-core@c763601

* Account for test changes in dbt-labs/dbt-core@debc867

* add changelog

* Empty-Commit

* rerun ci

* rerun ci

* readd a persist_docs call to snapshot.sql

* fix whitespace

Co-authored-by: Jeremy Cohen 
Co-authored-by: Matthew McKnight 
---
 .circleci/config.yml                          |  6 ++
 CHANGELOG.md                                  |  3 +
 dbt/adapters/spark/impl.py                    | 17 ++++++
 dbt/include/spark/macros/apply_grants.sql     | 39 ++++++++++++
 .../incremental/incremental.sql               |  4 ++
 .../macros/materializations/snapshot.sql      |  4 ++
 .../spark/macros/materializations/table.sql   |  4 ++
 test.env.example                              |  5 ++
 tests/conftest.py                             |  2 +-
 tests/functional/adapter/test_grants.py       | 60 +++++++++++++++++++
 10 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 dbt/include/spark/macros/apply_grants.sql
 create mode 100644 tests/functional/adapter/test_grants.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0a1a3e1b..8f0afa6c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -63,6 +63,9 @@ jobs:
     environment:
       DBT_INVOCATION_ENV: circle
       DBT_DATABRICKS_RETRY_ALL: True
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
     docker:
       - image: fishtownanalytics/test-container:10
     steps:
@@ -78,6 +81,9 @@ jobs:
     environment:
       DBT_INVOCATION_ENV: circle
       ODBC_DRIVER: Simba # TODO: move env var to Docker image
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
     docker:
       # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed
       - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36958eff..1abed6ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,9 @@
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
 - Apache Spark version upgraded to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
+### Features
+- Add grants to materializations ([#366](https://github.com/dbt-labs/dbt-spark/issues/366), [#381](https://github.com/dbt-labs/dbt-spark/pull/381))
+
 ### Under the hood
 - Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 699eca9d..3fb9978d 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -380,6 +380,23 @@ def run_sql_for_tests(self, sql, fetch, conn):
         finally:
             conn.transaction_open = False
 
+    def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
+        grants_dict: Dict[str, List[str]] = {}
+        for row in grants_table:
+            grantee = row["Principal"]
+            privilege = row["ActionType"]
+            object_type = row["ObjectType"]
+
+            # we only want to consider grants on this object
+            # (view or table both appear as 'TABLE')
+            # and we don't want to consider the OWN privilege
+            if object_type == "TABLE" and privilege != "OWN":
+                if privilege in grants_dict.keys():
+                    grants_dict[privilege].append(grantee)
+                else:
+                    grants_dict.update({privilege: [grantee]})
+        return grants_dict
+
 
 # spark does something interesting with joins when both tables have the same
 # static values for the join condition and complains that the join condition is
diff --git a/dbt/include/spark/macros/apply_grants.sql b/dbt/include/spark/macros/apply_grants.sql
new file mode 100644
index 00000000..49dae95d
--- /dev/null
+++ b/dbt/include/spark/macros/apply_grants.sql
@@ -0,0 +1,39 @@
+{% macro spark__copy_grants() %}
+
+    {% if config.materialized == 'view' %}
+        {#-- Spark views don't copy grants when they're replaced --#}
+        {{ return(False) }}
+
+    {% else %}
+      {#-- This depends on how we're replacing the table, which depends on its file format
+        -- Just play it safe by assuming that grants have been copied over, and need to be checked / possibly revoked
+        -- We can make this more efficient in the future
+      #}
+        {{ return(True) }}
+
+    {% endif %}
+{% endmacro %}
+
+
+{%- macro spark__get_grant_sql(relation, privilege, grantees) -%}
+    grant {{ privilege }} on {{ relation }} to {{ adapter.quote(grantees[0]) }}
+{%- endmacro %}
+
+
+{%- macro spark__get_revoke_sql(relation, privilege, grantees) -%}
+    revoke {{ privilege }} on {{ relation }} from {{ adapter.quote(grantees[0]) }}
+{%- endmacro %}
+
+
+{%- macro spark__support_multiple_grantees_per_dcl_statement() -%}
+    {{ return(False) }}
+{%- endmacro -%}
+
+
+{% macro spark__call_dcl_statements(dcl_statement_list) %}
+    {% for dcl_statement in dcl_statement_list %}
+        {% call statement('grant_or_revoke') %}
+            {{ dcl_statement }}
+        {% endcall %}
+    {% endfor %}
+{% endmacro %}
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 99cd31db..b80510b7 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -3,6 +3,7 @@
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy', default='append') -%}
+  {%- set grant_config = config.get('grants') -%}
 
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}
   {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%}
@@ -45,6 +46,9 @@
     {{ build_sql }}
   {%- endcall -%}
 
+  {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}
+  {% do apply_grants(target_relation, grant_config, should_revoke) %}
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks) }}
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index 9c891ef0..a5304682 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -75,6 +75,7 @@
   {%- set strategy_name = config.get('strategy') -%}
   {%- set unique_key = config.get('unique_key') %}
   {%- set file_format = config.get('file_format', 'parquet') -%}
+  {%- set grant_config = config.get('grants') -%}
 
   {% set target_relation_exists, target_relation = get_or_create_relation(
           database=none,
@@ -163,6 +164,9 @@
       {{ final_sql }}
   {% endcall %}
 
+  {% set should_revoke = should_revoke(target_relation_exists, full_refresh_mode) %}
+  {% do apply_grants(target_relation, grant_config, should_revoke) %}
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks, inside_transaction=True) }}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 2eeb806f..3462d333 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -1,6 +1,7 @@
 {% materialization table, adapter = 'spark' %}
 
   {%- set identifier = model['alias'] -%}
+  {%- set grant_config = config.get('grants') -%}
 
   {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
   {%- set target_relation = api.Relation.create(identifier=identifier,
@@ -22,6 +23,9 @@
     {{ create_table_as(False, target_relation, sql) }}
   {%- endcall %}
 
+  {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
+  {% do apply_grants(target_relation, grant_config, should_revoke) %}
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks) }}
diff --git a/test.env.example b/test.env.example
index bf4cf2ee..e69f700b 100644
--- a/test.env.example
+++ b/test.env.example
@@ -8,3 +8,8 @@ DBT_DATABRICKS_HOST_NAME=
 DBT_DATABRICKS_TOKEN=
 # file path to local ODBC driver
 ODBC_DRIVER=
+
+# users for testing 'grants' functionality
+DBT_TEST_USER_1=
+DBT_TEST_USER_2=
+DBT_TEST_USER_3=
diff --git a/tests/conftest.py b/tests/conftest.py
index 0c624713..0771566b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -108,4 +108,4 @@ def skip_by_profile_type(request):
     if request.node.get_closest_marker("skip_profile"):
         for skip_profile_type in request.node.get_closest_marker("skip_profile").args:
             if skip_profile_type == profile_type:
-                pytest.skip("skipped on '{profile_type}' profile")
+                pytest.skip(f"skipped on '{profile_type}' profile")
diff --git a/tests/functional/adapter/test_grants.py b/tests/functional/adapter/test_grants.py
new file mode 100644
index 00000000..8e0341df
--- /dev/null
+++ b/tests/functional/adapter/test_grants.py
@@ -0,0 +1,60 @@
+import pytest
+from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants
+from dbt.tests.adapter.grants.test_incremental_grants import BaseIncrementalGrants
+from dbt.tests.adapter.grants.test_invalid_grants import BaseInvalidGrants
+from dbt.tests.adapter.grants.test_seed_grants import BaseSeedGrants
+from dbt.tests.adapter.grants.test_snapshot_grants import BaseSnapshotGrants
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestModelGrantsSpark(BaseModelGrants):
+    def privilege_grantee_name_overrides(self):
+        # insert --> modify
+        return {
+            "select": "select",
+            "insert": "modify",
+            "fake_privilege": "fake_privilege",
+            "invalid_user": "invalid_user",
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestIncrementalGrantsSpark(BaseIncrementalGrants):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge",
+            }
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestSeedGrantsSpark(BaseSeedGrants):
+    # seeds in dbt-spark are currently "full refreshed," in such a way that
+    # the grants are not carried over
+    # see https://github.com/dbt-labs/dbt-spark/issues/388
+    def seeds_support_partial_refresh(self):
+        return False
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestSnapshotGrantsSpark(BaseSnapshotGrants):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "snapshots": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge",
+            }
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestInvalidGrantsSpark(BaseInvalidGrants):
+    def grantee_does_not_exist_error(self):
+        return "RESOURCE_DOES_NOT_EXIST"
+        
+    def privilege_does_not_exist_error(self):
+        return "Action Unknown"

From 244742ac0ee35db9774d9830a96d5796bf738448 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 12 Jul 2022 14:21:59 -0400
Subject: [PATCH 320/603] Bumping version to 1.3.0a1 (#393)

* Bumping version to 1.3.0a1

* Update CHANGELOG.md

* Fix whitespace

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 5 ++++-
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 0a892fdc..605b6f37 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0b1
+current_version = 1.3.0a1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1abed6ec..276e6759 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,7 @@
-## dbt-spark 1.2.0rc1 (Release TBD)
+## dbt-spark 1.3.0b1 (Release TBD)
+
+
+## dbt-spark 1.2.0rc1 (July 12, 2022)
 
 ### Fixes
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 6e8eee92..a9fe3c3e 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.2.0b1"
+version = "1.3.0a1"
diff --git a/setup.py b/setup.py
index aba51b34..cb0c40ae 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.2.0b1"
+package_version = "1.3.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 80c1775a9b6283fcd7cf2f1f2ce63ce2a1da8460 Mon Sep 17 00:00:00 2001
From: Scott Barber <74067474+barberscott@users.noreply.github.com>
Date: Thu, 21 Jul 2022 10:48:36 -0500
Subject: [PATCH 321/603] [CT-868] Pin pyodbc in dbt-spark (#398)

* [CT-868] Pin pyodbc in dbt-spark

Pin pyodbc to a known-working version until pyodbc>=4.0.35 releases.

* Update CHANGELOG.md

* Update CHANGELOG.md

* Update CHANGELOG.md
---
 CHANGELOG.md     | 5 +++++
 requirements.txt | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 276e6759..8e018f42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 ## dbt-spark 1.3.0b1 (Release TBD)
 
+### Fixes
+- Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
+
+### Contributors
+- [@barberscott](https://github.com/barberscott)  ([#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
 ## dbt-spark 1.2.0rc1 (July 12, 2022)
 
diff --git a/requirements.txt b/requirements.txt
index e03320a4..c64512ae 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 PyHive[hive]>=0.6.0,<0.7.0
-pyodbc>=4.0.30
+pyodbc==4.0.32
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From eac5614a3ad25e9a268ab8d71aaa91a9803e2039 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Fri, 22 Jul 2022 12:20:37 -0400
Subject: [PATCH 322/603] Updating CI pip and py10 (#403)

---
 .github/workflows/main.yml | 39 ++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 56685bfc..4166756c 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -18,7 +18,6 @@ on:
   push:
     branches:
       - "main"
-      - "develop"
       - "*.latest"
       - "releases/*"
   pull_request:
@@ -40,6 +39,7 @@ jobs:
     name: code-quality
 
     runs-on: ubuntu-latest
+    timeout-minutes: 10
 
     steps:
       - name: Check out the repository
@@ -55,23 +55,29 @@ jobs:
       - name: Install python dependencies
         run: |
           sudo apt-get install libsasl2-dev
-          pip install --user --upgrade pip
-          pip install -r dev-requirements.txt
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install pre-commit
           pre-commit --version
+          python -m pip install mypy==0.942
           mypy --version
+          python -m pip install -r requirements.txt
+          python -m pip install -r dev-requirements.txt
           dbt --version
-      - name: pre-commit hooks
+
+      - name: Run pre-commit hooks
         run: pre-commit run --all-files --show-diff-on-failure
 
   unit:
     name: unit test / python ${{ matrix.python-version }}
 
     runs-on: ubuntu-latest
+    timeout-minutes: 10
 
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
 
     env:
       TOXENV: "unit"
@@ -80,8 +86,6 @@ jobs:
     steps:
       - name: Check out the repository
         uses: actions/checkout@v2
-        with:
-          persist-credentials: false
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2
@@ -91,9 +95,9 @@ jobs:
       - name: Install python dependencies
         run: |
           sudo apt-get install libsasl2-dev
-          pip install --user --upgrade pip
-          pip install tox
-          pip --version
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install tox
           tox --version
       - name: Run tox
         run: tox
@@ -120,8 +124,6 @@ jobs:
     steps:
       - name: Check out the repository
         uses: actions/checkout@v2
-        with:
-          persist-credentials: false
 
       - name: Set up Python
         uses: actions/setup-python@v2
@@ -130,9 +132,10 @@ jobs:
 
       - name: Install python dependencies
         run: |
-          pip install --user --upgrade pip
-          pip install --upgrade setuptools wheel twine check-wheel-contents
-          pip --version
+          python -m pip install --user --upgrade pip
+          python -m pip install --upgrade setuptools wheel twine check-wheel-contents
+          python -m pip --version
+
       - name: Build distributions
         run: ./scripts/build-dist.sh
 
@@ -171,7 +174,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.7, 3.8, 3.9]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
@@ -194,13 +197,13 @@ jobs:
 
       - name: Install wheel distributions
         run: |
-          find ./dist/*.whl -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
+          find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Check wheel distributions
         run: |
           dbt --version
       - name: Install source distributions
         run: |
-          find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
+          find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Check source distributions
         run: |
           dbt --version

From 3a292d4e04519ad58d5ae660348f2b924f82f052 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Fri, 22 Jul 2022 12:51:05 -0400
Subject: [PATCH 323/603] Fixing one more pip reference in CI (#405)

---
 .github/workflows/main.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4166756c..b45f9377 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -184,9 +184,9 @@ jobs:
 
       - name: Install python dependencies
         run: |
-          pip install --user --upgrade pip
-          pip install --upgrade wheel
-          pip --version
+          python -m pip install --user --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip --version
       - uses: actions/download-artifact@v2
         with:
           name: dist

From 16d05e2e8d60baeda3b4a4abb82034dc3fd0d776 Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Mon, 25 Jul 2022 11:57:13 -0400
Subject: [PATCH 324/603] Change to support core incremental refactor (#394)

---
 CHANGELOG.md                                                    | 1 +
 .../spark/macros/materializations/incremental/incremental.sql   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e018f42..28f7e138 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@
 
 ### Under the hood
 - Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
+- Make minimal changes to support dbt Core incremental materialization refactor ([#402](https://github.com/dbt-labs/dbt-spark/issue/402), [#394](httpe://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
 - [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index b80510b7..1ca2c149 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -2,7 +2,7 @@
 
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
-  {%- set raw_strategy = config.get('incremental_strategy', default='append') -%}
+  {%- set raw_strategy = config.get('incremental_strategy') or 'append' -%}
   {%- set grant_config = config.get('grants') -%}
 
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}

From f58fc233fbbc7b9289299313807e706c57360613 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Thu, 28 Jul 2022 13:52:18 -0700
Subject: [PATCH 325/603] Feature/python model beta (#377)

Co-authored-by: Jeremy Cohen 
Co-authored-by: Ian Knox 
---
 .github/workflows/main.yml                    |   1 +
 CHANGELOG.md                                  |   3 +
 dbt/adapters/spark/impl.py                    | 127 +++++++++++++++++-
 dbt/include/spark/macros/adapters.sql         |  61 +++++----
 .../incremental/incremental.sql               |  77 +++++++----
 .../macros/materializations/snapshot.sql      |   2 +-
 .../spark/macros/materializations/table.sql   |  24 +++-
 dev-requirements.txt                          |   2 +
 requirements.txt                              |   2 +
 tests/conftest.py                             |   2 +
 tests/functional/adapter/test_basic.py        |   1 -
 tests/functional/adapter/test_python_model.py |  59 ++++++++
 .../test_incremental_strategies.py            |   2 +
 13 files changed, 297 insertions(+), 66 deletions(-)
 create mode 100644 tests/functional/adapter/test_python_model.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b45f9377..bf607c37 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -60,6 +60,7 @@ jobs:
           python -m pip install pre-commit
           pre-commit --version
           python -m pip install mypy==0.942
+          python -m pip install types-requests
           mypy --version
           python -m pip install -r requirements.txt
           python -m pip install -r dev-requirements.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 28f7e138..d015a26c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## dbt-spark 1.3.0b1 (Release TBD)
 
+### Features
+- support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+
 ### Fixes
 - Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 3fb9978d..12c42ab9 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,4 +1,7 @@
 import re
+import requests
+import time
+import base64
 from concurrent.futures import Future
 from dataclasses import dataclass
 from typing import Any, Dict, Iterable, List, Optional, Union
@@ -11,7 +14,8 @@
 import dbt.exceptions
 
 from dbt.adapters.base import AdapterConfig
-from dbt.adapters.base.impl import catch_as_completed
+from dbt.adapters.base.impl import catch_as_completed, log_code_execution
+from dbt.adapters.base.meta import available
 from dbt.adapters.sql import SQLAdapter
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
@@ -159,11 +163,9 @@ def list_relations_without_caching(
 
         return relations
 
-    def get_relation(
-        self, database: Optional[str], schema: str, identifier: str
-    ) -> Optional[BaseRelation]:
+    def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]:
         if not self.Relation.include_policy.database:
-            database = None
+            database = None  # type: ignore
 
         return super().get_relation(database, schema, identifier)
 
@@ -296,7 +298,12 @@ def get_catalog(self, manifest):
                 for schema in schemas:
                     futures.append(
                         tpe.submit_connected(
-                            self, schema, self._get_one_catalog, info, [schema], manifest
+                            self,
+                            schema,
+                            self._get_one_catalog,
+                            info,
+                            [schema],
+                            manifest,
                         )
                     )
             catalogs, exceptions = catch_as_completed(futures)
@@ -380,6 +387,114 @@ def run_sql_for_tests(self, sql, fetch, conn):
         finally:
             conn.transaction_open = False
 
+    @available.parse_none
+    @log_code_execution
+    def submit_python_job(self, parsed_model: dict, compiled_code: str, timeout=None):
+        # TODO improve the typing here.  N.B. Jinja returns a `jinja2.runtime.Undefined` instead
+        # of `None` which evaluates to True!
+
+        # TODO limit this function to run only when doing the materialization of python nodes
+
+        # assuming that for python job running over 1 day user would mannually overwrite this
+        schema = getattr(parsed_model, "schema", self.config.credentials.schema)
+        identifier = parsed_model["alias"]
+        if not timeout:
+            timeout = 60 * 60 * 24
+        if timeout <= 0:
+            raise ValueError("Timeout must larger than 0")
+
+        auth_header = {"Authorization": f"Bearer {self.connections.profile.credentials.token}"}
+
+        # create new dir
+        if not self.connections.profile.credentials.user:
+            raise ValueError("Need to supply user in profile to submit python job")
+        # it is safe to call mkdirs even if dir already exists and have content inside
+        work_dir = f"/Users/{self.connections.profile.credentials.user}/{schema}"
+        response = requests.post(
+            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/mkdirs",
+            headers=auth_header,
+            json={
+                "path": work_dir,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating work_dir for python notebooks\n {response.content!r}"
+            )
+
+        # add notebook
+        b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
+        response = requests.post(
+            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/import",
+            headers=auth_header,
+            json={
+                "path": f"{work_dir}/{identifier}",
+                "content": b64_encoded_content,
+                "language": "PYTHON",
+                "overwrite": True,
+                "format": "SOURCE",
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python notebook.\n {response.content!r}"
+            )
+
+        # submit job
+        submit_response = requests.post(
+            f"https://{self.connections.profile.credentials.host}/api/2.1/jobs/runs/submit",
+            headers=auth_header,
+            json={
+                "run_name": "debug task",
+                "existing_cluster_id": self.connections.profile.credentials.cluster,
+                "notebook_task": {
+                    "notebook_path": f"{work_dir}/{identifier}",
+                },
+            },
+        )
+        if submit_response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python run.\n {response.content!r}"
+            )
+
+        # poll until job finish
+        state = None
+        start = time.time()
+        run_id = submit_response.json()["run_id"]
+        terminal_states = ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]
+        while state not in terminal_states and time.time() - start < timeout:
+            time.sleep(1)
+            resp = requests.get(
+                f"https://{self.connections.profile.credentials.host}"
+                f"/api/2.1/jobs/runs/get?run_id={run_id}",
+                headers=auth_header,
+            )
+            json_resp = resp.json()
+            state = json_resp["state"]["life_cycle_state"]
+            # logger.debug(f"Polling.... in state: {state}")
+        if state != "TERMINATED":
+            raise dbt.exceptions.RuntimeException(
+                "python model run ended in state"
+                f"{state} with state_message\n{json_resp['state']['state_message']}"
+            )
+
+        # get end state to return to user
+        run_output = requests.get(
+            f"https://{self.connections.profile.credentials.host}"
+            f"/api/2.1/jobs/runs/get-output?run_id={run_id}",
+            headers=auth_header,
+        )
+        json_run_output = run_output.json()
+        result_state = json_run_output["metadata"]["state"]["result_state"]
+        if result_state != "SUCCESS":
+            raise dbt.exceptions.RuntimeException(
+                "Python model failed with traceback as:\n"
+                "(Note that the line number here does not "
+                "match the line number in your code due to dbt templating)\n"
+                f"{json_run_output['error_trace']}"
+            )
+        return self.connections.get_response(None)
+
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
         grants_dict: Dict[str, List[str]] = {}
         for row in grants_table:
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index abdeacb7..05630ede 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -117,35 +117,46 @@
 {%- endmacro %}
 
 
-{% macro create_temporary_view(relation, sql) -%}
-  {{ return(adapter.dispatch('create_temporary_view', 'dbt')(relation, sql)) }}
+{% macro create_temporary_view(relation, compiled_code) -%}
+  {{ return(adapter.dispatch('create_temporary_view', 'dbt')(relation, compiled_code)) }}
 {%- endmacro -%}
 
-{#-- We can't use temporary tables with `create ... as ()` syntax #}
-{% macro spark__create_temporary_view(relation, sql) -%}
-  create temporary view {{ relation.include(schema=false) }} as
-    {{ sql }}
-{% endmacro %}
+{#-- We can't use temporary tables with `create ... as ()` syntax --#}
+{% macro spark__create_temporary_view(relation, compiled_code) -%}
+    create temporary view {{ relation.include(schema=false) }} as
+      {{ compiled_code }}
+{%- endmacro -%}
 
 
-{% macro spark__create_table_as(temporary, relation, sql) -%}
-  {% if temporary -%}
-    {{ create_temporary_view(relation, sql) }}
-  {%- else -%}
-    {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
-      create or replace table {{ relation }}
-    {% else %}
-      create table {{ relation }}
-    {% endif %}
-    {{ file_format_clause() }}
-    {{ options_clause() }}
-    {{ partition_cols(label="partitioned by") }}
-    {{ clustered_cols(label="clustered by") }}
-    {{ location_clause() }}
-    {{ comment_clause() }}
-    as
-      {{ sql }}
-  {%- endif %}
+{%- macro spark__create_table_as(temporary, relation, compiled_code, language='sql') -%}
+  {%- if language == 'sql' -%}
+    {%- if temporary -%}
+      {{ create_temporary_view(relation, compiled_code) }}
+    {%- else -%}
+      {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
+        create or replace table {{ relation }}
+      {% else %}
+        create table {{ relation }}
+      {% endif %}
+      {{ file_format_clause() }}
+      {{ options_clause() }}
+      {{ partition_cols(label="partitioned by") }}
+      {{ clustered_cols(label="clustered by") }}
+      {{ location_clause() }}
+      {{ comment_clause() }}
+      as
+      {{ compiled_code }}
+    {%- endif -%}
+  {%- elif language == 'python' -%}
+    {#--
+    N.B. Python models _can_ write to temp views HOWEVER they use a different session
+    and have already expired by the time they need to be used (I.E. in merges for incremental models)
+
+    TODO: Deep dive into spark sessions to see if we can reuse a single session for an entire
+    dbt invocation.
+     --#}
+    {{ py_write_table(compiled_code=compiled_code, target_relation=relation) }}
+  {%- endif -%}
 {%- endmacro -%}
 
 
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 1ca2c149..91cba9e5 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -1,5 +1,4 @@
 {% materialization incremental, adapter='spark' -%}
-
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy') or 'append' -%}
@@ -8,43 +7,63 @@
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}
   {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%}
 
+  {#-- Set vars --#}
+
   {%- set unique_key = config.get('unique_key', none) -%}
   {%- set partition_by = config.get('partition_by', none) -%}
-
-  {%- set full_refresh_mode = (should_full_refresh()) -%}
-
-  {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %}
-
-  {% set target_relation = this %}
-  {% set existing_relation = load_relation(this) %}
-  {% set tmp_relation = make_temp_relation(this) %}
-
-  {% if strategy == 'insert_overwrite' and partition_by %}
-    {% call statement() %}
+  {%- set language = model['language'] -%}
+  {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}
+  {%- set target_relation = this -%}
+  {%- set existing_relation = load_relation(this) -%}
+  {%- set tmp_relation = make_temp_relation(this) -%}
+
+  {#-- Set Overwrite Mode --#}
+  {%- if strategy == 'insert_overwrite' and partition_by -%}
+    {%- call statement() -%}
       set spark.sql.sources.partitionOverwriteMode = DYNAMIC
-    {% endcall %}
-  {% endif %}
+    {%- endcall -%}
+  {%- endif -%}
 
+  {#-- Run pre-hooks --#}
   {{ run_hooks(pre_hooks) }}
 
-  {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
-
-  {% if existing_relation is none %}
-    {% set build_sql = create_table_as(False, target_relation, sql) %}
-  {% elif existing_relation.is_view or full_refresh_mode %}
+  {#-- Incremental run logic --#}
+  {%- if existing_relation is none -%}
+    {#-- Relation must be created --#}
+    {%- call statement('main', language=language) -%}
+      {{ create_table_as(False, target_relation, compiled_code, language) }}
+    {%- endcall -%}
+  {%- elif existing_relation.is_view or should_full_refresh() -%}
+    {#-- Relation must be dropped & recreated --#}
+    {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
     {% if not is_delta %} {#-- If Delta, we will `create or replace` below, so no need to drop --#}
       {% do adapter.drop_relation(existing_relation) %}
     {% endif %}
-    {% set build_sql = create_table_as(False, target_relation, sql) %}
-  {% else %}
-    {% do run_query(create_table_as(True, tmp_relation, sql)) %}
-    {% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
-    {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %}
-  {% endif %}
-
-  {%- call statement('main') -%}
-    {{ build_sql }}
-  {%- endcall -%}
+    {%- call statement('main', language=language) -%}
+      {{ create_table_as(False, target_relation, compiled_code, language) }}
+    {%- endcall -%}
+  {%- else -%}
+    {#-- Relation must be merged --#}
+    {%- call statement('create_tmp_relation', language=language) -%}
+      {{ create_table_as(True, tmp_relation, compiled_code, language) }}
+    {%- endcall -%}
+    {%- do process_schema_changes(on_schema_change, tmp_relation, existing_relation) -%}
+    {%- call statement('main') -%}
+      {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) }}
+    {%- endcall -%}
+    {%- if language == 'python' -%}
+      {#--
+      This is yucky.
+      See note in dbt-spark/dbt/include/spark/macros/adapters.sql
+      re: python models and temporary views.
+
+      Also, why doesn't either drop_relation or adapter.drop_relation work here?!
+      --#}
+      {% call statement('drop_relation') -%}
+        drop table if exists {{ tmp_relation }}
+      {%- endcall %}
+    {%- endif -%}
+  {%- endif -%}
 
   {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}
   {% do apply_grants(target_relation, grant_config, should_revoke) %}
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index a5304682..6cf2358f 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -117,7 +117,7 @@
 
   {% if not target_relation_exists %}
 
-      {% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %}
+      {% set build_sql = build_snapshot_table(strategy, model['compiled_code']) %}
       {% set final_sql = create_table_as(False, target_relation, build_sql) %}
 
   {% else %}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 3462d333..6a02ea16 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -1,5 +1,5 @@
 {% materialization table, adapter = 'spark' %}
-
+  {%- set language = model['language'] -%}
   {%- set identifier = model['alias'] -%}
   {%- set grant_config = config.get('grants') -%}
 
@@ -19,9 +19,10 @@
   {%- endif %}
 
   -- build model
-  {% call statement('main') -%}
-    {{ create_table_as(False, target_relation, sql) }}
-  {%- endcall %}
+
+  {%- call statement('main', language=language) -%}
+    {{ create_table_as(False, target_relation, compiled_code, language) }}
+  {%- endcall -%}
 
   {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
   {% do apply_grants(target_relation, grant_config, should_revoke) %}
@@ -33,3 +34,18 @@
   {{ return({'relations': [target_relation]})}}
 
 {% endmaterialization %}
+
+
+{% macro py_write_table(compiled_code, target_relation) %}
+{{ compiled_code }}
+# --- Autogenerated dbt materialization code. --- #
+dbt = dbtObj(spark.table)
+df = model(dbt, spark)
+df.write.mode("overwrite").format("delta").saveAsTable("{{ target_relation }}")
+{%- endmacro -%}
+
+{%macro py_script_comment()%}
+# how to execute python model in notebook
+# dbt = dbtObj(spark.table)
+# df = model(dbt, spark)
+{%endmacro%}
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b94cb8b6..5b29e5e9 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,6 +3,8 @@
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
+
+
 black==22.3.0
 bumpversion
 click~=8.0.4
diff --git a/requirements.txt b/requirements.txt
index c64512ae..5d774e4f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
 PyHive[hive]>=0.6.0,<0.7.0
+requests[python]>=2.28.1
+
 pyodbc==4.0.32
 sqlparams>=3.0.0
 thrift>=0.13.0
diff --git a/tests/conftest.py b/tests/conftest.py
index 0771566b..2fa50d6c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,6 +60,7 @@ def databricks_cluster_target():
         "connect_retries": 3,
         "connect_timeout": 5,
         "retry_all": True,
+        "user": os.getenv('DBT_DATABRICKS_USER'),
     }
 
 
@@ -91,6 +92,7 @@ def databricks_http_cluster_target():
         "connect_retries": 5,
         "connect_timeout": 60, 
         "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)),
+        "user": os.getenv('DBT_DATABRICKS_USER'),
     }
 
 
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index e1a57fd3..bdccf169 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -79,7 +79,6 @@ def project_config_update(self):
             }
         }
 
-
 @pytest.mark.skip_profile('spark_session')
 class TestBaseAdapterMethod(BaseAdapterMethod):
     pass
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
new file mode 100644
index 00000000..059412f1
--- /dev/null
+++ b/tests/functional/adapter/test_python_model.py
@@ -0,0 +1,59 @@
+import os
+import pytest
+from dbt.tests.util import run_dbt, write_file, run_dbt_and_capture
+from dbt.tests.adapter.python_model.test_python_model import BasePythonModelTests, BasePythonIncrementalTests
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestPythonModelSpark(BasePythonModelTests):
+    pass
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {}
+
+
+models__simple_python_model = """
+import pandas
+
+def model(dbt, spark):
+    dbt.config(
+        materialized='table',
+    )
+    data = [[1,2]] * 10
+    return spark.createDataFrame(data, schema=['test', 'test2'])
+"""
+models__simple_python_model_v2 = """
+import pandas
+
+def model(dbt, spark):
+    dbt.config(
+        materialized='table',
+    )
+    data = [[1,2]] * 10
+    return spark.createDataFrame(data, schema=['test1', 'test3'])
+"""
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestChangingSchemaSpark:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"simple_python_model.py": models__simple_python_model}
+
+    def test_changing_schema_with_log_validation(self, project, logs_dir):
+        run_dbt(["run"])
+        write_file(
+            models__simple_python_model_v2,
+            project.project_root + "/models",
+            "simple_python_model.py",
+        )
+        run_dbt(["run"])
+        log_file = os.path.join(logs_dir, "dbt.log")
+        with open(log_file, "r") as f:
+            log = f.read()
+            # validate #5510 log_code_execution works
+            assert "On model.test.simple_python_model:" in log
+            assert "spark.createDataFrame(data, schema=['test1', 'test3'])" in log
+            assert "Execution status: OK in" in log
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index 839f167e..3848d11a 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -60,6 +60,8 @@ def run_and_test(self):
     def test_insert_overwrite_apache_spark(self):
         self.run_and_test()
 
+    # This test requires settings on the test cluster
+    # more info at https://docs.getdbt.com/reference/resource-configs/spark-configs#the-insert_overwrite-strategy
     @use_profile("databricks_cluster")
     def test_insert_overwrite_databricks_cluster(self):
         self.run_and_test()

From 7f6cffecf38b7c41aa441eb020d464ba1e20bf9e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 29 Jul 2022 15:27:56 -0400
Subject: [PATCH 326/603] Bumping version to 1.3.0b1 (#412)

* Bumping version to 1.3.0b1

* Update CHANGELOG.md

* Fix whitespace

* Fixing whitespace

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 9 +++++++--
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 605b6f37..ef3954f4 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.0a1
+current_version = 1.3.0b1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d015a26c..5948429a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,16 @@
-## dbt-spark 1.3.0b1 (Release TBD)
+## dbt-spark 1.3.0b2 (Release TBD)
+
+## dbt-spark 1.3.0b1 (July 29, 2022)
 
 ### Features
-- support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+- Support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
 
 ### Fixes
 - Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
+### Under the hood
+- Support core incremental refactor ([#394](https://github.com/dbt-labs/dbt-spark/issues/394))
+
 ### Contributors
 - [@barberscott](https://github.com/barberscott)  ([#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index a9fe3c3e..4b49b750 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.3.0a1"
+version = "1.3.0b1"
diff --git a/setup.py b/setup.py
index cb0c40ae..229e89a1 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.3.0a1"
+package_version = "1.3.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 9b00895a51d14745d896ac17d08e6c2423a4703a Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 9 Aug 2022 15:34:57 -0500
Subject: [PATCH 327/603] init pr push for ct-1005 (#418)

* init pr push for ct-1005

* add changelog

* change pointer to spark

* eof fix

* remove ref to dbt-core in changelog existence

* typo fix

* typo and change of ref

* add condtional logic for kinds security and dependency and change NO ISSUE ref to spark issue #417
---
 .changes/0.0.0.md                             |   5 +
 .changes/1.3.0-b1.md                          |  11 +
 .changes/1.3.0/Features-20220808-141141.yaml  |   8 +
 .changes/1.3.0/Fixes-20220808-141623.yaml     |   8 +
 .../1.3.0/Under the Hood-20220808-141320.yaml |   7 +
 .changes/README.md                            |   3 +
 .changes/header.tpl.md                        |   6 +
 .changes/unreleased/.gitkeep                  |   0
 .../unreleased/Features-20220808-142118.yaml  |   7 +
 .changie.yaml                                 |  62 ++++
 .github/pull_request_template.md              |   2 +-
 .github/workflows/bot-changelog.yml           |  61 ++++
 .github/workflows/changelog-existence.yml     |  41 +++
 CHANGELOG.md                                  | 275 +-----------------
 CONTRIBUTING.md                               |  10 +
 15 files changed, 246 insertions(+), 260 deletions(-)
 create mode 100644 .changes/0.0.0.md
 create mode 100644 .changes/1.3.0-b1.md
 create mode 100644 .changes/1.3.0/Features-20220808-141141.yaml
 create mode 100644 .changes/1.3.0/Fixes-20220808-141623.yaml
 create mode 100644 .changes/1.3.0/Under the Hood-20220808-141320.yaml
 create mode 100644 .changes/README.md
 create mode 100644 .changes/header.tpl.md
 create mode 100644 .changes/unreleased/.gitkeep
 create mode 100644 .changes/unreleased/Features-20220808-142118.yaml
 create mode 100644 .changie.yaml
 create mode 100644 .github/workflows/bot-changelog.yml
 create mode 100644 .github/workflows/changelog-existence.yml

diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
new file mode 100644
index 00000000..5acfb3db
--- /dev/null
+++ b/.changes/0.0.0.md
@@ -0,0 +1,5 @@
+## Previous Releases
+For information on prior major and minor releases, see their changelogs:
+- [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
+- [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
+- [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/.changes/1.3.0-b1.md b/.changes/1.3.0-b1.md
new file mode 100644
index 00000000..ef64f439
--- /dev/null
+++ b/.changes/1.3.0-b1.md
@@ -0,0 +1,11 @@
+## dbt-spark 1.3.0-b1 - July 29, 2022
+
+### Features
+- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+### Fixes
+- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+### Under the Hood
+- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
+
+### Contributors
+- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
diff --git a/.changes/1.3.0/Features-20220808-141141.yaml b/.changes/1.3.0/Features-20220808-141141.yaml
new file mode 100644
index 00000000..444a3062
--- /dev/null
+++ b/.changes/1.3.0/Features-20220808-141141.yaml
@@ -0,0 +1,8 @@
+kind: Features
+body: Support python model through notebook, currently supported materializations
+  are table and incremental
+time: 2022-08-08T14:11:41.906131-05:00
+custom:
+  Author: ChenyuLInx
+  Issue: "417"
+  PR: "377"
diff --git a/.changes/1.3.0/Fixes-20220808-141623.yaml b/.changes/1.3.0/Fixes-20220808-141623.yaml
new file mode 100644
index 00000000..793e3e5b
--- /dev/null
+++ b/.changes/1.3.0/Fixes-20220808-141623.yaml
@@ -0,0 +1,8 @@
+kind: Fixes
+body: Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so
+  on Linux
+time: 2022-08-08T14:16:23.846876-05:00
+custom:
+  Author: barberscot
+  Issue: "397"
+  PR: "398"
diff --git a/.changes/1.3.0/Under the Hood-20220808-141320.yaml b/.changes/1.3.0/Under the Hood-20220808-141320.yaml
new file mode 100644
index 00000000..82535f92
--- /dev/null
+++ b/.changes/1.3.0/Under the Hood-20220808-141320.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Support core incremental refactor
+time: 2022-08-08T14:13:20.576155-05:00
+custom:
+  Author: gshank
+  Issue: "4402"
+  PR: "394"
diff --git a/.changes/README.md b/.changes/README.md
new file mode 100644
index 00000000..dc6106df
--- /dev/null
+++ b/.changes/README.md
@@ -0,0 +1,3 @@
+# CHANGELOG
+
+To view information about the changelog operation we suggest reading this [README](https://github.com/dbt-labs/dbt-spark/blob/main/.changes/README.md) found in `dbt-spark`.
diff --git a/.changes/header.tpl.md b/.changes/header.tpl.md
new file mode 100644
index 00000000..251ea5d5
--- /dev/null
+++ b/.changes/header.tpl.md
@@ -0,0 +1,6 @@
+# dbt-spark Changelog
+
+- This file provides a full account of all changes to `dbt-spark`.
+- Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
+- "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
+- Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
diff --git a/.changes/unreleased/.gitkeep b/.changes/unreleased/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.changes/unreleased/Features-20220808-142118.yaml b/.changes/unreleased/Features-20220808-142118.yaml
new file mode 100644
index 00000000..9c110e93
--- /dev/null
+++ b/.changes/unreleased/Features-20220808-142118.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Add changie to dbt-spark
+time: 2022-08-08T14:21:18.569756-05:00
+custom:
+  Author: mcknight-42
+  Issue: "416"
+  PR: "418"
diff --git a/.changie.yaml b/.changie.yaml
new file mode 100644
index 00000000..f5800f32
--- /dev/null
+++ b/.changie.yaml
@@ -0,0 +1,62 @@
+changesDir: .changes
+unreleasedDir: unreleased
+headerPath: header.tpl.md
+versionHeaderPath: ""
+changelogPath: CHANGELOG.md
+versionExt: md
+versionFormat: '## dbt-spark {{.Version}} - {{.Time.Format "January 02, 2006"}}'
+kindFormat: '### {{.Kind}}'
+changeFormat: '- {{.Body}} ([#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), [#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+kinds:
+- label: Breaking Changes
+- label: Features
+- label: Fixes
+- label: Under the Hood
+- label: Dependencies
+  changeFormat: '- {{.Body}} ({{if ne .Custom.Issue ""}}[#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), {{end}}[#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+- label: Security
+  changeFormat: '- {{.Body}} ({{if ne .Custom.Issue ""}}[#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), {{end}}[#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+custom:
+- key: Author
+  label: GitHub Username(s) (separated by a single space if multiple)
+  type: string
+  minLength: 3
+- key: Issue
+  label: GitHub Issue Number
+  type: int
+  minLength: 4
+- key: PR
+  label: GitHub Pull Request Number
+  type: int
+  minLength: 4
+footerFormat: |
+  {{- $contributorDict := dict }}
+  {{- /* any names added to this list should be all lowercase for later matching purposes */}}
+  {{- $core_team := list "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "dependabot[bot]" "snyk-bot" }}
+  {{- range $change := .Changes }}
+    {{- $authorList := splitList " " $change.Custom.Author }}
+    {{- /* loop through all authors for a PR */}}
+    {{- range $author := $authorList }}
+      {{- $authorLower := lower $author }}
+      {{- /* we only want to include non-core team contributors */}}
+      {{- if not (has $authorLower $core_team)}}
+        {{- $pr := $change.Custom.PR }}
+        {{- /* check if this contributor has other PRs associated with them already */}}
+        {{- if hasKey $contributorDict $author }}
+          {{- $prList := get $contributorDict $author }}
+          {{- $prList = append $prList $pr  }}
+          {{- $contributorDict := set $contributorDict $author $prList }}
+        {{- else }}
+          {{- $prList := list $change.Custom.PR }}
+          {{- $contributorDict := set $contributorDict $author $prList }}
+        {{- end }}
+      {{- end}}
+    {{- end}}
+  {{- end }}
+  {{- /* no indentation here for formatting so the final markdown doesn't have unneeded indentations */}}
+  {{- if $contributorDict}}
+  ### Contributors
+  {{- range $k,$v := $contributorDict }}
+  - [@{{$k}}](https://github.com/{{$k}}) ({{ range $index, $element := $v }}{{if $index}}, {{end}}[#{{$element}}](https://github.com/dbt-labs/dbt-spark/pull/{{$element}}){{end}})
+  {{- end }}
+  {{- end }}
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 5928b1cb..c4a5c53b 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -18,4 +18,4 @@ resolves #
 - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
 - [ ] I have run this code in development and it appears to resolve the stated issue
 - [ ] This PR includes tests, or tests are not required/relevant for this PR
-- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section.
+- [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry)
diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
new file mode 100644
index 00000000..d8056efe
--- /dev/null
+++ b/.github/workflows/bot-changelog.yml
@@ -0,0 +1,61 @@
+# **what?**
+# When bots create a PR, this action will add a corresponding changie yaml file to that
+# PR when a specific label is added.
+#
+# The file is created off a template:
+#
+# kind: 
+# body: 
+# time: 
+# custom:
+#   Author: 
+#   Issue: 4904
+#   PR: 
+#
+# **why?**
+# Automate changelog generation for more visability with automated bot PRs.
+#
+# **when?**
+# Once a PR is created, label should be added to PR before or after creation. You can also
+#  manually trigger this by adding the appropriate label at any time.
+#
+# **how to add another bot?**
+# Add the label and changie kind to the include matrix.  That's it!
+#
+
+name: Bot Changelog
+
+on:
+  pull_request:
+    # catch when the PR is opened with the label or when the label is added
+    types: [opened, labeled]
+
+permissions:
+  contents: write
+  pull-requests: read
+
+jobs:
+  generate_changelog:
+    strategy:
+      matrix:
+        include:
+          - label: "dependencies"
+            changie_kind: "Dependency"
+          - label: "snyk"
+            changie_kind: "Security"
+    runs-on: ubuntu-latest
+
+    steps:
+
+    - name: Create and commit changelog on bot PR
+      if: "contains(github.event.pull_request.labels.*.name, ${{ matrix.label }})"
+      id: bot_changelog
+      uses: emmyoop/changie_bot@v1.0
+      with:
+        GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
+        commit_author_name: "Github Build Bot"
+        commit_author_email: ""
+        commit_message: "Add automated changelog yaml from template for bot PR"
+        changie_kind: ${{ matrix.changie_kind }}
+        label: ${{ matrix.label }}
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}\n"
diff --git a/.github/workflows/changelog-existence.yml b/.github/workflows/changelog-existence.yml
new file mode 100644
index 00000000..6e51e8af
--- /dev/null
+++ b/.github/workflows/changelog-existence.yml
@@ -0,0 +1,41 @@
+# **what?**
+# Checks that a file has been committed under the /.changes directory
+# as a new CHANGELOG entry.  Cannot check for a specific filename as
+# it is dynamically generated by change type and timestamp.
+# This workflow should not require any secrets since it runs for PRs
+# from forked repos.
+# By default, secrets are not passed to workflows running from
+# a forked repo.
+
+# **why?**
+# Ensure code change gets reflected in the CHANGELOG.
+
+# **when?**
+# This will run for all PRs going into main and *.latest.  It will
+# run when they are opened, reopened, when any label is added or removed
+# and when new code is pushed to the branch.  The action will then get
+# skipped if the 'Skip Changelog' label is present is any of the labels.
+
+name: Check Changelog Entry
+
+on:
+  pull_request:
+    types: [opened, reopened, labeled, unlabeled, synchronize]
+  workflow_dispatch:
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+  pull-requests: write
+
+
+jobs:
+  changelog:
+    uses: dbt-labs/actions/.github/workflows/changelog-existence.yml@main
+    with:
+      changelog_comment: 'Thank you for your pull request! We could not find a changelog entry for this change. For details on how to document a change, see the [dbt-spark contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.MD).'
+      skip_label: 'Skip Changelog'
+    secrets: inherit # this is only acceptable because we own the action we're calling
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5948429a..4f187e31 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,266 +1,23 @@
-## dbt-spark 1.3.0b2 (Release TBD)
+# dbt-spark Changelog
 
-## dbt-spark 1.3.0b1 (July 29, 2022)
+- This file provides a full account of all changes to `dbt-spark`.
+- Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
+- "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
+- Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-### Features
-- Support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-
-### Fixes
-- Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
-
-### Under the hood
-- Support core incremental refactor ([#394](https://github.com/dbt-labs/dbt-spark/issues/394))
-
-### Contributors
-- [@barberscott](https://github.com/barberscott)  ([#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
-
-## dbt-spark 1.2.0rc1 (July 12, 2022)
-
-### Fixes
-- Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
-- Apache Spark version upgraded to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
-
-### Features
-- Add grants to materializations ([#366](https://github.com/dbt-labs/dbt-spark/issues/366), [#381](https://github.com/dbt-labs/dbt-spark/pull/381))
-
-### Under the hood
-- Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
-- Make minimal changes to support dbt Core incremental materialization refactor ([#402](https://github.com/dbt-labs/dbt-spark/issue/402), [#394](httpe://github.com/dbt-labs/dbt-spark/pull/394))
-
-### Contributors
-- [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
-- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
-
-## dbt-spark 1.2.0b1 (June 24, 2022)
-
-### Fixes
-- `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
-
-### Under the hood
-- Initialize lift + shift for cross-db macros ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
-- Add invocation env to user agent string ([#367](https://github.com/dbt-labs/dbt-spark/pull/367))
-- Use dispatch pattern for get_columns_in_relation_raw macro ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
-
-### Contributors
-- [@ueshin](https://github.com/ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
-- [@dbeatty10](https://github.com/dbeatty10) ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
-
-## dbt-spark 1.1.0 (April 28, 2022)
-
-### Features
-- Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
-- rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
-
-### Under the hood
-- Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
-- Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
-- Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
-- Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324))
-- when a table or view doesn't exist, 'adapter.get_columns_in_relation' will return empty list instead of fail ([#328]https://github.com/dbt-labs/dbt-spark/pull/328)
-
-### Contributors
-- [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
-- [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
-
-## dbt-spark 1.1.0b1 (March 23, 2022)
-
-### Features
-- Adds new integration test to check against new ability to allow unique_key to be a list. ([#282](https://github.com/dbt-labs/dbt-spark/issues/282)), [#291](https://github.com/dbt-labs/dbt-spark/pull/291))
-
-### Fixes
-- Closes the connection properly ([#280](https://github.com/dbt-labs/dbt-spark/issues/280), [#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-### Under the hood
-- get_response -> AdapterResponse ([#265](https://github.com/dbt-labs/dbt-spark/pull/265))
-- Adding stale Actions workflow ([#275](https://github.com/dbt-labs/dbt-spark/pull/275))
-- Update plugin author name (`fishtown-analytics` → `dbt-labs`) in ODBC user agent ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
-- Configure insert_overwrite models to use parquet ([#301](https://github.com/dbt-labs/dbt-spark/pull/301))
-
-### Contributors
-- [@amychen1776](https://github.com/amychen1776) ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
-- [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-## dbt-spark 1.0.1rc0 (Release TBD)
-
-### Fixes
-- Closes the connection properly ([#280](https://github.com/dbt-labs/dbt-spark/issues/280), [#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-### Contributors
-- [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-## dbt-spark 1.0.0 (December 3, 2021)
-
-### Fixes
-- Incremental materialization corrected to respect `full_refresh` config, by using `should_full_refresh()` macro ([#260](https://github.com/dbt-labs/dbt-spark/issues/260), [#262](https://github.com/dbt-labs/dbt-spark/pull/262/))
-
-### Contributors
-- [@grindheim](https://github.com/grindheim) ([#262](https://github.com/dbt-labs/dbt-spark/pull/262/))
-
-## dbt-spark 1.0.0rc2 (November 24, 2021)
-
-### Features
-- Add support for Apache Hudi (hudi file format) which supports incremental merge strategies ([#187](https://github.com/dbt-labs/dbt-spark/issues/187), [#210](https://github.com/dbt-labs/dbt-spark/pull/210))
-
-### Under the hood
-- Refactor seed macros: remove duplicated code from dbt-core, and provide clearer logging of SQL parameters that differ by connection method ([#249](https://github.com/dbt-labs/dbt-spark/issues/249), [#250](https://github.com/dbt-labs/dbt-snowflake/pull/250))
-- Replace `sample_profiles.yml` with `profile_template.yml`, for use with new `dbt init` ([#247](https://github.com/dbt-labs/dbt-spark/pull/247))
-
-### Contributors
-- [@vingov](https://github.com/vingov) ([#210](https://github.com/dbt-labs/dbt-spark/pull/210))
-
-## dbt-spark 1.0.0rc1 (November 10, 2021)
-
-### Under the hood
-- Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([dbt-core#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#253](https://github.com/dbt-labs/dbt-snowflake/pull/253))
-- Add support for structured logging ([#251](https://github.com/dbt-labs/dbt-spark/pull/251))
-
-## dbt-spark 0.21.1 (Release TBD)
-
-## dbt-spark 0.21.1rc1 (November 3, 2021)
-
-### Fixes
-- Fix `--store-failures` for tests, by suppressing irrelevant error in `comment_clause()` macro ([#232](https://github.com/dbt-labs/dbt-spark/issues/232), [#233](https://github.com/dbt-labs/dbt-spark/pull/233))
-- Add support for `on_schema_change` config in incremental models: `ignore`, `fail`, `append_new_columns`. For `sync_all_columns`, removing columns is not supported by Apache Spark or Delta Lake ([#198](https://github.com/dbt-labs/dbt-spark/issues/198), [#226](https://github.com/dbt-labs/dbt-spark/issues/226), [#229](https://github.com/dbt-labs/dbt-spark/pull/229))
-- Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224), [#234](https://github.com/dbt-labs/dbt-spark/pull/234))
-
-### Contributors
-- [@binhnefits](https://github.com/binhnefits) ([#234](https://github.com/dbt-labs/dbt-spark/pull/234))
-
-## dbt-spark 0.21.0 (October 4, 2021)
-
-### Fixes
-- Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
-- Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202))
-
-### Under the hood
-- Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211))
-
-### Contributors
-- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
-- [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204))
-
-## dbt-spark 0.21.0b2 (August 20, 2021)
-
-### Fixes
-- Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192))
-- Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201))
-- Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194))
-
-### Contributors
-- [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192))
-- [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201))
-- [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194))
-
-## dbt-spark 0.21.0b1 (August 3, 2021)
-
-## dbt-spark 0.20.1 (August 2, 2021)
-
-## dbt-spark 0.20.1rc1 (August 2, 2021)
-
-### Fixes
-- Fix `get_columns_in_relation` when called on models created in the same run ([#196](https://github.com/dbt-labs/dbt-spark/pull/196), [#197](https://github.com/dbt-labs/dbt-spark/pull/197))
-
-### Contributors
-- [@ali-tny](https://github.com/ali-tny) ([#197](https://github.com/fishtown-analytics/dbt-spark/pull/197))
-
-
-## dbt-spark 0.20.0 (July 12, 2021)
-
-## dbt-spark 0.20.0rc2 (July 7, 2021)
+## dbt-spark 1.3.0-b1 - July 29, 2022
 
 ### Features
-
-- Add support for `merge_update_columns` config in `merge`-strategy incremental models ([#183](https://github.com/fishtown-analytics/dbt-spark/pull/183), [#184](https://github.com/fishtown-analytics/dbt-spark/pull/184))
-
+- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
 ### Fixes
-
-- Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180))
-
-## dbt-spark 0.20.0rc1 (June 8, 2021)
-
-### Features
-
-- Allow user to specify `use_ssl` ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169))
-- Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171))
-- Add support for column-level `persist_docs` on Delta tables ([#84](https://github.com/fishtown-analytics/dbt-spark/pull/84), [#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))
-
-### Fixes
-- Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159))
-- Explicitly cast column types when inserting seeds ([#139](https://github.com/fishtown-analytics/dbt-spark/pull/139), [#166](https://github.com/fishtown-analytics/dbt-spark/pull/166))
-
-### Under the hood
-- Parse information returned by `list_relations_without_caching` macro to speed up catalog generation ([#93](https://github.com/fishtown-analytics/dbt-spark/issues/93), [#160](https://github.com/fishtown-analytics/dbt-spark/pull/160))
-- More flexible host passing, https:// can be omitted ([#153](https://github.com/fishtown-analytics/dbt-spark/issues/153))
+- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+### Under the Hood
+- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
-- [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159))
-- [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160))
-- [@Fokko](https://github.com/Fokko) ([#165](https://github.com/fishtown-analytics/dbt-spark/pull/165))
-- [@rahulgoyal2987](https://github.com/rahulgoyal2987) ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169))
-- [@JCZuurmond](https://github.com/JCZuurmond) ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171))
-- [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))
-
-
-## dbt-spark 0.19.1 (April 2, 2021)
-
-## dbt-spark 0.19.1b2 (February 26, 2021)
-
-### Under the hood
-- Update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150))
-
-## dbt-spark 0.19.0.1 (February 26, 2021)
-
-### Fixes
-- Fix package distribution to include incremental model materializations ([#151](https://github.com/fishtown-analytics/dbt-spark/pull/151), [#152](https://github.com/fishtown-analytics/dbt-spark/issues/152))
-
-## dbt-spark 0.19.0 (February 21, 2021)
-
-### Breaking changes
-- Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141))
-
-### Fixes
-- Capture hard-deleted records in snapshot merge, when `invalidate_hard_deletes` config is set ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/143), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/144))
-
-## dbt-spark 0.19.0rc1 (January 8, 2021)
-
-### Breaking changes
-- Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126))
-
-### Under the hood
-- Enable `CREATE OR REPLACE` support when using Delta. Instead of dropping and recreating the table, it will keep the existing table, and add a new version as supported by Delta. This will ensure that the table stays available when running the pipeline, and you can track the history.
-- Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120))
-
-### Fixes
-- Handle case of 0 retries better for HTTP Spark Connections ([#132](https://github.com/fishtown-analytics/dbt-spark/pull/132))
-
-### Contributors
-- [@danielvdende](https://github.com/danielvdende) ([#132](https://github.com/fishtown-analytics/dbt-spark/pull/132))
-- [@Fokko](https://github.com/Fokko) ([#125](https://github.com/fishtown-analytics/dbt-spark/pull/125))
-
-## dbt-spark 0.18.1.1 (November 13, 2020)
-
-### Fixes
-- Fix `extras_require` typo to enable `pip install dbt-spark[ODBC]` (([#121](https://github.com/fishtown-analytics/dbt-spark/pull/121)), ([#122](https://github.com/fishtown-analytics/dbt-spark/pull/122)))
-
-## dbt-spark 0.18.1 (November 6, 2020)
-
-### Features
-- Allows users to specify `auth` and `kerberos_service_name` ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107))
-- Add support for ODBC driver connections to Databricks clusters and endpoints ([#116](https://github.com/fishtown-analytics/dbt-spark/pull/116))
-
-### Under the hood
-- Updated README links ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115))
-- Support complete atomic overwrite of non-partitioned incremental models ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117))
-- Update to support dbt-core 0.18.1 ([#110](https://github.com/fishtown-analytics/dbt-spark/pull/110), [#118](https://github.com/fishtown-analytics/dbt-spark/pull/118))
-
-### Contributors
-- [@danielhstahl](https://github.com/danielhstahl) ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107))
-- [@collinprather](https://github.com/collinprather) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115))
-- [@charlottevdscheun](https://github.com/charlottevdscheun) ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117))
-- [@Fokko](https://github.com/Fokko) ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117))
-
-## dbt-spark 0.18.0 (September 18, 2020)
-
-### Under the hood
-- Make a number of changes to support dbt-adapter-tests ([#103](https://github.com/fishtown-analytics/dbt-spark/pull/103))
-- Update to support dbt-core 0.18.0. Run CI tests against local Spark, Databricks ([#105](https://github.com/fishtown-analytics/dbt-spark/pull/105))
+- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+## Previous Releases
+For information on prior major and minor releases, see their changelogs:
+- [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
+- [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
+- [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c0d9bb3d..1d6e76d3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -90,6 +90,16 @@ Many changes will require and update to the `dbt-spark` docs here are some usefu
 - The changes made are likely to impact one or both of [Spark Profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile), or [Saprk Configs](https://docs.getdbt.com/reference/resource-configs/spark-configs).
 - We ask every community member who makes a user-facing change to open an issue or PR regarding doc changes.
 
+## Adding CHANGELOG Entry
+
+We use [changie](https://changie.dev) to generate `CHANGELOG` entries. **Note:** Do not edit the `CHANGELOG.md` directly. Your modifications will be lost.
+
+Follow the steps to [install `changie`](https://changie.dev/guide/installation/) for your system.
+
+Once changie is installed and your PR is created, simply run `changie new` and changie will walk you through the process of creating a changelog entry.  Commit the file that's created and your changelog entry is complete!
+
+You don't need to worry about which `dbt-spark` version your change will go into. Just create the changelog entry with `changie`, and open your PR against the `main` branch. All merged changes will be included in the next minor version of `dbt-spark`. The Core maintainers _may_ choose to "backport" specific changes in order to patch older minor versions. In that case, a maintainer will take care of that backport after merging your PR, before releasing the new version of `dbt-spark`.
+
 ## Submitting a Pull Request
 
 dbt Labs provides a CI environment to test changes to the `dbt-spark` adapter, and periodic checks against the development version of `dbt-core` through Github Actions.

From 24e796d52d0201bdb4c45fac2e99a2a848cbe853 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Mon, 22 Aug 2022 10:23:05 -0500
Subject: [PATCH 328/603] Add ref to pre 1.0.0 in 0.0.0.md (#428)

* init pr push for ct-1005

* add changelog

* change pointer to spark

* eof fix

* remove ref to dbt-core in changelog existence

* typo fix

* typo and change of ref

* add condtional logic for kinds security and dependency and change NO ISSUE ref to spark issue #417

* add ref to pre 1.0.0 changes

* add ref to pre 1.0.0 changes

* fix eof fail on test

* fix eof fail on test

* expand out ref to past 1.0.0

* run changie merge

* repush changes

* remove excess spacing
---
 .changes/0.0.0.md | 3 +++
 CHANGELOG.md      | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
index 5acfb3db..14c2cf9e 100644
--- a/.changes/0.0.0.md
+++ b/.changes/0.0.0.md
@@ -3,3 +3,6 @@ For information on prior major and minor releases, see their changelogs:
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
+- [0.21](https://github.com/dbt-labs/dbt-spark/blob/0.21.latest/CHANGELOG.md)
+- [0.20](https://github.com/dbt-labs/dbt-spark/blob/0.20.latest/CHANGELOG.md)
+- [0.19 and earlier](https://github.com/dbt-labs/dbt-spark/blob/0.19.latest/CHANGELOG.md)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f187e31..0491a7b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,15 +9,21 @@
 
 ### Features
 - Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+
 ### Fixes
 - Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+
 ### Under the Hood
 - Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
 - [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
+- [0.21](https://github.com/dbt-labs/dbt-spark/blob/0.21.latest/CHANGELOG.md)
+- [0.20](https://github.com/dbt-labs/dbt-spark/blob/0.20.latest/CHANGELOG.md)
+- [0.19 and earlier](https://github.com/dbt-labs/dbt-spark/blob/0.19.latest/CHANGELOG.md)

From c9698f62118b9c5408b53bb8cc3be03ae5d3d8a4 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Thu, 25 Aug 2022 10:02:50 -0700
Subject: [PATCH 329/603] add supported language (#440)

* add supported language

* add changelog
---
 .changes/unreleased/Under the Hood-20220825-073413.yaml    | 7 +++++++
 .../macros/materializations/incremental/incremental.sql    | 2 +-
 dbt/include/spark/macros/materializations/table.sql        | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220825-073413.yaml

diff --git a/.changes/unreleased/Under the Hood-20220825-073413.yaml b/.changes/unreleased/Under the Hood-20220825-073413.yaml
new file mode 100644
index 00000000..71e187ca
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220825-073413.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: specify supported_languages for materialization that support python models
+time: 2022-08-25T07:34:13.397367-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "437"
+  PR: "440"
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 91cba9e5..1a92351c 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -1,4 +1,4 @@
-{% materialization incremental, adapter='spark' -%}
+{% materialization incremental, adapter='spark', supported_languages=['sql', 'python'] -%}
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy') or 'append' -%}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 6a02ea16..d39ba0b4 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -1,4 +1,4 @@
-{% materialization table, adapter = 'spark' %}
+{% materialization table, adapter = 'spark', supported_languages=['sql', 'python'] %}
   {%- set language = model['language'] -%}
   {%- set identifier = model['alias'] -%}
   {%- set grant_config = config.get('grants') -%}

From 5297b9225263fb33338fc54f004365ec1ad47104 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 25 Aug 2022 15:27:29 -0500
Subject: [PATCH 330/603] version bump, changie. and backports (#434)

---
 .github/workflows/backport.yml     | 42 +++++++++++++
 .github/workflows/version-bump.yml | 97 ++++--------------------------
 2 files changed, 53 insertions(+), 86 deletions(-)
 create mode 100644 .github/workflows/backport.yml

diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml
new file mode 100644
index 00000000..8c0355bd
--- /dev/null
+++ b/.github/workflows/backport.yml
@@ -0,0 +1,42 @@
+
+
+# **what?**
+# When a PR is merged, if it has the backport label, it will create
+# a new PR to backport those changes to the given branch. If it can't
+# cleanly do a backport, it will comment on the merged PR of the failure.
+#
+# Label naming convention: "backport "
+# Example: backport 1.0.latest
+#
+# You MUST "Squash and merge" the original PR or this won't work.
+
+# **why?**
+# Changes sometimes need to be backported to release branches.
+# This automates the backporting process
+
+# **when?**
+# Once a PR is "Squash and merge"'d, by adding a backport label, this is triggered
+
+name: Backport
+on:
+  pull_request:
+    types:
+      - labeled
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  backport:
+    name: Backport
+    runs-on: ubuntu-latest
+    # Only react to merged PRs for security reasons.
+    # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target.
+    if: >
+      github.event.pull_request.merged
+      && contains(github.event.label.name, 'backport')
+    steps:
+      - uses: tibdex/backport@v2.0.2
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml
index a8b3236c..bde34d68 100644
--- a/.github/workflows/version-bump.yml
+++ b/.github/workflows/version-bump.yml
@@ -1,18 +1,15 @@
 # **what?**
-# This workflow will take a version number and a dry run flag. With that
+# This workflow will take the new version number to bump to. With that
 # it will run versionbump to update the version number everywhere in the
-# code base and then generate an update Docker requirements file. If this
-# is a dry run, a draft PR will open with the changes. If this isn't a dry
-# run, the changes will be committed to the branch this is run on.
+# code base and then run changie to create the corresponding changelog.
+# A PR will be created with the changes that can be reviewed before committing.
 
 # **why?**
 # This is to aid in releasing dbt and making sure we have updated
-# the versions and Docker requirements in all places.
+# the version in all places and generated the changelog.
 
 # **when?**
-# This is triggered either manually OR
-# from the repository_dispatch event "version-bump" which is sent from
-# the dbt-release repo Action
+# This is triggered manually
 
 name: Version Bump
 
@@ -20,84 +17,12 @@ on:
   workflow_dispatch:
     inputs:
       version_number:
-       description: 'The version number to bump to'
+       description: 'The version number to bump to (ex. 1.2.0, 1.3.0b1)'
        required: true
-      is_dry_run:
-       description: 'Creates a draft PR to allow testing instead of committing to a branch'
-       required: true
-       default: 'true'
-  repository_dispatch:
-    types: [version-bump]
 
 jobs:
-  bump:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v2
-
-      - name: Set version and dry run values
-        id: variables
-        env:
-          VERSION_NUMBER: "${{ github.event.client_payload.version_number == '' && github.event.inputs.version_number || github.event.client_payload.version_number }}"
-          IS_DRY_RUN: "${{ github.event.client_payload.is_dry_run == '' && github.event.inputs.is_dry_run || github.event.client_payload.is_dry_run }}"
-        run: |
-          echo Repository dispatch event version: ${{ github.event.client_payload.version_number }}
-          echo Repository dispatch event dry run: ${{ github.event.client_payload.is_dry_run }}
-          echo Workflow dispatch event version: ${{ github.event.inputs.version_number }}
-          echo Workflow dispatch event dry run: ${{ github.event.inputs.is_dry_run }}
-          echo ::set-output name=VERSION_NUMBER::$VERSION_NUMBER
-          echo ::set-output name=IS_DRY_RUN::$IS_DRY_RUN
-
-      - uses: actions/setup-python@v2
-        with:
-          python-version: "3.8"
-
-      - name: Install python dependencies
-        run: |
-          sudo apt-get install libsasl2-dev
-          python3 -m venv env
-          source env/bin/activate
-          pip install --upgrade pip
-
-      - name: Create PR branch
-        if: ${{ steps.variables.outputs.IS_DRY_RUN  == 'true' }}
-        run: |
-          git checkout -b bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-          git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-          git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-
-      - name: Bumping version
-        run: |
-          source env/bin/activate
-          pip install -r dev-requirements.txt
-          env/bin/bumpversion --allow-dirty --new-version ${{steps.variables.outputs.VERSION_NUMBER}} major
-          git status
-
-      - name: Commit version bump directly
-        uses: EndBug/add-and-commit@v7
-        if: ${{ steps.variables.outputs.IS_DRY_RUN == 'false' }}
-        with:
-          author_name: 'Github Build Bot'
-          author_email: 'buildbot@fishtownanalytics.com'
-          message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-
-      - name: Commit version bump to branch
-        uses: EndBug/add-and-commit@v7
-        if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }}
-        with:
-          author_name: 'Github Build Bot'
-          author_email: 'buildbot@fishtownanalytics.com'
-          message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-          branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
-          push: 'origin origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
-
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v3
-        if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }}
-        with:
-          author: 'Github Build Bot '
-          draft: true
-          base: ${{github.ref}}
-          title: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-          branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
+  version_bump_and_changie:
+    uses: dbt-labs/actions/.github/workflows/version-bump.yml@main
+    with:
+      version_number: ${{ inputs.version_number }}
+    secrets: inherit  # ok since what we are calling is internally maintained

From 224cc28004122f478a965acb9f5deff788bbdd72 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 30 Aug 2022 11:20:35 -0400
Subject: [PATCH 331/603] Bumping version to 1.3.0b2 and generate changelog
 (#443)

* Bumping version to 1.3.0b2 and generate CHANGELOG

* Remove newline

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                                          | 2 +-
 .changes/1.3.0-b2.md                                      | 5 +++++
 .../{unreleased => 1.3.0}/Features-20220808-142118.yaml   | 0
 .../Under the Hood-20220825-073413.yaml                   | 0
 CHANGELOG.md                                              | 8 +++++---
 dbt/adapters/spark/__version__.py                         | 2 +-
 setup.py                                                  | 2 +-
 7 files changed, 13 insertions(+), 6 deletions(-)
 create mode 100644 .changes/1.3.0-b2.md
 rename .changes/{unreleased => 1.3.0}/Features-20220808-142118.yaml (100%)
 rename .changes/{unreleased => 1.3.0}/Under the Hood-20220825-073413.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index ef3954f4..f93a02ae 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.0b1
+current_version = 1.3.0b2
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/.changes/1.3.0-b2.md b/.changes/1.3.0-b2.md
new file mode 100644
index 00000000..8f7ea1e6
--- /dev/null
+++ b/.changes/1.3.0-b2.md
@@ -0,0 +1,5 @@
+## dbt-spark 1.3.0-b2 - August 30, 2022
+### Features
+- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
+### Under the Hood
+- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
diff --git a/.changes/unreleased/Features-20220808-142118.yaml b/.changes/1.3.0/Features-20220808-142118.yaml
similarity index 100%
rename from .changes/unreleased/Features-20220808-142118.yaml
rename to .changes/1.3.0/Features-20220808-142118.yaml
diff --git a/.changes/unreleased/Under the Hood-20220825-073413.yaml b/.changes/1.3.0/Under the Hood-20220825-073413.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20220825-073413.yaml
rename to .changes/1.3.0/Under the Hood-20220825-073413.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0491a7b5..de20a073 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,21 +4,23 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
+## dbt-spark 1.3.0-b2 - August 30, 2022
+### Features
+- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
+### Under the Hood
+- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
 
 ## dbt-spark 1.3.0-b1 - July 29, 2022
 
 ### Features
 - Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-
 ### Fixes
 - Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-
 ### Under the Hood
 - Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
 - [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 4b49b750..e2c1a233 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.3.0b1"
+version = "1.3.0b2"
diff --git a/setup.py b/setup.py
index 229e89a1..05e81449 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.3.0b1"
+package_version = "1.3.0b2"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From cef098f5181c51e9a6ae06c157ec6863852bcd22 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Tue, 30 Aug 2022 16:49:12 -0700
Subject: [PATCH 332/603] refactor submission method and add command API as
 defualt (#442)

* refactor submission method and add command API as defualt

* update run_name and add changelog

* fix format

* pr feedback
---
 .../Under the Hood-20220829-164426.yaml       |   7 +
 dbt/adapters/spark/impl.py                    | 108 +------
 dbt/adapters/spark/python_submissions.py      | 284 ++++++++++++++++++
 3 files changed, 300 insertions(+), 99 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220829-164426.yaml
 create mode 100644 dbt/adapters/spark/python_submissions.py

diff --git a/.changes/unreleased/Under the Hood-20220829-164426.yaml b/.changes/unreleased/Under the Hood-20220829-164426.yaml
new file mode 100644
index 00000000..bf58971f
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220829-164426.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Submit python model with Command API by default. Adjusted run name
+time: 2022-08-29T16:44:26.509138-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "424"
+  PR: "442"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 12c42ab9..6e97ce1f 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,4 @@
 import re
-import requests
-import time
-import base64
 from concurrent.futures import Future
 from dataclasses import dataclass
 from typing import Any, Dict, Iterable, List, Optional, Union
@@ -20,6 +17,7 @@
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
 from dbt.adapters.spark import SparkColumn
+from dbt.adapters.spark.python_submissions import PYTHON_SUBMISSION_HELPERS
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
@@ -394,105 +392,17 @@ def submit_python_job(self, parsed_model: dict, compiled_code: str, timeout=None
         # of `None` which evaluates to True!
 
         # TODO limit this function to run only when doing the materialization of python nodes
-
         # assuming that for python job running over 1 day user would mannually overwrite this
-        schema = getattr(parsed_model, "schema", self.config.credentials.schema)
-        identifier = parsed_model["alias"]
-        if not timeout:
-            timeout = 60 * 60 * 24
-        if timeout <= 0:
-            raise ValueError("Timeout must larger than 0")
-
-        auth_header = {"Authorization": f"Bearer {self.connections.profile.credentials.token}"}
-
-        # create new dir
-        if not self.connections.profile.credentials.user:
-            raise ValueError("Need to supply user in profile to submit python job")
-        # it is safe to call mkdirs even if dir already exists and have content inside
-        work_dir = f"/Users/{self.connections.profile.credentials.user}/{schema}"
-        response = requests.post(
-            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/mkdirs",
-            headers=auth_header,
-            json={
-                "path": work_dir,
-            },
-        )
-        if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
-                f"Error creating work_dir for python notebooks\n {response.content!r}"
+        submission_method = parsed_model["config"].get("submission_method", "commands")
+        if submission_method not in PYTHON_SUBMISSION_HELPERS:
+            raise NotImplementedError(
+                "Submission method {} is not supported".format(submission_method)
             )
-
-        # add notebook
-        b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
-        response = requests.post(
-            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/import",
-            headers=auth_header,
-            json={
-                "path": f"{work_dir}/{identifier}",
-                "content": b64_encoded_content,
-                "language": "PYTHON",
-                "overwrite": True,
-                "format": "SOURCE",
-            },
+        job_helper = PYTHON_SUBMISSION_HELPERS[submission_method](
+            parsed_model, self.connections.profile.credentials
         )
-        if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
-                f"Error creating python notebook.\n {response.content!r}"
-            )
-
-        # submit job
-        submit_response = requests.post(
-            f"https://{self.connections.profile.credentials.host}/api/2.1/jobs/runs/submit",
-            headers=auth_header,
-            json={
-                "run_name": "debug task",
-                "existing_cluster_id": self.connections.profile.credentials.cluster,
-                "notebook_task": {
-                    "notebook_path": f"{work_dir}/{identifier}",
-                },
-            },
-        )
-        if submit_response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
-                f"Error creating python run.\n {response.content!r}"
-            )
-
-        # poll until job finish
-        state = None
-        start = time.time()
-        run_id = submit_response.json()["run_id"]
-        terminal_states = ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]
-        while state not in terminal_states and time.time() - start < timeout:
-            time.sleep(1)
-            resp = requests.get(
-                f"https://{self.connections.profile.credentials.host}"
-                f"/api/2.1/jobs/runs/get?run_id={run_id}",
-                headers=auth_header,
-            )
-            json_resp = resp.json()
-            state = json_resp["state"]["life_cycle_state"]
-            # logger.debug(f"Polling.... in state: {state}")
-        if state != "TERMINATED":
-            raise dbt.exceptions.RuntimeException(
-                "python model run ended in state"
-                f"{state} with state_message\n{json_resp['state']['state_message']}"
-            )
-
-        # get end state to return to user
-        run_output = requests.get(
-            f"https://{self.connections.profile.credentials.host}"
-            f"/api/2.1/jobs/runs/get-output?run_id={run_id}",
-            headers=auth_header,
-        )
-        json_run_output = run_output.json()
-        result_state = json_run_output["metadata"]["state"]["result_state"]
-        if result_state != "SUCCESS":
-            raise dbt.exceptions.RuntimeException(
-                "Python model failed with traceback as:\n"
-                "(Note that the line number here does not "
-                "match the line number in your code due to dbt templating)\n"
-                f"{json_run_output['error_trace']}"
-            )
+        job_helper.submit(compiled_code)
+        # we don't really get any useful information back from the job submission other than success
         return self.connections.get_response(None)
 
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
new file mode 100644
index 00000000..ea172ef0
--- /dev/null
+++ b/dbt/adapters/spark/python_submissions.py
@@ -0,0 +1,284 @@
+import base64
+import time
+import requests
+from typing import Any, Dict
+import uuid
+
+import dbt.exceptions
+
+DEFAULT_POLLING_INTERVAL = 3
+SUBMISSION_LANGUAGE = "python"
+DEFAULT_TIMEOUT = 60 * 60 * 24
+
+
+class BasePythonJobHelper:
+    def __init__(self, parsed_model, credentials):
+        self.check_credentials(credentials)
+        self.credentials = credentials
+        self.identifier = parsed_model["alias"]
+        self.schema = getattr(parsed_model, "schema", self.credentials.schema)
+        self.parsed_model = parsed_model
+        self.timeout = self.get_timeout()
+        self.polling_interval = DEFAULT_POLLING_INTERVAL
+
+    def get_timeout(self):
+        timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
+        if timeout <= 0:
+            raise ValueError("Timeout must be a positive integer")
+        return timeout
+
+    def check_credentials(self, credentials):
+        raise NotImplementedError(
+            "Overwrite this method to check specific requirement for current submission method"
+        )
+
+    def submit(self, compiled_code):
+        raise NotImplementedError(
+            "BasePythonJobHelper is an abstract class and you should implement submit method."
+        )
+
+    def polling(
+        self,
+        status_func,
+        status_func_kwargs,
+        get_state_func,
+        terminal_states,
+        expected_end_state,
+        get_state_msg_func,
+    ):
+        state = None
+        start = time.time()
+        exceeded_timeout = False
+        response = {}
+        while state not in terminal_states:
+            if time.time() - start > self.timeout:
+                exceeded_timeout = True
+                break
+            # TODO should we do exponential backoff?
+            time.sleep(self.polling_interval)
+            response = status_func(**status_func_kwargs)
+            state = get_state_func(response)
+        if exceeded_timeout:
+            raise dbt.exceptions.RuntimeException("python model run timed out")
+        if state != expected_end_state:
+            raise dbt.exceptions.RuntimeException(
+                "python model run ended in state"
+                f"{state} with state_message\n{get_state_msg_func(response)}"
+            )
+        return response
+
+
+class DBNotebookPythonJobHelper(BasePythonJobHelper):
+    def __init__(self, parsed_model, credentials):
+        super().__init__(parsed_model, credentials)
+        self.auth_header = {"Authorization": f"Bearer {self.credentials.token}"}
+
+    def check_credentials(self, credentials):
+        if not credentials.user:
+            raise ValueError("Databricks user is required for notebook submission method.")
+
+    def _create_work_dir(self, path):
+        response = requests.post(
+            f"https://{self.credentials.host}/api/2.0/workspace/mkdirs",
+            headers=self.auth_header,
+            json={
+                "path": path,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating work_dir for python notebooks\n {response.content!r}"
+            )
+
+    def _upload_notebook(self, path, compiled_code):
+        b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
+        response = requests.post(
+            f"https://{self.credentials.host}/api/2.0/workspace/import",
+            headers=self.auth_header,
+            json={
+                "path": path,
+                "content": b64_encoded_content,
+                "language": "PYTHON",
+                "overwrite": True,
+                "format": "SOURCE",
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python notebook.\n {response.content!r}"
+            )
+
+    def _submit_notebook(self, path):
+        submit_response = requests.post(
+            f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
+            headers=self.auth_header,
+            json={
+                "run_name": f"{self.schema}-{self.identifier}-{uuid.uuid4()}",
+                "existing_cluster_id": self.credentials.cluster,
+                "notebook_task": {
+                    "notebook_path": path,
+                },
+            },
+        )
+        if submit_response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python run.\n {submit_response.content!r}"
+            )
+        return submit_response.json()["run_id"]
+
+    def submit(self, compiled_code):
+        # it is safe to call mkdirs even if dir already exists and have content inside
+        work_dir = f"/Users/{self.credentials.user}/{self.schema}/"
+        self._create_work_dir(work_dir)
+
+        # add notebook
+        whole_file_path = f"{work_dir}{self.identifier}"
+        self._upload_notebook(whole_file_path, compiled_code)
+
+        # submit job
+        run_id = self._submit_notebook(whole_file_path)
+
+        self.polling(
+            status_func=requests.get,
+            status_func_kwargs={
+                "url": f"https://{self.credentials.host}/api/2.1/jobs/runs/get?run_id={run_id}",
+                "headers": self.auth_header,
+            },
+            get_state_func=lambda response: response.json()["state"]["life_cycle_state"],
+            terminal_states=("TERMINATED", "SKIPPED", "INTERNAL_ERROR"),
+            expected_end_state="TERMINATED",
+            get_state_msg_func=lambda response: response.json()["state"]["state_message"],
+        )
+
+        # get end state to return to user
+        run_output = requests.get(
+            f"https://{self.credentials.host}" f"/api/2.1/jobs/runs/get-output?run_id={run_id}",
+            headers=self.auth_header,
+        )
+        json_run_output = run_output.json()
+        result_state = json_run_output["metadata"]["state"]["result_state"]
+        if result_state != "SUCCESS":
+            raise dbt.exceptions.RuntimeException(
+                "Python model failed with traceback as:\n"
+                "(Note that the line number here does not "
+                "match the line number in your code due to dbt templating)\n"
+                f"{json_run_output['error_trace']}"
+            )
+
+
+class DBContext:
+    def __init__(self, credentials):
+        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
+        self.cluster = credentials.cluster
+        self.host = credentials.host
+
+    def create(self) -> str:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#create-an-execution-context
+        response = requests.post(
+            f"https://{self.host}/api/1.2/contexts/create",
+            headers=self.auth_header,
+            json={
+                "clusterId": self.cluster,
+                "language": SUBMISSION_LANGUAGE,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating an execution context.\n {response.content!r}"
+            )
+        return response.json()["id"]
+
+    def destroy(self, context_id: str) -> str:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#delete-an-execution-context
+        response = requests.post(
+            f"https://{self.host}/api/1.2/contexts/destroy",
+            headers=self.auth_header,
+            json={
+                "clusterId": self.cluster,
+                "contextId": context_id,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error deleting an execution context.\n {response.content!r}"
+            )
+        return response.json()["id"]
+
+
+class DBCommand:
+    def __init__(self, credentials):
+        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
+        self.cluster = credentials.cluster
+        self.host = credentials.host
+
+    def execute(self, context_id: str, command: str) -> str:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#run-a-command
+        response = requests.post(
+            f"https://{self.host}/api/1.2/commands/execute",
+            headers=self.auth_header,
+            json={
+                "clusterId": self.cluster,
+                "contextId": context_id,
+                "language": SUBMISSION_LANGUAGE,
+                "command": command,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating a command.\n {response.content!r}"
+            )
+        return response.json()["id"]
+
+    def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#get-information-about-a-command
+        response = requests.get(
+            f"https://{self.host}/api/1.2/commands/status",
+            headers=self.auth_header,
+            params={
+                "clusterId": self.cluster,
+                "contextId": context_id,
+                "commandId": command_id,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error getting status of command.\n {response.content!r}"
+            )
+        return response.json()
+
+
+class DBCommandsApiPythonJobHelper(BasePythonJobHelper):
+    def check_credentials(self, credentials):
+        if not credentials.cluster:
+            raise ValueError("Databricks cluster is required for commands submission method.")
+
+    def submit(self, compiled_code):
+        context = DBContext(self.credentials)
+        command = DBCommand(self.credentials)
+        context_id = context.create()
+        try:
+            command_id = command.execute(context_id, compiled_code)
+            # poll until job finish
+            response = self.polling(
+                status_func=command.status,
+                status_func_kwargs={
+                    "context_id": context_id,
+                    "command_id": command_id,
+                },
+                get_state_func=lambda response: response["status"],
+                terminal_states=("Cancelled", "Error", "Finished"),
+                expected_end_state="Finished",
+                get_state_msg_func=lambda response: response.json()["results"]["data"],
+            )
+            if response["results"]["resultType"] == "error":
+                raise dbt.exceptions.RuntimeException(
+                    f"Python model failed with traceback as:\n" f"{response['results']['cause']}"
+                )
+        finally:
+            context.destroy(context_id)
+
+
+PYTHON_SUBMISSION_HELPERS = {
+    "notebook": DBNotebookPythonJobHelper,
+    "commands": DBCommandsApiPythonJobHelper,
+}

From ebd011ea71ba533e065cb167f6c8213753fa6e9e Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Wed, 31 Aug 2022 07:21:14 -0700
Subject: [PATCH 333/603] set tmp relation with proper schema (#445)

* set tmp relation with proper schema

* add changelog
---
 .changes/unreleased/Fixes-20220830-140224.yaml             | 7 +++++++
 dbt/include/spark/macros/adapters.sql                      | 7 +++----
 .../macros/materializations/incremental/incremental.sql    | 5 +++++
 .../macros/materializations/incremental/strategies.sql     | 6 +++---
 4 files changed, 18 insertions(+), 7 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220830-140224.yaml

diff --git a/.changes/unreleased/Fixes-20220830-140224.yaml b/.changes/unreleased/Fixes-20220830-140224.yaml
new file mode 100644
index 00000000..9e3da3ea
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220830-140224.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: python incremental model tmp table using correct schema
+time: 2022-08-30T14:02:24.603033-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "441"
+  PR: "445"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 05630ede..88190cc0 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -123,7 +123,7 @@
 
 {#-- We can't use temporary tables with `create ... as ()` syntax --#}
 {% macro spark__create_temporary_view(relation, compiled_code) -%}
-    create temporary view {{ relation.include(schema=false) }} as
+    create temporary view {{ relation }} as
       {{ compiled_code }}
 {%- endmacro -%}
 
@@ -185,7 +185,7 @@
 
 {% macro spark__get_columns_in_relation_raw(relation) -%}
   {% call statement('get_columns_in_relation_raw', fetch_result=True) %}
-      describe extended {{ relation.include(schema=(schema is not none)) }}
+      describe extended {{ relation }}
   {% endcall %}
   {% do return(load_result('get_columns_in_relation_raw').table) %}
 {% endmacro %}
@@ -263,8 +263,7 @@
 {% macro spark__make_temp_relation(base_relation, suffix) %}
     {% set tmp_identifier = base_relation.identifier ~ suffix %}
     {% set tmp_relation = base_relation.incorporate(path = {
-        "identifier": tmp_identifier,
-        "schema": None
+        "identifier": tmp_identifier
     }) -%}
 
     {% do return(tmp_relation) %}
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 1a92351c..e293441b 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -17,6 +17,11 @@
   {%- set existing_relation = load_relation(this) -%}
   {%- set tmp_relation = make_temp_relation(this) -%}
 
+  {#-- for SQL model we will create temp view that doesn't have database and schema --#}
+  {%- if language == 'sql'-%}
+    {%- set tmp_relation = tmp_relation.include(database=false, schema=false) -%}
+  {%- endif -%}
+
   {#-- Set Overwrite Mode --#}
   {%- if strategy == 'insert_overwrite' and partition_by -%}
     {%- call statement() -%}
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index 28b8f200..d98e1f69 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -4,7 +4,7 @@
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
     insert overwrite table {{ target_relation }}
     {{ partition_cols(label="partition") }}
-    select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }}
+    select {{dest_cols_csv}} from {{ source_relation }}
 
 {% endmacro %}
 
@@ -14,7 +14,7 @@
     {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
     insert into table {{ target_relation }}
-    select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }}
+    select {{dest_cols_csv}} from {{ source_relation }}
 
 {% endmacro %}
 
@@ -45,7 +45,7 @@
   {{ sql_header if sql_header is not none }}
 
   merge into {{ target }} as DBT_INTERNAL_DEST
-      using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE
+      using {{ source }} as DBT_INTERNAL_SOURCE
       on {{ predicates | join(' and ') }}
 
       when matched then update set

From a9c1d8c256956d40e5b6d25e6f0e7b7f7c9b5700 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Tue, 13 Sep 2022 15:44:11 -0400
Subject: [PATCH 334/603] Update repo templates (#448)

---
 .github/ISSUE_TEMPLATE/bug-report.yml        | 84 ++++++++++++++++++++
 .github/ISSUE_TEMPLATE/bug_report.md         | 33 --------
 .github/ISSUE_TEMPLATE/config.yml            | 14 ++++
 .github/ISSUE_TEMPLATE/feature-request.yml   | 59 ++++++++++++++
 .github/ISSUE_TEMPLATE/feature_request.md    | 23 ------
 .github/ISSUE_TEMPLATE/regression-report.yml | 82 +++++++++++++++++++
 .github/ISSUE_TEMPLATE/release.md            | 10 ---
 .github/{ISSUE_TEMPLATE => }/dependabot.yml  |  0
 .github/pull_request_template.md             |  7 +-
 9 files changed, 245 insertions(+), 67 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/bug-report.yml
 delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yml
 delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.md
 create mode 100644 .github/ISSUE_TEMPLATE/regression-report.yml
 delete mode 100644 .github/ISSUE_TEMPLATE/release.md
 rename .github/{ISSUE_TEMPLATE => }/dependabot.yml (100%)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
new file mode 100644
index 00000000..f5494b31
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,84 @@
+name: 🐞 Bug
+description: Report a bug or an issue you've found with dbt-spark
+title: "[Bug] "
+labels: ["bug", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this bug report!
+  - type: checkboxes
+    attributes:
+      label: Is this a new bug in dbt-spark?
+      description: >
+        In other words, is this an error, flaw, failure or fault in our software?
+
+        If this is a bug that broke existing functionality that used to work, please open a regression issue.
+        If this is a bug in the dbt-core logic, please open an issue in the dbt-core repository.
+        If this is a bug experienced while using dbt Cloud, please report to [support](mailto:support@getdbt.com).
+        If this is a request for help or troubleshooting code in your own dbt project, please join our [dbt Community Slack](https://www.getdbt.com/community/join-the-community/) or open a [Discussion question](https://github.com/dbt-labs/docs.getdbt.com/discussions).
+
+        Please search to see if an issue already exists for the bug you encountered.
+      options:
+        - label: I believe this is a new bug in dbt-spark
+          required: true
+        - label: I have searched the existing issues, and I could not find an existing issue for this bug
+          required: true
+  - type: textarea
+    attributes:
+      label: Current Behavior
+      description: A concise description of what you're experiencing.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+      description: A concise description of what you expected to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Steps To Reproduce
+      description: Steps to reproduce the behavior.
+      placeholder: |
+        1. In this environment...
+        2. With this config...
+        3. Run '...'
+        4. See error...
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: |
+        If applicable, log output to help explain your problem.
+      render: shell
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Environment
+      description: |
+        examples:
+          - **OS**: Ubuntu 20.04
+          - **Python**: 3.9.12 (`python3 --version`)
+          - **dbt-core**: 1.1.1 (`dbt --version`)
+          - **dbt-spark**: 1.1.0 (`dbt --version`)
+      value: |
+        - OS:
+        - Python:
+        - dbt-core:
+        - dbt-spark:
+      render: markdown
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Additional Context
+      description: |
+        Links? References? Anything that will give us more context about the issue you are encountering!
+
+        Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index 43f19a15..00000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,33 +0,0 @@
----
-name: Bug report
-about: Report a bug or an issue you've found with dbt-spark
-title: ''
-labels: bug, triage
-assignees: ''
-
----
-
-### Describe the bug
-A clear and concise description of what the bug is. What command did you run? What happened?
-
-### Steps To Reproduce
-In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here.
-
-### Expected behavior
-A clear and concise description of what you expected to happen.
-
-### Screenshots and log output
-If applicable, add screenshots or log output to help explain your problem.
-
-### System information
-**The output of `dbt --version`:**
-```
-<output goes here>
-```
-
-**The operating system you're using:**
-
-**The output of `python --version`:**
-
-### Additional context
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..129ea777
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,14 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Ask the community for help
+    url: https://github.com/dbt-labs/docs.getdbt.com/discussions
+    about: Need help troubleshooting? Check out our guide on how to ask
+  - name: Contact dbt Cloud support
+    url: mailto:support@getdbt.com
+    about: Are you using dbt Cloud? Contact our support team for help!
+  - name: Participate in Discussions
+    url: https://github.com/dbt-labs/dbt-spark/discussions
+    about: Do you have a Big Idea for dbt-spark? Read open discussions, or start a new one
+  - name: Create an issue for dbt-core
+    url: https://github.com/dbt-labs/dbt-core/issues/new/choose
+    about: Report a bug or request a feature for dbt-core
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
new file mode 100644
index 00000000..8c123ba5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -0,0 +1,59 @@
+name: ✨ Feature
+description: Propose a straightforward extension of dbt-spark functionality
+title: "[Feature] <title>"
+labels: ["enhancement", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this feature request!
+  - type: checkboxes
+    attributes:
+      label: Is this your first time submitting a feature request?
+      description: >
+        We want to make sure that features are distinct and discoverable,
+        so that other members of the community can find them and offer their thoughts.
+
+        Issues are the right place to request straightforward extensions of existing dbt-spark functionality.
+        For "big ideas" about future capabilities of dbt-spark, we ask that you open a
+        [discussion](https://github.com/dbt-labs/dbt-spark/discussions) in the "Ideas" category instead.
+      options:
+        - label: I have read the [expectations for open source contributors](https://docs.getdbt.com/docs/contributing/oss-expectations)
+          required: true
+        - label: I have searched the existing issues, and I could not find an existing issue for this feature
+          required: true
+        - label: I am requesting a straightforward extension of existing dbt-spark functionality, rather than a Big Idea better suited to a discussion
+          required: true
+  - type: textarea
+    attributes:
+      label: Describe the feature
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Describe alternatives you've considered
+      description: |
+        A clear and concise description of any alternative solutions or features you've considered.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Who will this benefit?
+      description: |
+        What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
+    validations:
+      required: false
+  - type: input
+    attributes:
+      label: Are you interested in contributing this feature?
+      description: Let us know if you want to write some code, and how we can help.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Anything else?
+      description: |
+        Links? References? Anything that will give us more context about the feature you are suggesting!
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index 5edc9f6c..00000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,23 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for dbt-spark
-title: ''
-labels: enhancement, triage
-assignees: ''
-
----
-
-### Describe the feature
-A clear and concise description of what you want to happen.
-
-### Describe alternatives you've considered
-A clear and concise description of any alternative solutions or features you've considered.
-
-### Additional context
-Please include any other relevant context here.
-
-### Who will this benefit?
-What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
-
-### Are you interested in contributing this feature?
-Let us know if you want to write some code, and how we can help.
diff --git a/.github/ISSUE_TEMPLATE/regression-report.yml b/.github/ISSUE_TEMPLATE/regression-report.yml
new file mode 100644
index 00000000..8b65d6a2
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/regression-report.yml
@@ -0,0 +1,82 @@
+name: ☣️ Regression
+description: Report a regression you've observed in a newer version of dbt-spark
+title: "[Regression] <title>"
+labels: ["bug", "regression", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this regression report!
+  - type: checkboxes
+    attributes:
+      label: Is this a regression in a recent version of dbt-spark?
+      description: >
+        A regression is when documented functionality works as expected in an older version of dbt-spark,
+        and no longer works after upgrading to a newer version of dbt-spark
+      options:
+        - label: I believe this is a regression in dbt-spark functionality
+          required: true
+        - label: I have searched the existing issues, and I could not find an existing issue for this regression
+          required: true
+  - type: textarea
+    attributes:
+      label: Current Behavior
+      description: A concise description of what you're experiencing.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Expected/Previous Behavior
+      description: A concise description of what you expected to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Steps To Reproduce
+      description: Steps to reproduce the behavior.
+      placeholder: |
+        1. In this environment...
+        2. With this config...
+        3. Run '...'
+        4. See error...
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: |
+        If applicable, log output to help explain your problem.
+      render: shell
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Environment
+      description: |
+        examples:
+          - **OS**: Ubuntu 20.04
+          - **Python**: 3.9.12 (`python3 --version`)
+          - **dbt-core (working version)**: 1.1.1 (`dbt --version`)
+          - **dbt-spark (working version)**: 1.1.0 (`dbt --version`)
+          - **dbt-core (regression version)**: 1.2.0 (`dbt --version`)
+          - **dbt-spark (regression version)**: 1.2.0 (`dbt --version`)
+      value: |
+        - OS:
+        - Python:
+        - dbt-core (working version):
+        - dbt-spark (working version):
+        - dbt-core (regression version):
+        - dbt-spark (regression version):
+      render: markdown
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Additional Context
+      description: |
+        Links? References? Anything that will give us more context about the issue you are encountering!
+
+        Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md
deleted file mode 100644
index a69349f5..00000000
--- a/.github/ISSUE_TEMPLATE/release.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: Release
-about: Release a new version of dbt-spark
-title: ''
-labels: release
-assignees: ''
-
----
-
-### TBD
diff --git a/.github/ISSUE_TEMPLATE/dependabot.yml b/.github/dependabot.yml
similarity index 100%
rename from .github/ISSUE_TEMPLATE/dependabot.yml
rename to .github/dependabot.yml
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index c4a5c53b..11381456 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -11,11 +11,16 @@ resolves #
 
 ### Description
 
-<!--- Describe the Pull Request here -->
+<!---
+  Describe the Pull Request here. Add any references and info to help reviewers
+  understand your changes. Include any tradeoffs you considered.
+-->
 
 ### Checklist
 
+- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md) and understand what's expected of me
 - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
 - [ ] I have run this code in development and it appears to resolve the stated issue
 - [ ] This PR includes tests, or tests are not required/relevant for this PR
+- [ ] I have [opened an issue to add/update docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose), or docs changes are not required/relevant for this PR
 - [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry)

From b310c4c81a8aeedc6ed188854d92c36410ba70ba Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 13 Sep 2022 15:30:30 -0700
Subject: [PATCH 335/603] add mypy ignore to column, connections and init
 (#462)

* add mypy ignore to column, connections and init

* changie file
---
 .changes/unreleased/Under the Hood-20220913-152004.yaml | 7 +++++++
 dbt/adapters/spark/__init__.py                          | 2 +-
 dbt/adapters/spark/column.py                            | 4 ++--
 dbt/adapters/spark/connections.py                       | 2 +-
 dbt/adapters/spark/impl.py                              | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220913-152004.yaml

diff --git a/.changes/unreleased/Under the Hood-20220913-152004.yaml b/.changes/unreleased/Under the Hood-20220913-152004.yaml
new file mode 100644
index 00000000..4c372db0
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220913-152004.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: ignore mypy typing issues
+time: 2022-09-13T15:20:04.459783-07:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "461"
+  PR: "462"
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index 6ecc5ecc..91ad5476 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -5,7 +5,7 @@
 from dbt.adapters.spark.impl import SparkAdapter
 
 from dbt.adapters.base import AdapterPlugin
-from dbt.include import spark
+from dbt.include import spark  # type: ignore
 
 Plugin = AdapterPlugin(
     adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index dcf7590e..8100fa45 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -9,7 +9,7 @@
 
 
 @dataclass
-class SparkColumn(dbtClassMixin, Column):
+class SparkColumn(dbtClassMixin, Column):  # type: ignore
     table_database: Optional[str] = None
     table_schema: Optional[str] = None
     table_name: Optional[str] = None
@@ -22,7 +22,7 @@ class SparkColumn(dbtClassMixin, Column):
     def translate_type(cls, dtype: str) -> str:
         return dtype
 
-    def can_expand_to(self: Self, other_column: Self) -> bool:
+    def can_expand_to(self: Self, other_column: Self) -> bool:  # type: ignore
         """returns True if both columns are strings"""
         return self.is_string() and other_column.is_string()
 
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 59ceb9dd..80e014a2 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -59,7 +59,7 @@ class SparkConnectionMethod(StrEnum):
 class SparkCredentials(Credentials):
     host: str
     method: SparkConnectionMethod
-    database: Optional[str]
+    database: Optional[str]  # type: ignore
     driver: Optional[str] = None
     cluster: Optional[str] = None
     endpoint: Optional[str] = None
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 6e97ce1f..b8979380 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -118,7 +118,7 @@ def add_schema_to_cache(self, schema) -> str:
             dbt.exceptions.raise_compiler_error(
                 "Attempted to cache a null schema for {}".format(name)
             )
-        if dbt.flags.USE_CACHE:
+        if dbt.flags.USE_CACHE:  # type: ignore
             self.cache.add_schema(None, schema)
         # so jinja doesn't render things
         return ""

From 571a6ef43763d0ae37d84e2a6eba7c32028e21dd Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Wed, 14 Sep 2022 10:23:53 -0400
Subject: [PATCH 336/603] Update changelog bot (#463)

* Update changelog bot

* Updating correct issue number
---
 .github/workflows/bot-changelog.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index d8056efe..39eacf9a 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -28,7 +28,7 @@ name: Bot Changelog
 on:
   pull_request:
     # catch when the PR is opened with the label or when the label is added
-    types: [opened, labeled]
+    types: [labeled]
 
 permissions:
   contents: write
@@ -48,9 +48,9 @@ jobs:
     steps:
 
     - name: Create and commit changelog on bot PR
-      if: "contains(github.event.pull_request.labels.*.name, ${{ matrix.label }})"
+      if: ${{ contains(github.event.pull_request.labels.*.name, matrix.label) }}
       id: bot_changelog
-      uses: emmyoop/changie_bot@v1.0
+      uses: emmyoop/changie_bot@v1.0.1
       with:
         GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
         commit_author_name: "Github Build Bot"
@@ -58,4 +58,4 @@ jobs:
         commit_message: "Add automated changelog yaml from template for bot PR"
         changie_kind: ${{ matrix.changie_kind }}
         label: ${{ matrix.label }}
-        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}\n"
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}"

From 60f47d5acf7e2788725529e4bf349120551eb66b Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 15 Sep 2022 09:37:05 -0500
Subject: [PATCH 337/603] [CT-1114] remove Cache call from
 get_columns_in_relation (#451)

* init push for change to get_columns_in_relation to fix cache inconsistencies during on_schema_change

* trying to clear mypy issues

* changelog

* add ref to columns before called on by macro
---
 .../unreleased/Fixes-20220914-010520.yaml     |  8 ++++
 .pre-commit-config.yaml                       |  2 +-
 dbt/adapters/spark/impl.py                    | 42 ++++++-------------
 3 files changed, 22 insertions(+), 30 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220914-010520.yaml

diff --git a/.changes/unreleased/Fixes-20220914-010520.yaml b/.changes/unreleased/Fixes-20220914-010520.yaml
new file mode 100644
index 00000000..f8584f05
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220914-010520.yaml
@@ -0,0 +1,8 @@
+kind: Fixes
+body: change to get_columns_in_relation to fix cache inconsistencies to fix cache
+  issues in incremental models causing failure on on_schema_change
+time: 2022-09-14T01:05:20.312981-05:00
+custom:
+  Author: McKnight-42
+  Issue: "447"
+  PR: "451"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e70156dc..e85b1dc8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ exclude: '^tests/.*'
 
 # Force all unspecified python hooks to run python 3.8
 default_language_version:
-  python: python3.8
+  python: python3
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index b8979380..c228fc03 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -207,36 +207,20 @@ def find_table_information_separator(rows: List[dict]) -> int:
         return pos
 
     def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
-        cached_relations = self.cache.get_relations(relation.database, relation.schema)
-        cached_relation = next(
-            (
-                cached_relation
-                for cached_relation in cached_relations
-                if str(cached_relation) == str(relation)
-            ),
-            None,
-        )
         columns = []
-        if cached_relation and cached_relation.information:
-            columns = self.parse_columns_from_information(cached_relation)
-        if not columns:
-            # in open source delta 'show table extended' query output doesnt
-            # return relation's schema. if columns are empty from cache,
-            # use get_columns_in_relation spark macro
-            # which would execute 'describe extended tablename' query
-            try:
-                rows: List[agate.Row] = self.execute_macro(
-                    GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
-                )
-                columns = self.parse_describe_extended(relation, rows)
-            except dbt.exceptions.RuntimeException as e:
-                # spark would throw error when table doesn't exist, where other
-                # CDW would just return and empty list, normalizing the behavior here
-                errmsg = getattr(e, "msg", "")
-                if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg:
-                    pass
-                else:
-                    raise e
+        try:
+            rows: List[agate.Row] = self.execute_macro(
+                GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
+            )
+            columns = self.parse_describe_extended(relation, rows)
+        except dbt.exceptions.RuntimeException as e:
+            # spark would throw error when table doesn't exist, where other
+            # CDW would just return and empty list, normalizing the behavior here
+            errmsg = getattr(e, "msg", "")
+            if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg:
+                pass
+            else:
+                raise e
 
         # strip hudi metadata columns.
         columns = [x for x in columns if x.name not in self.HUDI_METADATA_COLUMNS]

From 36bbe0de5ec6069384c9a754ada85588fe032511 Mon Sep 17 00:00:00 2001
From: Chenyu Li <chenyu.li@dbtlabs.com>
Date: Thu, 15 Sep 2022 08:20:42 -0700
Subject: [PATCH 338/603] Enhancement/refactor python submission (#452)

* refactor and move common logic to core
---
 .../Under the Hood-20220912-104517.yaml       |  7 +++
 dbt/adapters/spark/impl.py                    | 43 ++++++++---------
 dbt/adapters/spark/python_submissions.py      | 48 +++++++++----------
 3 files changed, 49 insertions(+), 49 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220912-104517.yaml

diff --git a/.changes/unreleased/Under the Hood-20220912-104517.yaml b/.changes/unreleased/Under the Hood-20220912-104517.yaml
new file mode 100644
index 00000000..e45c97bf
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220912-104517.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Better interface for python submission
+time: 2022-09-12T10:45:17.226481-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "452"
+  PR: "452"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index c228fc03..77b1e4b5 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,7 @@
 import re
 from concurrent.futures import Future
 from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Union, Type
 from typing_extensions import TypeAlias
 
 import agate
@@ -10,14 +10,17 @@
 import dbt
 import dbt.exceptions
 
-from dbt.adapters.base import AdapterConfig
-from dbt.adapters.base.impl import catch_as_completed, log_code_execution
-from dbt.adapters.base.meta import available
+from dbt.adapters.base import AdapterConfig, PythonJobHelper
+from dbt.adapters.base.impl import catch_as_completed
+from dbt.contracts.connection import AdapterResponse
 from dbt.adapters.sql import SQLAdapter
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
 from dbt.adapters.spark import SparkColumn
-from dbt.adapters.spark.python_submissions import PYTHON_SUBMISSION_HELPERS
+from dbt.adapters.spark.python_submissions import (
+    DBNotebookPythonJobHelper,
+    DBCommandsApiPythonJobHelper,
+)
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
@@ -369,26 +372,20 @@ def run_sql_for_tests(self, sql, fetch, conn):
         finally:
             conn.transaction_open = False
 
-    @available.parse_none
-    @log_code_execution
-    def submit_python_job(self, parsed_model: dict, compiled_code: str, timeout=None):
-        # TODO improve the typing here.  N.B. Jinja returns a `jinja2.runtime.Undefined` instead
-        # of `None` which evaluates to True!
-
-        # TODO limit this function to run only when doing the materialization of python nodes
-        # assuming that for python job running over 1 day user would mannually overwrite this
-        submission_method = parsed_model["config"].get("submission_method", "commands")
-        if submission_method not in PYTHON_SUBMISSION_HELPERS:
-            raise NotImplementedError(
-                "Submission method {} is not supported".format(submission_method)
-            )
-        job_helper = PYTHON_SUBMISSION_HELPERS[submission_method](
-            parsed_model, self.connections.profile.credentials
-        )
-        job_helper.submit(compiled_code)
-        # we don't really get any useful information back from the job submission other than success
+    def generate_python_submission_response(self, submission_result: Any) -> AdapterResponse:
         return self.connections.get_response(None)
 
+    @property
+    def default_python_submission_method(self) -> str:
+        return "commands"
+
+    @property
+    def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]:
+        return {
+            "notebook": DBNotebookPythonJobHelper,
+            "commands": DBCommandsApiPythonJobHelper,
+        }
+
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
         grants_dict: Dict[str, List[str]] = {}
         for row in grants_table:
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index ea172ef0..5ee4adb1 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -5,14 +5,16 @@
 import uuid
 
 import dbt.exceptions
+from dbt.adapters.base import PythonJobHelper
+from dbt.adapters.spark import SparkCredentials
 
-DEFAULT_POLLING_INTERVAL = 3
+DEFAULT_POLLING_INTERVAL = 5
 SUBMISSION_LANGUAGE = "python"
 DEFAULT_TIMEOUT = 60 * 60 * 24
 
 
-class BasePythonJobHelper:
-    def __init__(self, parsed_model, credentials):
+class BaseDatabricksHelper(PythonJobHelper):
+    def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
         self.check_credentials(credentials)
         self.credentials = credentials
         self.identifier = parsed_model["alias"]
@@ -21,18 +23,18 @@ def __init__(self, parsed_model, credentials):
         self.timeout = self.get_timeout()
         self.polling_interval = DEFAULT_POLLING_INTERVAL
 
-    def get_timeout(self):
+    def get_timeout(self) -> int:
         timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
         if timeout <= 0:
             raise ValueError("Timeout must be a positive integer")
         return timeout
 
-    def check_credentials(self, credentials):
+    def check_credentials(self, credentials: SparkCredentials) -> None:
         raise NotImplementedError(
             "Overwrite this method to check specific requirement for current submission method"
         )
 
-    def submit(self, compiled_code):
+    def submit(self, compiled_code: str) -> None:
         raise NotImplementedError(
             "BasePythonJobHelper is an abstract class and you should implement submit method."
         )
@@ -45,7 +47,7 @@ def polling(
         terminal_states,
         expected_end_state,
         get_state_msg_func,
-    ):
+    ) -> Dict:
         state = None
         start = time.time()
         exceeded_timeout = False
@@ -54,7 +56,7 @@ def polling(
             if time.time() - start > self.timeout:
                 exceeded_timeout = True
                 break
-            # TODO should we do exponential backoff?
+            # should we do exponential backoff?
             time.sleep(self.polling_interval)
             response = status_func(**status_func_kwargs)
             state = get_state_func(response)
@@ -68,16 +70,16 @@ def polling(
         return response
 
 
-class DBNotebookPythonJobHelper(BasePythonJobHelper):
-    def __init__(self, parsed_model, credentials):
+class DBNotebookPythonJobHelper(BaseDatabricksHelper):
+    def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
         super().__init__(parsed_model, credentials)
         self.auth_header = {"Authorization": f"Bearer {self.credentials.token}"}
 
-    def check_credentials(self, credentials):
+    def check_credentials(self, credentials) -> None:
         if not credentials.user:
             raise ValueError("Databricks user is required for notebook submission method.")
 
-    def _create_work_dir(self, path):
+    def _create_work_dir(self, path: str) -> None:
         response = requests.post(
             f"https://{self.credentials.host}/api/2.0/workspace/mkdirs",
             headers=self.auth_header,
@@ -90,7 +92,7 @@ def _create_work_dir(self, path):
                 f"Error creating work_dir for python notebooks\n {response.content!r}"
             )
 
-    def _upload_notebook(self, path, compiled_code):
+    def _upload_notebook(self, path: str, compiled_code: str) -> None:
         b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
         response = requests.post(
             f"https://{self.credentials.host}/api/2.0/workspace/import",
@@ -108,7 +110,7 @@ def _upload_notebook(self, path, compiled_code):
                 f"Error creating python notebook.\n {response.content!r}"
             )
 
-    def _submit_notebook(self, path):
+    def _submit_notebook(self, path: str) -> str:
         submit_response = requests.post(
             f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
             headers=self.auth_header,
@@ -126,7 +128,7 @@ def _submit_notebook(self, path):
             )
         return submit_response.json()["run_id"]
 
-    def submit(self, compiled_code):
+    def submit(self, compiled_code: str) -> None:
         # it is safe to call mkdirs even if dir already exists and have content inside
         work_dir = f"/Users/{self.credentials.user}/{self.schema}/"
         self._create_work_dir(work_dir)
@@ -167,7 +169,7 @@ def submit(self, compiled_code):
 
 
 class DBContext:
-    def __init__(self, credentials):
+    def __init__(self, credentials: SparkCredentials) -> None:
         self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
         self.cluster = credentials.cluster
         self.host = credentials.host
@@ -206,7 +208,7 @@ def destroy(self, context_id: str) -> str:
 
 
 class DBCommand:
-    def __init__(self, credentials):
+    def __init__(self, credentials: SparkCredentials) -> None:
         self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
         self.cluster = credentials.cluster
         self.host = credentials.host
@@ -247,12 +249,12 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
         return response.json()
 
 
-class DBCommandsApiPythonJobHelper(BasePythonJobHelper):
-    def check_credentials(self, credentials):
+class DBCommandsApiPythonJobHelper(BaseDatabricksHelper):
+    def check_credentials(self, credentials: SparkCredentials) -> None:
         if not credentials.cluster:
             raise ValueError("Databricks cluster is required for commands submission method.")
 
-    def submit(self, compiled_code):
+    def submit(self, compiled_code: str) -> None:
         context = DBContext(self.credentials)
         command = DBCommand(self.credentials)
         context_id = context.create()
@@ -276,9 +278,3 @@ def submit(self, compiled_code):
                 )
         finally:
             context.destroy(context_id)
-
-
-PYTHON_SUBMISSION_HELPERS = {
-    "notebook": DBNotebookPythonJobHelper,
-    "commands": DBCommandsApiPythonJobHelper,
-}

From 4c88e4ab6af5b9ccec8710bba6354aa8ca53dac9 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 20 Sep 2022 09:46:51 -0600
Subject: [PATCH 339/603] Convert df to pyspark DataFrame if it is pandas
 before writing (#469)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Convert df to pyspark DataFrame if it is pandas before writing

* Changelog entry

* Use `overwriteSchema` option like dbt-databricks

* Upstream `py_write_table` macro from dbt-databricks

* Convert df to a PySpark DataFrame if it's a Pandas-on-Spark DataFrame before writing

* Separate conversion logic from import logic

* Raise exception if not able to convert to a Spark DataFrame

* Prefer pandas → pandas-on-Spark → Spark over direct pandas → Spark
---
 .../Under the Hood-20220916-125706.yaml       |  7 ++++
 .../spark/macros/materializations/table.sql   | 35 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220916-125706.yaml

diff --git a/.changes/unreleased/Under the Hood-20220916-125706.yaml b/.changes/unreleased/Under the Hood-20220916-125706.yaml
new file mode 100644
index 00000000..54b82eb5
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220916-125706.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Enable Pandas and Pandas-on-Spark DataFrames for dbt python models
+time: 2022-09-16T12:57:06.846297-06:00
+custom:
+  Author: chamini2 dbeatty10
+  Issue: "468"
+  PR: "469"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index d39ba0b4..25d70c72 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -41,7 +41,40 @@
 # --- Autogenerated dbt materialization code. --- #
 dbt = dbtObj(spark.table)
 df = model(dbt, spark)
-df.write.mode("overwrite").format("delta").saveAsTable("{{ target_relation }}")
+
+import importlib.util
+
+pandas_available = False
+pyspark_available = False
+
+# make sure pandas exists before using it
+if importlib.util.find_spec("pandas"):
+  import pandas
+  pandas_available = True
+
+# make sure pyspark.pandas exists before using it
+if importlib.util.find_spec("pyspark.pandas"):
+  import pyspark.pandas
+  pyspark_available = True
+
+# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first
+# since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`
+# and converting from pandas-on-Spark to Spark DataFrame has no overhead
+if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+  df = pyspark.pandas.frame.DataFrame(df)
+
+# convert to pyspark.sql.dataframe.DataFrame
+if isinstance(df, pyspark.sql.dataframe.DataFrame):
+  pass  # since it is already a Spark DataFrame
+elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame):
+  df = df.to_spark()
+elif pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+  df = spark.createDataFrame(df)
+else:
+  msg = f"{type(df)} is not a supported type for dbt Python materialization"
+  raise Exception(msg)
+
+df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}")
 {%- endmacro -%}
 
 {%macro py_script_comment()%}

From c46fff986ab4ef0ff62e2c955bc380e6c8b110f9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:02:36 -0400
Subject: [PATCH 340/603] Bump black from 22.3.0 to 22.8.0 (#458)

* Bump black from 22.3.0 to 22.8.0

Bumps [black](https://github.com/psf/black) from 22.3.0 to 22.8.0.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/22.3.0...22.8.0)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-191910.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-191910.yaml

diff --git a/.changes/unreleased/Dependency-20220914-191910.yaml b/.changes/unreleased/Dependency-20220914-191910.yaml
new file mode 100644
index 00000000..ad2534c1
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-191910.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump black from 22.3.0 to 22.8.0"
+time: 2022-09-14T19:19:10.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 458
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 5b29e5e9..81f95a6d 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 
 
-black==22.3.0
+black==22.8.0
 bumpversion
 click~=8.0.4
 flake8

From f32f9fd723c3879e007c5d87df3c61fa79f2ac77 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:03:35 -0400
Subject: [PATCH 341/603] Bump mypy from 0.950 to 0.971 (#456)

* Bump mypy from 0.950 to 0.971

Bumps [mypy](https://github.com/python/mypy) from 0.950 to 0.971.
- [Release notes](https://github.com/python/mypy/releases)
- [Commits](https://github.com/python/mypy/compare/v0.950...v0.971)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-192102.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-192102.yaml

diff --git a/.changes/unreleased/Dependency-20220914-192102.yaml b/.changes/unreleased/Dependency-20220914-192102.yaml
new file mode 100644
index 00000000..f13fd6c4
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-192102.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump mypy from 0.950 to 0.971"
+time: 2022-09-14T19:21:02.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 456
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 81f95a6d..87ca93da 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,7 @@ flaky
 freezegun==0.3.9
 ipdb
 mock>=1.3.0
-mypy==0.950
+mypy==0.971
 pre-commit
 pytest-csv
 pytest-dotenv

From e918e2671b044753b36b0ea14a02d22ae66345f8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Sep 2022 10:08:10 -0400
Subject: [PATCH 342/603] Update click requirement from ~=8.0.4 to ~=8.1.3
 (#457)

* Update click requirement from ~=8.0.4 to ~=8.1.3

Updates the requirements on [click](https://github.com/pallets/click) to permit the latest version.
- [Release notes](https://github.com/pallets/click/releases)
- [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/click/compare/8.0.4...8.1.3)

---
updated-dependencies:
- dependency-name: click
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-192027.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-192027.yaml

diff --git a/.changes/unreleased/Dependency-20220914-192027.yaml b/.changes/unreleased/Dependency-20220914-192027.yaml
new file mode 100644
index 00000000..1863b52f
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-192027.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Update click requirement from ~=8.0.4 to ~=8.1.3"
+time: 2022-09-14T19:20:27.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 457
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 87ca93da..8959df95 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,7 +7,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 black==22.8.0
 bumpversion
-click~=8.0.4
+click~=8.1.3
 flake8
 flaky
 freezegun==0.3.9

From d2dfcdf38858df878ac5bc2253336160c00a6bca Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Sep 2022 11:58:02 -0400
Subject: [PATCH 343/603] Bump pyodbc from 4.0.32 to 4.0.34 (#459)

* Bump pyodbc from 4.0.32 to 4.0.34

Bumps [pyodbc](https://github.com/mkleehammer/pyodbc) from 4.0.32 to 4.0.34.
- [Release notes](https://github.com/mkleehammer/pyodbc/releases)
- [Commits](https://github.com/mkleehammer/pyodbc/compare/4.0.32...4.0.34)

---
updated-dependencies:
- dependency-name: pyodbc
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* Remove newline

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .changes/unreleased/Dependency-20220913-225328.yaml | 7 +++++++
 requirements.txt                                    | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220913-225328.yaml

diff --git a/.changes/unreleased/Dependency-20220913-225328.yaml b/.changes/unreleased/Dependency-20220913-225328.yaml
new file mode 100644
index 00000000..b934c08c
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220913-225328.yaml
@@ -0,0 +1,7 @@
+kind: Dependency
+body: "Bump pyodbc from 4.0.32 to 4.0.34"
+time: 2022-09-13T22:53:28.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 459
diff --git a/requirements.txt b/requirements.txt
index 5d774e4f..14b36b72 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 PyHive[hive]>=0.6.0,<0.7.0
 requests[python]>=2.28.1
 
-pyodbc==4.0.32
+pyodbc==4.0.34
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From e07b8a20fa409e24e4c9959739c3eaefaf2eeb5a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Sep 2022 15:04:33 -0400
Subject: [PATCH 344/603] Bump thrift-sasl from 0.4.1 to 0.4.3 (#455)

* Bump thrift-sasl from 0.4.1 to 0.4.3

Bumps [thrift-sasl](https://github.com/cloudera/thrift_sasl) from 0.4.1 to 0.4.3.
- [Release notes](https://github.com/cloudera/thrift_sasl/releases)
- [Changelog](https://github.com/cloudera/thrift_sasl/blob/master/CHANGELOG.md)
- [Commits](https://github.com/cloudera/thrift_sasl/compare/v0.4.1...v0.4.3)

---
updated-dependencies:
- dependency-name: thrift-sasl
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-192125.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-192125.yaml

diff --git a/.changes/unreleased/Dependency-20220914-192125.yaml b/.changes/unreleased/Dependency-20220914-192125.yaml
new file mode 100644
index 00000000..78234be8
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-192125.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump thrift-sasl from 0.4.1 to 0.4.3"
+time: 2022-09-14T19:21:25.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 455
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8959df95..e93c1b41 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -24,4 +24,4 @@ tox>=3.2.0
 
 # Test requirements
 sasl>=0.2.1
-thrift_sasl==0.4.1
+thrift_sasl==0.4.3

From 8c921d071e925524c958b2f3fdea1405761c1a97 Mon Sep 17 00:00:00 2001
From: James McNeill <55981540+jpmmcneill@users.noreply.github.com>
Date: Thu, 22 Sep 2022 14:16:00 +0100
Subject: [PATCH 345/603] Jpmmcneill/spark type boolean (#471)

* implement type boolean test spark

* changie result
---
 .changes/unreleased/Features-20220920-000814.yaml | 7 +++++++
 tests/functional/adapter/utils/test_data_types.py | 5 +++++
 2 files changed, 12 insertions(+)
 create mode 100644 .changes/unreleased/Features-20220920-000814.yaml

diff --git a/.changes/unreleased/Features-20220920-000814.yaml b/.changes/unreleased/Features-20220920-000814.yaml
new file mode 100644
index 00000000..96ba6364
--- /dev/null
+++ b/.changes/unreleased/Features-20220920-000814.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: implement testing for type_boolean in spark
+time: 2022-09-20T00:08:14.15447+01:00
+custom:
+  Author: jpmmcneill
+  Issue: "470"
+  PR: "471"
diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py
index 65a24a3a..ce608580 100644
--- a/tests/functional/adapter/utils/test_data_types.py
+++ b/tests/functional/adapter/utils/test_data_types.py
@@ -9,6 +9,7 @@
 from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
 from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
 from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp
+from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean
 
 
 class TestTypeBigInt(BaseTypeBigInt):
@@ -65,3 +66,7 @@ class TestTypeString(BaseTypeString):
     
 class TestTypeTimestamp(BaseTypeTimestamp):
     pass
+
+
+class TestTypeBoolean(BaseTypeBoolean):
+    pass

From f20aecd275c211726c6225dc9abe1484fc90e9e6 Mon Sep 17 00:00:00 2001
From: Chenyu Li <chenyu.li@dbtlabs.com>
Date: Fri, 23 Sep 2022 15:59:41 -0700
Subject: [PATCH 346/603] More flexible cluster configuration (#467)

* update config hierachy for python submission method
---
 .../unreleased/Features-20220923-101248.yaml  |   8 +
 dbt/adapters/spark/connections.py             |   4 +
 dbt/adapters/spark/impl.py                    |  10 +-
 dbt/adapters/spark/python_submissions.py      | 210 ++++++++++--------
 tests/functional/adapter/test_python_model.py |   6 +-
 5 files changed, 135 insertions(+), 103 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220923-101248.yaml

diff --git a/.changes/unreleased/Features-20220923-101248.yaml b/.changes/unreleased/Features-20220923-101248.yaml
new file mode 100644
index 00000000..e46b2b10
--- /dev/null
+++ b/.changes/unreleased/Features-20220923-101248.yaml
@@ -0,0 +1,8 @@
+kind: Features
+body: Support job cluster in notebook submission method, remove requirement for user
+  for python model submission
+time: 2022-09-23T10:12:48.288911-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "444"
+  PR: "467"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 80e014a2..951e8ed7 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -82,6 +82,10 @@ def __pre_deserialize__(cls, data):
             data["database"] = None
         return data
 
+    @property
+    def cluster_id(self):
+        return self.cluster
+
     def __post_init__(self):
         # spark classifies database and schema as the same thing
         if self.database is not None and self.database != self.schema:
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 77b1e4b5..6eff652e 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -18,8 +18,8 @@
 from dbt.adapters.spark import SparkRelation
 from dbt.adapters.spark import SparkColumn
 from dbt.adapters.spark.python_submissions import (
-    DBNotebookPythonJobHelper,
-    DBCommandsApiPythonJobHelper,
+    JobClusterPythonJobHelper,
+    AllPurposeClusterPythonJobHelper,
 )
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
@@ -377,13 +377,13 @@ def generate_python_submission_response(self, submission_result: Any) -> Adapter
 
     @property
     def default_python_submission_method(self) -> str:
-        return "commands"
+        return "all_purpose_cluster"
 
     @property
     def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]:
         return {
-            "notebook": DBNotebookPythonJobHelper,
-            "commands": DBCommandsApiPythonJobHelper,
+            "job_cluster": JobClusterPythonJobHelper,
+            "all_purpose_cluster": AllPurposeClusterPythonJobHelper,
         }
 
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 5ee4adb1..c6341abe 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -7,21 +7,31 @@
 import dbt.exceptions
 from dbt.adapters.base import PythonJobHelper
 from dbt.adapters.spark import SparkCredentials
+from dbt.adapters.spark import __version__
 
-DEFAULT_POLLING_INTERVAL = 5
+DEFAULT_POLLING_INTERVAL = 10
 SUBMISSION_LANGUAGE = "python"
 DEFAULT_TIMEOUT = 60 * 60 * 24
+DBT_SPARK_VERSION = __version__.version
 
 
 class BaseDatabricksHelper(PythonJobHelper):
     def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
-        self.check_credentials(credentials)
         self.credentials = credentials
         self.identifier = parsed_model["alias"]
-        self.schema = getattr(parsed_model, "schema", self.credentials.schema)
+        self.schema = parsed_model["schema"]
         self.parsed_model = parsed_model
         self.timeout = self.get_timeout()
         self.polling_interval = DEFAULT_POLLING_INTERVAL
+        self.check_credentials()
+        self.auth_header = {
+            "Authorization": f"Bearer {self.credentials.token}",
+            "User-Agent": f"dbt-labs-dbt-spark/{DBT_SPARK_VERSION} (Databricks)",
+        }
+
+    @property
+    def cluster_id(self) -> str:
+        return self.parsed_model.get("cluster_id", self.credentials.cluster_id)
 
     def get_timeout(self) -> int:
         timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
@@ -29,56 +39,11 @@ def get_timeout(self) -> int:
             raise ValueError("Timeout must be a positive integer")
         return timeout
 
-    def check_credentials(self, credentials: SparkCredentials) -> None:
+    def check_credentials(self) -> None:
         raise NotImplementedError(
             "Overwrite this method to check specific requirement for current submission method"
         )
 
-    def submit(self, compiled_code: str) -> None:
-        raise NotImplementedError(
-            "BasePythonJobHelper is an abstract class and you should implement submit method."
-        )
-
-    def polling(
-        self,
-        status_func,
-        status_func_kwargs,
-        get_state_func,
-        terminal_states,
-        expected_end_state,
-        get_state_msg_func,
-    ) -> Dict:
-        state = None
-        start = time.time()
-        exceeded_timeout = False
-        response = {}
-        while state not in terminal_states:
-            if time.time() - start > self.timeout:
-                exceeded_timeout = True
-                break
-            # should we do exponential backoff?
-            time.sleep(self.polling_interval)
-            response = status_func(**status_func_kwargs)
-            state = get_state_func(response)
-        if exceeded_timeout:
-            raise dbt.exceptions.RuntimeException("python model run timed out")
-        if state != expected_end_state:
-            raise dbt.exceptions.RuntimeException(
-                "python model run ended in state"
-                f"{state} with state_message\n{get_state_msg_func(response)}"
-            )
-        return response
-
-
-class DBNotebookPythonJobHelper(BaseDatabricksHelper):
-    def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
-        super().__init__(parsed_model, credentials)
-        self.auth_header = {"Authorization": f"Bearer {self.credentials.token}"}
-
-    def check_credentials(self, credentials) -> None:
-        if not credentials.user:
-            raise ValueError("Databricks user is required for notebook submission method.")
-
     def _create_work_dir(self, path: str) -> None:
         response = requests.post(
             f"https://{self.credentials.host}/api/2.0/workspace/mkdirs",
@@ -110,17 +75,18 @@ def _upload_notebook(self, path: str, compiled_code: str) -> None:
                 f"Error creating python notebook.\n {response.content!r}"
             )
 
-    def _submit_notebook(self, path: str) -> str:
+    def _submit_job(self, path: str, cluster_spec: dict) -> str:
+        job_spec = {
+            "run_name": f"{self.schema}-{self.identifier}-{uuid.uuid4()}",
+            "notebook_task": {
+                "notebook_path": path,
+            },
+        }
+        job_spec.update(cluster_spec)
         submit_response = requests.post(
             f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
             headers=self.auth_header,
-            json={
-                "run_name": f"{self.schema}-{self.identifier}-{uuid.uuid4()}",
-                "existing_cluster_id": self.credentials.cluster,
-                "notebook_task": {
-                    "notebook_path": path,
-                },
-            },
+            json=job_spec,
         )
         if submit_response.status_code != 200:
             raise dbt.exceptions.RuntimeException(
@@ -128,17 +94,16 @@ def _submit_notebook(self, path: str) -> str:
             )
         return submit_response.json()["run_id"]
 
-    def submit(self, compiled_code: str) -> None:
+    def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None:
         # it is safe to call mkdirs even if dir already exists and have content inside
-        work_dir = f"/Users/{self.credentials.user}/{self.schema}/"
+        work_dir = f"/dbt_python_model/{self.schema}/"
         self._create_work_dir(work_dir)
-
         # add notebook
         whole_file_path = f"{work_dir}{self.identifier}"
         self._upload_notebook(whole_file_path, compiled_code)
 
         # submit job
-        run_id = self._submit_notebook(whole_file_path)
+        run_id = self._submit_job(whole_file_path, cluster_spec)
 
         self.polling(
             status_func=requests.get,
@@ -167,11 +132,56 @@ def submit(self, compiled_code: str) -> None:
                 f"{json_run_output['error_trace']}"
             )
 
+    def submit(self, compiled_code: str) -> None:
+        raise NotImplementedError(
+            "BasePythonJobHelper is an abstract class and you should implement submit method."
+        )
+
+    def polling(
+        self,
+        status_func,
+        status_func_kwargs,
+        get_state_func,
+        terminal_states,
+        expected_end_state,
+        get_state_msg_func,
+    ) -> Dict:
+        state = None
+        start = time.time()
+        exceeded_timeout = False
+        response = {}
+        while state not in terminal_states:
+            if time.time() - start > self.timeout:
+                exceeded_timeout = True
+                break
+            # should we do exponential backoff?
+            time.sleep(self.polling_interval)
+            response = status_func(**status_func_kwargs)
+            state = get_state_func(response)
+        if exceeded_timeout:
+            raise dbt.exceptions.RuntimeException("python model run timed out")
+        if state != expected_end_state:
+            raise dbt.exceptions.RuntimeException(
+                "python model run ended in state"
+                f"{state} with state_message\n{get_state_msg_func(response)}"
+            )
+        return response
+
+
+class JobClusterPythonJobHelper(BaseDatabricksHelper):
+    def check_credentials(self) -> None:
+        if not self.parsed_model["config"].get("job_cluster_config", None):
+            raise ValueError("job_cluster_config is required for commands submission method.")
+
+    def submit(self, compiled_code: str) -> None:
+        cluster_spec = {"new_cluster": self.parsed_model["config"]["job_cluster_config"]}
+        self._submit_through_notebook(compiled_code, cluster_spec)
+
 
 class DBContext:
-    def __init__(self, credentials: SparkCredentials) -> None:
-        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
-        self.cluster = credentials.cluster
+    def __init__(self, credentials: SparkCredentials, cluster_id: str, auth_header: dict) -> None:
+        self.auth_header = auth_header
+        self.cluster_id = cluster_id
         self.host = credentials.host
 
     def create(self) -> str:
@@ -180,7 +190,7 @@ def create(self) -> str:
             f"https://{self.host}/api/1.2/contexts/create",
             headers=self.auth_header,
             json={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "language": SUBMISSION_LANGUAGE,
             },
         )
@@ -196,7 +206,7 @@ def destroy(self, context_id: str) -> str:
             f"https://{self.host}/api/1.2/contexts/destroy",
             headers=self.auth_header,
             json={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "contextId": context_id,
             },
         )
@@ -208,9 +218,9 @@ def destroy(self, context_id: str) -> str:
 
 
 class DBCommand:
-    def __init__(self, credentials: SparkCredentials) -> None:
-        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
-        self.cluster = credentials.cluster
+    def __init__(self, credentials: SparkCredentials, cluster_id: str, auth_header: dict) -> None:
+        self.auth_header = auth_header
+        self.cluster_id = cluster_id
         self.host = credentials.host
 
     def execute(self, context_id: str, command: str) -> str:
@@ -219,7 +229,7 @@ def execute(self, context_id: str, command: str) -> str:
             f"https://{self.host}/api/1.2/commands/execute",
             headers=self.auth_header,
             json={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "contextId": context_id,
                 "language": SUBMISSION_LANGUAGE,
                 "command": command,
@@ -237,7 +247,7 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
             f"https://{self.host}/api/1.2/commands/status",
             headers=self.auth_header,
             params={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "contextId": context_id,
                 "commandId": command_id,
             },
@@ -249,32 +259,38 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
         return response.json()
 
 
-class DBCommandsApiPythonJobHelper(BaseDatabricksHelper):
-    def check_credentials(self, credentials: SparkCredentials) -> None:
-        if not credentials.cluster:
-            raise ValueError("Databricks cluster is required for commands submission method.")
+class AllPurposeClusterPythonJobHelper(BaseDatabricksHelper):
+    def check_credentials(self) -> None:
+        if not self.cluster_id:
+            raise ValueError(
+                "Databricks cluster_id is required for all_purpose_cluster submission method with running with notebook."
+            )
 
     def submit(self, compiled_code: str) -> None:
-        context = DBContext(self.credentials)
-        command = DBCommand(self.credentials)
-        context_id = context.create()
-        try:
-            command_id = command.execute(context_id, compiled_code)
-            # poll until job finish
-            response = self.polling(
-                status_func=command.status,
-                status_func_kwargs={
-                    "context_id": context_id,
-                    "command_id": command_id,
-                },
-                get_state_func=lambda response: response["status"],
-                terminal_states=("Cancelled", "Error", "Finished"),
-                expected_end_state="Finished",
-                get_state_msg_func=lambda response: response.json()["results"]["data"],
-            )
-            if response["results"]["resultType"] == "error":
-                raise dbt.exceptions.RuntimeException(
-                    f"Python model failed with traceback as:\n" f"{response['results']['cause']}"
+        if self.parsed_model["config"].get("create_notebook", False):
+            self._submit_through_notebook(compiled_code, {"existing_cluster_id": self.cluster_id})
+        else:
+            context = DBContext(self.credentials, self.cluster_id, self.auth_header)
+            command = DBCommand(self.credentials, self.cluster_id, self.auth_header)
+            context_id = context.create()
+            try:
+                command_id = command.execute(context_id, compiled_code)
+                # poll until job finish
+                response = self.polling(
+                    status_func=command.status,
+                    status_func_kwargs={
+                        "context_id": context_id,
+                        "command_id": command_id,
+                    },
+                    get_state_func=lambda response: response["status"],
+                    terminal_states=("Cancelled", "Error", "Finished"),
+                    expected_end_state="Finished",
+                    get_state_msg_func=lambda response: response.json()["results"]["data"],
                 )
-        finally:
-            context.destroy(context_id)
+                if response["results"]["resultType"] == "error":
+                    raise dbt.exceptions.RuntimeException(
+                        f"Python model failed with traceback as:\n"
+                        f"{response['results']['cause']}"
+                    )
+            finally:
+                context.destroy(context_id)
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 059412f1..ed6185b6 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -2,11 +2,15 @@
 import pytest
 from dbt.tests.util import run_dbt, write_file, run_dbt_and_capture
 from dbt.tests.adapter.python_model.test_python_model import BasePythonModelTests, BasePythonIncrementalTests
-
+from dbt.tests.adapter.python_model.test_spark import BasePySparkTests
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonModelSpark(BasePythonModelTests):
     pass
 
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestPySpark(BasePySparkTests):
+    pass
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     @pytest.fixture(scope="class")

From 31104dff7ba2f63cd6c3adafeb2ac633b225e043 Mon Sep 17 00:00:00 2001
From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com>
Date: Mon, 26 Sep 2022 13:49:28 -0500
Subject: [PATCH 347/603] merge exclude columns spark (#390)

* merge exclude columns spark

* reformat

* integration test for merge exclude columns

* pin core branch for integration tests

* missing symbol

* changie

* add dest columns
---
 .../unreleased/Features-20220826-133818.yaml  |  7 ++++++
 .../incremental/column_helpers.sql            | 23 +++++++++++++++++++
 .../incremental/strategies.sql                |  7 ++++--
 dev-requirements.txt                          |  2 +-
 .../models_delta/merge_exclude_columns.sql    | 22 ++++++++++++++++++
 .../seeds/expected_exclude_upsert.csv         |  4 ++++
 .../test_incremental_strategies.py            |  1 +
 7 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220826-133818.yaml
 create mode 100644 dbt/include/spark/macros/materializations/incremental/column_helpers.sql
 create mode 100644 tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
 create mode 100644 tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv

diff --git a/.changes/unreleased/Features-20220826-133818.yaml b/.changes/unreleased/Features-20220826-133818.yaml
new file mode 100644
index 00000000..f6771809
--- /dev/null
+++ b/.changes/unreleased/Features-20220826-133818.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: merge exclude columns for spark models
+time: 2022-08-26T13:38:18.75458-05:00
+custom:
+  Author: dave-connors-3
+  Issue: "5260"
+  PR: "390"
diff --git a/dbt/include/spark/macros/materializations/incremental/column_helpers.sql b/dbt/include/spark/macros/materializations/incremental/column_helpers.sql
new file mode 100644
index 00000000..3eec968d
--- /dev/null
+++ b/dbt/include/spark/macros/materializations/incremental/column_helpers.sql
@@ -0,0 +1,23 @@
+{% macro spark__get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) %}
+  {%- set default_cols = None -%}
+
+  {%- if merge_update_columns and merge_exclude_columns -%}
+    {{ exceptions.raise_compiler_error(
+        'Model cannot specify merge_update_columns and merge_exclude_columns. Please update model to use only one config'
+    )}}
+  {%- elif merge_update_columns -%}
+    {%- set update_columns = merge_update_columns -%}
+  {%- elif merge_exclude_columns -%}
+    {%- set update_columns = [] -%}
+    {%- for column in dest_columns -%}
+      {% if column.column | lower not in merge_exclude_columns | map("lower") | list %}
+        {%- do update_columns.append(column.quoted) -%}
+      {% endif %}
+    {%- endfor -%}
+  {%- else -%}
+    {%- set update_columns = default_cols -%}
+  {%- endif -%}
+
+  {{ return(update_columns) }}
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index d98e1f69..17196e85 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -20,9 +20,12 @@
 
 
 {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %}
-  {# skip dest_columns, use merge_update_columns config if provided, otherwise use "*" #}
+  {# need dest_columns for merge_exclude_columns, default to use "*" #}
   {%- set predicates = [] if predicates is none else [] + predicates -%}
-  {%- set update_columns = config.get("merge_update_columns") -%}
+  {%- set dest_columns = adapter.get_columns_in_relation(target) -%}
+  {%- set merge_update_columns = config.get('merge_update_columns') -%}
+  {%- set merge_exclude_columns = config.get('merge_exclude_columns') -%}
+  {%- set update_columns = get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) -%}
 
   {% if unique_key %}
       {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}
diff --git a/dev-requirements.txt b/dev-requirements.txt
index e93c1b41..4148a759 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,6 +1,6 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git@feature/merge_exclude_columns#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 
diff --git a/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql b/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
new file mode 100644
index 00000000..815f46b1
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
@@ -0,0 +1,22 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+    unique_key = 'id',
+    merge_exclude_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be ignored, color will be updated
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv
new file mode 100644
index 00000000..a0f1a652
--- /dev/null
+++ b/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv
@@ -0,0 +1,4 @@
+id,msg,color
+1,hello,blue
+2,goodbye,green
+3,anyway,purple
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index 3848d11a..73bb6ba2 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -78,6 +78,7 @@ def run_and_test(self):
         self.assertTablesEqual("merge_no_key", "expected_append")
         self.assertTablesEqual("merge_unique_key", "expected_upsert")
         self.assertTablesEqual("merge_update_columns", "expected_partial_upsert")
+        self.assertTablesEqual("merge_exclude_columns", "expected_exclude_upsert")
 
     @use_profile("databricks_cluster")
     def test_delta_strategies_databricks_cluster(self):

From ceab5abdd73999f34a34328735d71b6a47fb7594 Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Mon, 26 Sep 2022 15:35:31 -0400
Subject: [PATCH 348/603] Remove branch ref from dev-requirements.txt (#477)

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 4148a759..e93c1b41 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,6 +1,6 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@feature/merge_exclude_columns#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 

From 32ddd224f64097e8178c442d80ded8f55ba1ba92 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 26 Sep 2022 15:30:18 -0600
Subject: [PATCH 349/603] Array macros (#454)

* Temporary dev-requirements

* Changelog entry

* Implementations and tests for array macros

* Remove `cast_array_to_string` macro

* Restore original dev-requirements.txt
---
 .changes/unreleased/Features-20220913-084852.yaml |  7 +++++++
 dbt/include/spark/macros/utils/array_append.sql   |  3 +++
 dbt/include/spark/macros/utils/array_concat.sql   |  3 +++
 .../spark/macros/utils/array_construct.sql        |  3 +++
 tests/functional/adapter/utils/test_utils.py      | 15 +++++++++++++++
 5 files changed, 31 insertions(+)
 create mode 100644 .changes/unreleased/Features-20220913-084852.yaml
 create mode 100644 dbt/include/spark/macros/utils/array_append.sql
 create mode 100644 dbt/include/spark/macros/utils/array_concat.sql
 create mode 100644 dbt/include/spark/macros/utils/array_construct.sql

diff --git a/.changes/unreleased/Features-20220913-084852.yaml b/.changes/unreleased/Features-20220913-084852.yaml
new file mode 100644
index 00000000..014a598a
--- /dev/null
+++ b/.changes/unreleased/Features-20220913-084852.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Array macros
+time: 2022-09-13T08:48:52.684985-06:00
+custom:
+  Author: graciegoheen dbeatty10
+  Issue: "453"
+  PR: "454"
diff --git a/dbt/include/spark/macros/utils/array_append.sql b/dbt/include/spark/macros/utils/array_append.sql
new file mode 100644
index 00000000..efe39e7a
--- /dev/null
+++ b/dbt/include/spark/macros/utils/array_append.sql
@@ -0,0 +1,3 @@
+{% macro spark__array_append(array, new_element) -%}
+    {{ array_concat(array, array_construct([new_element])) }}
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/array_concat.sql b/dbt/include/spark/macros/utils/array_concat.sql
new file mode 100644
index 00000000..1441618c
--- /dev/null
+++ b/dbt/include/spark/macros/utils/array_concat.sql
@@ -0,0 +1,3 @@
+{% macro spark__array_concat(array_1, array_2) -%}
+    concat({{ array_1 }}, {{ array_2 }})
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/array_construct.sql b/dbt/include/spark/macros/utils/array_construct.sql
new file mode 100644
index 00000000..a4e5e0c7
--- /dev/null
+++ b/dbt/include/spark/macros/utils/array_construct.sql
@@ -0,0 +1,3 @@
+{% macro spark__array_construct(inputs, data_type) -%}
+    array( {{ inputs|join(' , ') }} )
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index c71161e6..eab99b49 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -1,5 +1,8 @@
 import pytest
 
+from dbt.tests.adapter.utils.test_array_append import BaseArrayAppend
+from dbt.tests.adapter.utils.test_array_concat import BaseArrayConcat
+from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct
 from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
 from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
@@ -31,6 +34,18 @@ class TestAnyValue(BaseAnyValue):
     pass
 
 
+class TestArrayAppend(BaseArrayAppend):
+    pass
+
+
+class TestArrayConcat(BaseArrayConcat):
+    pass
+
+
+class TestArrayConstruct(BaseArrayConstruct):
+    pass
+
+
 class TestBoolOr(BaseBoolOr):
     pass
 

From 23d17a068d6b2de26eee3b6830c8ab8d16a3797d Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 26 Sep 2022 19:52:58 -0600
Subject: [PATCH 350/603] Tests for `current_timestamp` (#475)

* Test failure will confirm that it is not a timezone-naive data type

* Temporarily update dev-requirements.text

* Tests for current_timestamp

* Restore original dev-requirements.txt

* Tests that the Python driver for dbt-spark returns a timezone-naive datetime for `{{ dbt.current_timestamp() }}`
---
 tests/functional/adapter/utils/test_utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index eab99b49..102df731 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -7,6 +7,7 @@
 from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
 from dbt.tests.adapter.utils.test_concat import BaseConcat
+from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
 from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
@@ -59,6 +60,11 @@ class TestConcat(BaseConcat):
     pass
 
 
+# Use either BaseCurrentTimestampAware or BaseCurrentTimestampNaive but not both
+class TestCurrentTimestamp(BaseCurrentTimestampNaive):
+    pass
+
+
 class TestDateAdd(BaseDateAdd):
     pass
 

From 80dc02992c7626fa294537d6f8b1491a06b589a0 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 27 Sep 2022 06:02:20 -0600
Subject: [PATCH 351/603] Convert df to pyspark DataFrame if it is koalas
 before writing (#474)

* Temporarily update dev-requirements.txt

* Changelog entry

* Temporarily update dev-requirements.txt

* Convert df to pyspark DataFrame if it is koalas before writing

* Restore original version of dev-requirements.txt

* Preferentially convert Koalas DataFrames to pandas-on-Spark DataFrames first

* Fix explanation

Co-authored-by: Takuya UESHIN <ueshin@databricks.com>
---
 .../unreleased/Under the Hood-20220924-143713.yaml   |  7 +++++++
 dbt/include/spark/macros/materializations/table.sql  | 12 +++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220924-143713.yaml

diff --git a/.changes/unreleased/Under the Hood-20220924-143713.yaml b/.changes/unreleased/Under the Hood-20220924-143713.yaml
new file mode 100644
index 00000000..c537a939
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220924-143713.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Convert df to pyspark DataFrame if it is koalas before writing
+time: 2022-09-24T14:37:13.100404-06:00
+custom:
+  Author: dbeatty10 ueshin
+  Issue: "473"
+  PR: "474"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 25d70c72..5721bd25 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -46,6 +46,7 @@ import importlib.util
 
 pandas_available = False
 pyspark_available = False
+koalas_available = False
 
 # make sure pandas exists before using it
 if importlib.util.find_spec("pandas"):
@@ -57,17 +58,26 @@ if importlib.util.find_spec("pyspark.pandas"):
   import pyspark.pandas
   pyspark_available = True
 
-# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first
+# make sure databricks.koalas exists before using it
+if importlib.util.find_spec("databricks.koalas"):
+  import databricks.koalas
+  koalas_available = True
+
+# preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first
 # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`
 # and converting from pandas-on-Spark to Spark DataFrame has no overhead
 if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = pyspark.pandas.frame.DataFrame(df)
+elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+  df = databricks.koalas.frame.DataFrame(df)
 
 # convert to pyspark.sql.dataframe.DataFrame
 if isinstance(df, pyspark.sql.dataframe.DataFrame):
   pass  # since it is already a Spark DataFrame
 elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame):
   df = df.to_spark()
+elif koalas_available and isinstance(df, databricks.koalas.frame.DataFrame):
+  df = df.to_spark()
 elif pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = spark.createDataFrame(df)
 else:

From d462afcd78258e962b642068bd58ec049fef4d65 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Wed, 28 Sep 2022 14:33:44 -0600
Subject: [PATCH 352/603] try/except rather than find_spec for optional imports
 (#489)

* try/except rather than find_spec for optional imports

* Remove unused import
---
 .../spark/macros/materializations/table.sql    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 5721bd25..80808910 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -42,26 +42,26 @@
 dbt = dbtObj(spark.table)
 df = model(dbt, spark)
 
-import importlib.util
-
-pandas_available = False
-pyspark_available = False
-koalas_available = False
-
 # make sure pandas exists before using it
-if importlib.util.find_spec("pandas"):
+try:
   import pandas
   pandas_available = True
+except ImportError:
+  pandas_available = False
 
 # make sure pyspark.pandas exists before using it
-if importlib.util.find_spec("pyspark.pandas"):
+try:
   import pyspark.pandas
   pyspark_available = True
+except ImportError:
+  pyspark_available = False
 
 # make sure databricks.koalas exists before using it
-if importlib.util.find_spec("databricks.koalas"):
+try:
   import databricks.koalas
   koalas_available = True
+except ImportError:
+  koalas_available = False
 
 # preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first
 # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`

From 7f233b16ce1ae6683eb62741cb32c99ff2867d70 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 29 Sep 2022 09:16:07 -0400
Subject: [PATCH 353/603] Bumping version to 1.4.0a1 and generate changelog
 (#492)

* Bumping version to 1.4.0a1 and generate CHANGELOG

* Updated changelog

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Leah Antkiewicz <leah.antkiewicz@fishtownanalytics.com>
---
 .bumpversion.cfg                                |  2 +-
 .changes/1.3.0-b1.md                            | 11 -----------
 .changes/1.3.0-b2.md                            |  5 -----
 .changes/1.3.0/Features-20220808-141141.yaml    |  8 --------
 .changes/1.3.0/Features-20220808-142118.yaml    |  7 -------
 .changes/1.3.0/Fixes-20220808-141623.yaml       |  8 --------
 .../1.3.0/Under the Hood-20220808-141320.yaml   |  7 -------
 .../1.3.0/Under the Hood-20220825-073413.yaml   |  7 -------
 .../unreleased/Dependency-20220913-225328.yaml  |  7 -------
 .../unreleased/Dependency-20220914-191910.yaml  |  7 -------
 .../unreleased/Dependency-20220914-192027.yaml  |  7 -------
 .../unreleased/Dependency-20220914-192102.yaml  |  7 -------
 .../unreleased/Dependency-20220914-192125.yaml  |  7 -------
 .../unreleased/Features-20220826-133818.yaml    |  7 -------
 .../unreleased/Features-20220913-084852.yaml    |  7 -------
 .../unreleased/Features-20220920-000814.yaml    |  7 -------
 .../unreleased/Features-20220923-101248.yaml    |  8 --------
 .changes/unreleased/Fixes-20220830-140224.yaml  |  7 -------
 .changes/unreleased/Fixes-20220914-010520.yaml  |  8 --------
 .../Under the Hood-20220829-164426.yaml         |  7 -------
 .../Under the Hood-20220912-104517.yaml         |  7 -------
 .../Under the Hood-20220913-152004.yaml         |  7 -------
 .../Under the Hood-20220916-125706.yaml         |  7 -------
 .../Under the Hood-20220924-143713.yaml         |  7 -------
 CHANGELOG.md                                    | 17 +----------------
 dbt/adapters/spark/__version__.py               |  2 +-
 setup.py                                        |  2 +-
 27 files changed, 4 insertions(+), 186 deletions(-)
 delete mode 100644 .changes/1.3.0-b1.md
 delete mode 100644 .changes/1.3.0-b2.md
 delete mode 100644 .changes/1.3.0/Features-20220808-141141.yaml
 delete mode 100644 .changes/1.3.0/Features-20220808-142118.yaml
 delete mode 100644 .changes/1.3.0/Fixes-20220808-141623.yaml
 delete mode 100644 .changes/1.3.0/Under the Hood-20220808-141320.yaml
 delete mode 100644 .changes/1.3.0/Under the Hood-20220825-073413.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220913-225328.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-191910.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-192027.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-192102.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-192125.yaml
 delete mode 100644 .changes/unreleased/Features-20220826-133818.yaml
 delete mode 100644 .changes/unreleased/Features-20220913-084852.yaml
 delete mode 100644 .changes/unreleased/Features-20220920-000814.yaml
 delete mode 100644 .changes/unreleased/Features-20220923-101248.yaml
 delete mode 100644 .changes/unreleased/Fixes-20220830-140224.yaml
 delete mode 100644 .changes/unreleased/Fixes-20220914-010520.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220829-164426.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220912-104517.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220913-152004.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220916-125706.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220924-143713.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f93a02ae..5fa558e9 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.0b2
+current_version = 1.4.0a1
 parse = (?P<major>\d+)
 	\.(?P<minor>\d+)
 	\.(?P<patch>\d+)
diff --git a/.changes/1.3.0-b1.md b/.changes/1.3.0-b1.md
deleted file mode 100644
index ef64f439..00000000
--- a/.changes/1.3.0-b1.md
+++ /dev/null
@@ -1,11 +0,0 @@
-## dbt-spark 1.3.0-b1 - July 29, 2022
-
-### Features
-- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-### Fixes
-- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-### Under the Hood
-- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
-
-### Contributors
-- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
diff --git a/.changes/1.3.0-b2.md b/.changes/1.3.0-b2.md
deleted file mode 100644
index 8f7ea1e6..00000000
--- a/.changes/1.3.0-b2.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## dbt-spark 1.3.0-b2 - August 30, 2022
-### Features
-- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
-### Under the Hood
-- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
diff --git a/.changes/1.3.0/Features-20220808-141141.yaml b/.changes/1.3.0/Features-20220808-141141.yaml
deleted file mode 100644
index 444a3062..00000000
--- a/.changes/1.3.0/Features-20220808-141141.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Features
-body: Support python model through notebook, currently supported materializations
-  are table and incremental
-time: 2022-08-08T14:11:41.906131-05:00
-custom:
-  Author: ChenyuLInx
-  Issue: "417"
-  PR: "377"
diff --git a/.changes/1.3.0/Features-20220808-142118.yaml b/.changes/1.3.0/Features-20220808-142118.yaml
deleted file mode 100644
index 9c110e93..00000000
--- a/.changes/1.3.0/Features-20220808-142118.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Add changie to dbt-spark
-time: 2022-08-08T14:21:18.569756-05:00
-custom:
-  Author: mcknight-42
-  Issue: "416"
-  PR: "418"
diff --git a/.changes/1.3.0/Fixes-20220808-141623.yaml b/.changes/1.3.0/Fixes-20220808-141623.yaml
deleted file mode 100644
index 793e3e5b..00000000
--- a/.changes/1.3.0/Fixes-20220808-141623.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Fixes
-body: Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so
-  on Linux
-time: 2022-08-08T14:16:23.846876-05:00
-custom:
-  Author: barberscot
-  Issue: "397"
-  PR: "398"
diff --git a/.changes/1.3.0/Under the Hood-20220808-141320.yaml b/.changes/1.3.0/Under the Hood-20220808-141320.yaml
deleted file mode 100644
index 82535f92..00000000
--- a/.changes/1.3.0/Under the Hood-20220808-141320.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Support core incremental refactor
-time: 2022-08-08T14:13:20.576155-05:00
-custom:
-  Author: gshank
-  Issue: "4402"
-  PR: "394"
diff --git a/.changes/1.3.0/Under the Hood-20220825-073413.yaml b/.changes/1.3.0/Under the Hood-20220825-073413.yaml
deleted file mode 100644
index 71e187ca..00000000
--- a/.changes/1.3.0/Under the Hood-20220825-073413.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: specify supported_languages for materialization that support python models
-time: 2022-08-25T07:34:13.397367-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "437"
-  PR: "440"
diff --git a/.changes/unreleased/Dependency-20220913-225328.yaml b/.changes/unreleased/Dependency-20220913-225328.yaml
deleted file mode 100644
index b934c08c..00000000
--- a/.changes/unreleased/Dependency-20220913-225328.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Dependency
-body: "Bump pyodbc from 4.0.32 to 4.0.34"
-time: 2022-09-13T22:53:28.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 459
diff --git a/.changes/unreleased/Dependency-20220914-191910.yaml b/.changes/unreleased/Dependency-20220914-191910.yaml
deleted file mode 100644
index ad2534c1..00000000
--- a/.changes/unreleased/Dependency-20220914-191910.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump black from 22.3.0 to 22.8.0"
-time: 2022-09-14T19:19:10.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 458
diff --git a/.changes/unreleased/Dependency-20220914-192027.yaml b/.changes/unreleased/Dependency-20220914-192027.yaml
deleted file mode 100644
index 1863b52f..00000000
--- a/.changes/unreleased/Dependency-20220914-192027.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Update click requirement from ~=8.0.4 to ~=8.1.3"
-time: 2022-09-14T19:20:27.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 457
diff --git a/.changes/unreleased/Dependency-20220914-192102.yaml b/.changes/unreleased/Dependency-20220914-192102.yaml
deleted file mode 100644
index f13fd6c4..00000000
--- a/.changes/unreleased/Dependency-20220914-192102.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump mypy from 0.950 to 0.971"
-time: 2022-09-14T19:21:02.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 456
diff --git a/.changes/unreleased/Dependency-20220914-192125.yaml b/.changes/unreleased/Dependency-20220914-192125.yaml
deleted file mode 100644
index 78234be8..00000000
--- a/.changes/unreleased/Dependency-20220914-192125.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump thrift-sasl from 0.4.1 to 0.4.3"
-time: 2022-09-14T19:21:25.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 455
diff --git a/.changes/unreleased/Features-20220826-133818.yaml b/.changes/unreleased/Features-20220826-133818.yaml
deleted file mode 100644
index f6771809..00000000
--- a/.changes/unreleased/Features-20220826-133818.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: merge exclude columns for spark models
-time: 2022-08-26T13:38:18.75458-05:00
-custom:
-  Author: dave-connors-3
-  Issue: "5260"
-  PR: "390"
diff --git a/.changes/unreleased/Features-20220913-084852.yaml b/.changes/unreleased/Features-20220913-084852.yaml
deleted file mode 100644
index 014a598a..00000000
--- a/.changes/unreleased/Features-20220913-084852.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Array macros
-time: 2022-09-13T08:48:52.684985-06:00
-custom:
-  Author: graciegoheen dbeatty10
-  Issue: "453"
-  PR: "454"
diff --git a/.changes/unreleased/Features-20220920-000814.yaml b/.changes/unreleased/Features-20220920-000814.yaml
deleted file mode 100644
index 96ba6364..00000000
--- a/.changes/unreleased/Features-20220920-000814.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: implement testing for type_boolean in spark
-time: 2022-09-20T00:08:14.15447+01:00
-custom:
-  Author: jpmmcneill
-  Issue: "470"
-  PR: "471"
diff --git a/.changes/unreleased/Features-20220923-101248.yaml b/.changes/unreleased/Features-20220923-101248.yaml
deleted file mode 100644
index e46b2b10..00000000
--- a/.changes/unreleased/Features-20220923-101248.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Features
-body: Support job cluster in notebook submission method, remove requirement for user
-  for python model submission
-time: 2022-09-23T10:12:48.288911-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "444"
-  PR: "467"
diff --git a/.changes/unreleased/Fixes-20220830-140224.yaml b/.changes/unreleased/Fixes-20220830-140224.yaml
deleted file mode 100644
index 9e3da3ea..00000000
--- a/.changes/unreleased/Fixes-20220830-140224.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: python incremental model tmp table using correct schema
-time: 2022-08-30T14:02:24.603033-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "441"
-  PR: "445"
diff --git a/.changes/unreleased/Fixes-20220914-010520.yaml b/.changes/unreleased/Fixes-20220914-010520.yaml
deleted file mode 100644
index f8584f05..00000000
--- a/.changes/unreleased/Fixes-20220914-010520.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Fixes
-body: change to get_columns_in_relation to fix cache inconsistencies to fix cache
-  issues in incremental models causing failure on on_schema_change
-time: 2022-09-14T01:05:20.312981-05:00
-custom:
-  Author: McKnight-42
-  Issue: "447"
-  PR: "451"
diff --git a/.changes/unreleased/Under the Hood-20220829-164426.yaml b/.changes/unreleased/Under the Hood-20220829-164426.yaml
deleted file mode 100644
index bf58971f..00000000
--- a/.changes/unreleased/Under the Hood-20220829-164426.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Submit python model with Command API by default. Adjusted run name
-time: 2022-08-29T16:44:26.509138-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "424"
-  PR: "442"
diff --git a/.changes/unreleased/Under the Hood-20220912-104517.yaml b/.changes/unreleased/Under the Hood-20220912-104517.yaml
deleted file mode 100644
index e45c97bf..00000000
--- a/.changes/unreleased/Under the Hood-20220912-104517.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Better interface for python submission
-time: 2022-09-12T10:45:17.226481-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "452"
-  PR: "452"
diff --git a/.changes/unreleased/Under the Hood-20220913-152004.yaml b/.changes/unreleased/Under the Hood-20220913-152004.yaml
deleted file mode 100644
index 4c372db0..00000000
--- a/.changes/unreleased/Under the Hood-20220913-152004.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: ignore mypy typing issues
-time: 2022-09-13T15:20:04.459783-07:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "461"
-  PR: "462"
diff --git a/.changes/unreleased/Under the Hood-20220916-125706.yaml b/.changes/unreleased/Under the Hood-20220916-125706.yaml
deleted file mode 100644
index 54b82eb5..00000000
--- a/.changes/unreleased/Under the Hood-20220916-125706.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Enable Pandas and Pandas-on-Spark DataFrames for dbt python models
-time: 2022-09-16T12:57:06.846297-06:00
-custom:
-  Author: chamini2 dbeatty10
-  Issue: "468"
-  PR: "469"
diff --git a/.changes/unreleased/Under the Hood-20220924-143713.yaml b/.changes/unreleased/Under the Hood-20220924-143713.yaml
deleted file mode 100644
index c537a939..00000000
--- a/.changes/unreleased/Under the Hood-20220924-143713.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Convert df to pyspark DataFrame if it is koalas before writing
-time: 2022-09-24T14:37:13.100404-06:00
-custom:
-  Author: dbeatty10 ueshin
-  Issue: "473"
-  PR: "474"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index de20a073..6dd49494 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,25 +4,10 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
-## dbt-spark 1.3.0-b2 - August 30, 2022
-### Features
-- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
-### Under the Hood
-- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
 
-## dbt-spark 1.3.0-b1 - July 29, 2022
-
-### Features
-- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-### Fixes
-- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-### Under the Hood
-- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
-
-### Contributors
-- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index e2c1a233..70ba273f 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.3.0b2"
+version = "1.4.0a1"
diff --git a/setup.py b/setup.py
index 05e81449..9d6c1367 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.3.0b2"
+package_version = "1.4.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 0cb958275fa3184dd6cee451f2f8b7719b39e380 Mon Sep 17 00:00:00 2001
From: Chenyu Li <chenyu.li@dbtlabs.com>
Date: Fri, 30 Sep 2022 13:42:32 -0700
Subject: [PATCH 354/603] various improvement (#493)

Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
---
 dbt/adapters/spark/python_submissions.py         | 16 +++++++++++++---
 .../spark/macros/materializations/table.sql      | 10 ++++++----
 tests/functional/adapter/test_python_model.py    | 16 ++++++++++++++++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index c6341abe..1e81c572 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -31,7 +31,7 @@ def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
 
     @property
     def cluster_id(self) -> str:
-        return self.parsed_model.get("cluster_id", self.credentials.cluster_id)
+        return self.parsed_model["config"].get("cluster_id", self.credentials.cluster_id)
 
     def get_timeout(self) -> int:
         timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
@@ -82,7 +82,17 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
                 "notebook_path": path,
             },
         }
-        job_spec.update(cluster_spec)
+        job_spec.update(cluster_spec)  # updates 'new_cluster' config
+        # PYPI packages
+        packages = self.parsed_model["config"].get("packages", [])
+        # additional format of packages
+        additional_libs = self.parsed_model["config"].get("additional_libs", [])
+        libraries = []
+        for package in packages:
+            libraries.append({"pypi": {"package": package}})
+        for lib in additional_libs:
+            libraries.append(lib)
+        job_spec.update({"libraries": libraries})  # type: ignore
         submit_response = requests.post(
             f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
             headers=self.auth_header,
@@ -96,7 +106,7 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
 
     def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None:
         # it is safe to call mkdirs even if dir already exists and have content inside
-        work_dir = f"/dbt_python_model/{self.schema}/"
+        work_dir = f"/Shared/dbt_python_model/{self.schema}/"
         self._create_work_dir(work_dir)
         # add notebook
         whole_file_path = f"{work_dir}{self.identifier}"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 80808910..c82e27e9 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -42,6 +42,8 @@
 dbt = dbtObj(spark.table)
 df = model(dbt, spark)
 
+# make sure pyspark exists in the namepace, for 7.3.x-scala2.12 it does not exist
+import pyspark
 # make sure pandas exists before using it
 try:
   import pandas
@@ -52,9 +54,9 @@ except ImportError:
 # make sure pyspark.pandas exists before using it
 try:
   import pyspark.pandas
-  pyspark_available = True
+  pyspark_pandas_api_available = True
 except ImportError:
-  pyspark_available = False
+  pyspark_pandas_api_available = False
 
 # make sure databricks.koalas exists before using it
 try:
@@ -66,7 +68,7 @@ except ImportError:
 # preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first
 # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`
 # and converting from pandas-on-Spark to Spark DataFrame has no overhead
-if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+if pyspark_pandas_api_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = pyspark.pandas.frame.DataFrame(df)
 elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = databricks.koalas.frame.DataFrame(df)
@@ -74,7 +76,7 @@ elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.
 # convert to pyspark.sql.dataframe.DataFrame
 if isinstance(df, pyspark.sql.dataframe.DataFrame):
   pass  # since it is already a Spark DataFrame
-elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame):
+elif pyspark_pandas_api_available and isinstance(df, pyspark.pandas.frame.DataFrame):
   df = df.to_spark()
 elif koalas_available and isinstance(df, databricks.koalas.frame.DataFrame):
   df = df.to_spark()
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index ed6185b6..140f4162 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -20,10 +20,26 @@ def project_config_update(self):
 
 models__simple_python_model = """
 import pandas
+import torch
+import spacy
 
 def model(dbt, spark):
     dbt.config(
         materialized='table',
+        submission_method='job_cluster',
+        job_cluster_config={
+            "spark_version": "7.3.x-scala2.12",
+            "node_type_id": "i3.xlarge",
+            "num_workers": 0,
+            "spark_conf": {
+                "spark.databricks.cluster.profile": "singleNode",
+                "spark.master": "local[*, 4]"
+            },
+            "custom_tags": {
+                "ResourceClass": "SingleNode"
+            }
+        },
+        packages=['spacy', 'torch']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])

From b65e4f366bce7dc98c2e698667fb197faa7cbe48 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Mon, 3 Oct 2022 08:04:34 -0700
Subject: [PATCH 355/603] consolidate timestamp macros (#480)

* add mypy ignore to column, connections and init

* consolidate timestamp macros

* add changie

* Update Features-20220926-123609.yaml

* add backcompat to test fixture

* remove current_timestamp_in_utc

* add expected_sql

* remove backcompat from test_timestamps.py

* update dev-requirements

* Update change log body

* lower case timestamps
---
 .../unreleased/Features-20220926-123609.yaml   |  7 +++++++
 dbt/include/spark/macros/adapters.sql          |  4 ----
 dbt/include/spark/macros/utils/timestamps.sql  |  3 +++
 .../adapter/utils/test_timestamps.py           | 18 ++++++++++++++++++
 4 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220926-123609.yaml
 create mode 100644 dbt/include/spark/macros/utils/timestamps.sql
 create mode 100644 tests/functional/adapter/utils/test_timestamps.py

diff --git a/.changes/unreleased/Features-20220926-123609.yaml b/.changes/unreleased/Features-20220926-123609.yaml
new file mode 100644
index 00000000..b80b3730
--- /dev/null
+++ b/.changes/unreleased/Features-20220926-123609.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Migrate dbt-utils current_timestamp macros into core + adapters
+time: 2022-09-26T12:36:09.319981-07:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "483"
+  PR: "480"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 88190cc0..abc7a0ba 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -209,10 +209,6 @@
   {{ return(load_result('list_schemas').table) }}
 {% endmacro %}
 
-{% macro spark__current_timestamp() -%}
-  current_timestamp()
-{%- endmacro %}
-
 {% macro spark__rename_relation(from_relation, to_relation) -%}
   {% call statement('rename_relation') -%}
     {% if not from_relation.type %}
diff --git a/dbt/include/spark/macros/utils/timestamps.sql b/dbt/include/spark/macros/utils/timestamps.sql
new file mode 100644
index 00000000..68d6f688
--- /dev/null
+++ b/dbt/include/spark/macros/utils/timestamps.sql
@@ -0,0 +1,3 @@
+{% macro spark__current_timestamp() -%}
+    current_timestamp()
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_timestamps.py b/tests/functional/adapter/utils/test_timestamps.py
new file mode 100644
index 00000000..8507c0a6
--- /dev/null
+++ b/tests/functional/adapter/utils/test_timestamps.py
@@ -0,0 +1,18 @@
+import pytest
+from dbt.tests.adapter.utils.test_timestamps import BaseCurrentTimestamps
+
+
+class TestCurrentTimestampSpark(BaseCurrentTimestamps):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"get_current_timestamp.sql": "select {{ current_timestamp() }} as current_timestamp"}
+
+    @pytest.fixture(scope="class")
+    def expected_schema(self):
+        return {
+            "current_timestamp": "timestamp"
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return """select current_timestamp() as current_timestamp"""

From 37dcfe3061c2d7c50621ae1591062fc4fed5e995 Mon Sep 17 00:00:00 2001
From: "V.Shkaberda" <V.Shkaberda@gmail.com>
Date: Wed, 12 Oct 2022 00:56:13 +0300
Subject: [PATCH 356/603] Fix/ldap password (#396)

* Fix password is not passing for thrift + LDAP.

* Add password to tests.

* Add CHANGELOG entry.

* Fixing up changelog entry

Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
Co-authored-by: Leah Antkiewicz <leah.antkiewicz@fishtownanalytics.com>
---
 .changes/unreleased/Fixes-20220926-112857.yaml | 7 +++++++
 dbt/adapters/spark/connections.py              | 3 +++
 tests/unit/test_adapter.py                     | 6 ++++--
 3 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220926-112857.yaml

diff --git a/.changes/unreleased/Fixes-20220926-112857.yaml b/.changes/unreleased/Fixes-20220926-112857.yaml
new file mode 100644
index 00000000..2a18f13a
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220926-112857.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Password doesn't pass to server using LDAP connection via thrift (#310)
+time: 2022-09-26T11:28:57.306285-04:00
+custom:
+  Author: VShkaberda
+  Issue: "310"
+  PR: "396"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 951e8ed7..66ca93d3 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -65,6 +65,7 @@ class SparkCredentials(Credentials):
     endpoint: Optional[str] = None
     token: Optional[str] = None
     user: Optional[str] = None
+    password: Optional[str] = None
     port: int = 443
     auth: Optional[str] = None
     kerberos_service_name: Optional[str] = None
@@ -375,6 +376,7 @@ def open(cls, connection):
                             username=creds.user,
                             auth=creds.auth,
                             kerberos_service_name=creds.kerberos_service_name,
+                            password=creds.password,
                         )
                         conn = hive.connect(thrift_transport=transport)
                     else:
@@ -384,6 +386,7 @@ def open(cls, connection):
                             username=creds.user,
                             auth=creds.auth,
                             kerberos_service_name=creds.kerberos_service_name,
+                            password=creds.password,
                         )  # noqa
                     handle = PyhiveConnectionWrapper(conn)
                 elif creds.method == SparkConnectionMethod.ODBC:
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index f87a89b2..53b95f73 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -154,12 +154,13 @@ def test_thrift_connection(self):
         config = self._get_target_thrift(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(host, port, username, auth, kerberos_service_name):
+        def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
             self.assertEqual(host, 'myorg.sparkhost.com')
             self.assertEqual(port, 10001)
             self.assertEqual(username, 'dbt')
             self.assertIsNone(auth)
             self.assertIsNone(kerberos_service_name)
+            self.assertIsNone(password)
 
         with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
             connection = adapter.acquire_connection('dummy')
@@ -193,12 +194,13 @@ def test_thrift_connection_kerberos(self):
         config = self._get_target_thrift_kerberos(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(host, port, username, auth, kerberos_service_name):
+        def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
             self.assertEqual(host, 'myorg.sparkhost.com')
             self.assertEqual(port, 10001)
             self.assertEqual(username, 'dbt')
             self.assertEqual(auth, 'KERBEROS')
             self.assertEqual(kerberos_service_name, 'hive')
+            self.assertIsNone(password)
 
         with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
             connection = adapter.acquire_connection('dummy')

From b759267461a021961185655779dd286ce6a0aefc Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Fri, 4 Nov 2022 11:33:54 -0500
Subject: [PATCH 357/603] [CT-1469] init pr for short term python version issue
 fix (#508)

---
 .github/workflows/main.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index bf607c37..ff1c92ce 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -48,7 +48,7 @@ jobs:
           persist-credentials: false
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4.3.0
         with:
           python-version: '3.8'
 
@@ -89,7 +89,7 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4.3.0
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -127,9 +127,9 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4.3.0
         with:
-          python-version: 3.8
+          python-version: '3.8'
 
       - name: Install python dependencies
         run: |
@@ -179,7 +179,7 @@ jobs:
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4.3.0
         with:
           python-version: ${{ matrix.python-version }}
 

From c87b6b2c48bcefb0ce52cd64984d3129d6f14ea0 Mon Sep 17 00:00:00 2001
From: Akash R Nilugal <akashnilugal@gmail.com>
Date: Mon, 14 Nov 2022 23:11:12 +0530
Subject: [PATCH 358/603] dbt run fails with spark-testing profile of examples
 (#513)

---
 .changes/unreleased/Fixes-20221113-211208.yaml | 7 +++++++
 README.md                                      | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20221113-211208.yaml

diff --git a/.changes/unreleased/Fixes-20221113-211208.yaml b/.changes/unreleased/Fixes-20221113-211208.yaml
new file mode 100644
index 00000000..73a180d8
--- /dev/null
+++ b/.changes/unreleased/Fixes-20221113-211208.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Updated the Spark-testing profile name to spark_testing
+time: 2022-11-13T21:12:08.669273+05:30
+custom:
+  Author: akashrn5
+  Issue: "514"
+  PR: "513"
diff --git a/README.md b/README.md
index 241d869d..3d9f75d3 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ If the instance doesn't start correctly, try the complete reset command listed b
 Create a profile like this one:
 
 ```
-spark-testing:
+spark_testing:
   target: local
   outputs:
     local:

From b6cee1dbeb817c6e51d60b2a0c2342c5fb9c1835 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 15 Nov 2022 15:54:45 -0600
Subject: [PATCH 359/603] fix flake8 change (#516)

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e85b1dc8..4b098e0c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
     - "--target-version=py38"
     - "--check"
     - "--diff"
-- repo: https://gitlab.com/pycqa/flake8
+- repo: https://github.com/pycqa/flake8
   rev: 4.0.1
   hooks:
   - id: flake8

From becc101486928bc571de5a00b4117f2fb675b8a5 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Tue, 29 Nov 2022 07:52:59 -0800
Subject: [PATCH 360/603] Supports new error messages. (#520)

* Supports new error messages.

* changie

* tuple

* fix.

* suggestion.
---
 .changes/unreleased/Fixes-20221116-234601.yaml | 7 +++++++
 dbt/adapters/spark/impl.py                     | 9 ++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20221116-234601.yaml

diff --git a/.changes/unreleased/Fixes-20221116-234601.yaml b/.changes/unreleased/Fixes-20221116-234601.yaml
new file mode 100644
index 00000000..3e394c87
--- /dev/null
+++ b/.changes/unreleased/Fixes-20221116-234601.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Support new error messages in the future Spark.
+time: 2022-11-16T23:46:01.899921861Z
+custom:
+  Author: ueshin
+  Issue: "515"
+  PR: "520"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 6eff652e..b00e8401 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -37,6 +37,12 @@
 KEY_TABLE_OWNER = "Owner"
 KEY_TABLE_STATISTICS = "Statistics"
 
+TABLE_OR_VIEW_NOT_FOUND_MESSAGES = (
+    "[TABLE_OR_VIEW_NOT_FOUND]",
+    "Table or view not found",
+    "NoSuchTableException",
+)
+
 
 @dataclass
 class SparkConfig(AdapterConfig):
@@ -220,7 +226,8 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
             # spark would throw error when table doesn't exist, where other
             # CDW would just return and empty list, normalizing the behavior here
             errmsg = getattr(e, "msg", "")
-            if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg:
+            found_msgs = (msg in errmsg for msg in TABLE_OR_VIEW_NOT_FOUND_MESSAGES)
+            if any(found_msgs):
                 pass
             else:
                 raise e

From 951184703a3d95656b9796033503789bdfa01ce8 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Mon, 5 Dec 2022 14:48:11 -0600
Subject: [PATCH 361/603] trying to fix tox issue causing failures (#534)

* updating tox python version, changing contriburing file example for integration tests

* remove change to contributing file for backport ease

* add changelog

* trying to incorprorate mulit python version

* trying to fix unit test

* remove unneded call to basepython

* back to 3.8
---
 .changes/unreleased/Under the Hood-20221202-140724.yaml | 7 +++++++
 setup.py                                                | 1 +
 tox.ini                                                 | 3 +--
 3 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20221202-140724.yaml

diff --git a/.changes/unreleased/Under the Hood-20221202-140724.yaml b/.changes/unreleased/Under the Hood-20221202-140724.yaml
new file mode 100644
index 00000000..88fee7be
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20221202-140724.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: updating python version in tox
+time: 2022-12-02T14:07:24.377668-06:00
+custom:
+  Author: McKnight-42
+  Issue: "536"
+  PR: "534"
diff --git a/setup.py b/setup.py
index 9d6c1367..5b5a0258 100644
--- a/setup.py
+++ b/setup.py
@@ -93,6 +93,7 @@ def _get_dbt_core_version():
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
     ],
     python_requires=">=3.7",
 )
diff --git a/tox.ini b/tox.ini
index a75e2a26..cdb33a78 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,8 +2,7 @@
 skipsdist = True
 envlist = unit, flake8, integration-spark-thrift
 
-[testenv:unit]
-basepython = python3.8
+[testenv:{unit,py37,py38,py39,py310,py}]
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
 passenv = DBT_* PYTEST_ADDOPTS
 deps =

From 712a553b6048606ff9cedf275315df4d83b9a1cf Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Fri, 9 Dec 2022 12:39:56 -0800
Subject: [PATCH 362/603] Policy Changes to support Py 3.11 (#543)

* add mypy ignore to column, connections and init

* passenv spacing changes post tox release of 4.0

* adding allowlist_externals to trigger bash commands

* add default_factory lambda

* add extra allowlist external

* add changie

* experiment with default_policy

Co-authored-by: Matthew McKnight <matthew.mcknight@dbtlabs.com>
---
 .../unreleased/Fixes-20221209-101854.yaml     |  7 ++++
 dbt/adapters/spark/impl.py                    |  4 +--
 dbt/adapters/spark/relation.py                |  6 ++--
 tox.ini                                       | 34 ++++++++++++++++---
 4 files changed, 41 insertions(+), 10 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20221209-101854.yaml

diff --git a/.changes/unreleased/Fixes-20221209-101854.yaml b/.changes/unreleased/Fixes-20221209-101854.yaml
new file mode 100644
index 00000000..afd1d6dc
--- /dev/null
+++ b/.changes/unreleased/Fixes-20221209-101854.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Update policy code and test config to support py3.11 and tox 4.0
+time: 2022-12-09T10:18:54.68118-08:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "542"
+  PR: "543"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index b00e8401..f02025c7 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -158,7 +158,7 @@ def list_relations_without_caching(
             rel_type = RelationType.View if "Type: VIEW" in information else RelationType.Table
             is_delta = "Provider: delta" in information
             is_hudi = "Provider: hudi" in information
-            relation = self.Relation.create(
+            relation: BaseRelation = self.Relation.create(
                 schema=_schema,
                 identifier=name,
                 type=rel_type,
@@ -171,7 +171,7 @@ def list_relations_without_caching(
         return relations
 
     def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]:
-        if not self.Relation.include_policy.database:
+        if not self.Relation.get_default_include_policy().database:
             database = None  # type: ignore
 
         return super().get_relation(database, schema, identifier)
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index 249caf0d..d3abb0d3 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -1,6 +1,6 @@
 from typing import Optional
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 
 from dbt.adapters.base.relation import BaseRelation, Policy
 from dbt.exceptions import RuntimeException
@@ -22,8 +22,8 @@ class SparkIncludePolicy(Policy):
 
 @dataclass(frozen=True, eq=False, repr=False)
 class SparkRelation(BaseRelation):
-    quote_policy: SparkQuotePolicy = SparkQuotePolicy()
-    include_policy: SparkIncludePolicy = SparkIncludePolicy()
+    quote_policy: Policy = field(default_factory=lambda: SparkQuotePolicy())
+    include_policy: Policy = field(default_factory=lambda: SparkIncludePolicy())
     quote_character: str = "`"
     is_delta: Optional[bool] = None
     is_hudi: Optional[bool] = None
diff --git a/tox.ini b/tox.ini
index cdb33a78..31456b08 100644
--- a/tox.ini
+++ b/tox.ini
@@ -3,36 +3,54 @@ skipsdist = True
 envlist = unit, flake8, integration-spark-thrift
 
 [testenv:{unit,py37,py38,py39,py310,py}]
+allowlist_externals =
+    /bin/bash
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
-passenv = DBT_* PYTEST_ADDOPTS
+passenv =
+    DBT_*
+    PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
 
 [testenv:integration-spark-databricks-http]
+allowlist_externals =
+    /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/*'
-passenv = DBT_* PYTEST_ADDOPTS
+passenv =
+    DBT_*
+    PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
     -e.
 
 [testenv:integration-spark-databricks-odbc-cluster]
+allowlist_externals =
+    /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
-passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
+passenv =
+    DBT_*
+    PYTEST_ADDOPTS
+    ODBC_DRIVER
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
     -e.
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
+allowlist_externals =
+    /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
-passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER
+passenv =
+    DBT_*
+    PYTEST_ADDOPTS
+    ODBC_DRIVER
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -40,16 +58,22 @@ deps =
 
 
 [testenv:integration-spark-thrift]
+allowlist_externals =
+    /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
-passenv = DBT_* PYTEST_ADDOPTS
+passenv =
+    DBT_*
+    PYTEST_ADDOPTS
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
     -e.
 
 [testenv:integration-spark-session]
+allowlist_externals =
+    /bin/bash
 basepython = python3
 commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
 passenv =

From 24e4a96338b88a60723dc24742a8ecf41d6a2f54 Mon Sep 17 00:00:00 2001
From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com>
Date: Thu, 15 Dec 2022 10:51:05 -0600
Subject: [PATCH 363/603] Feature/incremental predicates (#436)

* incremental predicate support

* remove logging statement

* test merge predicates

* missing color column

* add update row

* changie

* pin core branch

* update test to remove incremental strategy

* add functional test

* add file format config to merge test

* add plus to config name

* update SQL to spark syntax

* add pytest decorator

* update skip profile pytest decorator

* handle predicates config, add test

* unpin feature request
---
 .../unreleased/Features-20220823-093457.yaml  |  7 ++
 .../incremental/incremental.sql               |  3 +-
 .../incremental/strategies.sql                |  8 +--
 .../adapter/test_incremental_predicates.py    | 67 +++++++++++++++++++
 4 files changed, 80 insertions(+), 5 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220823-093457.yaml
 create mode 100644 tests/functional/adapter/test_incremental_predicates.py

diff --git a/.changes/unreleased/Features-20220823-093457.yaml b/.changes/unreleased/Features-20220823-093457.yaml
new file mode 100644
index 00000000..66396a8e
--- /dev/null
+++ b/.changes/unreleased/Features-20220823-093457.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: incremental predicates
+time: 2022-08-23T09:34:57.026688-05:00
+custom:
+  Author: dave-connors-3
+  Issue: "435"
+  PR: "436"
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index e293441b..cc46d4c1 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -13,6 +13,7 @@
   {%- set partition_by = config.get('partition_by', none) -%}
   {%- set language = model['language'] -%}
   {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}
+  {%- set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) -%}
   {%- set target_relation = this -%}
   {%- set existing_relation = load_relation(this) -%}
   {%- set tmp_relation = make_temp_relation(this) -%}
@@ -54,7 +55,7 @@
     {%- endcall -%}
     {%- do process_schema_changes(on_schema_change, tmp_relation, existing_relation) -%}
     {%- call statement('main') -%}
-      {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) }}
+      {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, incremental_predicates) }}
     {%- endcall -%}
     {%- if language == 'python' -%}
       {#--
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index 17196e85..facfaadf 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -19,9 +19,9 @@
 {% endmacro %}
 
 
-{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %}
+{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) %}
   {# need dest_columns for merge_exclude_columns, default to use "*" #}
-  {%- set predicates = [] if predicates is none else [] + predicates -%}
+  {%- set predicates = [] if incremental_predicates is none else [] + incremental_predicates -%}
   {%- set dest_columns = adapter.get_columns_in_relation(target) -%}
   {%- set merge_update_columns = config.get('merge_update_columns') -%}
   {%- set merge_exclude_columns = config.get('merge_exclude_columns') -%}
@@ -62,7 +62,7 @@
 {% endmacro %}
 
 
-{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %}
+{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key, incremental_predicates) %}
   {%- if strategy == 'append' -%}
     {#-- insert new records into existing table, without updating or overwriting #}
     {{ get_insert_into_sql(source, target) }}
@@ -71,7 +71,7 @@
     {{ get_insert_overwrite_sql(source, target) }}
   {%- elif strategy == 'merge' -%}
   {#-- merge all columns with databricks delta - schema changes are handled for us #}
-    {{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }}
+    {{ get_merge_sql(target, source, unique_key, dest_columns=none, incremental_predicates=incremental_predicates) }}
   {%- else -%}
     {% set no_sql_for_strategy_msg -%}
       No known SQL for the incremental strategy provided: {{ strategy }}
diff --git a/tests/functional/adapter/test_incremental_predicates.py b/tests/functional/adapter/test_incremental_predicates.py
new file mode 100644
index 00000000..59c6b353
--- /dev/null
+++ b/tests/functional/adapter/test_incremental_predicates.py
@@ -0,0 +1,67 @@
+import pytest
+from dbt.tests.adapter.incremental.test_incremental_predicates import BaseIncrementalPredicates
+
+
+models__spark_incremental_predicates_sql = """
+{{ config(
+    materialized = 'incremental',
+    unique_key = 'id'
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- merge will not happen on the above record where id = 2, so new record will fall to insert
+select cast(1 as bigint) as id, 'hey' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
+"""
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark')
+class TestIncrementalPredicatesMergeSpark(BaseIncrementalPredicates):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": { 
+                "+incremental_predicates": [
+                    "dbt_internal_dest.id != 2"
+                ],
+                "+incremental_strategy": "merge",
+                "+file_format": "delta"
+            }
+        }
+        
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "delete_insert_incremental_predicates.sql": models__spark_incremental_predicates_sql
+        }
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark')
+class TestPredicatesMergeSpark(BaseIncrementalPredicates):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": { 
+                "+predicates": [
+                    "dbt_internal_dest.id != 2"
+                ],
+                "+incremental_strategy": "merge",
+                "+file_format": "delta"
+            }
+        }
+        
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "delete_insert_incremental_predicates.sql": models__spark_incremental_predicates_sql
+        }
\ No newline at end of file

From 6a003fb0ee131bb82bd515e898fd87e930d3e92d Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Thu, 15 Dec 2022 11:58:26 -0600
Subject: [PATCH 364/603] add release workflow (#550)

* add release workflow

* tweak the release

* update needs
---
 .github/workflows/release.yml | 268 +++++++++++++++++++++++-----------
 1 file changed, 182 insertions(+), 86 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 554e13a8..053dec97 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,118 +1,214 @@
-# Builds the spark plugin and releases it to GitHub and Pypi
-name: Build and Release
+# **what?**
+# Take the given commit, run unit tests specifically on that sha, build and
+# package it, and then release to GitHub with that specific build (PyPi to follow later)
+
+# **why?**
+# Ensure an automated and tested release process
+
+# **when?**
+# This will only run manually with a given sha and version
+
+name: Build, Test, and Package
 
 on:
   workflow_dispatch:
+    inputs:
+      sha:
+        description: "The last commit sha in the release"
+        type: string
+        required: true
+      changelog_path:
+        description: "Path to changes log"
+        type: string
+        default: "./CHANGELOG.md"
+        required: false
+      version_number:
+        description: "The release version number (i.e. 1.0.0b1)"
+        type: string
+        required: true
+      test_run:
+        description: "Test run (Publish release as draft to GitHub)"
+        type: boolean
+        default: false
+        required: false
+
+permissions:
+  contents: write # this is the permission that allows creating a new release
 
-# Release version number that must be updated for each release
 env:
-  version_number: '0.20.0rc2'
+  PYTHON_TARGET_VERSION: 3.8
+  ARTIFACT_RETENTION_DAYS: 2
+
+defaults:
+  run:
+    shell: bash
 
 jobs:
-  Test:
+  log-inputs:
+    name: Log Inputs
+    runs-on: ubuntu-latest
+    steps:
+      - name: "[DEBUG] Print Variables"
+        run: |
+          echo The last commit sha in the release:  ${{ inputs.sha }}
+          echo The release version number:          ${{ inputs.version_number }}
+          echo The path to the changelog markdpown: ${{ inputs.changelog_path }}
+          echo This is a test run:                  ${{ inputs.test_run }}
+          echo Python target version:               ${{ env.PYTHON_TARGET_VERSION }}
+          echo Artifact retention days:             ${{ env.ARTIFACT_RETENTION_DAYS }}
+
+  unit:
+    name: Unit Test
     runs-on: ubuntu-latest
+
+    env:
+      TOXENV: "unit"
+
     steps:
-      - name: Setup Python
-        uses: actions/setup-python@v2.2.2
+      - name: "Checkout Commit - ${{ inputs.sha }}"
+        uses: actions/checkout@v3
         with:
-          python-version: '3.8'
+          persist-credentials: false
+          ref: ${{ github.event.inputs.sha }}
 
-      - uses: actions/checkout@v2
+      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_TARGET_VERSION }}
 
-      - name: Test release
+      - name: "Install Python Dependencies"
         run: |
-          python3 -m venv env
-          source env/bin/activate
           sudo apt-get install libsasl2-dev
-          pip install -r dev-requirements.txt
-          pip install twine wheel setuptools
-          python setup.py sdist bdist_wheel
-          pip install dist/dbt-spark-*.tar.gz
-          pip install dist/dbt_spark-*-py3-none-any.whl
-          twine check dist/dbt_spark-*-py3-none-any.whl dist/dbt-spark-*.tar.gz
-
-  GitHubRelease:
-    name: GitHub release
+          python -m pip install --user --upgrade pip
+          python -m pip install tox
+          python -m pip --version
+          python -m tox --version
+
+      - name: "Run Tox"
+        run: tox
+
+  build:
+    name: Build Packages
+
     runs-on: ubuntu-latest
-    needs: Test
+
     steps:
-      - name: Setup Python
-        uses: actions/setup-python@v2.2.2
+      - name: "Checkout Commit - ${{ inputs.sha }}"
+        uses: actions/checkout@v3
         with:
-          python-version: '3.8'
+          persist-credentials: false
+          ref: ${{ inputs.sha }}
 
-      - uses: actions/checkout@v2
+      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_TARGET_VERSION }}
 
-      - name: Bumping version
+      - name: "Install Python Dependencies"
         run: |
-          python3 -m venv env
-          source env/bin/activate
           sudo apt-get install libsasl2-dev
-          pip install -r dev-requirements.txt
-          bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}}
-          bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty
-          git status
+          python -m pip install --user --upgrade pip
+          python -m pip install --upgrade setuptools wheel twine check-wheel-contents
+          python -m pip --version
 
-      - name: Commit version bump and tag
-        uses: EndBug/add-and-commit@v7
-        with:
-          author_name: 'Leah Antkiewicz'
-          author_email: 'leah.antkiewicz@dbtlabs.com'
-          message: 'Bumping version to ${{env.version_number}}'
-          tag: v${{env.version_number}}
-
-      # Need to set an output variable because env variables can't be taken as input
-      # This is needed for the next step with releasing to GitHub
-      - name: Find release type
-        id: release_type
-        env:
-          IS_PRERELEASE: ${{ contains(env.version_number, 'rc') ||  contains(env.version_number, 'b') }}
+      - name: "Build Distributions"
+        run: ./scripts/build-dist.sh
+
+      - name: "[DEBUG] Show Distributions"
+        run: ls -lh dist/
+
+      - name: "Check Distribution Descriptions"
         run: |
-          echo ::set-output name=isPrerelease::$IS_PRERELEASE
+          twine check dist/*
 
-      - name: Create GitHub release
-        uses: actions/create-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
+      - name: "[DEBUG] Check Wheel Contents"
+        run: |
+          check-wheel-contents dist/*.whl --ignore W007,W008
+
+      - name: "Upload Build Artifact - ${{ inputs.version_number }}"
+        uses: actions/upload-artifact@v3
         with:
-          tag_name: v${{env.version_number}}
-          release_name: dbt-spark v${{env.version_number}}
-          prerelease: ${{ steps.release_type.outputs.isPrerelease }}
-          body: |
-            Tracking [dbt-core v${{env.version_number}}](https://github.com/dbt-labs/dbt/releases/tag/v${{env.version_number}}).
-
-            ```sh
-            $ pip install dbt-spark==${{env.version_number}}
-            # or
-            $ pip install "dbt-spark[ODBC]==${{env.version_number}}"
-            # or
-            $ pip install "dbt-spark[PyHive]==${{env.version_number}}"
-            ```
-
-  PypiRelease:
-    name: Pypi release
+          name: ${{ inputs.version_number }}
+          path: |
+            dist/
+            !dist/dbt-${{ inputs.version_number }}.tar.gz
+          retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
+
+  test-build:
+    name: Verify Packages
+
+    needs: [unit, build]
+
     runs-on: ubuntu-latest
-    needs: GitHubRelease
-    environment: PypiProd
+
     steps:
-      - name: Setup Python
-        uses: actions/setup-python@v2.2.2
+      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
+        uses: actions/setup-python@v4
         with:
-          python-version: '3.8'
+          python-version: ${{ env.PYTHON_TARGET_VERSION }}
+
+      - name: "Install Python Dependencies"
+        run: |
+          sudo apt-get install libsasl2-dev
+          python -m pip install --user --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip --version
 
-      - uses: actions/checkout@v2
+      - name: "Download Build Artifact - ${{ inputs.version_number }}"
+        uses: actions/download-artifact@v3
         with:
-          ref: v${{env.version_number}}
+          name: ${{ inputs.version_number }}
+          path: dist/
 
-      - name: Release to pypi
-        env:
-          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      - name: "[DEBUG] Show Distributions"
+        run: ls -lh dist/
+
+      - name: "Install Wheel Distributions"
         run: |
-          python3 -m venv env
-          source env/bin/activate
-          sudo apt-get install libsasl2-dev
-          pip install -r dev-requirements.txt
-          pip install twine wheel setuptools
-          python setup.py sdist bdist_wheel
-          twine upload --non-interactive dist/dbt_spark-${{env.version_number}}-py3-none-any.whl dist/dbt-spark-${{env.version_number}}.tar.gz
+          find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
+
+      - name: "[DEBUG] Check Wheel Distributions"
+        run: |
+          dbt --version
+
+      - name: "Install Source Distributions"
+        run: |
+          find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
+
+      - name: "[DEBUG] Check Source Distributions"
+        run: |
+          dbt --version
+
+  github-release:
+    name: GitHub Release
+    if: ${{ !failure() && !cancelled() }}
+    needs: test-build
+
+    # pin to commit since this is workflow is WIP but this commit has been tested as working
+    uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@7b6e01d73d2c8454e06302cc66ef4c2dbd4dbe4e
+
+    with:
+      sha: ${{ inputs.sha }}
+      version_number: ${{ inputs.version_number }}
+      changelog_path: ${{ inputs.changelog_path }}
+      test_run: ${{ inputs.test_run }}
+
+# Skipping this for now until we've proven build work in the repos
+  # pypi-release:
+  #   name: Pypi release
+
+  #   runs-on: ubuntu-latest
+
+  #   needs: github-release
+
+  #   environment: PypiProd
+  #   steps:
+  #     - uses: actions/download-artifact@v2
+  #       with:
+  #         name: dist
+  #         path: 'dist'
+
+  #     - name: Publish distribution to PyPI
+  #       uses: pypa/gh-action-pypi-publish@v1.4.2
+  #       with:
+  #         password: ${{ secrets.PYPI_API_TOKEN }}

From 57e4ae8adb8f4f69d15d50ef7bd56aaca536802c Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Thu, 15 Dec 2022 11:58:51 -0600
Subject: [PATCH 365/603] fix footer, update contributors (#553)

---
 .changes/0.0.0.md | 1 +
 .changie.yaml     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
index 14c2cf9e..68ecfaa1 100644
--- a/.changes/0.0.0.md
+++ b/.changes/0.0.0.md
@@ -1,5 +1,6 @@
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/.changie.yaml b/.changie.yaml
index f5800f32..7d34a96b 100644
--- a/.changie.yaml
+++ b/.changie.yaml
@@ -32,7 +32,7 @@ custom:
 footerFormat: |
   {{- $contributorDict := dict }}
   {{- /* any names added to this list should be all lowercase for later matching purposes */}}
-  {{- $core_team := list "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "dependabot[bot]" "snyk-bot" }}
+  {{- $core_team := list "michelleark" "peterallenwebb" "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "aranke" "mikealfare" "dependabot[bot]" "snyk-bot" "colin-rogers-dbt" }}
   {{- range $change := .Changes }}
     {{- $authorList := splitList " " $change.Custom.Author }}
     {{- /* loop through all authors for a PR */}}

From d4c19cc02732287a7d00fd19689a385efaad22db Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 15 Dec 2022 12:32:12 -0600
Subject: [PATCH 366/603] Bumping version to 1.4.0b1 and generate CHANGELOG
 (#554)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                                |  2 +-
 .changes/1.4.0-b1.md                            | 17 +++++++++++++++++
 .../Features-20220823-093457.yaml               |  0
 .../Features-20220926-123609.yaml               |  0
 .../Fixes-20220926-112857.yaml                  |  0
 .../Fixes-20221113-211208.yaml                  |  0
 .../Fixes-20221116-234601.yaml                  |  0
 .../Fixes-20221209-101854.yaml                  |  0
 .../Under the Hood-20221202-140724.yaml         |  0
 CHANGELOG.md                                    | 16 ++++++++++++++++
 dbt/adapters/spark/__version__.py               |  2 +-
 setup.py                                        |  2 +-
 12 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 .changes/1.4.0-b1.md
 rename .changes/{unreleased => 1.4.0}/Features-20220823-093457.yaml (100%)
 rename .changes/{unreleased => 1.4.0}/Features-20220926-123609.yaml (100%)
 rename .changes/{unreleased => 1.4.0}/Fixes-20220926-112857.yaml (100%)
 rename .changes/{unreleased => 1.4.0}/Fixes-20221113-211208.yaml (100%)
 rename .changes/{unreleased => 1.4.0}/Fixes-20221116-234601.yaml (100%)
 rename .changes/{unreleased => 1.4.0}/Fixes-20221209-101854.yaml (100%)
 rename .changes/{unreleased => 1.4.0}/Under the Hood-20221202-140724.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 5fa558e9..9fe09ab4 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.4.0a1
+current_version = 1.4.0b1
 parse = (?P<major>\d+)
 	\.(?P<minor>\d+)
 	\.(?P<patch>\d+)
diff --git a/.changes/1.4.0-b1.md b/.changes/1.4.0-b1.md
new file mode 100644
index 00000000..fc562626
--- /dev/null
+++ b/.changes/1.4.0-b1.md
@@ -0,0 +1,17 @@
+## dbt-spark 1.4.0-b1 - December 15, 2022
+### Features
+- incremental predicates ([#435](https://github.com/dbt-labs/dbt-spark/issues/435), [#436](https://github.com/dbt-labs/dbt-spark/pull/436))
+- Migrate dbt-utils current_timestamp macros into core + adapters ([#483](https://github.com/dbt-labs/dbt-spark/issues/483), [#480](https://github.com/dbt-labs/dbt-spark/pull/480))
+### Fixes
+- Password doesn't pass to server using LDAP connection via thrift (#310) ([#310](https://github.com/dbt-labs/dbt-spark/issues/310), [#396](https://github.com/dbt-labs/dbt-spark/pull/396))
+- Updated the Spark-testing profile name to spark_testing ([#514](https://github.com/dbt-labs/dbt-spark/issues/514), [#513](https://github.com/dbt-labs/dbt-spark/pull/513))
+- Support new error messages in the future Spark. ([#515](https://github.com/dbt-labs/dbt-spark/issues/515), [#520](https://github.com/dbt-labs/dbt-spark/pull/520))
+- Update policy code and test config to support py3.11 and tox 4.0 ([#542](https://github.com/dbt-labs/dbt-spark/issues/542), [#543](https://github.com/dbt-labs/dbt-spark/pull/543))
+### Under the Hood
+- updating python version in tox ([#536](https://github.com/dbt-labs/dbt-spark/issues/536), [#534](https://github.com/dbt-labs/dbt-spark/pull/534))
+
+### Contributors
+- [@VShkaberda](https://github.com/VShkaberda) ([#396](https://github.com/dbt-labs/dbt-spark/pull/396))
+- [@akashrn5](https://github.com/akashrn5) ([#513](https://github.com/dbt-labs/dbt-spark/pull/513))
+- [@dave-connors-3](https://github.com/dave-connors-3) ([#436](https://github.com/dbt-labs/dbt-spark/pull/436))
+- [@ueshin](https://github.com/ueshin) ([#520](https://github.com/dbt-labs/dbt-spark/pull/520))
diff --git a/.changes/unreleased/Features-20220823-093457.yaml b/.changes/1.4.0/Features-20220823-093457.yaml
similarity index 100%
rename from .changes/unreleased/Features-20220823-093457.yaml
rename to .changes/1.4.0/Features-20220823-093457.yaml
diff --git a/.changes/unreleased/Features-20220926-123609.yaml b/.changes/1.4.0/Features-20220926-123609.yaml
similarity index 100%
rename from .changes/unreleased/Features-20220926-123609.yaml
rename to .changes/1.4.0/Features-20220926-123609.yaml
diff --git a/.changes/unreleased/Fixes-20220926-112857.yaml b/.changes/1.4.0/Fixes-20220926-112857.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20220926-112857.yaml
rename to .changes/1.4.0/Fixes-20220926-112857.yaml
diff --git a/.changes/unreleased/Fixes-20221113-211208.yaml b/.changes/1.4.0/Fixes-20221113-211208.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20221113-211208.yaml
rename to .changes/1.4.0/Fixes-20221113-211208.yaml
diff --git a/.changes/unreleased/Fixes-20221116-234601.yaml b/.changes/1.4.0/Fixes-20221116-234601.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20221116-234601.yaml
rename to .changes/1.4.0/Fixes-20221116-234601.yaml
diff --git a/.changes/unreleased/Fixes-20221209-101854.yaml b/.changes/1.4.0/Fixes-20221209-101854.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20221209-101854.yaml
rename to .changes/1.4.0/Fixes-20221209-101854.yaml
diff --git a/.changes/unreleased/Under the Hood-20221202-140724.yaml b/.changes/1.4.0/Under the Hood-20221202-140724.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20221202-140724.yaml
rename to .changes/1.4.0/Under the Hood-20221202-140724.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6dd49494..263050ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,23 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
+## dbt-spark 1.4.0-b1 - December 15, 2022
+### Features
+- incremental predicates ([#435](https://github.com/dbt-labs/dbt-spark/issues/435), [#436](https://github.com/dbt-labs/dbt-spark/pull/436))
+- Migrate dbt-utils current_timestamp macros into core + adapters ([#483](https://github.com/dbt-labs/dbt-spark/issues/483), [#480](https://github.com/dbt-labs/dbt-spark/pull/480))
+### Fixes
+- Password doesn't pass to server using LDAP connection via thrift (#310) ([#310](https://github.com/dbt-labs/dbt-spark/issues/310), [#396](https://github.com/dbt-labs/dbt-spark/pull/396))
+- Updated the Spark-testing profile name to spark_testing ([#514](https://github.com/dbt-labs/dbt-spark/issues/514), [#513](https://github.com/dbt-labs/dbt-spark/pull/513))
+- Support new error messages in the future Spark. ([#515](https://github.com/dbt-labs/dbt-spark/issues/515), [#520](https://github.com/dbt-labs/dbt-spark/pull/520))
+- Update policy code and test config to support py3.11 and tox 4.0 ([#542](https://github.com/dbt-labs/dbt-spark/issues/542), [#543](https://github.com/dbt-labs/dbt-spark/pull/543))
+### Under the Hood
+- updating python version in tox ([#536](https://github.com/dbt-labs/dbt-spark/issues/536), [#534](https://github.com/dbt-labs/dbt-spark/pull/534))
 
+### Contributors
+- [@VShkaberda](https://github.com/VShkaberda) ([#396](https://github.com/dbt-labs/dbt-spark/pull/396))
+- [@akashrn5](https://github.com/akashrn5) ([#513](https://github.com/dbt-labs/dbt-spark/pull/513))
+- [@dave-connors-3](https://github.com/dave-connors-3) ([#436](https://github.com/dbt-labs/dbt-spark/pull/436))
+- [@ueshin](https://github.com/ueshin) ([#520](https://github.com/dbt-labs/dbt-spark/pull/520))
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 70ba273f..27cfeecd 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.4.0a1"
+version = "1.4.0b1"
diff --git a/setup.py b/setup.py
index 5b5a0258..012e58d4 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.4.0a1"
+package_version = "1.4.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 512b3d014bb29127b5969319ecfb22bff6ac9e3d Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Thu, 15 Dec 2022 16:06:31 -0600
Subject: [PATCH 367/603] add pypi release to workflow (#555)

---
 .github/workflows/release.yml | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 053dec97..ad1dc92e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -193,22 +193,23 @@ jobs:
       changelog_path: ${{ inputs.changelog_path }}
       test_run: ${{ inputs.test_run }}
 
-# Skipping this for now until we've proven build work in the repos
-  # pypi-release:
-  #   name: Pypi release
+  pypi-release:
+    name: Pypi release
+    # only release to PyPi if we're not testing - will release to PyPi test when workflow gets rewritten
+    if: inputs.test_run  == 'false'
 
-  #   runs-on: ubuntu-latest
+    runs-on: ubuntu-latest
 
-  #   needs: github-release
+    needs: github-release
 
-  #   environment: PypiProd
-  #   steps:
-  #     - uses: actions/download-artifact@v2
-  #       with:
-  #         name: dist
-  #         path: 'dist'
+    environment: PypiProd
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: dist
+          path: 'dist'
 
-  #     - name: Publish distribution to PyPI
-  #       uses: pypa/gh-action-pypi-publish@v1.4.2
-  #       with:
-  #         password: ${{ secrets.PYPI_API_TOKEN }}
+      - name: Publish distribution to PyPI
+        uses: pypa/gh-action-pypi-publish@v1.4.2
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}

From 764cea73b73b7b6123027169de25fdeabb5f11d3 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 22 Dec 2022 13:36:21 -0800
Subject: [PATCH 368/603] skip databricks integration tests (#575)

* add mypy ignore to column, connections and init

* skip databricks integration tests
---
 .circleci/config.yml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 8f0afa6c..aa52e9ec 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -121,12 +121,12 @@ workflows:
       - integration-spark-thrift:
           requires:
             - unit
-      - integration-spark-databricks-http:
-          requires:
-            - integration-spark-thrift
-      - integration-spark-databricks-odbc-cluster:
-          requires:
-            - integration-spark-thrift
-      - integration-spark-databricks-odbc-endpoint:
-          requires:
-            - integration-spark-thrift
+      # - integration-spark-databricks-http:
+      #     requires:
+      #       - integration-spark-thrift
+      # - integration-spark-databricks-odbc-cluster:
+      #     requires:
+      #       - integration-spark-thrift
+      # - integration-spark-databricks-odbc-endpoint:
+      #     requires:
+      #       - integration-spark-thrift

From 5ca20be56ec2d557b4fff5e42c320949040650d3 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 4 Jan 2023 15:17:48 -0800
Subject: [PATCH 369/603] add databricks integ tests back (#578)

* add mypy ignore to column, connections and init

* add databricks integ tests back

* remove exclusion

* fix pydantic version issue
---
 .circleci/config.yml                          | 18 +++++++++---------
 tests/functional/adapter/test_python_model.py |  2 +-
 tox.ini                                       |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index aa52e9ec..8f0afa6c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -121,12 +121,12 @@ workflows:
       - integration-spark-thrift:
           requires:
             - unit
-      # - integration-spark-databricks-http:
-      #     requires:
-      #       - integration-spark-thrift
-      # - integration-spark-databricks-odbc-cluster:
-      #     requires:
-      #       - integration-spark-thrift
-      # - integration-spark-databricks-odbc-endpoint:
-      #     requires:
-      #       - integration-spark-thrift
+      - integration-spark-databricks-http:
+          requires:
+            - integration-spark-thrift
+      - integration-spark-databricks-odbc-cluster:
+          requires:
+            - integration-spark-thrift
+      - integration-spark-databricks-odbc-endpoint:
+          requires:
+            - integration-spark-thrift
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 140f4162..e5e7debe 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -39,7 +39,7 @@ def model(dbt, spark):
                 "ResourceClass": "SingleNode"
             }
         },
-        packages=['spacy', 'torch']
+        packages=['spacy', 'torch', 'pydantic<1.10.3']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])
diff --git a/tox.ini b/tox.ini
index 31456b08..683180a5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -45,7 +45,7 @@ deps =
 allowlist_externals =
     /bin/bash
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs}  -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv =
     DBT_*

From d9a3d76086e0b37a224f6eec84e5a8b2274bd3b7 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Fri, 6 Jan 2023 08:46:35 -0700
Subject: [PATCH 370/603] Use CircleCI Context for AWS access key environment
 variables (#582)

* Use CircleCI Context for AWS access key environment variables

* Use the `aws-credentials` global context configured within CircleCI

* Remove changelog entry [skip ci]

* Remove extra space
---
 .circleci/config.yml | 2 ++
 tox.ini              | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 8f0afa6c..5fea78c3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -125,8 +125,10 @@ workflows:
           requires:
             - integration-spark-thrift
       - integration-spark-databricks-odbc-cluster:
+          context: aws-credentials
           requires:
             - integration-spark-thrift
       - integration-spark-databricks-odbc-endpoint:
+          context: aws-credentials
           requires:
             - integration-spark-thrift
diff --git a/tox.ini b/tox.ini
index 683180a5..31456b08 100644
--- a/tox.ini
+++ b/tox.ini
@@ -45,7 +45,7 @@ deps =
 allowlist_externals =
     /bin/bash
 basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs}  -n4 tests/functional/adapter/*'
+commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv =
     DBT_*

From 28a9d0995207ffa40ed051bfb631ee32a7ae4412 Mon Sep 17 00:00:00 2001
From: Grace Goheen <53586774+graciegoheen@users.noreply.github.com>
Date: Tue, 10 Jan 2023 17:09:38 -0700
Subject: [PATCH 371/603] add escape_single_quotes macro (#573)

* add escape_single_quotes macro

* added changelog entry

* Trim trailing whitespace

* Trim trailing whitespace

Co-authored-by: Doug Beatty <doug.beatty@dbtlabs.com>
---
 .changes/unreleased/Fixes-20221221-092410.yaml          | 8 ++++++++
 dbt/include/spark/macros/utils/escape_single_quotes.sql | 4 ++++
 tests/functional/adapter/utils/test_utils.py            | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20221221-092410.yaml
 create mode 100644 dbt/include/spark/macros/utils/escape_single_quotes.sql

diff --git a/.changes/unreleased/Fixes-20221221-092410.yaml b/.changes/unreleased/Fixes-20221221-092410.yaml
new file mode 100644
index 00000000..1aa8001f
--- /dev/null
+++ b/.changes/unreleased/Fixes-20221221-092410.yaml
@@ -0,0 +1,8 @@
+kind: Fixes
+body: Added escape_single_quotes macro as Spark/Databricks uses a single backslash
+  instead of a single quote
+time: 2022-12-21T09:24:10.952042-06:00
+custom:
+  Author: graciegoheen
+  Issue: "572"
+  PR: "573"
diff --git a/dbt/include/spark/macros/utils/escape_single_quotes.sql b/dbt/include/spark/macros/utils/escape_single_quotes.sql
new file mode 100644
index 00000000..2572ffb6
--- /dev/null
+++ b/dbt/include/spark/macros/utils/escape_single_quotes.sql
@@ -0,0 +1,4 @@
+{# /* Spark/Databricks uses a single backslash: they're -> they\'re. The second backslash is to escape it from Jinja */ #}
+{% macro spark__escape_single_quotes(expression) -%}
+{{ expression | replace("'","\\'") }}
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 102df731..6fd6a912 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -79,7 +79,7 @@ class TestDateTrunc(BaseDateTrunc):
     pass
 
 
-class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote):
+class TestEscapeSingleQuotes(BaseEscapeSingleQuotesBackslash):
     pass
 
 

From 94148f6c94bf98a73d3fd21f5d170eaa8edcc3fa Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Tue, 10 Jan 2023 18:37:25 -0600
Subject: [PATCH 372/603] rename exceptions (#585)

* rename exceptions

* point back to main

* Delete Under the Hood-20230110-105904.yaml

* Update Under the Hood-20230110-101919.yaml
---
 .../Under the Hood-20230110-101919.yaml       |  7 ++++++
 dbt/adapters/spark/connections.py             | 25 ++++++++++---------
 dbt/adapters/spark/impl.py                    | 12 ++++-----
 dbt/adapters/spark/python_submissions.py      | 22 ++++++++--------
 dbt/adapters/spark/relation.py                |  6 ++---
 .../test_incremental_on_schema_change.py      |  1 -
 .../test_incremental_strategies.py            |  1 -
 .../persist_docs/test_persist_docs.py         |  3 ---
 .../test_seed_column_types.py                 |  1 -
 tests/unit/test_adapter.py                    |  8 +++---
 10 files changed, 44 insertions(+), 42 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230110-101919.yaml

diff --git a/.changes/unreleased/Under the Hood-20230110-101919.yaml b/.changes/unreleased/Under the Hood-20230110-101919.yaml
new file mode 100644
index 00000000..2209fdb9
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230110-101919.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Rename exceptions to match dbt-core.
+time: 2023-01-10T10:19:19.675879-06:00
+custom:
+  Author: emmyoop
+  Issue: "557"
+  PR: "585"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 66ca93d3..a606beb7 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -90,7 +90,7 @@ def cluster_id(self):
     def __post_init__(self):
         # spark classifies database and schema as the same thing
         if self.database is not None and self.database != self.schema:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"    schema: {self.schema} \n"
                 f"    database: {self.database} \n"
                 f"On Spark, database must be omitted or have the same value as"
@@ -102,7 +102,7 @@ def __post_init__(self):
             try:
                 import pyodbc  # noqa: F401
             except ImportError as e:
-                raise dbt.exceptions.RuntimeException(
+                raise dbt.exceptions.DbtRuntimeError(
                     f"{self.method} connection method requires "
                     "additional dependencies. \n"
                     "Install the additional required dependencies with "
@@ -111,7 +111,7 @@ def __post_init__(self):
                 ) from e
 
         if self.method == SparkConnectionMethod.ODBC and self.cluster and self.endpoint:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 "`cluster` and `endpoint` cannot both be set when"
                 f" using {self.method} method to connect to Spark"
             )
@@ -120,7 +120,7 @@ def __post_init__(self):
             self.method == SparkConnectionMethod.HTTP
             or self.method == SparkConnectionMethod.THRIFT
         ) and not (ThriftState and THttpClient and hive):
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"{self.method} connection method requires "
                 "additional dependencies. \n"
                 "Install the additional required dependencies with "
@@ -131,7 +131,7 @@ def __post_init__(self):
             try:
                 import pyspark  # noqa: F401
             except ImportError as e:
-                raise dbt.exceptions.RuntimeException(
+                raise dbt.exceptions.DbtRuntimeError(
                     f"{self.method} connection method requires "
                     "additional dependencies. \n"
                     "Install the additional required dependencies with "
@@ -233,12 +233,13 @@ def execute(self, sql, bindings=None):
         if poll_state.errorMessage:
             logger.debug("Poll response: {}".format(poll_state))
             logger.debug("Poll status: {}".format(state))
-            dbt.exceptions.raise_database_error(poll_state.errorMessage)
+            raise dbt.exceptions.DbtDatabaseError(poll_state.errorMessage)
 
         elif state not in STATE_SUCCESS:
             status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state))
-
-            dbt.exceptions.raise_database_error("Query failed with status: {}".format(status_type))
+            raise dbt.exceptions.DbtDatabaseError(
+                "Query failed with status: {}".format(status_type)
+            )
 
         logger.debug("Poll status: {}, query complete".format(state))
 
@@ -293,9 +294,9 @@ def exception_handler(self, sql):
             thrift_resp = exc.args[0]
             if hasattr(thrift_resp, "status"):
                 msg = thrift_resp.status.errorMessage
-                raise dbt.exceptions.RuntimeException(msg)
+                raise dbt.exceptions.DbtRuntimeError(msg)
             else:
-                raise dbt.exceptions.RuntimeException(str(exc))
+                raise dbt.exceptions.DbtRuntimeError(str(exc))
 
     def cancel(self, connection):
         connection.handle.cancel()
@@ -462,7 +463,7 @@ def open(cls, connection):
                     msg = "Failed to connect"
                     if creds.token is not None:
                         msg += ", is your token valid?"
-                    raise dbt.exceptions.FailedToConnectException(msg) from e
+                    raise dbt.exceptions.FailedToConnectError(msg) from e
                 retryable_message = _is_retryable_error(e)
                 if retryable_message and creds.connect_retries > 0:
                     msg = (
@@ -483,7 +484,7 @@ def open(cls, connection):
                     logger.warning(msg)
                     time.sleep(creds.connect_timeout)
                 else:
-                    raise dbt.exceptions.FailedToConnectException("failed to connect") from e
+                    raise dbt.exceptions.FailedToConnectError("failed to connect") from e
         else:
             raise exc
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index f02025c7..1abdfa62 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -124,7 +124,7 @@ def add_schema_to_cache(self, schema) -> str:
         """Cache a new schema in dbt. It will show up in `list relations`."""
         if schema is None:
             name = self.nice_connection_name()
-            dbt.exceptions.raise_compiler_error(
+            raise dbt.exceptions.CompilationError(
                 "Attempted to cache a null schema for {}".format(name)
             )
         if dbt.flags.USE_CACHE:  # type: ignore
@@ -138,7 +138,7 @@ def list_relations_without_caching(
         kwargs = {"schema_relation": schema_relation}
         try:
             results = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs)
-        except dbt.exceptions.RuntimeException as e:
+        except dbt.exceptions.DbtRuntimeError as e:
             errmsg = getattr(e, "msg", "")
             if f"Database '{schema_relation}' not found" in errmsg:
                 return []
@@ -150,7 +150,7 @@ def list_relations_without_caching(
         relations = []
         for row in results:
             if len(row) != 4:
-                raise dbt.exceptions.RuntimeException(
+                raise dbt.exceptions.DbtRuntimeError(
                     f'Invalid value from "show table extended ...", '
                     f"got {len(row)} values, expected 4"
                 )
@@ -222,7 +222,7 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
                 GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
             )
             columns = self.parse_describe_extended(relation, rows)
-        except dbt.exceptions.RuntimeException as e:
+        except dbt.exceptions.DbtRuntimeError as e:
             # spark would throw error when table doesn't exist, where other
             # CDW would just return and empty list, normalizing the behavior here
             errmsg = getattr(e, "msg", "")
@@ -280,7 +280,7 @@ def get_properties(self, relation: Relation) -> Dict[str, str]:
     def get_catalog(self, manifest):
         schema_map = self._get_catalog_schemas(manifest)
         if len(schema_map) > 1:
-            dbt.exceptions.raise_compiler_error(
+            raise dbt.exceptions.CompilationError(
                 f"Expected only one database in get_catalog, found " f"{list(schema_map)}"
             )
 
@@ -308,7 +308,7 @@ def _get_one_catalog(
         manifest,
     ) -> agate.Table:
         if len(schemas) != 1:
-            dbt.exceptions.raise_compiler_error(
+            raise dbt.exceptions.CompilationError(
                 f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}"
             )
 
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 1e81c572..47529e07 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -53,7 +53,7 @@ def _create_work_dir(self, path: str) -> None:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error creating work_dir for python notebooks\n {response.content!r}"
             )
 
@@ -71,7 +71,7 @@ def _upload_notebook(self, path: str, compiled_code: str) -> None:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error creating python notebook.\n {response.content!r}"
             )
 
@@ -99,7 +99,7 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
             json=job_spec,
         )
         if submit_response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error creating python run.\n {submit_response.content!r}"
             )
         return submit_response.json()["run_id"]
@@ -135,7 +135,7 @@ def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> No
         json_run_output = run_output.json()
         result_state = json_run_output["metadata"]["state"]["result_state"]
         if result_state != "SUCCESS":
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 "Python model failed with traceback as:\n"
                 "(Note that the line number here does not "
                 "match the line number in your code due to dbt templating)\n"
@@ -169,9 +169,9 @@ def polling(
             response = status_func(**status_func_kwargs)
             state = get_state_func(response)
         if exceeded_timeout:
-            raise dbt.exceptions.RuntimeException("python model run timed out")
+            raise dbt.exceptions.DbtRuntimeError("python model run timed out")
         if state != expected_end_state:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 "python model run ended in state"
                 f"{state} with state_message\n{get_state_msg_func(response)}"
             )
@@ -205,7 +205,7 @@ def create(self) -> str:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error creating an execution context.\n {response.content!r}"
             )
         return response.json()["id"]
@@ -221,7 +221,7 @@ def destroy(self, context_id: str) -> str:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error deleting an execution context.\n {response.content!r}"
             )
         return response.json()["id"]
@@ -246,7 +246,7 @@ def execute(self, context_id: str, command: str) -> str:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error creating a command.\n {response.content!r}"
             )
         return response.json()["id"]
@@ -263,7 +263,7 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
+            raise dbt.exceptions.DbtRuntimeError(
                 f"Error getting status of command.\n {response.content!r}"
             )
         return response.json()
@@ -298,7 +298,7 @@ def submit(self, compiled_code: str) -> None:
                     get_state_msg_func=lambda response: response.json()["results"]["data"],
                 )
                 if response["results"]["resultType"] == "error":
-                    raise dbt.exceptions.RuntimeException(
+                    raise dbt.exceptions.DbtRuntimeError(
                         f"Python model failed with traceback as:\n"
                         f"{response['results']['cause']}"
                     )
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index d3abb0d3..0b0c58bc 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass, field
 
 from dbt.adapters.base.relation import BaseRelation, Policy
-from dbt.exceptions import RuntimeException
+from dbt.exceptions import DbtRuntimeError
 
 
 @dataclass
@@ -31,11 +31,11 @@ class SparkRelation(BaseRelation):
 
     def __post_init__(self):
         if self.database != self.schema and self.database:
-            raise RuntimeException("Cannot set database in spark!")
+            raise DbtRuntimeError("Cannot set database in spark!")
 
     def render(self):
         if self.include_policy.database and self.include_policy.schema:
-            raise RuntimeException(
+            raise DbtRuntimeError(
                 "Got a spark relation with schema and database set to "
                 "include, but only one can be set"
             )
diff --git a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py
index 2d967a4d..4d1cd374 100644
--- a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py
+++ b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py
@@ -1,6 +1,5 @@
 from cProfile import run
 from tests.integration.base import DBTIntegrationTest, use_profile
-import dbt.exceptions
 
 
 class TestIncrementalOnSchemaChange(DBTIntegrationTest):
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index 73bb6ba2..cea8225e 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -1,6 +1,5 @@
 from cProfile import run
 from tests.integration.base import DBTIntegrationTest, use_profile
-import dbt.exceptions
 
 
 class TestIncrementalStrategies(DBTIntegrationTest):
diff --git a/tests/integration/persist_docs/test_persist_docs.py b/tests/integration/persist_docs/test_persist_docs.py
index bc93f491..527f548a 100644
--- a/tests/integration/persist_docs/test_persist_docs.py
+++ b/tests/integration/persist_docs/test_persist_docs.py
@@ -1,8 +1,5 @@
 from cProfile import run
 from tests.integration.base import DBTIntegrationTest, use_profile
-import dbt.exceptions
-
-import json
 
 
 class TestPersistDocsDelta(DBTIntegrationTest):
diff --git a/tests/integration/seed_column_types/test_seed_column_types.py b/tests/integration/seed_column_types/test_seed_column_types.py
index 326c9f52..dc997d11 100644
--- a/tests/integration/seed_column_types/test_seed_column_types.py
+++ b/tests/integration/seed_column_types/test_seed_column_types.py
@@ -1,6 +1,5 @@
 from cProfile import run
 from tests.integration.base import DBTIntegrationTest, use_profile
-import dbt.exceptions
 
 
 class TestSeedColumnTypeCast(DBTIntegrationTest):
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index 53b95f73..d24bc8a2 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -2,7 +2,7 @@
 from unittest import mock
 
 import dbt.flags as flags
-from dbt.exceptions import RuntimeException
+from dbt.exceptions import DbtRuntimeError
 from agate import Row
 from pyhive import hive
 from dbt.adapters.spark import SparkAdapter, SparkRelation
@@ -453,7 +453,7 @@ def test_relation_with_database(self):
         adapter = SparkAdapter(config)
         # fine
         adapter.Relation.create(schema='different', identifier='table')
-        with self.assertRaises(RuntimeException):
+        with self.assertRaises(DbtRuntimeError):
             # not fine - database set
             adapter.Relation.create(
                 database='something', schema='different', identifier='table')
@@ -476,7 +476,7 @@ def test_profile_with_database(self):
             },
             'target': 'test'
         }
-        with self.assertRaises(RuntimeException):
+        with self.assertRaises(DbtRuntimeError):
             config_from_parts_or_dicts(self.project_cfg, profile)
 
     def test_profile_with_cluster_and_sql_endpoint(self):
@@ -496,7 +496,7 @@ def test_profile_with_cluster_and_sql_endpoint(self):
             },
             'target': 'test'
         }
-        with self.assertRaises(RuntimeException):
+        with self.assertRaises(DbtRuntimeError):
             config_from_parts_or_dicts(self.project_cfg, profile)
 
     def test_parse_columns_from_information_with_table_type_and_delta_provider(self):

From 8d266fde31ede25d1ffe1cd9508c5fb563b2de66 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 11 Jan 2023 14:21:19 -0800
Subject: [PATCH 373/603] wrap if expression in brackets (#576)

* add mypy ignore to column, connections and init

* wrap if expression in brackets

* change comparison type for boolean if check

* align artifact downloads
---
 .github/workflows/release.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ad1dc92e..c4008dcf 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -196,7 +196,7 @@ jobs:
   pypi-release:
     name: Pypi release
     # only release to PyPi if we're not testing - will release to PyPi test when workflow gets rewritten
-    if: inputs.test_run  == 'false'
+    if: ${{ inputs.test_run  == false }}
 
     runs-on: ubuntu-latest
 
@@ -204,11 +204,11 @@ jobs:
 
     environment: PypiProd
     steps:
-      - uses: actions/download-artifact@v2
+      - name: "Download Build Artifact - ${{ inputs.version_number }}"
+        uses: actions/download-artifact@v3
         with:
-          name: dist
-          path: 'dist'
-
+          name: ${{ inputs.version_number }}
+          path: dist/
       - name: Publish distribution to PyPI
         uses: pypa/gh-action-pypi-publish@v1.4.2
         with:

From 8aceeb4e280369929c1784dae809e0c0c8690942 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 12 Jan 2023 11:04:59 -0800
Subject: [PATCH 374/603] Bumping version to 1.5.0a1 and generate changelog
 (#588)

* Bumping version to 1.5.0a1 and generate CHANGELOG

* update changelog for 1.5.0a1

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
---
 .bumpversion.cfg                               |  2 +-
 .changes/0.0.0.md                              |  1 +
 .changes/1.4.0-b1.md                           | 17 -----------------
 .changes/1.4.0/Features-20220823-093457.yaml   |  7 -------
 .changes/1.4.0/Features-20220926-123609.yaml   |  7 -------
 .changes/1.4.0/Fixes-20220926-112857.yaml      |  7 -------
 .changes/1.4.0/Fixes-20221113-211208.yaml      |  7 -------
 .changes/1.4.0/Fixes-20221116-234601.yaml      |  7 -------
 .changes/1.4.0/Fixes-20221209-101854.yaml      |  7 -------
 .../1.4.0/Under the Hood-20221202-140724.yaml  |  7 -------
 .changes/unreleased/Fixes-20221221-092410.yaml |  8 --------
 .../Under the Hood-20230110-101919.yaml        |  7 -------
 CHANGELOG.md                                   | 18 +-----------------
 dbt/adapters/spark/__version__.py              |  2 +-
 setup.py                                       |  2 +-
 15 files changed, 5 insertions(+), 101 deletions(-)
 delete mode 100644 .changes/1.4.0-b1.md
 delete mode 100644 .changes/1.4.0/Features-20220823-093457.yaml
 delete mode 100644 .changes/1.4.0/Features-20220926-123609.yaml
 delete mode 100644 .changes/1.4.0/Fixes-20220926-112857.yaml
 delete mode 100644 .changes/1.4.0/Fixes-20221113-211208.yaml
 delete mode 100644 .changes/1.4.0/Fixes-20221116-234601.yaml
 delete mode 100644 .changes/1.4.0/Fixes-20221209-101854.yaml
 delete mode 100644 .changes/1.4.0/Under the Hood-20221202-140724.yaml
 delete mode 100644 .changes/unreleased/Fixes-20221221-092410.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20230110-101919.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 9fe09ab4..949720f7 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.4.0b1
+current_version = 1.5.0a1
 parse = (?P<major>\d+)
 	\.(?P<minor>\d+)
 	\.(?P<patch>\d+)
diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
index 68ecfaa1..bed77333 100644
--- a/.changes/0.0.0.md
+++ b/.changes/0.0.0.md
@@ -1,5 +1,6 @@
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
diff --git a/.changes/1.4.0-b1.md b/.changes/1.4.0-b1.md
deleted file mode 100644
index fc562626..00000000
--- a/.changes/1.4.0-b1.md
+++ /dev/null
@@ -1,17 +0,0 @@
-## dbt-spark 1.4.0-b1 - December 15, 2022
-### Features
-- incremental predicates ([#435](https://github.com/dbt-labs/dbt-spark/issues/435), [#436](https://github.com/dbt-labs/dbt-spark/pull/436))
-- Migrate dbt-utils current_timestamp macros into core + adapters ([#483](https://github.com/dbt-labs/dbt-spark/issues/483), [#480](https://github.com/dbt-labs/dbt-spark/pull/480))
-### Fixes
-- Password doesn't pass to server using LDAP connection via thrift (#310) ([#310](https://github.com/dbt-labs/dbt-spark/issues/310), [#396](https://github.com/dbt-labs/dbt-spark/pull/396))
-- Updated the Spark-testing profile name to spark_testing ([#514](https://github.com/dbt-labs/dbt-spark/issues/514), [#513](https://github.com/dbt-labs/dbt-spark/pull/513))
-- Support new error messages in the future Spark. ([#515](https://github.com/dbt-labs/dbt-spark/issues/515), [#520](https://github.com/dbt-labs/dbt-spark/pull/520))
-- Update policy code and test config to support py3.11 and tox 4.0 ([#542](https://github.com/dbt-labs/dbt-spark/issues/542), [#543](https://github.com/dbt-labs/dbt-spark/pull/543))
-### Under the Hood
-- updating python version in tox ([#536](https://github.com/dbt-labs/dbt-spark/issues/536), [#534](https://github.com/dbt-labs/dbt-spark/pull/534))
-
-### Contributors
-- [@VShkaberda](https://github.com/VShkaberda) ([#396](https://github.com/dbt-labs/dbt-spark/pull/396))
-- [@akashrn5](https://github.com/akashrn5) ([#513](https://github.com/dbt-labs/dbt-spark/pull/513))
-- [@dave-connors-3](https://github.com/dave-connors-3) ([#436](https://github.com/dbt-labs/dbt-spark/pull/436))
-- [@ueshin](https://github.com/ueshin) ([#520](https://github.com/dbt-labs/dbt-spark/pull/520))
diff --git a/.changes/1.4.0/Features-20220823-093457.yaml b/.changes/1.4.0/Features-20220823-093457.yaml
deleted file mode 100644
index 66396a8e..00000000
--- a/.changes/1.4.0/Features-20220823-093457.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: incremental predicates
-time: 2022-08-23T09:34:57.026688-05:00
-custom:
-  Author: dave-connors-3
-  Issue: "435"
-  PR: "436"
diff --git a/.changes/1.4.0/Features-20220926-123609.yaml b/.changes/1.4.0/Features-20220926-123609.yaml
deleted file mode 100644
index b80b3730..00000000
--- a/.changes/1.4.0/Features-20220926-123609.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Migrate dbt-utils current_timestamp macros into core + adapters
-time: 2022-09-26T12:36:09.319981-07:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "483"
-  PR: "480"
diff --git a/.changes/1.4.0/Fixes-20220926-112857.yaml b/.changes/1.4.0/Fixes-20220926-112857.yaml
deleted file mode 100644
index 2a18f13a..00000000
--- a/.changes/1.4.0/Fixes-20220926-112857.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: Password doesn't pass to server using LDAP connection via thrift (#310)
-time: 2022-09-26T11:28:57.306285-04:00
-custom:
-  Author: VShkaberda
-  Issue: "310"
-  PR: "396"
diff --git a/.changes/1.4.0/Fixes-20221113-211208.yaml b/.changes/1.4.0/Fixes-20221113-211208.yaml
deleted file mode 100644
index 73a180d8..00000000
--- a/.changes/1.4.0/Fixes-20221113-211208.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: Updated the Spark-testing profile name to spark_testing
-time: 2022-11-13T21:12:08.669273+05:30
-custom:
-  Author: akashrn5
-  Issue: "514"
-  PR: "513"
diff --git a/.changes/1.4.0/Fixes-20221116-234601.yaml b/.changes/1.4.0/Fixes-20221116-234601.yaml
deleted file mode 100644
index 3e394c87..00000000
--- a/.changes/1.4.0/Fixes-20221116-234601.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: Support new error messages in the future Spark.
-time: 2022-11-16T23:46:01.899921861Z
-custom:
-  Author: ueshin
-  Issue: "515"
-  PR: "520"
diff --git a/.changes/1.4.0/Fixes-20221209-101854.yaml b/.changes/1.4.0/Fixes-20221209-101854.yaml
deleted file mode 100644
index afd1d6dc..00000000
--- a/.changes/1.4.0/Fixes-20221209-101854.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: Update policy code and test config to support py3.11 and tox 4.0
-time: 2022-12-09T10:18:54.68118-08:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "542"
-  PR: "543"
diff --git a/.changes/1.4.0/Under the Hood-20221202-140724.yaml b/.changes/1.4.0/Under the Hood-20221202-140724.yaml
deleted file mode 100644
index 88fee7be..00000000
--- a/.changes/1.4.0/Under the Hood-20221202-140724.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: updating python version in tox
-time: 2022-12-02T14:07:24.377668-06:00
-custom:
-  Author: McKnight-42
-  Issue: "536"
-  PR: "534"
diff --git a/.changes/unreleased/Fixes-20221221-092410.yaml b/.changes/unreleased/Fixes-20221221-092410.yaml
deleted file mode 100644
index 1aa8001f..00000000
--- a/.changes/unreleased/Fixes-20221221-092410.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Fixes
-body: Added escape_single_quotes macro as Spark/Databricks uses a single backslash
-  instead of a single quote
-time: 2022-12-21T09:24:10.952042-06:00
-custom:
-  Author: graciegoheen
-  Issue: "572"
-  PR: "573"
diff --git a/.changes/unreleased/Under the Hood-20230110-101919.yaml b/.changes/unreleased/Under the Hood-20230110-101919.yaml
deleted file mode 100644
index 2209fdb9..00000000
--- a/.changes/unreleased/Under the Hood-20230110-101919.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Rename exceptions to match dbt-core.
-time: 2023-01-10T10:19:19.675879-06:00
-custom:
-  Author: emmyoop
-  Issue: "557"
-  PR: "585"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 263050ca..f99421c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,25 +4,9 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
-## dbt-spark 1.4.0-b1 - December 15, 2022
-### Features
-- incremental predicates ([#435](https://github.com/dbt-labs/dbt-spark/issues/435), [#436](https://github.com/dbt-labs/dbt-spark/pull/436))
-- Migrate dbt-utils current_timestamp macros into core + adapters ([#483](https://github.com/dbt-labs/dbt-spark/issues/483), [#480](https://github.com/dbt-labs/dbt-spark/pull/480))
-### Fixes
-- Password doesn't pass to server using LDAP connection via thrift (#310) ([#310](https://github.com/dbt-labs/dbt-spark/issues/310), [#396](https://github.com/dbt-labs/dbt-spark/pull/396))
-- Updated the Spark-testing profile name to spark_testing ([#514](https://github.com/dbt-labs/dbt-spark/issues/514), [#513](https://github.com/dbt-labs/dbt-spark/pull/513))
-- Support new error messages in the future Spark. ([#515](https://github.com/dbt-labs/dbt-spark/issues/515), [#520](https://github.com/dbt-labs/dbt-spark/pull/520))
-- Update policy code and test config to support py3.11 and tox 4.0 ([#542](https://github.com/dbt-labs/dbt-spark/issues/542), [#543](https://github.com/dbt-labs/dbt-spark/pull/543))
-### Under the Hood
-- updating python version in tox ([#536](https://github.com/dbt-labs/dbt-spark/issues/536), [#534](https://github.com/dbt-labs/dbt-spark/pull/534))
-
-### Contributors
-- [@VShkaberda](https://github.com/VShkaberda) ([#396](https://github.com/dbt-labs/dbt-spark/pull/396))
-- [@akashrn5](https://github.com/akashrn5) ([#513](https://github.com/dbt-labs/dbt-spark/pull/513))
-- [@dave-connors-3](https://github.com/dave-connors-3) ([#436](https://github.com/dbt-labs/dbt-spark/pull/436))
-- [@ueshin](https://github.com/ueshin) ([#520](https://github.com/dbt-labs/dbt-spark/pull/520))
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 27cfeecd..219c289b 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.4.0b1"
+version = "1.5.0a1"
diff --git a/setup.py b/setup.py
index 012e58d4..da8d59cb 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.4.0b1"
+package_version = "1.5.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 89f0cc25310d834b639fbd20ff1e4a1ae93941ca Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathanieldmay@gmail.com>
Date: Wed, 18 Jan 2023 23:05:18 -0500
Subject: [PATCH 375/603] make dev installs requirements.txt too (#399)

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a520c425..8b251662 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
 .PHONY: dev
 dev: ## Installs adapter in develop mode along with development depedencies
 	@\
-	pip install -r dev-requirements.txt && pre-commit install
+	pip install -r requirements.txt -r dev-requirements.txt && pre-commit install
 
 .PHONY: mypy
 mypy: ## Runs mypy against staged changes for static type checking.

From 39800e024dbda19077ae7ee6a7aec8a9b0c47323 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen <jeremy@dbtlabs.com>
Date: Thu, 19 Jan 2023 13:19:24 +0100
Subject: [PATCH 376/603] Convert incremental on_schema_change tests (#593)

* Convert incremental on_schema_change tests

* Switch to dbt-core main
---
 .../test_incremental_on_schema_change.py      |  84 ++++++++++
 .../test_incremental_predicates.py            |   0
 .../test_incremental_unique_id.py             |   0
 .../models/incremental_append_new_columns.sql |  28 ----
 .../incremental_append_new_columns_target.sql |  19 ---
 .../models/incremental_fail.sql               |  18 --
 .../models/incremental_ignore.sql             |  18 --
 .../models/incremental_ignore_target.sql      |  15 --
 .../models/incremental_sync_all_columns.sql   |  30 ----
 .../incremental_sync_all_columns_target.sql   |  20 ---
 .../models/model_a.sql                        |  22 ---
 .../test_incremental_on_schema_change.py      | 156 ------------------
 12 files changed, 84 insertions(+), 326 deletions(-)
 create mode 100644 tests/functional/adapter/incremental/test_incremental_on_schema_change.py
 rename tests/functional/adapter/{ => incremental}/test_incremental_predicates.py (100%)
 rename tests/functional/adapter/{ => incremental}/test_incremental_unique_id.py (100%)
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_append_new_columns.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_append_new_columns_target.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_fail.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_ignore.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_ignore_target.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/models/model_a.sql
 delete mode 100644 tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py

diff --git a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
new file mode 100644
index 00000000..974edd26
--- /dev/null
+++ b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
@@ -0,0 +1,84 @@
+import pytest
+
+from dbt.tests.util import run_dbt
+
+from dbt.tests.adapter.incremental.test_incremental_on_schema_change import BaseIncrementalOnSchemaChangeSetup
+
+
+class IncrementalOnSchemaChangeIgnoreFail(BaseIncrementalOnSchemaChangeSetup):
+    def test_run_incremental_ignore(self, project):
+        select = "model_a incremental_ignore incremental_ignore_target"
+        compare_source = "incremental_ignore"
+        compare_target = "incremental_ignore_target"
+        self.run_twice_and_assert(select, compare_source, compare_target, project)
+
+    def test_run_incremental_fail_on_schema_change(self, project):
+        select = "model_a incremental_fail"
+        run_dbt(["run", "--models", select, "--full-refresh"])
+        results_two = run_dbt(["run", "--models", select], expect_pass=False)
+        assert "Compilation Error" in results_two[1].message
+
+
+@pytest.mark.skip_profile("databricks_sql_endpoint")
+class TestAppendOnSchemaChange(IncrementalOnSchemaChangeIgnoreFail):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+incremental_strategy": "append",
+            }
+        }
+
+
+@pytest.mark.skip_profile("databricks_sql_endpoint", "spark_session")
+class TestInsertOverwriteOnSchemaChange(IncrementalOnSchemaChangeIgnoreFail):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "parquet",
+                "+partition_by": "id",
+                "+incremental_strategy": "insert_overwrite",
+            }
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestDeltaOnSchemaChange(BaseIncrementalOnSchemaChangeSetup):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge",
+                "+unique_key": "id",
+            }
+        }
+
+    def run_incremental_sync_all_columns(self, project):
+        select = "model_a incremental_sync_all_columns incremental_sync_all_columns_target"
+        compare_source = "incremental_sync_all_columns"
+        compare_target = "incremental_sync_all_columns_target"
+        run_dbt(["run", "--models", select, "--full-refresh"])
+        # Delta Lake doesn"t support removing columns -- show a nice compilation error
+        results = run_dbt(["run", "--models", select], expect_pass = False)
+        assert "Compilation Error" in results[1].message
+        
+    def run_incremental_sync_remove_only(self, project):
+        select = "model_a incremental_sync_remove_only incremental_sync_remove_only_target"
+        compare_source = "incremental_sync_remove_only"
+        compare_target = "incremental_sync_remove_only_target"
+        run_dbt(["run", "--models", select, "--full-refresh"])
+        # Delta Lake doesn"t support removing columns -- show a nice compilation error
+        results = run_dbt(["run", "--models", select], expect_pass = False)
+        assert "Compilation Error" in results[1].message
+
+    def test_run_incremental_append_new_columns(self, project):
+        # only adding new columns in supported
+        self.run_incremental_append_new_columns(project)
+        # handling columns that have been removed doesn"t work on Delta Lake today
+        # self.run_incremental_append_new_columns_remove_one(project)
+
+    def test_run_incremental_sync_all_columns(self, project):
+        self.run_incremental_sync_all_columns(project)
+        self.run_incremental_sync_remove_only(project)
diff --git a/tests/functional/adapter/test_incremental_predicates.py b/tests/functional/adapter/incremental/test_incremental_predicates.py
similarity index 100%
rename from tests/functional/adapter/test_incremental_predicates.py
rename to tests/functional/adapter/incremental/test_incremental_predicates.py
diff --git a/tests/functional/adapter/test_incremental_unique_id.py b/tests/functional/adapter/incremental/test_incremental_unique_id.py
similarity index 100%
rename from tests/functional/adapter/test_incremental_unique_id.py
rename to tests/functional/adapter/incremental/test_incremental_unique_id.py
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns.sql b/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns.sql
deleted file mode 100644
index 86f6c7c4..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        on_schema_change='append_new_columns'
-    )
-}}
-
-{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %}
-
-WITH source_data AS (SELECT * FROM {{ ref('model_a') }} )
-
-{% if is_incremental()  %}
-
-SELECT id, 
-       cast(field1 as {{string_type}}) as field1, 
-       cast(field2 as {{string_type}}) as field2, 
-       cast(field3 as {{string_type}}) as field3, 
-       cast(field4 as {{string_type}}) as field4 
-FROM source_data WHERE id NOT IN (SELECT id from {{ this }} )
-
-{% else %}
-
-SELECT id, 
-       cast(field1 as {{string_type}}) as field1, 
-       cast(field2 as {{string_type}}) as field2 
-FROM source_data where id <= 3
-
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns_target.sql b/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns_target.sql
deleted file mode 100644
index 55ed7b2c..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_append_new_columns_target.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ 
-    config(materialized='table') 
-}}
-
-{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %}
-
-with source_data as (
-
-    select * from {{ ref('model_a') }}
-
-)
-
-select id
-       ,cast(field1 as {{string_type}}) as field1
-       ,cast(field2 as {{string_type}}) as field2
-       ,cast(CASE WHEN id <= 3 THEN NULL ELSE field3 END as {{string_type}}) AS field3
-       ,cast(CASE WHEN id <= 3 THEN NULL ELSE field4 END as {{string_type}}) AS field4
-
-from source_data
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_fail.sql b/tests/integration/incremental_on_schema_change/models/incremental_fail.sql
deleted file mode 100644
index 939fc20c..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_fail.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        on_schema_change='fail'
-    )
-}}
-
-WITH source_data AS (SELECT * FROM {{ ref('model_a') }} )
-
-{% if is_incremental()  %}
-
-SELECT id, field1, field2 FROM source_data
-
-{% else %}
-
-SELECT id, field1, field3 FROm source_data
-
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_ignore.sql b/tests/integration/incremental_on_schema_change/models/incremental_ignore.sql
deleted file mode 100644
index 98f0a74a..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_ignore.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        on_schema_change='ignore'
-    )
-}}
-
-WITH source_data AS (SELECT * FROM {{ ref('model_a') }} )
-
-{% if is_incremental() %}
-
-SELECT id, field1, field2, field3, field4 FROM source_data WHERE id NOT IN (SELECT id from {{ this }} )
-
-{% else %}
-
-SELECT id, field1, field2 FROM source_data LIMIT 3
-
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_ignore_target.sql b/tests/integration/incremental_on_schema_change/models/incremental_ignore_target.sql
deleted file mode 100644
index 92d4564e..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_ignore_target.sql
+++ /dev/null
@@ -1,15 +0,0 @@
-{{ 
-    config(materialized='table') 
-}}
-
-with source_data as (
-
-    select * from {{ ref('model_a') }}
-
-)
-
-select id
-       ,field1
-       ,field2
-
-from source_data
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns.sql b/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns.sql
deleted file mode 100644
index 2c5a461e..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-{{
-    config(
-        materialized='incremental',
-        on_schema_change='sync_all_columns'
-        
-    )
-}}
-
-WITH source_data AS (SELECT * FROM {{ ref('model_a') }} )
-
-{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %}
-
-{% if is_incremental() %}
-
-SELECT id, 
-       cast(field1 as {{string_type}}) as field1, 
-       cast(field3 as {{string_type}}) as field3, -- to validate new fields
-       cast(field4 as {{string_type}}) AS field4 -- to validate new fields
-
-FROM source_data WHERE id NOT IN (SELECT id from {{ this }} )
-
-{% else %}
-
-select id, 
-       cast(field1 as {{string_type}}) as field1, 
-       cast(field2 as {{string_type}}) as field2
-
-from source_data where id <= 3
-
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql b/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql
deleted file mode 100644
index 56591eb2..00000000
--- a/tests/integration/incremental_on_schema_change/models/incremental_sync_all_columns_target.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ 
-    config(materialized='table') 
-}}
-
-with source_data as (
-
-    select * from {{ ref('model_a') }}
-
-)
-
-{% set string_type = 'string' if target.type == 'bigquery' else 'varchar(10)' %}
-
-select id
-       ,cast(field1 as {{string_type}}) as field1
-       --,field2
-       ,cast(case when id <= 3 then null else field3 end as {{string_type}}) as field3
-       ,cast(case when id <= 3 then null else field4 end as {{string_type}}) as field4
-
-from source_data
-order by id
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/models/model_a.sql b/tests/integration/incremental_on_schema_change/models/model_a.sql
deleted file mode 100644
index 2a0b2dda..00000000
--- a/tests/integration/incremental_on_schema_change/models/model_a.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ 
-    config(materialized='table') 
-}}
-
-with source_data as (
-
-    select 1 as id, 'aaa' as field1, 'bbb' as field2, 111 as field3, 'TTT' as field4
-    union all select 2 as id, 'ccc' as field1, 'ddd' as field2, 222 as field3, 'UUU' as field4
-    union all select 3 as id, 'eee' as field1, 'fff' as field2, 333 as field3, 'VVV' as field4
-    union all select 4 as id, 'ggg' as field1, 'hhh' as field2, 444 as field3, 'WWW' as field4
-    union all select 5 as id, 'iii' as field1, 'jjj' as field2, 555 as field3, 'XXX' as field4
-    union all select 6 as id, 'kkk' as field1, 'lll' as field2, 666 as field3, 'YYY' as field4
-
-)
-
-select id
-       ,field1
-       ,field2
-       ,field3
-       ,field4
-
-from source_data
\ No newline at end of file
diff --git a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py
deleted file mode 100644
index 4d1cd374..00000000
--- a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from cProfile import run
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestIncrementalOnSchemaChange(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "incremental_on_schema_change"
-
-    @property
-    def models(self):
-        return "models"
-
-    @property
-    def project_config(self):
-        return {
-            "config-version": 2,
-            "test-paths": ["tests"]
-        }
-
-    def run_twice_and_assert(
-        self, include, compare_source, compare_target
-    ):
-
-        # dbt run (twice)
-        run_args = ['run']
-        if include:
-            run_args.extend(('--models', include))
-        results_one = self.run_dbt(run_args)
-        results_two = self.run_dbt(run_args)
-
-        self.assertEqual(len(results_one), 3)
-        self.assertEqual(len(results_two), 3)
-        
-        self.assertTablesEqual(compare_source, compare_target)
-
-    def run_incremental_ignore(self):
-        select = 'model_a incremental_ignore incremental_ignore_target'
-        compare_source = 'incremental_ignore'
-        compare_target = 'incremental_ignore_target'
-        self.run_twice_and_assert(select, compare_source, compare_target)
-
-    def run_incremental_append_new_columns(self):
-        select = 'model_a incremental_append_new_columns incremental_append_new_columns_target'
-        compare_source = 'incremental_append_new_columns'
-        compare_target = 'incremental_append_new_columns_target'
-        self.run_twice_and_assert(select, compare_source, compare_target)
-
-    def run_incremental_fail_on_schema_change(self):
-        select = 'model_a incremental_fail'
-        results_one = self.run_dbt(['run', '--models', select, '--full-refresh'])
-        results_two = self.run_dbt(['run', '--models', select], expect_pass = False)
-        self.assertIn('Compilation Error', results_two[1].message)
-
-    def run_incremental_sync_all_columns(self):
-        # this doesn't work on Delta today
-        select = 'model_a incremental_sync_all_columns incremental_sync_all_columns_target'
-        compare_source = 'incremental_sync_all_columns'
-        compare_target = 'incremental_sync_all_columns_target'
-        results_one = self.run_dbt(['run', '--models', select, '--full-refresh'])
-        results_two = self.run_dbt(['run', '--models', select], expect_pass = False)
-        self.assertIn('Compilation Error', results_two[1].message)
-
-
-class TestApacheSparkAppend(TestIncrementalOnSchemaChange):
-
-    @property
-    def project_config(self):
-        return {
-            "config-version": 2,
-            "test-paths": ["tests"],
-            "models": {
-                "+incremental_strategy": "append",
-            }
-        }
-        
-    # only 'ignore' and 'fail' are supported
-
-    @use_profile('apache_spark')
-    def test__apache_spark__run_incremental_ignore(self):
-        self.run_incremental_ignore()
-
-    @use_profile('apache_spark')
-    def test__apache_spark__run_incremental_fail_on_schema_change(self):
-        self.run_incremental_fail_on_schema_change()
-
-
-class TestApacheSparkInsertOverwrite(TestIncrementalOnSchemaChange):
-
-    @property
-    def project_config(self):
-        return {
-            "config-version": 2,
-            "test-paths": ["tests"],
-            "models": {
-                "+file_format": "parquet",
-                "+partition_by": "id",
-                "+incremental_strategy": "insert_overwrite",
-            }
-        }
-        
-    # only 'ignore' and 'fail' are supported
-
-    @use_profile('apache_spark')
-    def test__apache_spark__run_incremental_ignore(self):
-        self.run_incremental_ignore()
-
-    @use_profile('apache_spark')
-    def test__apache_spark__run_incremental_fail_on_schema_change(self):
-        self.run_incremental_fail_on_schema_change()
-
-
-class TestDeltaOnSchemaChange(TestIncrementalOnSchemaChange):
-    @property
-    def project_config(self):
-        return {
-            "config-version": 2,
-            "test-paths": ["tests"],
-            "models": {
-                "+file_format": "delta",
-                "+incremental_strategy": "merge",
-                "+unique_key": "id",
-            }
-        }
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster__run_incremental_ignore(self):
-        self.run_incremental_ignore()
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster__run_incremental_fail_on_schema_change(self):
-        self.run_incremental_fail_on_schema_change()
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster__run_incremental_append_new_columns(self):
-        self.run_incremental_append_new_columns()
-
-    @use_profile('databricks_cluster')
-    def test__databricks_cluster__run_incremental_sync_all_columns(self):
-        self.run_incremental_sync_all_columns()
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint__run_incremental_ignore(self):
-        self.run_incremental_ignore()
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint__run_incremental_fail_on_schema_change(self):
-        self.run_incremental_fail_on_schema_change()
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint__run_incremental_append_new_columns(self):
-        self.run_incremental_append_new_columns()
-
-    @use_profile('databricks_sql_endpoint')
-    def test__databricks_sql_endpoint__run_incremental_sync_all_columns(self):
-        self.run_incremental_sync_all_columns()

From 31143310731f23261abe97d588baad15d909d9f4 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 26 Jan 2023 08:21:52 -0800
Subject: [PATCH 377/603] Use AttrDict instead of agate (#604)

* add mypy ignore to column, connections and init

* change from agate to AttrDict
---
 dbt/adapters/spark/impl.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 1abdfa62..95af4291 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -24,7 +24,7 @@
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
-from dbt.utils import executor
+from dbt.utils import executor, AttrDict
 
 logger = AdapterLogger("Spark")
 
@@ -176,9 +176,7 @@ def get_relation(self, database: str, schema: str, identifier: str) -> Optional[
 
         return super().get_relation(database, schema, identifier)
 
-    def parse_describe_extended(
-        self, relation: Relation, raw_rows: List[agate.Row]
-    ) -> List[SparkColumn]:
+    def parse_describe_extended(self, relation: Relation, raw_rows: AttrDict) -> List[SparkColumn]:
         # Convert the Row to a dict
         dict_rows = [dict(zip(row._keys, row._values)) for row in raw_rows]
         # Find the separator between the rows and the metadata provided
@@ -218,7 +216,7 @@ def find_table_information_separator(rows: List[dict]) -> int:
     def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
         columns = []
         try:
-            rows: List[agate.Row] = self.execute_macro(
+            rows: AttrDict = self.execute_macro(
                 GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
             )
             columns = self.parse_describe_extended(relation, rows)

From 2e767bc10ddef3c85ffb16a2fee916b579bec57e Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 26 Jan 2023 15:32:07 -0800
Subject: [PATCH 378/603] Upgrade Thrift to 0.16.0 (#605)

* allow thrift 0.16.0

* add changie
---
 .changes/unreleased/Dependencies-20230126-152319.yaml | 7 +++++++
 setup.py                                              | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230126-152319.yaml

diff --git a/.changes/unreleased/Dependencies-20230126-152319.yaml b/.changes/unreleased/Dependencies-20230126-152319.yaml
new file mode 100644
index 00000000..7fe5d0c7
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230126-152319.yaml
@@ -0,0 +1,7 @@
+kind: Dependencies
+body: Allow thrift 0.16.0
+time: 2023-01-26T15:23:19.978823-08:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "606"
+  PR: "605"
diff --git a/setup.py b/setup.py
index da8d59cb..bfa37745 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@ def _get_dbt_core_version():
 odbc_extras = ["pyodbc>=4.0.30"]
 pyhive_extras = [
     "PyHive[hive]>=0.6.0,<0.7.0",
-    "thrift>=0.11.0,<0.16.0",
+    "thrift>=0.11.0,<0.17.0",
 ]
 session_extras = ["pyspark>=3.0.0,<4.0.0"]
 all_extras = odbc_extras + pyhive_extras + session_extras

From 7d46e180bb085569544ac402fa6aa31f527ddde3 Mon Sep 17 00:00:00 2001
From: Alexander Smolyakov <alexander.smolyakov.dev@gmail.com>
Date: Fri, 27 Jan 2023 19:06:50 +0400
Subject: [PATCH 379/603] [CI/CD] Update release workflow (#592)

* Update release workflow

* Fix format

* Set default `test_run` value to `true`

* Update Slack secret

* Resolve review comments
---
 .github/workflows/release.yml | 271 +++++++++++++++-------------------
 1 file changed, 120 insertions(+), 151 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index c4008dcf..397bbdb4 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,14 +1,20 @@
 # **what?**
-# Take the given commit, run unit tests specifically on that sha, build and
-# package it, and then release to GitHub with that specific build (PyPi to follow later)
-
+# Release workflow provides the following steps:
+# - checkout the given commit;
+# - validate version in sources and changelog file for given version;
+# - run unit tests against given commit;
+# - build and package that SHA;
+# - release it to GitHub and PyPI with that specific build;
+#
 # **why?**
 # Ensure an automated and tested release process
-
+#
 # **when?**
-# This will only run manually with a given sha and version
+# This will only run manually. Run this workflow only after the
+# version bump workflow is completed and related changes are reviewed and merged.
+#
 
-name: Build, Test, and Package
+name: Release to GitHub and PyPI
 
 on:
   workflow_dispatch:
@@ -17,28 +23,43 @@ on:
         description: "The last commit sha in the release"
         type: string
         required: true
-      changelog_path:
-        description: "Path to changes log"
+      target_branch:
+        description: "The branch to release from"
         type: string
-        default: "./CHANGELOG.md"
-        required: false
+        required: true
       version_number:
         description: "The release version number (i.e. 1.0.0b1)"
         type: string
         required: true
+      build_script_path:
+        description: "Build script path"
+        type: string
+        default: "scripts/build-dist.sh"
+        required: true
+      s3_bucket_name:
+        description: "AWS S3 bucket name"
+        type: string
+        default: "core-team-artifacts"
+        required: true
+      package_test_command:
+        description: "Package test command"
+        type: string
+        default: "dbt --version"
+        required: true
+      env_setup_script_path:
+        description: "Environment setup script path"
+        type: string
+        default: ""
+        required: false
       test_run:
-        description: "Test run (Publish release as draft to GitHub)"
+        description: "Test run (Publish release as draft)"
         type: boolean
-        default: false
+        default: true
         required: false
 
 permissions:
   contents: write # this is the permission that allows creating a new release
 
-env:
-  PYTHON_TARGET_VERSION: 3.8
-  ARTIFACT_RETENTION_DAYS: 2
-
 defaults:
   run:
     shell: bash
@@ -50,166 +71,114 @@ jobs:
     steps:
       - name: "[DEBUG] Print Variables"
         run: |
-          echo The last commit sha in the release:  ${{ inputs.sha }}
-          echo The release version number:          ${{ inputs.version_number }}
-          echo The path to the changelog markdpown: ${{ inputs.changelog_path }}
-          echo This is a test run:                  ${{ inputs.test_run }}
-          echo Python target version:               ${{ env.PYTHON_TARGET_VERSION }}
-          echo Artifact retention days:             ${{ env.ARTIFACT_RETENTION_DAYS }}
-
-  unit:
-    name: Unit Test
-    runs-on: ubuntu-latest
+          echo The last commit sha in the release: ${{ inputs.sha }}
+          echo The branch to release from:         ${{ inputs.target_branch }}
+          echo The release version number:         ${{ inputs.version_number }}
+          echo Build script path:                  ${{ inputs.build_script_path }}
+          echo Environment setup script path:      ${{ inputs.env_setup_script_path }}
+          echo AWS S3 bucket name:                 ${{ inputs.s3_bucket_name }}
+          echo Package test command:               ${{ inputs.package_test_command }}
+          echo Test run:                           ${{ inputs.test_run }}
+
+  # The Spark repository uses CircleCI to run integration tests.
+  # Because of this, the process of version bumps will be manual
+  # which means that this stage will be used to audit the version
+  # and changelog in sources.
+  # We are passing `env_setup_script_path` as an empty string
+  # so that the integration tests stage will be skipped.
+  audit-version-and-changelog:
+    name: Bump package version, Generate changelog
+
+    uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@main
 
-    env:
-      TOXENV: "unit"
+    with:
+      sha: ${{ inputs.sha }}
+      version_number: ${{ inputs.version_number }}
+      target_branch: ${{ inputs.target_branch }}
+      env_setup_script_path: ""
+      test_run: ${{ inputs.test_run }}
 
-    steps:
-      - name: "Checkout Commit - ${{ inputs.sha }}"
-        uses: actions/checkout@v3
-        with:
-          persist-credentials: false
-          ref: ${{ github.event.inputs.sha }}
-
-      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ env.PYTHON_TARGET_VERSION }}
-
-      - name: "Install Python Dependencies"
-        run: |
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip install tox
-          python -m pip --version
-          python -m tox --version
+    secrets:
+      FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }}
 
-      - name: "Run Tox"
-        run: tox
+  log-outputs-audit-version-and-changelog:
+    name: "[Log output] Bump package version, Generate changelog"
+    if: ${{ !failure() && !cancelled() }}
 
-  build:
-    name: Build Packages
+    needs: [audit-version-and-changelog]
 
     runs-on: ubuntu-latest
 
     steps:
-      - name: "Checkout Commit - ${{ inputs.sha }}"
-        uses: actions/checkout@v3
-        with:
-          persist-credentials: false
-          ref: ${{ inputs.sha }}
-
-      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ env.PYTHON_TARGET_VERSION }}
-
-      - name: "Install Python Dependencies"
-        run: |
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip install --upgrade setuptools wheel twine check-wheel-contents
-          python -m pip --version
-
-      - name: "Build Distributions"
-        run: ./scripts/build-dist.sh
-
-      - name: "[DEBUG] Show Distributions"
-        run: ls -lh dist/
-
-      - name: "Check Distribution Descriptions"
+      - name: Print variables
         run: |
-          twine check dist/*
-
-      - name: "[DEBUG] Check Wheel Contents"
-        run: |
-          check-wheel-contents dist/*.whl --ignore W007,W008
-
-      - name: "Upload Build Artifact - ${{ inputs.version_number }}"
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ inputs.version_number }}
-          path: |
-            dist/
-            !dist/dbt-${{ inputs.version_number }}.tar.gz
-          retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
-
-  test-build:
-    name: Verify Packages
+          echo Final SHA     : ${{ needs.audit-version-and-changelog.outputs.final_sha }}
+          echo Changelog path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
 
-    needs: [unit, build]
-
-    runs-on: ubuntu-latest
+  build-test-package:
+    name: Build, Test, Package
+    if: ${{ !failure() && !cancelled() }}
+    needs: [audit-version-and-changelog]
 
-    steps:
-      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ env.PYTHON_TARGET_VERSION }}
+    uses: dbt-labs/dbt-release/.github/workflows/build.yml@main
 
-      - name: "Install Python Dependencies"
-        run: |
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip install --upgrade wheel
-          python -m pip --version
+    with:
+      sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
+      version_number: ${{ inputs.version_number }}
+      changelog_path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
+      build_script_path: ${{ inputs.build_script_path }}
+      s3_bucket_name: ${{ inputs.s3_bucket_name }}
+      package_test_command: ${{ inputs.package_test_command }}
+      test_run: ${{ inputs.test_run }}
 
-      - name: "Download Build Artifact - ${{ inputs.version_number }}"
-        uses: actions/download-artifact@v3
-        with:
-          name: ${{ inputs.version_number }}
-          path: dist/
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.PRODUCTION_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.PRODUCTION_AWS_SECRET_ACCESS_KEY }}
 
-      - name: "[DEBUG] Show Distributions"
-        run: ls -lh dist/
+  github-release:
+    name: GitHub Release
+    if: ${{ !failure() && !cancelled() }}
 
-      - name: "Install Wheel Distributions"
-        run: |
-          find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
+    needs: [audit-version-and-changelog, build-test-package]
 
-      - name: "[DEBUG] Check Wheel Distributions"
-        run: |
-          dbt --version
+    uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main
 
-      - name: "Install Source Distributions"
-        run: |
-          find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
+    with:
+      sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
+      version_number: ${{ inputs.version_number }}
+      changelog_path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
+      test_run: ${{ inputs.test_run }}
 
-      - name: "[DEBUG] Check Source Distributions"
-        run: |
-          dbt --version
+  pypi-release:
+    name: PyPI Release
 
-  github-release:
-    name: GitHub Release
-    if: ${{ !failure() && !cancelled() }}
-    needs: test-build
+    needs: [github-release]
 
-    # pin to commit since this is workflow is WIP but this commit has been tested as working
-    uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@7b6e01d73d2c8454e06302cc66ef4c2dbd4dbe4e
+    uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main
 
     with:
-      sha: ${{ inputs.sha }}
       version_number: ${{ inputs.version_number }}
-      changelog_path: ${{ inputs.changelog_path }}
       test_run: ${{ inputs.test_run }}
 
-  pypi-release:
-    name: Pypi release
-    # only release to PyPi if we're not testing - will release to PyPi test when workflow gets rewritten
-    if: ${{ inputs.test_run  == false }}
+    secrets:
+      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+      TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}
 
-    runs-on: ubuntu-latest
+  slack-notification:
+    name: Slack Notification
+    if: ${{ failure() }}
 
-    needs: github-release
+    needs:
+      [
+        audit-version-and-changelog,
+        build-test-package,
+        github-release,
+        pypi-release,
+      ]
 
-    environment: PypiProd
-    steps:
-      - name: "Download Build Artifact - ${{ inputs.version_number }}"
-        uses: actions/download-artifact@v3
-        with:
-          name: ${{ inputs.version_number }}
-          path: dist/
-      - name: Publish distribution to PyPI
-        uses: pypa/gh-action-pypi-publish@v1.4.2
-        with:
-          password: ${{ secrets.PYPI_API_TOKEN }}
+    uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
+    with:
+      status: "failure"
+
+    secrets:
+      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}

From 4517567c7e6aafa983a6e8995270feca8316486f Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Fri, 27 Jan 2023 16:08:33 -0600
Subject: [PATCH 380/603] remove PR requirement for changie and allow multiple
 issues per changelog (#603)

---
 .changie.yaml                       | 97 ++++++++++++++++++++++-------
 .github/workflows/bot-changelog.yml |  5 +-
 2 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/.changie.yaml b/.changie.yaml
index 7d34a96b..e6cc1964 100644
--- a/.changie.yaml
+++ b/.changie.yaml
@@ -6,57 +6,110 @@ changelogPath: CHANGELOG.md
 versionExt: md
 versionFormat: '## dbt-spark {{.Version}} - {{.Time.Format "January 02, 2006"}}'
 kindFormat: '### {{.Kind}}'
-changeFormat: '- {{.Body}} ([#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), [#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+changeFormat: |-
+  {{- $IssueList := list }}
+  {{- $changes := splitList " " $.Custom.Issue }}
+  {{- range $issueNbr := $changes }}
+    {{- $changeLink := "[#nbr](https://github.com/dbt-labs/dbt-spark/issues/nbr)" | replace "nbr" $issueNbr }}
+    {{- $IssueList = append $IssueList $changeLink  }}
+  {{- end -}}
+  - {{.Body}} ({{ range $index, $element := $IssueList }}{{if $index}}, {{end}}{{$element}}{{end}})
+
 kinds:
 - label: Breaking Changes
 - label: Features
 - label: Fixes
 - label: Under the Hood
 - label: Dependencies
-  changeFormat: '- {{.Body}} ({{if ne .Custom.Issue ""}}[#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), {{end}}[#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+  changeFormat: |-
+    {{- $PRList := list }}
+    {{- $changes := splitList " " $.Custom.PR }}
+    {{- range $pullrequest := $changes }}
+      {{- $changeLink := "[#nbr](https://github.com/dbt-labs/dbt-spark/pull/nbr)" | replace "nbr" $pullrequest }}
+      {{- $PRList = append $PRList $changeLink  }}
+    {{- end -}}
+    - {{.Body}} ({{ range $index, $element := $PRList }}{{if $index}}, {{end}}{{$element}}{{end}})
+  skipGlobalChoices: true
+  additionalChoices:
+    - key: Author
+      label: GitHub Username(s) (separated by a single space if multiple)
+      type: string
+      minLength: 3
+    - key: PR
+      label: GitHub Pull Request Number (separated by a single space if multiple)
+      type: string
+      minLength: 1
 - label: Security
-  changeFormat: '- {{.Body}} ({{if ne .Custom.Issue ""}}[#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), {{end}}[#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+  changeFormat: |-
+    {{- $PRList := list }}
+    {{- $changes := splitList " " $.Custom.PR }}
+    {{- range $pullrequest := $changes }}
+      {{- $changeLink := "[#nbr](https://github.com/dbt-labs/dbt-spark/pull/nbr)" | replace "nbr" $pullrequest }}
+      {{- $PRList = append $PRList $changeLink  }}
+    {{- end -}}
+    - {{.Body}} ({{ range $index, $element := $PRList }}{{if $index}}, {{end}}{{$element}}{{end}})
+  skipGlobalChoices: true
+  additionalChoices:
+    - key: Author
+      label: GitHub Username(s) (separated by a single space if multiple)
+      type: string
+      minLength: 3
+    - key: PR
+      label: GitHub Pull Request Number (separated by a single space if multiple)
+      type: string
+      minLength: 1
+
+newlines:
+  afterChangelogHeader: 1
+  afterKind: 1
+  afterChangelogVersion: 1
+  beforeKind: 1
+  endOfVersion: 1
+
 custom:
 - key: Author
   label: GitHub Username(s) (separated by a single space if multiple)
   type: string
   minLength: 3
 - key: Issue
-  label: GitHub Issue Number
-  type: int
-  minLength: 4
-- key: PR
-  label: GitHub Pull Request Number
+  label: GitHub Issue Number (separated by a single space if multiple)
   type: int
   minLength: 4
+
 footerFormat: |
   {{- $contributorDict := dict }}
   {{- /* any names added to this list should be all lowercase for later matching purposes */}}
-  {{- $core_team := list "michelleark" "peterallenwebb" "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "aranke" "mikealfare" "dependabot[bot]" "snyk-bot" "colin-rogers-dbt" }}
+  {{- $core_team := list "michelleark" "peterallenwebb" "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "aranke" "dependabot[bot]" "snyk-bot" "colin-rogers-dbt" }}
   {{- range $change := .Changes }}
     {{- $authorList := splitList " " $change.Custom.Author }}
-    {{- /* loop through all authors for a PR */}}
+    {{- /* loop through all authors for a single changelog */}}
     {{- range $author := $authorList }}
       {{- $authorLower := lower $author }}
       {{- /* we only want to include non-core team contributors */}}
       {{- if not (has $authorLower $core_team)}}
-        {{- $pr := $change.Custom.PR }}
-        {{- /* check if this contributor has other PRs associated with them already */}}
-        {{- if hasKey $contributorDict $author }}
-          {{- $prList := get $contributorDict $author }}
-          {{- $prList = append $prList $pr  }}
-          {{- $contributorDict := set $contributorDict $author $prList }}
-        {{- else }}
-          {{- $prList := list $change.Custom.PR }}
-          {{- $contributorDict := set $contributorDict $author $prList }}
-        {{- end }}
-      {{- end}}
+        {{- $changeList := splitList " " $change.Custom.Author }}
+          {{- $changeLink := $change.Kind }}
+          {{- if or (eq $change.Kind "Dependencies") (eq $change.Kind "Security") }}
+            {{- $changeLink = "[#nbr](https://github.com/dbt-labs/dbt-spark/pull/nbr)" | replace "nbr" $change.Custom.PR }}
+          {{- else }}
+            {{- $changeLink = "[#nbr](https://github.com/dbt-labs/dbt-spark/issues/nbr)" | replace "nbr" $change.Custom.Issue }}
+          {{- end }}
+          {{- /* check if this contributor has other changes associated with them already */}}
+          {{- if hasKey $contributorDict $author }}
+            {{- $contributionList := get $contributorDict $author }}
+            {{- $contributionList = append $contributionList $changeLink  }}
+            {{- $contributorDict := set $contributorDict $author $contributionList }}
+          {{- else }}
+            {{- $contributionList := list $changeLink }}
+            {{- $contributorDict := set $contributorDict $author $contributionList }}
+          {{- end }}
+        {{- end}}
     {{- end}}
   {{- end }}
   {{- /* no indentation here for formatting so the final markdown doesn't have unneeded indentations */}}
   {{- if $contributorDict}}
   ### Contributors
   {{- range $k,$v := $contributorDict }}
-  - [@{{$k}}](https://github.com/{{$k}}) ({{ range $index, $element := $v }}{{if $index}}, {{end}}[#{{$element}}](https://github.com/dbt-labs/dbt-spark/pull/{{$element}}){{end}})
+  - [@{{$k}}](https://github.com/{{$k}}) ({{ range $index, $element := $v }}{{if $index}}, {{end}}{{$element}}{{end}})
   {{- end }}
   {{- end }}
diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index 39eacf9a..92aff8eb 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -9,7 +9,6 @@
 # time: <current timestamp>
 # custom:
 #   Author: <PR User Login (generally the bot)>
-#   Issue: 4904
 #   PR: <PR number>
 #
 # **why?**
@@ -40,7 +39,7 @@ jobs:
       matrix:
         include:
           - label: "dependencies"
-            changie_kind: "Dependency"
+            changie_kind: "Dependencies"
           - label: "snyk"
             changie_kind: "Security"
     runs-on: ubuntu-latest
@@ -58,4 +57,4 @@ jobs:
         commit_message: "Add automated changelog yaml from template for bot PR"
         changie_kind: ${{ matrix.changie_kind }}
         label: ${{ matrix.label }}
-        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}"
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  PR: ${{ github.event.pull_request.number }}"

From 4d179e0af38d15aca085dccff49e674d22299e85 Mon Sep 17 00:00:00 2001
From: Alexander Smolyakov <alexander.smolyakov.dev@gmail.com>
Date: Mon, 30 Jan 2023 19:19:03 +0400
Subject: [PATCH 381/603] Update release workflow (#613)

- Update AWS secrets
- Rework condition for Slack notification
---
 .github/workflows/release.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 397bbdb4..0aa1298c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -132,8 +132,8 @@ jobs:
       test_run: ${{ inputs.test_run }}
 
     secrets:
-      AWS_ACCESS_KEY_ID: ${{ secrets.PRODUCTION_AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.PRODUCTION_AWS_SECRET_ACCESS_KEY }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 
   github-release:
     name: GitHub Release
@@ -166,7 +166,7 @@ jobs:
 
   slack-notification:
     name: Slack Notification
-    if: ${{ failure() }}
+    if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }}
 
     needs:
       [

From 9241c385b1561e58aa262eff7c4782d2a174faf1 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Tue, 31 Jan 2023 10:46:57 -0600
Subject: [PATCH 382/603] change type to string (#611)

* change type to string

* reduce required length to 1
---
 .changie.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.changie.yaml b/.changie.yaml
index e6cc1964..6a670c6d 100644
--- a/.changie.yaml
+++ b/.changie.yaml
@@ -73,8 +73,8 @@ custom:
   minLength: 3
 - key: Issue
   label: GitHub Issue Number (separated by a single space if multiple)
-  type: int
-  minLength: 4
+  type: string
+  minLength: 1
 
 footerFormat: |
   {{- $contributorDict := dict }}

From 5d27961798e2ddc8f86433dbfa865ad5d5cb4946 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Wed, 1 Feb 2023 13:47:24 -0600
Subject: [PATCH 383/603] fix bug with contributor list (#620)

---
 .changie.yaml | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/.changie.yaml b/.changie.yaml
index 6a670c6d..09dfd3cf 100644
--- a/.changie.yaml
+++ b/.changie.yaml
@@ -88,19 +88,28 @@ footerFormat: |
       {{- /* we only want to include non-core team contributors */}}
       {{- if not (has $authorLower $core_team)}}
         {{- $changeList := splitList " " $change.Custom.Author }}
+          {{- $IssueList := list }}
           {{- $changeLink := $change.Kind }}
           {{- if or (eq $change.Kind "Dependencies") (eq $change.Kind "Security") }}
-            {{- $changeLink = "[#nbr](https://github.com/dbt-labs/dbt-spark/pull/nbr)" | replace "nbr" $change.Custom.PR }}
+            {{- $changes := splitList " " $change.Custom.PR }}
+            {{- range $issueNbr := $changes }}
+              {{- $changeLink := "[#nbr](https://github.com/dbt-labs/dbt-spark/pull/nbr)" | replace "nbr" $issueNbr }}
+              {{- $IssueList = append $IssueList $changeLink  }}
+            {{- end -}}
           {{- else }}
-            {{- $changeLink = "[#nbr](https://github.com/dbt-labs/dbt-spark/issues/nbr)" | replace "nbr" $change.Custom.Issue }}
+            {{- $changes := splitList " " $change.Custom.Issue }}
+            {{- range $issueNbr := $changes }}
+              {{- $changeLink := "[#nbr](https://github.com/dbt-labs/dbt-spark/issues/nbr)" | replace "nbr" $issueNbr }}
+              {{- $IssueList = append $IssueList $changeLink  }}
+            {{- end -}}
           {{- end }}
           {{- /* check if this contributor has other changes associated with them already */}}
           {{- if hasKey $contributorDict $author }}
             {{- $contributionList := get $contributorDict $author }}
-            {{- $contributionList = append $contributionList $changeLink  }}
+            {{- $contributionList = concat $contributionList $IssueList  }}
             {{- $contributorDict := set $contributorDict $author $contributionList }}
           {{- else }}
-            {{- $contributionList := list $changeLink }}
+            {{- $contributionList := $IssueList }}
             {{- $contributorDict := set $contributorDict $author $contributionList }}
           {{- end }}
         {{- end}}

From f877d1e7bc9721c200c9712ca897379fa35c2554 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Wed, 1 Feb 2023 22:23:52 -0800
Subject: [PATCH 384/603] Convert Spark persist docs test (#612)

* Begin conversion and get ready for CI testing.

* Uncheck models

* Change core index.

* Pair test down for minimal test

* Adjust code with some guessing.

* Forgot a version tag

* Make test conversion work. Finally

* Fix up the code.

* Attempt to fix test conversion with profile skips.

* Add missing column test and cleanup code.

* Remove shas from the requirements now that base conversion is live.

* Revert whitespace change.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .../adapter/persist_docs/fixtures.py}         |  93 +++++++-------
 .../adapter/persist_docs/test_persist_docs.py | 119 ++++++++++++++++++
 .../models/incremental_delta_model.sql        |   2 -
 .../persist_docs/models/my_fun_docs.md        |  10 --
 .../persist_docs/models/no_docs_model.sql     |   1 -
 .../persist_docs/models/table_delta_model.sql |   2 -
 .../persist_docs/models/view_model.sql        |   2 -
 tests/integration/persist_docs/seeds/seed.csv |   3 -
 .../integration/persist_docs/seeds/seeds.yml  |  26 ----
 .../persist_docs/test_persist_docs.py         |  68 ----------
 10 files changed, 163 insertions(+), 163 deletions(-)
 rename tests/{integration/persist_docs/models/schema.yml => functional/adapter/persist_docs/fixtures.py} (60%)
 create mode 100644 tests/functional/adapter/persist_docs/test_persist_docs.py
 delete mode 100644 tests/integration/persist_docs/models/incremental_delta_model.sql
 delete mode 100644 tests/integration/persist_docs/models/my_fun_docs.md
 delete mode 100644 tests/integration/persist_docs/models/no_docs_model.sql
 delete mode 100644 tests/integration/persist_docs/models/table_delta_model.sql
 delete mode 100644 tests/integration/persist_docs/models/view_model.sql
 delete mode 100644 tests/integration/persist_docs/seeds/seed.csv
 delete mode 100644 tests/integration/persist_docs/seeds/seeds.yml
 delete mode 100644 tests/integration/persist_docs/test_persist_docs.py

diff --git a/tests/integration/persist_docs/models/schema.yml b/tests/functional/adapter/persist_docs/fixtures.py
similarity index 60%
rename from tests/integration/persist_docs/models/schema.yml
rename to tests/functional/adapter/persist_docs/fixtures.py
index 6680f392..3c351ab5 100644
--- a/tests/integration/persist_docs/models/schema.yml
+++ b/tests/functional/adapter/persist_docs/fixtures.py
@@ -1,31 +1,35 @@
+_MODELS__MY_FUN_DOCS = """
+{% docs my_fun_doc %}
+name Column description "with double quotes"
+and with 'single  quotes' as welll as other;
+'''abc123'''
+reserved -- characters
+--
+/* comment */
+Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
+
+{% enddocs %}
+"""
+
+_MODELS__INCREMENTAL_DELTA = """
+{{ config(materialized='incremental', file_format='delta') }}
+select 1 as id, 'Joe' as name
+"""
+
+_MODELS__TABLE_DELTA_MODEL = """
+{{ config(materialized='table', file_format='delta') }}
+select 1 as id, 'Joe' as name
+"""
+
+_MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN = """
+{{ config(materialized='table', file_format='delta') }}
+select 1 as id, 'Joe' as different_name
+"""
+
+_PROPERTIES__MODELS = """
 version: 2
 
 models:
-  
-  - name: table_parquet_model
-    description: |
-      Table model description "with double quotes"
-      and with 'single  quotes' as welll as other;
-      '''abc123'''
-      reserved -- characters
-      --
-      /* comment */
-      Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
-    columns:
-      - name: id
-        description: |
-          id Column description "with double quotes"
-          and with 'single  quotes' as welll as other;
-          '''abc123'''
-          reserved -- characters
-          --
-          /* comment */
-          Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
-      - name: name
-        description: |
-          Some stuff here and then a call to
-          {{ doc('my_fun_doc')}}
-
   - name: table_delta_model
     description: |
       Table model description "with double quotes"
@@ -50,9 +54,9 @@
           Some stuff here and then a call to
           {{ doc('my_fun_doc')}}
 
-  - name: table_hudi_model
+  - name: incremental_delta_model
     description: |
-      Table model description "with double quotes"
+      Incremental model description "with double quotes"
       and with 'single  quotes' as welll as other;
       '''abc123'''
       reserved -- characters
@@ -73,30 +77,15 @@
         description: |
           Some stuff here and then a call to
           {{ doc('my_fun_doc')}}
-  
-  - name: view_model
-    description: |
-      View model description "with double quotes"
-      and with 'single  quotes' as welll as other;
-      '''abc123'''
-      reserved -- characters
-      --
-      /* comment */
-      Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
-    columns:
-      - name: id
-        description: |
-          id Column description "with double quotes"
-          and with 'single  quotes' as welll as other;
-          '''abc123'''
-          reserved -- characters
-          --
-          /* comment */
-          Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
+"""
 
-  - name: incremental_delta_model
+_PROPERTIES__SEEDS = """
+version: 2
+
+seeds:
+  - name: seed
     description: |
-      Incremental model description "with double quotes"
+      Seed model description "with double quotes"
       and with 'single  quotes' as welll as other;
       '''abc123'''
       reserved -- characters
@@ -117,3 +106,9 @@
         description: |
           Some stuff here and then a call to
           {{ doc('my_fun_doc')}}
+"""
+
+_SEEDS__BASIC = """id,name
+1,Alice
+2,Bob
+"""
diff --git a/tests/functional/adapter/persist_docs/test_persist_docs.py b/tests/functional/adapter/persist_docs/test_persist_docs.py
new file mode 100644
index 00000000..3ddab6df
--- /dev/null
+++ b/tests/functional/adapter/persist_docs/test_persist_docs.py
@@ -0,0 +1,119 @@
+import json
+import os
+import pytest
+
+from dbt.tests.util import run_dbt
+
+from fixtures import (
+    _MODELS__MY_FUN_DOCS,
+    _MODELS__INCREMENTAL_DELTA,
+    _MODELS__TABLE_DELTA_MODEL,
+    _MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN,
+    _PROPERTIES__MODELS,
+    _PROPERTIES__SEEDS,
+    _SEEDS__BASIC,
+)
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestPersistDocsDeltaTable:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "incremental_delta_model.sql": _MODELS__INCREMENTAL_DELTA,
+            "my_fun_docs.md": _MODELS__MY_FUN_DOCS,
+            "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL,
+            "schema.yml": _PROPERTIES__MODELS
+        }
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "seed.csv": _SEEDS__BASIC,
+            "seed.yml": _PROPERTIES__SEEDS
+        }
+
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            'models': {
+                'test': {
+                    '+persist_docs': {
+                        "relation": True,
+                        "columns": True,
+                    },
+                }
+            },
+            'seeds': {
+                'test': {
+                    '+persist_docs': {
+                        "relation": True,
+                        "columns": True,
+                    },
+                    '+file_format': 'delta',
+                    '+quote_columns': True
+                }
+            },
+        }
+
+    def test_delta_comments(self, project):
+        run_dbt(['seed'])
+        run_dbt(['run'])
+
+        for table, whatis in [
+            ('table_delta_model', 'Table'),
+            ('seed', 'Seed'),
+            ('incremental_delta_model', 'Incremental')
+        ]:
+            results = project.run_sql(
+                'describe extended {schema}.{table}'.format(schema=project.test_schema, table=table),
+                fetch='all'
+            )
+
+            for result in results:
+                if result[0] == 'Comment':
+                    assert result[1].startswith(f'{whatis} model description')
+                if result[0] == 'id':
+                    assert result[2].startswith('id Column description')
+                if result[0] == 'name':
+                    assert result[2].startswith('Some stuff here and then a call to')
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestPersistDocsMissingColumn:
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "test": {
+                    "+persist_docs": {
+                        "columns": True,
+                    },
+                }
+            }
+        }
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "seed.csv": _SEEDS__BASIC,
+            "seed.yml": _PROPERTIES__SEEDS
+        }
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN,
+            "my_fun_docs.md": _MODELS__MY_FUN_DOCS,
+    }
+
+    @pytest.fixture(scope="class")
+    def properties(self):
+        return {"schema.yml": _PROPERTIES__MODELS}
+
+    def test_missing_column(self, project):
+        '''spark will use our schema to verify all columns exist rather than fail silently'''
+        run_dbt(["seed"])
+        res = run_dbt(["run"], expect_pass=False)
+        assert "Missing field name in table" in res[0].message
diff --git a/tests/integration/persist_docs/models/incremental_delta_model.sql b/tests/integration/persist_docs/models/incremental_delta_model.sql
deleted file mode 100644
index c3f325ea..00000000
--- a/tests/integration/persist_docs/models/incremental_delta_model.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-{{ config(materialized='incremental', file_format='delta') }}
-select 1 as id, 'Joe' as name
diff --git a/tests/integration/persist_docs/models/my_fun_docs.md b/tests/integration/persist_docs/models/my_fun_docs.md
deleted file mode 100644
index f3c0fbf5..00000000
--- a/tests/integration/persist_docs/models/my_fun_docs.md
+++ /dev/null
@@ -1,10 +0,0 @@
-{% docs my_fun_doc %}
-name Column description "with double quotes"
-and with 'single  quotes' as welll as other;
-'''abc123'''
-reserved -- characters
---
-/* comment */
-Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
-
-{% enddocs %}
diff --git a/tests/integration/persist_docs/models/no_docs_model.sql b/tests/integration/persist_docs/models/no_docs_model.sql
deleted file mode 100644
index e39a7a15..00000000
--- a/tests/integration/persist_docs/models/no_docs_model.sql
+++ /dev/null
@@ -1 +0,0 @@
-select 1 as id, 'Alice' as name
diff --git a/tests/integration/persist_docs/models/table_delta_model.sql b/tests/integration/persist_docs/models/table_delta_model.sql
deleted file mode 100644
index 06e40e0c..00000000
--- a/tests/integration/persist_docs/models/table_delta_model.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-{{ config(materialized='table', file_format='delta') }}
-select 1 as id, 'Joe' as name
diff --git a/tests/integration/persist_docs/models/view_model.sql b/tests/integration/persist_docs/models/view_model.sql
deleted file mode 100644
index a6f96a16..00000000
--- a/tests/integration/persist_docs/models/view_model.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-{{ config(materialized='view') }}
-select 2 as id, 'Bob' as name
diff --git a/tests/integration/persist_docs/seeds/seed.csv b/tests/integration/persist_docs/seeds/seed.csv
deleted file mode 100644
index 4a295177..00000000
--- a/tests/integration/persist_docs/seeds/seed.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-id,name
-1,Alice
-2,Bob
\ No newline at end of file
diff --git a/tests/integration/persist_docs/seeds/seeds.yml b/tests/integration/persist_docs/seeds/seeds.yml
deleted file mode 100644
index 7ab82fa6..00000000
--- a/tests/integration/persist_docs/seeds/seeds.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-version: 2
-
-seeds:
-  - name: seed
-    description: |
-      Seed model description "with double quotes"
-      and with 'single  quotes' as welll as other;
-      '''abc123'''
-      reserved -- characters
-      --
-      /* comment */
-      Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
-    columns:
-      - name: id
-        description: |
-          id Column description "with double quotes"
-          and with 'single  quotes' as welll as other;
-          '''abc123'''
-          reserved -- characters
-          --
-          /* comment */
-          Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
-      - name: name
-        description: |
-          Some stuff here and then a call to
-          {{ doc('my_fun_doc')}}
diff --git a/tests/integration/persist_docs/test_persist_docs.py b/tests/integration/persist_docs/test_persist_docs.py
deleted file mode 100644
index 527f548a..00000000
--- a/tests/integration/persist_docs/test_persist_docs.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from cProfile import run
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestPersistDocsDelta(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "persist_docs_columns"
-        
-    @property
-    def models(self):
-        return "models"
-
-    @property
-    def project_config(self):
-        return {
-            'config-version': 2,
-            'models': {
-                'test': {
-                    '+persist_docs': {
-                        "relation": True,
-                        "columns": True,
-                    },
-                }
-            },
-            'seeds': {
-                'test': {
-                    '+persist_docs': {
-                        "relation": True,
-                        "columns": True,
-                    },
-                    '+file_format': 'delta',
-                    '+quote_columns': True
-                }
-            },
-        }
-
-    def test_delta_comments(self):
-        self.run_dbt(['seed'])
-        self.run_dbt(['run'])
-        
-        for table, whatis in [
-            ('table_delta_model', 'Table'), 
-            ('seed', 'Seed'), 
-            ('incremental_delta_model', 'Incremental')
-        ]:
-            results = self.run_sql(
-                'describe extended {schema}.{table}'.format(schema=self.unique_schema(), table=table),
-                fetch='all'
-            )
-            
-            for result in results:
-                if result[0] == 'Comment':
-                    assert result[1].startswith(f'{whatis} model description')
-                if result[0] == 'id':
-                    assert result[2].startswith('id Column description')
-                if result[0] == 'name':
-                    assert result[2].startswith('Some stuff here and then a call to')
-
-    # runs on Spark v3.0
-    @use_profile("databricks_cluster")
-    def test_delta_comments_databricks_cluster(self):
-        self.test_delta_comments()
-
-    # runs on Spark v3.0
-    @use_profile("databricks_sql_endpoint")
-    def test_delta_comments_databricks_sql_endpoint(self):
-        self.test_delta_comments()

From 55d86d09b8630b2e0a2107c696d632721fd04bd4 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Fri, 3 Feb 2023 11:03:36 -0600
Subject: [PATCH 385/603] update regex for version bump (#630)

* update regex for version bump

* update to prekind

* more renaming

* finish up adding nightly release check for future proofing
---
 .bumpversion.cfg | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 949720f7..8fcb9d4a 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,16 +1,26 @@
 [bumpversion]
 current_version = 1.5.0a1
-parse = (?P<major>\d+)
-	\.(?P<minor>\d+)
-	\.(?P<patch>\d+)
-	((?P<prerelease>a|b|rc)(?P<num>\d+))?
+
+# `parse` allows parsing the version into the parts we need to check.  There are some
+# unnamed groups and that's okay because they do not need to be audited.  If any part
+# of the version passed and does not match the regex, it will fail.
+# expected matches: `1.5.0`, `1.5.0a1`, `1.5.0a1.dev123457+nightly`
+# excepted failures: `1`, `1.5`, `1.5.2-a1`, `text1.5.0`
+parse = (?P<major>[\d]+) # major version number
+	\.(?P<minor>[\d]+) # minor version number
+	\.(?P<patch>[\d]+) # patch version number
+	(((?P<prekind>a|b|rc) # optional pre-release type
+	?(?P<num>[\d]+?)) # optional pre-release version number
+	\.?(?P<nightly>[a-z0-9]+\+[a-z]+)? # optional nightly release indicator
+	)?
 serialize =
-	{major}.{minor}.{patch}{prerelease}{num}
+	{major}.{minor}.{patch}{prekind}{num}.{nightly}
+	{major}.{minor}.{patch}{prekind}{num}
 	{major}.{minor}.{patch}
 commit = False
 tag = False
 
-[bumpversion:part:prerelease]
+[bumpversion:part:prekind]
 first_value = a
 optional_value = final
 values =
@@ -22,6 +32,8 @@ values =
 [bumpversion:part:num]
 first_value = 1
 
+[bumpversion:part:nightly]
+
 [bumpversion:file:setup.py]
 
 [bumpversion:file:dbt/adapters/spark/__version__.py]

From 5baa88a79b7824bf626b6cd99c5e4893c17e95e6 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 7 Feb 2023 15:23:58 -0800
Subject: [PATCH 386/603] convert test_store_test_failures to functional (#629)

* add mypy ignore to column, connections and init

* convert test_store_test_failures to functional

* convert test_store_test_failures to functional

* remove dev requirements override

* experiment with naming

* specify test schema

* test specify delta in seeds schema

* test specify store schema

* test quote columns=True

* don't run databricks_cluster or databricks_sql_endpoint for store test failures

* remove store_failures integration tests
---
 dev-requirements.txt                          |  2 -
 .../test_store_test_failures.py               | 46 +++++++++++++++++
 .../store_failures/models/schema.yml          |  9 ----
 .../store_failures/models/view_model.sql      |  5 --
 .../store_failures/test_store_failures.py     | 51 -------------------
 5 files changed, 46 insertions(+), 67 deletions(-)
 create mode 100644 tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
 delete mode 100644 tests/integration/store_failures/models/schema.yml
 delete mode 100644 tests/integration/store_failures/models/view_model.sql
 delete mode 100644 tests/integration/store_failures/test_store_failures.py

diff --git a/dev-requirements.txt b/dev-requirements.txt
index e93c1b41..58009d04 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,8 +3,6 @@
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
-
-
 black==22.8.0
 bumpversion
 click~=8.1.3
diff --git a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
new file mode 100644
index 00000000..86c15a55
--- /dev/null
+++ b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
@@ -0,0 +1,46 @@
+import pytest
+
+from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import StoreTestFailuresBase, \
+    TEST_AUDIT_SCHEMA_SUFFIX
+
+
+@pytest.mark.skip_profile('spark_session', 'databricks_cluster', 'databricks_sql_endpoint')
+class TestSparkStoreTestFailures(StoreTestFailuresBase):
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "seeds": {
+                "quote_columns": True,
+            },
+            'tests': {
+                "+schema": TEST_AUDIT_SCHEMA_SUFFIX,
+                '+store_failures': True
+            }
+        }
+    def test_store_and_assert(self, project):
+        self.run_tests_store_one_failure(project)
+        self.run_tests_store_failures_and_assert(project)
+
+
+@pytest.mark.skip_profile('apache_spark', 'spark_session')
+class TestSparkStoreTestFailuresWithDelta(StoreTestFailuresBase):
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "seeds": {
+                "quote_columns": False,
+                "test": self.column_type_overrides(),
+                "+file_format": "delta",
+            },
+            'tests': {
+                "+schema": TEST_AUDIT_SCHEMA_SUFFIX,
+                '+store_failures': True,
+                '+file_format': 'delta',
+            }
+        }
+
+    def test_store_and_assert_failure_with_delta(self, project):
+        self.run_tests_store_one_failure(project)
+        self.run_tests_store_failures_and_assert(project)
diff --git a/tests/integration/store_failures/models/schema.yml b/tests/integration/store_failures/models/schema.yml
deleted file mode 100644
index be559b20..00000000
--- a/tests/integration/store_failures/models/schema.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-version: 2
-
-models:
-  - name: view_model
-    columns:
-      - name: id
-        tests:
-          - unique
-          - not_null
diff --git a/tests/integration/store_failures/models/view_model.sql b/tests/integration/store_failures/models/view_model.sql
deleted file mode 100644
index 2ff36b4e..00000000
--- a/tests/integration/store_failures/models/view_model.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-select 1 as id
-union all
-select 1 as id
-union all
-select null as id
diff --git a/tests/integration/store_failures/test_store_failures.py b/tests/integration/store_failures/test_store_failures.py
deleted file mode 100644
index 679bde64..00000000
--- a/tests/integration/store_failures/test_store_failures.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-class TestStoreFailures(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "store_failures"
-        
-    @property
-    def models(self):
-        return "models"
-
-    @property
-    def project_config(self):
-        return {
-            'config-version': 2,
-            'tests': {
-                '+store_failures': True,
-                '+severity': 'warn',
-            }
-        }
-
-    def test_store_failures(self):
-        self.run_dbt(['run'])
-        results = self.run_dbt(['test', '--store-failures'])
-
-class TestStoreFailuresApacheSpark(TestStoreFailures):
-
-    @use_profile("apache_spark")
-    def test_store_failures_apache_spark(self):
-        self.test_store_failures()
-        
-class TestStoreFailuresDelta(TestStoreFailures):
-
-    @property
-    def project_config(self):
-        return {
-            'config-version': 2,
-            'tests': {
-                '+store_failures': True,
-                '+severity': 'warn',
-                '+file_format': 'delta',
-            }
-        }
-
-    @use_profile("databricks_cluster")
-    def test_store_failures_databricks_cluster(self):
-        self.test_store_failures()
-    
-    @use_profile("databricks_sql_endpoint")
-    def test_store_failures_databricks_sql_endpoint(self):
-        self.test_store_failures()

From c87d744b8cc6463cc586f6287704c2fccd41cc96 Mon Sep 17 00:00:00 2001
From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com>
Date: Wed, 8 Feb 2023 15:13:05 -0600
Subject: [PATCH 387/603] Convert merge exclude columns tests (#601)

* pin core branch for tests

* add adapter test

* changie

* remove integration tests added in 390

* delta file format config

* skip profile

* include file format for dbx endpoints

* unpin core dev branch
---
 .../unreleased/Fixes-20230123-134955.yaml     |  7 ++++++
 .../test_incremental_merge_exclude_columns.py | 14 ++++++++++++
 .../models_delta/merge_exclude_columns.sql    | 22 -------------------
 .../seeds/expected_exclude_upsert.csv         |  4 ----
 .../test_incremental_strategies.py            |  1 -
 5 files changed, 21 insertions(+), 27 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230123-134955.yaml
 create mode 100644 tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
 delete mode 100644 tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
 delete mode 100644 tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv

diff --git a/.changes/unreleased/Fixes-20230123-134955.yaml b/.changes/unreleased/Fixes-20230123-134955.yaml
new file mode 100644
index 00000000..a40d912b
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230123-134955.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: add merge_exclude_columns tests
+time: 2023-01-23T13:49:55.74249-06:00
+custom:
+  Author: dave-connors-3
+  Issue: "00"
+  PR: "600"
diff --git a/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py b/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
new file mode 100644
index 00000000..e3b07f03
--- /dev/null
+++ b/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
@@ -0,0 +1,14 @@
+import pytest
+
+from dbt.tests.util import run_dbt
+from dbt.tests.adapter.incremental.test_incremental_merge_exclude_columns import BaseMergeExcludeColumns
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark')
+class TestMergeExcludeColumns(BaseMergeExcludeColumns):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta"
+            }
+        }
diff --git a/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql b/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
deleted file mode 100644
index 815f46b1..00000000
--- a/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-    unique_key = 'id',
-    merge_exclude_columns = ['msg'],
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
-
-{% else %}
-
--- msg will be ignored, color will be updated
-select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
-union all
-select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
-
-{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv
deleted file mode 100644
index a0f1a652..00000000
--- a/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,msg,color
-1,hello,blue
-2,goodbye,green
-3,anyway,purple
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index cea8225e..026f562d 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -77,7 +77,6 @@ def run_and_test(self):
         self.assertTablesEqual("merge_no_key", "expected_append")
         self.assertTablesEqual("merge_unique_key", "expected_upsert")
         self.assertTablesEqual("merge_update_columns", "expected_partial_upsert")
-        self.assertTablesEqual("merge_exclude_columns", "expected_exclude_upsert")
 
     @use_profile("databricks_cluster")
     def test_delta_strategies_databricks_cluster(self):

From 0c8288ce6c2b2c2ef7caab0ed55cfa1cd2b51de7 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 9 Feb 2023 16:30:30 -0800
Subject: [PATCH 388/603] remove invocation of integration tests in tox (#614)

* add mypy ignore to column, connections and init

* remove invocation of integration tests in tox

* add changie

* remove leading space from changie entry

* update CONTRIBUTING.md
---
 .changes/unreleased/Under the Hood-20230130-170310.yaml | 6 ++++++
 CONTRIBUTING.md                                         | 6 +++---
 tox.ini                                                 | 3 ---
 3 files changed, 9 insertions(+), 6 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230130-170310.yaml

diff --git a/.changes/unreleased/Under the Hood-20230130-170310.yaml b/.changes/unreleased/Under the Hood-20230130-170310.yaml
new file mode 100644
index 00000000..c9131b6b
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230130-170310.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: 'remove tox call to integration tests'
+time: 2023-01-30T17:03:10.031843-08:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "584"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1d6e76d3..57d6ee47 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -72,8 +72,8 @@ There are a few methods for running tests locally.
 Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like:
 
 ```sh
-# run specific spark integration tests
-python -m pytest -m profile_spark tests/integration/get_columns_in_relation
+# run all functional tests
+python -m pytest --profile databricks_sql_endpoint tests/functional/
 # run specific functional tests
 python -m pytest --profile databricks_sql_endpoint tests/functional/adapter/test_basic.py
 # run all unit tests in a file
@@ -104,7 +104,7 @@ You don't need to worry about which `dbt-spark` version your change will go into
 
 dbt Labs provides a CI environment to test changes to the `dbt-spark` adapter, and periodic checks against the development version of `dbt-core` through Github Actions.
 
-A `dbt-spark` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
+A `dbt-spark` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or functional test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
 
 Once all requests and answers have been answered the `dbt-spark` maintainer can trigger CI testing.
 
diff --git a/tox.ini b/tox.ini
index 31456b08..0747b61f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -31,7 +31,6 @@ allowlist_externals =
     /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*'
-           /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 tests/integration/*'
 passenv =
     DBT_*
     PYTEST_ADDOPTS
@@ -46,7 +45,6 @@ allowlist_externals =
     /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
-           /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 tests/integration/*'
 passenv =
     DBT_*
     PYTEST_ADDOPTS
@@ -62,7 +60,6 @@ allowlist_externals =
     /bin/bash
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
-           /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 tests/integration/*'
 passenv =
     DBT_*
     PYTEST_ADDOPTS

From 3de7603283c6fa53e85032461ab273c5b13246f7 Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Fri, 10 Feb 2023 11:55:04 -0500
Subject: [PATCH 389/603] parse vars in unit test utils  (#632)

* test against feature/click-cli core branch

* fix unit tests

* remove tests/integration tests

* done testing against feature branch

* Temporary test for click branch

* Done testing click

* Not done, sorry

* fix unittest

* Done testing click feature, working properly

* fix unittest

* use new get_flags

---------

Co-authored-by: Stu Kilgore <stuart.kilgore@gmail.com>
Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Co-authored-by: Chenyu Li <chenyu.li@dbtlabs.com>
---
 dbt/adapters/spark/impl.py |  3 ++-
 tests/unit/utils.py        | 11 ++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 95af4291..fea5bbac 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -24,6 +24,7 @@
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
+from dbt.flags import get_flags
 from dbt.utils import executor, AttrDict
 
 logger = AdapterLogger("Spark")
@@ -127,7 +128,7 @@ def add_schema_to_cache(self, schema) -> str:
             raise dbt.exceptions.CompilationError(
                 "Attempted to cache a null schema for {}".format(name)
             )
-        if dbt.flags.USE_CACHE:  # type: ignore
+        if get_flags().USE_CACHE:  # type: ignore
             self.cache.add_schema(None, schema)
         # so jinja doesn't render things
         return ""
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 68d0fc33..585f1f61 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -46,6 +46,12 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
         cli_vars = parse_cli_vars(cli_vars)
 
     renderer = ProfileRenderer(cli_vars)
+
+    # in order to call dbt's internal profile rendering, we need to set the
+    # flags global. This is a bit of a hack, but it's the best way to do it.
+    from dbt.flags import set_from_args
+    from argparse import Namespace
+    set_from_args(Namespace(), None)
     return Profile.from_raw_profile_info(
         profile,
         profile_name,
@@ -54,7 +60,6 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
 
 
 def project_from_dict(project, profile, packages=None, selectors=None, cli_vars='{}'):
-    from dbt.context.target import generate_target_context
     from dbt.config import Project
     from dbt.config.renderer import DbtProjectYamlRenderer
     from dbt.config.utils import parse_cli_vars
@@ -77,7 +82,11 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars=
 
 def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars='{}'):
     from dbt.config import Project, Profile, RuntimeConfig
+    from dbt.config.utils import parse_cli_vars
     from copy import deepcopy
+    if not isinstance(cli_vars, dict):
+        cli_vars = parse_cli_vars(cli_vars)
+
 
     if isinstance(project, Project):
         profile_name = project.profile_name

From cda0d09f104fcebfd1ee62d5da7db76a76a8682e Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Mon, 13 Feb 2023 14:09:27 -0600
Subject: [PATCH 390/603] Sync Changelog team to GitHub team (#621)

* update to dynamically determine core team members

* tweak team logic a to make it more intuitive

* temporarily point to dev branch

* move bump version comment

* point workflow back tp main
---
 .bumpversion.cfg |  8 +-------
 .changie.yaml    | 13 ++++++++++---
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 8fcb9d4a..32859c09 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,18 +1,12 @@
 [bumpversion]
 current_version = 1.5.0a1
-
-# `parse` allows parsing the version into the parts we need to check.  There are some
-# unnamed groups and that's okay because they do not need to be audited.  If any part
-# of the version passed and does not match the regex, it will fail.
-# expected matches: `1.5.0`, `1.5.0a1`, `1.5.0a1.dev123457+nightly`
-# excepted failures: `1`, `1.5`, `1.5.2-a1`, `text1.5.0`
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
 	(((?P<prekind>a|b|rc) # optional pre-release type
 	?(?P<num>[\d]+?)) # optional pre-release version number
 	\.?(?P<nightly>[a-z0-9]+\+[a-z]+)? # optional nightly release indicator
-	)?
+	)? # expected matches: `1.5.0`, `1.5.0a1`, `1.5.0a1.dev123457+nightly`, expected failures: `1`, `1.5`, `1.5.2-a1`, `text1.5.0`
 serialize =
 	{major}.{minor}.{patch}{prekind}{num}.{nightly}
 	{major}.{minor}.{patch}{prekind}{num}
diff --git a/.changie.yaml b/.changie.yaml
index 09dfd3cf..f4d5bdb9 100644
--- a/.changie.yaml
+++ b/.changie.yaml
@@ -4,6 +4,7 @@ headerPath: header.tpl.md
 versionHeaderPath: ""
 changelogPath: CHANGELOG.md
 versionExt: md
+envPrefix: "CHANGIE_"
 versionFormat: '## dbt-spark {{.Version}} - {{.Time.Format "January 02, 2006"}}'
 kindFormat: '### {{.Kind}}'
 changeFormat: |-
@@ -78,15 +79,21 @@ custom:
 
 footerFormat: |
   {{- $contributorDict := dict }}
-  {{- /* any names added to this list should be all lowercase for later matching purposes */}}
-  {{- $core_team := list "michelleark" "peterallenwebb" "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "aranke" "dependabot[bot]" "snyk-bot" "colin-rogers-dbt" }}
+  {{- /* ensure all names in this list are all lowercase for later matching purposes */}}
+  {{- $core_team := splitList " " .Env.CORE_TEAM }}
+  {{- /* ensure we always skip snyk and dependabot in addition to the core team */}}
+  {{- $maintainers := list "dependabot[bot]" "snyk-bot"}}
+  {{- range $team_member := $core_team }}
+    {{- $team_member_lower := lower $team_member }}
+    {{- $maintainers = append $maintainers $team_member_lower }}
+  {{- end }}
   {{- range $change := .Changes }}
     {{- $authorList := splitList " " $change.Custom.Author }}
     {{- /* loop through all authors for a single changelog */}}
     {{- range $author := $authorList }}
       {{- $authorLower := lower $author }}
       {{- /* we only want to include non-core team contributors */}}
-      {{- if not (has $authorLower $core_team)}}
+      {{- if not (has $authorLower $maintainers)}}
         {{- $changeList := splitList " " $change.Custom.Author }}
           {{- $IssueList := list }}
           {{- $changeLink := $change.Kind }}

From 6f7307bdf65bf2358cd85f3db0a223a810ae81da Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 15 Feb 2023 18:37:05 -0500
Subject: [PATCH 391/603] CT-1967 - test conversion - get columns in relation
 (#643)

* migrated get_columns-in_relation to functional tests
---
 Makefile                                      |  2 +-
 .../adapter/test_get_columns_in_relation.py   | 34 +++++++++++++++++++
 .../get_columns_in_relation/models/child.sql  |  1 -
 .../models/get_columns_from_child.sql         |  6 ----
 .../test_get_columns_in_relation.py           | 27 ---------------
 5 files changed, 35 insertions(+), 35 deletions(-)
 create mode 100644 tests/functional/adapter/test_get_columns_in_relation.py
 delete mode 100644 tests/integration/get_columns_in_relation/models/child.sql
 delete mode 100644 tests/integration/get_columns_in_relation/models/get_columns_from_child.sql
 delete mode 100644 tests/integration/get_columns_in_relation/test_get_columns_in_relation.py

diff --git a/Makefile b/Makefile
index 8b251662..dca1ec07 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 .DEFAULT_GOAL:=help
 
 .PHONY: dev
-dev: ## Installs adapter in develop mode along with development depedencies
+dev: ## Installs adapter in develop mode along with development dependencies
 	@\
 	pip install -r requirements.txt -r dev-requirements.txt && pre-commit install
 
diff --git a/tests/functional/adapter/test_get_columns_in_relation.py b/tests/functional/adapter/test_get_columns_in_relation.py
new file mode 100644
index 00000000..da21be18
--- /dev/null
+++ b/tests/functional/adapter/test_get_columns_in_relation.py
@@ -0,0 +1,34 @@
+import pytest
+
+from dbt.tests.util import run_dbt, relation_from_name, check_relations_equal_with_relations
+
+
+_MODEL_CHILD = "select 1"
+
+
+_MODEL_PARENT = """
+{% set cols = adapter.get_columns_in_relation(ref('child')) %}
+
+select
+    {% for col in cols %}
+        {{ adapter.quote(col.column) }}{%- if not loop.last %},{{ '\n ' }}{% endif %}
+    {% endfor %}
+from {{ ref('child') }}
+"""
+
+
+class TestColumnsInRelation:
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "child.sql": _MODEL_CHILD,
+            "parent.sql": _MODEL_PARENT,
+        }
+
+    @pytest.mark.skip_profile("databricks_http_cluster", "spark_session")
+    def test_get_columns_in_relation(self, project):
+        run_dbt(["run"])
+        child = relation_from_name(project.adapter, "child")
+        parent = relation_from_name(project.adapter, "parent")
+        check_relations_equal_with_relations(project.adapter, [child, parent])
diff --git a/tests/integration/get_columns_in_relation/models/child.sql b/tests/integration/get_columns_in_relation/models/child.sql
deleted file mode 100644
index 2e3761f7..00000000
--- a/tests/integration/get_columns_in_relation/models/child.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT 1
diff --git a/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql b/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql
deleted file mode 100644
index 5118ae03..00000000
--- a/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT 
-  {% set cols = adapter.get_columns_in_relation(ref('child')) %}
-  {% for col in cols %}
-    {{ adapter.quote(col.column) }}{%- if not loop.last %},{{ '\n ' }}{% endif %}
-  {% endfor %}
-FROM {{ ref('child') }}
diff --git a/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py b/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py
deleted file mode 100644
index 418cbd99..00000000
--- a/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestGetColumnInRelationInSameRun(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "get_columns_in_relation"
-
-    @property
-    def models(self):
-        return "models"
-
-    def run_and_test(self):
-        self.run_dbt(["run"])
-        self.assertTablesEqual("child", "get_columns_from_child")
-
-    @use_profile("apache_spark")
-    def test_get_columns_in_relation_in_same_run_apache_spark(self):
-        self.run_and_test()
-
-    @use_profile("databricks_cluster")
-    def test_get_columns_in_relation_in_same_run_databricks_cluster(self):
-        self.run_and_test()
-
-    @use_profile("databricks_sql_endpoint")
-    def test_get_columns_in_relation_in_same_run_databricks_sql_endpoint(self):
-        self.run_and_test()

From b3f655890534cc76ebdc3451f42b56a2a3fb613e Mon Sep 17 00:00:00 2001
From: Benoit Perigaud <8754100+b-per@users.noreply.github.com>
Date: Fri, 17 Feb 2023 03:28:10 -0800
Subject: [PATCH 392/603] dbt Constraints / model contracts (#574)

* Add support for constraints in Spark

* Add tests for constraints

* Update requirements for CI to pass

* Update dispatched macro with argument

* Use spark decorator for tests

* Update test to remove unsupported constraints

* Allow multiple queries to be sent

* Revert change on splitting satements in `execute`

* Add `call statement` for table with constraints

* Add checks when the split by `;` is empty

* Fix typo in JInja variable name

* Rename `constraints` to `constraints_check`

* Support constraints with `alter` statements

* Changie entry

* Fix missing `endif`

* Remove get_columns_spec_ddl as we use alter

* Remove unused dispatch macro

* Update dispatched macro

* Update tests to work with `alter` approach

* Make tests valid for databricks only for delta

* Try other way to call tests

* Add schema info

* Remove wrong argument to test

* Use new testing framework

* Add check on column names and order

* Check only when constraints enabled

* Remove config nesting

* constraint_check is not a list

* Fix CICD

* Typo

* Only allow not null

* Update expected SQL to the Spark one

* Make file_format delta

* Try this

* Check for earlier part of error message

* Check for any rather than all error messages

* Reset to dbt-core main

---------

Co-authored-by: Sung Won Chung <sungwonchung3@gmail.com>
Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
Co-authored-by: Michelle Ark <michelle.ark@dbtlabs.com>
---
 .../unreleased/Features-20230130-125855.yaml  |  8 +++
 dbt/include/spark/macros/adapters.sql         | 52 +++++++++++++++++
 .../spark/macros/materializations/table.sql   |  2 +
 tests/functional/adapter/test_constraints.py  | 58 +++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 .changes/unreleased/Features-20230130-125855.yaml
 create mode 100644 tests/functional/adapter/test_constraints.py

diff --git a/.changes/unreleased/Features-20230130-125855.yaml b/.changes/unreleased/Features-20230130-125855.yaml
new file mode 100644
index 00000000..1f27a388
--- /dev/null
+++ b/.changes/unreleased/Features-20230130-125855.yaml
@@ -0,0 +1,8 @@
+kind: Features
+body: 'Support for data types constraints in Spark following the dbt Core feature
+  #6271'
+time: 2023-01-30T12:58:55.972992+01:00
+custom:
+  Author: b-per
+  Issue: "558"
+  PR: "574"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index abc7a0ba..21350ea3 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -138,6 +138,9 @@
       {% else %}
         create table {{ relation }}
       {% endif %}
+      {% if config.get('constraints_enabled', False) %}
+        {{ get_assert_columns_equivalent(sql) }}
+      {% endif %}
       {{ file_format_clause() }}
       {{ options_clause() }}
       {{ partition_cols(label="partitioned by") }}
@@ -160,6 +163,55 @@
 {%- endmacro -%}
 
 
+{% macro persist_constraints(relation, model) %}
+  {{ return(adapter.dispatch('persist_constraints', 'dbt')(relation, model)) }}
+{% endmacro %}
+
+{% macro spark__persist_constraints(relation, model) %}
+  {% if config.get('constraints_enabled', False) and config.get('file_format', 'delta') == 'delta' %}
+    {% do alter_table_add_constraints(relation, model.columns) %}
+    {% do alter_column_set_constraints(relation, model.columns) %}
+  {% endif %}
+{% endmacro %}
+
+{% macro alter_table_add_constraints(relation, constraints) %}
+  {{ return(adapter.dispatch('alter_table_add_constraints', 'dbt')(relation, constraints)) }}
+{% endmacro %}
+
+{% macro spark__alter_table_add_constraints(relation, column_dict) %}
+
+  {% for column_name in column_dict %}
+    {% set constraints_check = column_dict[column_name]['constraints_check'] %}
+    {% if constraints_check and not is_incremental() %}
+      {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint_check) -%}
+      {% call statement() %}
+        alter table {{ relation }} add constraint {{ constraint_hash }} check {{ constraints_check }};
+      {% endcall %}
+    {% endif %}
+  {% endfor %}
+{% endmacro %}
+
+{% macro alter_column_set_constraints(relation, column_dict) %}
+  {{ return(adapter.dispatch('alter_column_set_constraints', 'dbt')(relation, column_dict)) }}
+{% endmacro %}
+
+{% macro spark__alter_column_set_constraints(relation, column_dict) %}
+  {% for column_name in column_dict %}
+    {% set constraints = column_dict[column_name]['constraints'] %}
+    {% for constraint in constraints %}
+      {% if constraint != 'not null' %}
+        {{ exceptions.warn('Invalid constraint for column ' ~ column_name ~ '. Only `not null` is supported.') }}
+      {% else %}
+        {% set quoted_name = adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name %}
+        {% call statement() %}
+          alter table {{ relation }} change column {{ quoted_name }} set {{ constraint }};
+        {% endcall %}
+      {% endif %}
+    {% endfor %}
+  {% endfor %}
+{% endmacro %}
+
+
 {% macro spark__create_view_as(relation, sql) -%}
   create or replace view {{ relation }}
   {{ comment_clause() }}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index c82e27e9..d323e4f3 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -29,6 +29,8 @@
 
   {% do persist_docs(target_relation, model) %}
 
+  {% do persist_constraints(target_relation, model) %}
+
   {{ run_hooks(post_hooks) }}
 
   {{ return({'relations': [target_relation]})}}
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
new file mode 100644
index 00000000..ecf67384
--- /dev/null
+++ b/tests/functional/adapter/test_constraints.py
@@ -0,0 +1,58 @@
+import pytest
+from dbt.tests.util import relation_from_name
+from dbt.tests.adapter.constraints.test_constraints import (
+    BaseConstraintsColumnsEqual,
+    BaseConstraintsRuntimeEnforcement
+)
+
+# constraints are enforced via 'alter' statements that run after table creation
+_expected_sql_spark = """
+create or replace table {0}  
+    using delta
+    as
+
+select
+    1 as id,
+    'blue' as color,
+    cast('2019-01-01' as date) as date_day
+"""
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark')
+class TestSparkConstraintsColumnsEqual(BaseConstraintsColumnsEqual):
+    pass
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark')
+class TestSparkConstraintsRuntimeEnforcement(BaseConstraintsRuntimeEnforcement):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+    
+    @pytest.fixture(scope="class")
+    def expected_sql(self, project):
+        relation = relation_from_name(project.adapter, "my_model")
+        return _expected_sql_spark.format(relation)
+
+    # On Spark/Databricks, constraints are applied *after* the table is replaced.
+    # We don't have any way to "rollback" the table to its previous happy state.
+    # So the 'color' column will be updated to 'red', instead of 'blue'.
+    @pytest.fixture(scope="class")
+    def expected_color(self):
+        return "red"
+
+    @pytest.fixture(scope="class")
+    def expected_error_messages(self):
+        return [
+            "violate the new CHECK constraint",
+            "DELTA_NEW_CHECK_CONSTRAINT_VIOLATION",
+            "violate the new NOT NULL constraint",
+        ]
+
+    def assert_expected_error_messages(self, error_message, expected_error_messages):
+        # This needs to be ANY instead of ALL
+        # The CHECK constraint is added before the NOT NULL constraint
+        # and different connection types display/truncate the error message in different ways...
+        assert any(msg in error_message for msg in expected_error_messages)

From 9dc352bb1d258af23d3dd425d0c6e44d44b8c9c9 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Tue, 21 Feb 2023 09:40:42 -0600
Subject: [PATCH 393/603] add new workflow for cutting .latest branches (#641)

* add new workflow

* rename workflow extension, update comments

* update PR body
---
 .github/workflows/cut-release-branch.yml | 42 ++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 .github/workflows/cut-release-branch.yml

diff --git a/.github/workflows/cut-release-branch.yml b/.github/workflows/cut-release-branch.yml
new file mode 100644
index 00000000..4102bedc
--- /dev/null
+++ b/.github/workflows/cut-release-branch.yml
@@ -0,0 +1,42 @@
+# **what?**
+# Calls a centralize3d workflow that will:
+# 1. Cut a new branch (generally `*.latest`)
+# 2. Also cleans up all files in `.changes/unreleased` and `.changes/previous version on
+# `main` and bumps `main` to the input version.
+
+# **why?**
+# Generally reduces the workload of engineers and reduces error.  Allow automation.
+
+# **when?**
+# This will run when called manually.
+
+name: Cut new release branch
+
+on:
+  workflow_dispatch:
+    inputs:
+      version_to_bump_main:
+        description: 'The alpha version main should bump to (ex. 1.6.0a1)'
+        required: true
+      new_branch_name:
+        description: 'The full name of the new branch (ex. 1.5.latest)'
+        required: true
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: write
+
+jobs:
+  cut_branch:
+    name: "Cut branch and clean up main for dbt-spark"
+    uses: dbt-labs/actions/.github/workflows/cut-release-branch.yml@main
+    with:
+      version_to_bump_main: ${{ inputs.version_to_bump_main }}
+      new_branch_name: ${{ inputs.new_branch_name }}
+      PR_title: "Cleanup main after cutting new ${{ inputs.new_branch_name }} branch"
+      PR_body: "This PR will fail CI until the dbt-core PR has been merged due to release version conflicts.  dev-requirements.txt needs to be updated to have the dbt-core dependencies point to this new branch."
+    secrets:
+      FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }}

From 99e4ef91845ea528b1e046d0935d8545c5a7505b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 22 Feb 2023 17:57:24 -0500
Subject: [PATCH 394/603] [create-pull-request] automated change (#646)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.5.0-b1.md                          | 21 ++++++++++++++++
 .../Dependencies-20230126-152319.yaml         |  0
 .../Features-20230130-125855.yaml             |  0
 .../Fixes-20230123-134955.yaml                |  0
 .../Under the Hood-20230130-170310.yaml       |  0
 CHANGELOG.md                                  | 24 +++++++++++++++++++
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 9 files changed, 48 insertions(+), 3 deletions(-)
 create mode 100644 .changes/1.5.0-b1.md
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230126-152319.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Features-20230130-125855.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Fixes-20230123-134955.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Under the Hood-20230130-170310.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 32859c09..0ae6b8fb 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.0a1
+current_version = 1.5.0b1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.5.0-b1.md b/.changes/1.5.0-b1.md
new file mode 100644
index 00000000..693f8154
--- /dev/null
+++ b/.changes/1.5.0-b1.md
@@ -0,0 +1,21 @@
+## dbt-spark 1.5.0-b1 - February 22, 2023
+
+### Features
+
+- Support for data types constraints in Spark following the dbt Core feature #6271 ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
+
+### Fixes
+
+- add merge_exclude_columns tests ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
+
+### Under the Hood
+
+- remove tox call to integration tests ([#584](https://github.com/dbt-labs/dbt-spark/issues/584))
+
+### Dependencies
+
+- Allow thrift 0.16.0 ([#605](https://github.com/dbt-labs/dbt-spark/pull/605))
+
+### Contributors
+- [@b-per](https://github.com/b-per) ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
+- [@dave-connors-3](https://github.com/dave-connors-3) ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
diff --git a/.changes/unreleased/Dependencies-20230126-152319.yaml b/.changes/1.5.0/Dependencies-20230126-152319.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230126-152319.yaml
rename to .changes/1.5.0/Dependencies-20230126-152319.yaml
diff --git a/.changes/unreleased/Features-20230130-125855.yaml b/.changes/1.5.0/Features-20230130-125855.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230130-125855.yaml
rename to .changes/1.5.0/Features-20230130-125855.yaml
diff --git a/.changes/unreleased/Fixes-20230123-134955.yaml b/.changes/1.5.0/Fixes-20230123-134955.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230123-134955.yaml
rename to .changes/1.5.0/Fixes-20230123-134955.yaml
diff --git a/.changes/unreleased/Under the Hood-20230130-170310.yaml b/.changes/1.5.0/Under the Hood-20230130-170310.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230130-170310.yaml
rename to .changes/1.5.0/Under the Hood-20230130-170310.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f99421c8..bf19c6c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,30 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
+
+## dbt-spark 1.5.0-b1 - February 22, 2023
+
+### Features
+
+- Support for data types constraints in Spark following the dbt Core feature #6271 ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
+
+### Fixes
+
+- add merge_exclude_columns tests ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
+
+### Under the Hood
+
+- remove tox call to integration tests ([#584](https://github.com/dbt-labs/dbt-spark/issues/584))
+
+### Dependencies
+
+- Allow thrift 0.16.0 ([#605](https://github.com/dbt-labs/dbt-spark/pull/605))
+
+### Contributors
+- [@b-per](https://github.com/b-per) ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
+- [@dave-connors-3](https://github.com/dave-connors-3) ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
+
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 219c289b..c3758128 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.5.0a1"
+version = "1.5.0b1"
diff --git a/setup.py b/setup.py
index bfa37745..d81fdf26 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0a1"
+package_version = "1.5.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 402a9fdf08b08115b081ea430dbb61ca88957245 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Thu, 23 Feb 2023 09:57:30 -0600
Subject: [PATCH 395/603] add release prep secret (#648)

---
 .github/workflows/release.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 0aa1298c..df30809a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -98,8 +98,7 @@ jobs:
       env_setup_script_path: ""
       test_run: ${{ inputs.test_run }}
 
-    secrets:
-      FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }}
+    secrets: inherit
 
   log-outputs-audit-version-and-changelog:
     name: "[Log output] Bump package version, Generate changelog"

From 3489bd72b91a841296f80d4746a367f835995628 Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Thu, 23 Feb 2023 14:48:21 -0500
Subject: [PATCH 396/603] Rename constraints_enabled to contract (#649)

---
 .changes/unreleased/Under the Hood-20230223-105838.yaml | 6 ++++++
 dbt/include/spark/macros/adapters.sql                   | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230223-105838.yaml

diff --git a/.changes/unreleased/Under the Hood-20230223-105838.yaml b/.changes/unreleased/Under the Hood-20230223-105838.yaml
new file mode 100644
index 00000000..8b882f3b
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230223-105838.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Rename constraints_enabled to contract
+time: 2023-02-23T10:58:38.092981-05:00
+custom:
+  Author: gshank
+  Issue: "645"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 21350ea3..f76867cc 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -138,7 +138,7 @@
       {% else %}
         create table {{ relation }}
       {% endif %}
-      {% if config.get('constraints_enabled', False) %}
+      {% if config.get('contract', False) %}
         {{ get_assert_columns_equivalent(sql) }}
       {% endif %}
       {{ file_format_clause() }}
@@ -168,7 +168,7 @@
 {% endmacro %}
 
 {% macro spark__persist_constraints(relation, model) %}
-  {% if config.get('constraints_enabled', False) and config.get('file_format', 'delta') == 'delta' %}
+  {% if config.get('contract', False) and config.get('file_format', 'delta') == 'delta' %}
     {% do alter_table_add_constraints(relation, model.columns) %}
     {% do alter_column_set_constraints(relation, model.columns) %}
   {% endif %}

From 8c84b80d60754155f9252baed92c99038bd4e9f4 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Thu, 23 Feb 2023 17:41:06 -0500
Subject: [PATCH 397/603] add codeowners file (#650)

---
 .github/CODEOWNERS | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .github/CODEOWNERS

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000..f6283d12
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,3 @@
+# This codeowners file is used to ensure all PRs require reviews from the adapters team
+
+* @dbt-labs/core-adapters

From 1d03fc75e2c9f6d37be851e271f50b36c026644b Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 27 Feb 2023 12:22:04 -0500
Subject: [PATCH 398/603] mirror issues to ADAP jira project (#642)

---
 .github/workflows/jira-creation.yml   | 4 +++-
 .github/workflows/jira-label.yml      | 4 +++-
 .github/workflows/jira-transition.yml | 7 ++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml
index b4016bef..2611a8bd 100644
--- a/.github/workflows/jira-creation.yml
+++ b/.github/workflows/jira-creation.yml
@@ -19,7 +19,9 @@ permissions:
 
 jobs:
   call-label-action:
-    uses: dbt-labs/jira-actions/.github/workflows/jira-creation.yml@main
+    uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main
+    with:
+      project_key: ADAP
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml
index 3da2e3a3..1637cbe3 100644
--- a/.github/workflows/jira-label.yml
+++ b/.github/workflows/jira-label.yml
@@ -19,7 +19,9 @@ permissions:
 
 jobs:
   call-label-action:
-    uses: dbt-labs/jira-actions/.github/workflows/jira-label.yml@main
+    uses: dbt-labs/actions/.github/workflows/jira-label.yml@main
+    with:
+      project_key: ADAP
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml
index ed9f9cd4..99158a15 100644
--- a/.github/workflows/jira-transition.yml
+++ b/.github/workflows/jira-transition.yml
@@ -15,9 +15,14 @@ on:
   issues:
     types: [closed, deleted, reopened]
 
+# no special access is needed
+permissions: read-all
+
 jobs:
   call-label-action:
-    uses: dbt-labs/jira-actions/.github/workflows/jira-transition.yml@main
+    uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main
+    with:
+      project_key: ADAP
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}

From 1e176d09e061222a9b29397d18fc9f7149e8ee16 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 27 Feb 2023 12:32:22 -0500
Subject: [PATCH 399/603] Revert "mirror issues to ADAP jira project (#642)"
 (#652)

This reverts commit 1d03fc75e2c9f6d37be851e271f50b36c026644b.
---
 .github/workflows/jira-creation.yml   | 4 +---
 .github/workflows/jira-label.yml      | 4 +---
 .github/workflows/jira-transition.yml | 7 +------
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml
index 2611a8bd..b4016bef 100644
--- a/.github/workflows/jira-creation.yml
+++ b/.github/workflows/jira-creation.yml
@@ -19,9 +19,7 @@ permissions:
 
 jobs:
   call-label-action:
-    uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main
-    with:
-      project_key: ADAP
+    uses: dbt-labs/jira-actions/.github/workflows/jira-creation.yml@main
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml
index 1637cbe3..3da2e3a3 100644
--- a/.github/workflows/jira-label.yml
+++ b/.github/workflows/jira-label.yml
@@ -19,9 +19,7 @@ permissions:
 
 jobs:
   call-label-action:
-    uses: dbt-labs/actions/.github/workflows/jira-label.yml@main
-    with:
-      project_key: ADAP
+    uses: dbt-labs/jira-actions/.github/workflows/jira-label.yml@main
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml
index 99158a15..ed9f9cd4 100644
--- a/.github/workflows/jira-transition.yml
+++ b/.github/workflows/jira-transition.yml
@@ -15,14 +15,9 @@ on:
   issues:
     types: [closed, deleted, reopened]
 
-# no special access is needed
-permissions: read-all
-
 jobs:
   call-label-action:
-    uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main
-    with:
-      project_key: ADAP
+    uses: dbt-labs/jira-actions/.github/workflows/jira-transition.yml@main
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}

From bcbb87897296cd5fda552696595ea2d460a3b0bd Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Mon, 27 Feb 2023 14:26:13 -0600
Subject: [PATCH 400/603] remove seed_column_tests integration test (#637)

* remove seed_column_tests

* readd seed_column_types test as functional and try to expand to account for changing csv seed types via yml file

* add comment describing point of test

* try to moake test more inline with existing functional test tests_column_types

* define schema test

* fixture import change

* modify test

* update to main and try dictionary version of macro

* re kick off tests

* try to make test more indepth

* revert back to original test version

* try to break out small part of test macro into smaller piece
---
 .../adapter/seed_column_types/fixtures.py     | 114 ++++++++++++++++++
 .../test_seed_column_types.py                 |  32 +++++
 .../seed_column_types/seeds/payments.csv      |  11 --
 .../test_seed_column_types.py                 |  35 ------
 4 files changed, 146 insertions(+), 46 deletions(-)
 create mode 100644 tests/functional/adapter/seed_column_types/fixtures.py
 create mode 100644 tests/functional/adapter/seed_column_types/test_seed_column_types.py
 delete mode 100644 tests/integration/seed_column_types/seeds/payments.csv
 delete mode 100644 tests/integration/seed_column_types/test_seed_column_types.py

diff --git a/tests/functional/adapter/seed_column_types/fixtures.py b/tests/functional/adapter/seed_column_types/fixtures.py
new file mode 100644
index 00000000..298b49e9
--- /dev/null
+++ b/tests/functional/adapter/seed_column_types/fixtures.py
@@ -0,0 +1,114 @@
+_MACRO_TEST_IS_TYPE_SQL = """
+{% macro simple_type_check_column(column, check) %}
+    {% set checks = {
+        'string': column.is_string,
+        'float': column.is_float,
+        'number': column.is_number,
+        'numeric': column.is_numeric,
+        'integer': column.is_integer,
+    } %}
+    {% if check not in checks %}
+        {% do exceptions.raise_compiler_error('invalid type check value: ' ~ check) %}
+    {% endif %}
+    {{ return(checks[check]()) }}
+{% endmacro %}
+
+{% macro type_check_column(column, type_checks) %}
+    {% set failures = [] %}
+    {% for type_check in type_checks %}
+        {% if type_check.startswith('not ') %}
+            {% if simple_type_check_column(column, type_check[4:]) %}
+                {% do log('simple_type_check_column got ', True) %}
+                {% do failures.append(type_check) %}
+            {% endif %}
+        {% else %}
+            {% if not simple_type_check_column(column, type_check) %}
+                {% do failures.append(type_check) %}
+            {% endif %}
+        {% endif %}
+    {% endfor %}
+    {% if (failures | length) > 0 %}
+        {% do log('column ' ~ column.name ~ ' had failures: ' ~ failures, info=True) %}
+    {% endif %}
+    {% do return((failures | length) == 0) %}
+{% endmacro %}
+
+{% macro is_bad_column(column, column_map) %}
+    {% set column_key = (column.name | lower) %}
+    {% if column_key not in column_map %}
+        {% do exceptions.raise_compiler_error('column key ' ~ column_key ~ ' not found in ' ~ (column_map | list | string)) %}
+    {% endif %}
+
+    {% set type_checks = column_map[column_key] %}
+    {% if not type_checks %}
+        {% do exceptions.raise_compiler_error('no type checks?') %}
+    {% endif %}
+
+    {{ return(not type_check_column(column, type_checks)) }}
+{% endmacro %}
+
+{% test is_type(seed, column_map) %}
+    {% if not execute %}
+        {{ return(None) }}
+    {% endif %}
+    {% if not column_map %}
+        {% do exceptions.raise_compiler_error('test_is_type must have a column name') %}
+    {% endif %}
+    {% set columns = adapter.get_columns_in_relation(seed) %}
+    {% if (column_map | length) != (columns | length) %}
+        {% set column_map_keys = (column_map | list | string) %}
+        {% set column_names = (columns | map(attribute='name') | list | string) %}
+        {% do exceptions.raise_compiler_error('did not get all the columns/all columns not specified:\n' ~ column_map_keys ~ '\nvs\n' ~ column_names) %}
+    {% endif %}
+    {% set bad_columns = [] %}
+    {% for column in columns %}
+        {% if is_bad_column(column, column_map) %}
+            {% do bad_columns.append(column.name) %}
+        {% endif %}
+    {% endfor %}
+    {% do log('bad columns: ' ~ bad_columns, info=True) %}
+    {% for bad_column in bad_columns %}
+      select '{{ bad_column }}' as bad_column
+      {{ 'union all' if not loop.last }}
+    {% endfor %}
+      select * from (select 1 limit 0) as nothing
+{% endtest %}
+"""
+
+_SEED_CSV = """
+ID,ORDERID,PAYMENTMETHOD,STATUS,AMOUNT,AMOUNT_USD,CREATED
+1,1,credit_card,success,1000,10.00,2018-01-01
+2,2,credit_card,success,2000,20.00,2018-01-02
+3,3,coupon,success,100,1.00,2018-01-04
+4,4,coupon,success,2500,25.00,2018-01-05
+5,5,bank_transfer,fail,1700,17.00,2018-01-05
+6,5,bank_transfer,success,1700,17.00,2018-01-05
+7,6,credit_card,success,600,6.00,2018-01-07
+8,7,credit_card,success,1600,16.00,2018-01-09
+9,8,credit_card,success,2300,23.00,2018-01-11
+10,9,gift_card,success,2300,23.00,2018-01-12
+"""
+
+_SEED_YML = """
+seeds:
+  - name: payments
+    config:
+        column_types:
+            id: string
+            orderid: string
+            paymentmethod: string
+            status: string
+            amount: integer
+            amount_usd: decimal(20,2)
+            created: timestamp
+    tests:
+        - is_type:
+            column_map:
+                id: ["string", "not number"]
+                orderid: ["string", "not number"]
+                paymentmethod: ["string", "not number"]
+                status: ["string", "not number"]
+                amount: ["integer", "number"]
+                amount_usd: ["decimal", "number"]
+                created: ["timestamp", "string"]
+"""
diff --git a/tests/functional/adapter/seed_column_types/test_seed_column_types.py b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
new file mode 100644
index 00000000..6a6175ff
--- /dev/null
+++ b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
@@ -0,0 +1,32 @@
+import pytest
+from dbt.tests.util import run_dbt
+from tests.functional.adapter.seed_column_types.fixtures import (
+    _MACRO_TEST_IS_TYPE_SQL,
+    _SEED_CSV,
+    _SEED_YML
+)
+
+@pytest.mark.skip_profile('spark_session')
+class TestSeedColumnTypesCast:
+
+    @pytest.fixture(scope="class")
+    def macros(self):
+        return {"test_is_type.sql": _MACRO_TEST_IS_TYPE_SQL}
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "payments.csv": _SEED_CSV,
+            "schema.yml": _SEED_YML
+        }
+
+    def run_and_test(self):
+        results = run_dbt(["seed"])
+        assert len(results) == 1
+        run_dbt(["test"])
+
+
+    #  We want to test seed types because hive would cause all fields to be strings.
+    # setting column_types in project.yml should change them and pass.
+    def test_column_seed_type(self):
+        self.run_and_test()
diff --git a/tests/integration/seed_column_types/seeds/payments.csv b/tests/integration/seed_column_types/seeds/payments.csv
deleted file mode 100644
index 3f49d788..00000000
--- a/tests/integration/seed_column_types/seeds/payments.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-ID,ORDERID,PAYMENTMETHOD,STATUS,AMOUNT,AMOUNT_USD,CREATED
-1,1,credit_card,success,1000,10.00,2018-01-01
-2,2,credit_card,success,2000,20.00,2018-01-02
-3,3,coupon,success,100,1.00,2018-01-04
-4,4,coupon,success,2500,25.00,2018-01-05
-5,5,bank_transfer,fail,1700,17.00,2018-01-05
-6,5,bank_transfer,success,1700,17.00,2018-01-05
-7,6,credit_card,success,600,6.00,2018-01-07
-8,7,credit_card,success,1600,16.00,2018-01-09
-9,8,credit_card,success,2300,23.00,2018-01-11
-10,9,gift_card,success,2300,23.00,2018-01-12
diff --git a/tests/integration/seed_column_types/test_seed_column_types.py b/tests/integration/seed_column_types/test_seed_column_types.py
deleted file mode 100644
index dc997d11..00000000
--- a/tests/integration/seed_column_types/test_seed_column_types.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from cProfile import run
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestSeedColumnTypeCast(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "seed_column_types"
-        
-    @property
-    def models(self):
-        return "models"
-
-    @property
-    def project_config(self):
-        return {
-            'seeds': {
-                'quote_columns': False,
-            },
-        }
-
-    # runs on Spark v2.0
-    @use_profile("apache_spark")
-    def test_seed_column_types_apache_spark(self):
-        self.run_dbt(["seed"])
-
-    # runs on Spark v3.0
-    @use_profile("databricks_cluster")
-    def test_seed_column_types_databricks_cluster(self):
-        self.run_dbt(["seed"])
-
-    # runs on Spark v3.0
-    @use_profile("databricks_sql_endpoint")
-    def test_seed_column_types_databricks_sql_endpoint(self):
-        self.run_dbt(["seed"])

From d3f7c7971727e8bf3b9ffff453fa51c82618ec32 Mon Sep 17 00:00:00 2001
From: Neelesh Salian <nssalian@users.noreply.github.com>
Date: Wed, 1 Mar 2023 08:12:27 -0800
Subject: [PATCH 401/603] [CT-1968]: Convert incremental_strategies to
 functional test (#644)

* Convert incremental_strategies.WIP

* Fix skipped profiles

* Removing hudi tests

* Remove apache_spark profile to check

* Change relations check

* Remove schemas

* Changing the class name to Base

* Temp fix for pytz

* Include seeds

* Remove pytz change and clean up in test
---
 .../incremental_strategies/fixtures.py        | 398 ++++++++++++++++++
 .../adapter/incremental_strategies/seeds.py   |  27 ++
 .../test_incremental_strategies.py            | 131 ++++++
 .../models/default_append.sql                 |  17 -
 .../models_bad/bad_file_format.sql            |  18 -
 .../models_bad/bad_insert_overwrite_delta.sql |  19 -
 .../models_bad/bad_merge_not_delta.sql        |  18 -
 .../models_bad/bad_strategy.sql               |  18 -
 .../models_delta/append_delta.sql             |  19 -
 .../models_delta/merge_no_key.sql             |  19 -
 .../models_delta/merge_unique_key.sql         |  20 -
 .../models_delta/merge_update_columns.sql     |  22 -
 .../models_hudi/append.sql                    |  19 -
 .../insert_overwrite_no_partitions.sql        |  19 -
 .../insert_overwrite_partitions.sql           |  20 -
 .../models_hudi/merge_no_key.sql              |  19 -
 .../models_hudi/merge_unique_key.sql          |  20 -
 .../models_hudi/merge_update_columns.sql      |  22 -
 .../insert_overwrite_no_partitions.sql        |  19 -
 .../insert_overwrite_partitions.sql           |  20 -
 .../seeds/expected_append.csv                 |   5 -
 .../seeds/expected_overwrite.csv              |   3 -
 .../seeds/expected_partial_upsert.csv         |   4 -
 .../seeds/expected_upsert.csv                 |   4 -
 .../test_incremental_strategies.py            | 128 ------
 25 files changed, 556 insertions(+), 472 deletions(-)
 create mode 100644 tests/functional/adapter/incremental_strategies/fixtures.py
 create mode 100644 tests/functional/adapter/incremental_strategies/seeds.py
 create mode 100644 tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
 delete mode 100644 tests/integration/incremental_strategies/models/default_append.sql
 delete mode 100644 tests/integration/incremental_strategies/models_bad/bad_file_format.sql
 delete mode 100644 tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql
 delete mode 100644 tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql
 delete mode 100644 tests/integration/incremental_strategies/models_bad/bad_strategy.sql
 delete mode 100644 tests/integration/incremental_strategies/models_delta/append_delta.sql
 delete mode 100644 tests/integration/incremental_strategies/models_delta/merge_no_key.sql
 delete mode 100644 tests/integration/incremental_strategies/models_delta/merge_unique_key.sql
 delete mode 100644 tests/integration/incremental_strategies/models_delta/merge_update_columns.sql
 delete mode 100644 tests/integration/incremental_strategies/models_hudi/append.sql
 delete mode 100644 tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
 delete mode 100644 tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
 delete mode 100644 tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
 delete mode 100644 tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
 delete mode 100644 tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
 delete mode 100644 tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
 delete mode 100644 tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql
 delete mode 100644 tests/integration/incremental_strategies/seeds/expected_append.csv
 delete mode 100644 tests/integration/incremental_strategies/seeds/expected_overwrite.csv
 delete mode 100644 tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
 delete mode 100644 tests/integration/incremental_strategies/seeds/expected_upsert.csv
 delete mode 100644 tests/integration/incremental_strategies/test_incremental_strategies.py

diff --git a/tests/functional/adapter/incremental_strategies/fixtures.py b/tests/functional/adapter/incremental_strategies/fixtures.py
new file mode 100644
index 00000000..f82decf2
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/fixtures.py
@@ -0,0 +1,398 @@
+#
+# Models
+#
+
+default_append_sql = """
+{{ config(
+    materialized = 'incremental',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+#
+# Bad Models
+#
+
+bad_file_format_sql = """
+{{ config(
+    materialized = 'incremental',
+    file_format = 'something_else',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+bad_insert_overwrite_delta_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    file_format = 'delta',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+bad_merge_not_delta_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+bad_strategy_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'something_else',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+#
+# Delta Models
+#
+
+append_delta_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'append',
+    file_format = 'delta',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+delta_merge_no_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+delta_merge_unique_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+    unique_key = 'id',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+delta_merge_update_columns_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+    unique_key = 'id',
+    merge_update_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be updated, color will be ignored
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
+""".lstrip()
+
+#
+# Insert Overwrite
+#
+
+insert_overwrite_no_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    file_format = 'parquet',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+insert_overwrite_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    partition_by = 'id',
+    file_format = 'parquet',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+#
+# Hudi Models
+#
+
+append_hudi_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'append',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_insert_overwrite_no_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_insert_overwrite_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    partition_by = 'id',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_merge_no_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_merge_unique_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+    unique_key = 'id',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_update_columns_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+    unique_key = 'id',
+    merge_update_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be updated, color will be ignored
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
+""".lstrip()
\ No newline at end of file
diff --git a/tests/functional/adapter/incremental_strategies/seeds.py b/tests/functional/adapter/incremental_strategies/seeds.py
new file mode 100644
index 00000000..c27561e0
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/seeds.py
@@ -0,0 +1,27 @@
+expected_append_csv = """
+id,msg
+1,hello
+2,goodbye
+2,yo
+3,anyway
+""".lstrip()
+
+expected_overwrite_csv = """
+id,msg
+2,yo
+3,anyway
+""".lstrip()
+
+expected_partial_upsert_csv = """
+id,msg,color
+1,hello,blue
+2,yo,red
+3,anyway,purple
+""".lstrip()
+
+expected_upsert_csv = """
+id,msg
+1,hello
+2,yo
+3,anyway
+""".lstrip()
diff --git a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
new file mode 100644
index 00000000..202eeb44
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
@@ -0,0 +1,131 @@
+import pytest
+
+from dbt.tests.util import run_dbt, check_relations_equal
+from dbt.tests.adapter.simple_seed.test_seed import SeedConfigBase
+from tests.functional.adapter.incremental_strategies.seeds import *
+from tests.functional.adapter.incremental_strategies.fixtures import *
+
+class BaseIncrementalStrategies(SeedConfigBase):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "expected_append.csv": expected_append_csv,
+            "expected_overwrite.csv": expected_overwrite_csv,
+            "expected_upsert.csv": expected_upsert_csv,
+            "expected_partial_upsert.csv": expected_partial_upsert_csv
+        }
+
+    @staticmethod
+    def seed_and_run_once():
+        run_dbt(["seed"])
+        run_dbt(["run"])
+
+    @staticmethod
+    def seed_and_run_twice():
+        run_dbt(["seed"])
+        run_dbt(["run"])
+        run_dbt(["run"])
+
+class TestDefaultAppend(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "default_append.sql" : default_append_sql
+        }
+
+    def run_and_test(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(project.adapter, ["default_append", "expected_append"])
+
+
+    @pytest.mark.skip_profile("databricks_http_cluster", "databricks_sql_endpoint", "spark_session")
+    def test_default_append(self, project):
+        self.run_and_test(project)
+
+
+class TestInsertOverwrite(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "insert_overwrite_no_partitions.sql": insert_overwrite_no_partitions_sql,
+            "insert_overwrite_partitions.sql": insert_overwrite_partitions_sql
+        }
+
+    def run_and_test(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(project.adapter, ["insert_overwrite_no_partitions", "expected_overwrite"])
+        check_relations_equal(project.adapter, ["insert_overwrite_partitions", "expected_upsert"])
+
+    @pytest.mark.skip_profile("databricks_http_cluster", "databricks_sql_endpoint", "spark_session")
+    def test_insert_overwrite(self, project):
+        self.run_and_test(project)
+
+class TestDeltaStrategies(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "append_delta.sql": append_delta_sql,
+            "merge_no_key.sql": delta_merge_no_key_sql,
+            "merge_unique_key.sql": delta_merge_unique_key_sql,
+            "merge_update_columns.sql": delta_merge_update_columns_sql,
+        }
+
+    def run_and_test(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(project.adapter, ["append_delta", "expected_append"])
+        check_relations_equal(project.adapter, ["merge_no_key", "expected_append"])
+        check_relations_equal(project.adapter, ["merge_unique_key", "expected_upsert"])
+        check_relations_equal(project.adapter, ["merge_update_columns", "expected_partial_upsert"])
+
+    @pytest.mark.skip_profile("apache_spark", "databricks_http_cluster", "databricks_sql_endpoint",
+                              "spark_session")
+    def test_delta_strategies(self, project):
+        self.run_and_test(project)
+
+# class TestHudiStrategies(BaseIncrementalStrategies):
+#     @pytest.fixture(scope="class")
+#     def models(self):
+#         return {
+#             "append.sql": append_hudi_sql,
+#             "insert_overwrite_no_partitions.sql": hudi_insert_overwrite_no_partitions_sql,
+#             "insert_overwrite_partitions.sql": hudi_insert_overwrite_partitions_sql,
+#             "merge_no_key.sql": hudi_merge_no_key_sql,
+#             "merge_unique_key.sql": hudi_merge_unique_key_sql,
+#             "merge_update_columns.sql": hudi_update_columns_sql,
+#         }
+#
+#     def run_and_test(self, project):
+#         self.seed_and_run_twice()
+#         check_relations_equal(project.adapter, ["append", "expected_append"])
+#         check_relations_equal(project.adapter, ["merge_no_key", "expected_append"])
+#         check_relations_equal(project.adapter, ["merge_unique_key", "expected_upsert"])
+#         check_relations_equal(project.adapter, ["insert_overwrite_no_partitions", "expected_overwrite"])
+#         check_relations_equal(project.adapter, ["insert_overwrite_partitions", "expected_upsert"])
+#
+#     @pytest.mark.skip_profile("databricks_http_cluster", "databricks_cluster",
+#                               "databricks_sql_endpoint", "spark_session")
+#     def test_hudi_strategies(self, project):
+#         self.run_and_test(project)
+
+
+class TestBadStrategies(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "bad_file_format.sql": bad_file_format_sql,
+            "bad_insert_overwrite_delta.sql": bad_insert_overwrite_delta_sql,
+            "bad_merge_not_delta.sql": bad_merge_not_delta_sql,
+            "bad_strategy.sql": bad_strategy_sql
+        }
+
+    @staticmethod
+    def run_and_test():
+        run_results = run_dbt(["run"], expect_pass=False)
+        # assert all models fail with compilation errors
+        for result in run_results:
+            assert result.status == "error"
+            assert "Compilation Error in model" in result.message
+
+    @pytest.mark.skip_profile("databricks_http_cluster", "spark_session")
+    def test_bad_strategies(self, project):
+        self.run_and_test()
diff --git a/tests/integration/incremental_strategies/models/default_append.sql b/tests/integration/incremental_strategies/models/default_append.sql
deleted file mode 100644
index e2a10393..00000000
--- a/tests/integration/incremental_strategies/models/default_append.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_file_format.sql b/tests/integration/incremental_strategies/models_bad/bad_file_format.sql
deleted file mode 100644
index 911ccbb8..00000000
--- a/tests/integration/incremental_strategies/models_bad/bad_file_format.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    file_format = 'something_else',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql b/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql
deleted file mode 100644
index b7186e1b..00000000
--- a/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'delta',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql b/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql
deleted file mode 100644
index 79a95111..00000000
--- a/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_strategy.sql b/tests/integration/incremental_strategies/models_bad/bad_strategy.sql
deleted file mode 100644
index 72912f50..00000000
--- a/tests/integration/incremental_strategies/models_bad/bad_strategy.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'something_else',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/append_delta.sql b/tests/integration/incremental_strategies/models_delta/append_delta.sql
deleted file mode 100644
index bfbd2512..00000000
--- a/tests/integration/incremental_strategies/models_delta/append_delta.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'append',
-    file_format = 'delta',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/merge_no_key.sql b/tests/integration/incremental_strategies/models_delta/merge_no_key.sql
deleted file mode 100644
index 35a71b1a..00000000
--- a/tests/integration/incremental_strategies/models_delta/merge_no_key.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql b/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql
deleted file mode 100644
index e8dfd37b..00000000
--- a/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-    unique_key = 'id',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql b/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql
deleted file mode 100644
index d934b299..00000000
--- a/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-    unique_key = 'id',
-    merge_update_columns = ['msg'],
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
-
-{% else %}
-
--- msg will be updated, color will be ignored
-select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
-union all
-select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/append.sql b/tests/integration/incremental_strategies/models_hudi/append.sql
deleted file mode 100644
index 9be27bec..00000000
--- a/tests/integration/incremental_strategies/models_hudi/append.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'append',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
deleted file mode 100644
index 08137408..00000000
--- a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
deleted file mode 100644
index 0f74cfdb..00000000
--- a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    partition_by = 'id',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql b/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
deleted file mode 100644
index 8def11dd..00000000
--- a/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql b/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
deleted file mode 100644
index ee72860d..00000000
--- a/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'hudi',
-    unique_key = 'id',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql b/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
deleted file mode 100644
index 99f0d0b7..00000000
--- a/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'hudi',
-    unique_key = 'id',
-    merge_update_columns = ['msg'],
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
-
-{% else %}
-
--- msg will be updated, color will be ignored
-select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
-union all
-select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
deleted file mode 100644
index ae007b45..00000000
--- a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'parquet',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql
deleted file mode 100644
index cfe235ad..00000000
--- a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    partition_by = 'id',
-    file_format = 'parquet',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/seeds/expected_append.csv b/tests/integration/incremental_strategies/seeds/expected_append.csv
deleted file mode 100644
index c96e569b..00000000
--- a/tests/integration/incremental_strategies/seeds/expected_append.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-id,msg
-1,hello
-2,goodbye
-2,yo
-3,anyway
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_overwrite.csv b/tests/integration/incremental_strategies/seeds/expected_overwrite.csv
deleted file mode 100644
index 46d8f605..00000000
--- a/tests/integration/incremental_strategies/seeds/expected_overwrite.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-id,msg
-2,yo
-3,anyway
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
deleted file mode 100644
index bc922cde..00000000
--- a/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,msg,color
-1,hello,blue
-2,yo,red
-3,anyway,purple
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_upsert.csv
deleted file mode 100644
index 71805dfc..00000000
--- a/tests/integration/incremental_strategies/seeds/expected_upsert.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,msg
-1,hello
-2,yo
-3,anyway
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
deleted file mode 100644
index 026f562d..00000000
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from cProfile import run
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestIncrementalStrategies(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "incremental_strategies"
-
-    @property
-    def project_config(self):
-        return {
-            'seeds': {
-                'quote_columns': False,
-            },
-        }
-
-    def seed_and_run_once(self):
-        self.run_dbt(["seed"])
-        self.run_dbt(["run"])
-
-    def seed_and_run_twice(self):
-        self.run_dbt(["seed"])
-        self.run_dbt(["run"])
-        self.run_dbt(["run"])
-
-
-class TestDefaultAppend(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models"
-        
-    def run_and_test(self):
-        self.seed_and_run_twice()
-        self.assertTablesEqual("default_append", "expected_append")
-
-    @use_profile("apache_spark")
-    def test_default_append_apache_spark(self):
-        self.run_and_test()
-
-    @use_profile("databricks_cluster")
-    def test_default_append_databricks_cluster(self):
-        self.run_and_test()
-
-
-class TestInsertOverwrite(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models_insert_overwrite"
-
-    def run_and_test(self):
-        self.seed_and_run_twice()
-        self.assertTablesEqual(
-            "insert_overwrite_no_partitions", "expected_overwrite")
-        self.assertTablesEqual(
-            "insert_overwrite_partitions", "expected_upsert")
-
-    @use_profile("apache_spark")
-    def test_insert_overwrite_apache_spark(self):
-        self.run_and_test()
-
-    # This test requires settings on the test cluster
-    # more info at https://docs.getdbt.com/reference/resource-configs/spark-configs#the-insert_overwrite-strategy
-    @use_profile("databricks_cluster")
-    def test_insert_overwrite_databricks_cluster(self):
-        self.run_and_test()
-
-
-class TestDeltaStrategies(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models_delta"
-
-    def run_and_test(self):
-        self.seed_and_run_twice()
-        self.assertTablesEqual("append_delta", "expected_append")
-        self.assertTablesEqual("merge_no_key", "expected_append")
-        self.assertTablesEqual("merge_unique_key", "expected_upsert")
-        self.assertTablesEqual("merge_update_columns", "expected_partial_upsert")
-
-    @use_profile("databricks_cluster")
-    def test_delta_strategies_databricks_cluster(self):
-        self.run_and_test()
-
-# Uncomment this hudi integration test after the hudi 0.10.0 release to make it work.
-# class TestHudiStrategies(TestIncrementalStrategies):
-#     @property
-#     def models(self):
-#         return "models_hudi"
-#
-#     def run_and_test(self):
-#         self.seed_and_run_once()
-#         self.assertTablesEqual("append", "expected_append")
-#         self.assertTablesEqual("merge_no_key", "expected_append")
-#         self.assertTablesEqual("merge_unique_key", "expected_upsert")
-#         self.assertTablesEqual(
-#             "insert_overwrite_no_partitions", "expected_overwrite")
-#         self.assertTablesEqual(
-#             "insert_overwrite_partitions", "expected_upsert")
-#
-#     @use_profile("apache_spark")
-#     def test_hudi_strategies_apache_spark(self):
-#         self.run_and_test()
-
-
-class TestBadStrategies(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models_bad"
-
-    def run_and_test(self):
-        results = self.run_dbt(["run"], expect_pass=False)
-        # assert all models fail with compilation errors
-        for result in results:
-            self.assertEqual("error", result.status)
-            self.assertIn("Compilation Error in model", result.message)
-
-    @use_profile("apache_spark")
-    def test_bad_strategies_apache_spark(self):
-        self.run_and_test()
-
-    @use_profile("databricks_cluster")
-    def test_bad_strategies_databricks_cluster(self):
-        self.run_and_test()
-
-    @use_profile("databricks_sql_endpoint")
-    def test_bad_strategies_databricks_sql_endpoint(self):
-        self.run_and_test()

From ccf57ac4f462bc37d9bd74c1339153af096fd503 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 1 Mar 2023 15:08:20 -0500
Subject: [PATCH 402/603] Standardize dev requirements (#653)

* pinned dev requirements to major version, synced across adapters
* hard pinned mypy
* pinned wheel due to due vulnerability in 0.30
* added py37-specific pins where needed
* added --explicit-package-bases arg to mypy to properly parse dbt/adapters as a namespace package
* updated with standard pre-commit-config for all adapters
* update flake8 config
* black
* mypy
---
 .flake8                        | 12 ++++----
 .pre-commit-config.yaml        | 16 +++++------
 Makefile                       |  5 ++++
 dbt/adapters/spark/__init__.py |  2 +-
 dbt/adapters/spark/impl.py     | 24 +++++++++-------
 dev-requirements.txt           | 50 +++++++++++++++++++++-------------
 setup.py                       |  2 +-
 7 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/.flake8 b/.flake8
index f39d154c..bbc3202a 100644
--- a/.flake8
+++ b/.flake8
@@ -4,9 +4,11 @@ select =
     W
     F
 ignore =
-    W503 # makes Flake8 work like black
-    W504
-    E203 # makes Flake8 work like black
-    E741
-    E501
+    # makes Flake8 work like black
+    W503,
+    W504,
+    # makes Flake8 work like black
+    E203,
+    E741,
+    E501,
 exclude = test
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4b098e0c..1141ccc9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ default_language_version:
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v3.2.0
+  rev: v4.4.0
   hooks:
   - id: check-yaml
     args: [--unsafe]
@@ -18,31 +18,31 @@ repos:
   - id: trailing-whitespace
   - id: check-case-conflict
 - repo: https://github.com/psf/black
-  rev: 21.12b0
+  rev: 23.1.0
   hooks:
   - id: black
-    additional_dependencies: ['click==8.0.4']
+    additional_dependencies: ['click~=8.1']
     args:
     - "--line-length=99"
     - "--target-version=py38"
   - id: black
     alias: black-check
     stages: [manual]
-    additional_dependencies: ['click==8.0.4']
+    additional_dependencies: ['click~=8.1']
     args:
     - "--line-length=99"
     - "--target-version=py38"
     - "--check"
     - "--diff"
 - repo: https://github.com/pycqa/flake8
-  rev: 4.0.1
+  rev: 6.0.0
   hooks:
   - id: flake8
   - id: flake8
     alias: flake8-check
     stages: [manual]
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.950
+  rev: v1.0.1
   hooks:
   - id: mypy
     # N.B.: Mypy is... a bit fragile.
@@ -55,12 +55,12 @@ repos:
     # of our control to the mix.  Unfortunately, there's nothing we can
     # do about per pre-commit's author.
     # See https://github.com/pre-commit/pre-commit/issues/730 for details.
-    args: [--show-error-codes, --ignore-missing-imports]
+    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases]
     files: ^dbt/adapters/.*
     language: system
   - id: mypy
     alias: mypy-check
     stages: [manual]
-    args: [--show-error-codes, --pretty, --ignore-missing-imports]
+    args: [--show-error-codes, --pretty, --ignore-missing-imports, --explicit-package-bases]
     files: ^dbt/adapters
     language: system
diff --git a/Makefile b/Makefile
index dca1ec07..5cb3297f 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,11 @@ dev: ## Installs adapter in develop mode along with development dependencies
 	@\
 	pip install -r requirements.txt -r dev-requirements.txt && pre-commit install
 
+.PHONY: dev-uninstall
+dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment
+               ## Useful when updating versions, or if you accidentally installed into the system interpreter
+	pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y
+
 .PHONY: mypy
 mypy: ## Runs mypy against staged changes for static type checking.
 	@\
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index 91ad5476..31588019 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -8,5 +8,5 @@
 from dbt.include import spark  # type: ignore
 
 Plugin = AdapterPlugin(
-    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH
+    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH  # type: ignore
 )
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index fea5bbac..e37bc945 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -133,9 +133,7 @@ def add_schema_to_cache(self, schema) -> str:
         # so jinja doesn't render things
         return ""
 
-    def list_relations_without_caching(
-        self, schema_relation: SparkRelation
-    ) -> List[SparkRelation]:
+    def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[BaseRelation]:
         kwargs = {"schema_relation": schema_relation}
         try:
             results = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs)
@@ -177,7 +175,9 @@ def get_relation(self, database: str, schema: str, identifier: str) -> Optional[
 
         return super().get_relation(database, schema, identifier)
 
-    def parse_describe_extended(self, relation: Relation, raw_rows: AttrDict) -> List[SparkColumn]:
+    def parse_describe_extended(
+        self, relation: BaseRelation, raw_rows: AttrDict
+    ) -> List[SparkColumn]:
         # Convert the Row to a dict
         dict_rows = [dict(zip(row._keys, row._values)) for row in raw_rows]
         # Find the separator between the rows and the metadata provided
@@ -214,7 +214,7 @@ def find_table_information_separator(rows: List[dict]) -> int:
             pos += 1
         return pos
 
-    def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
+    def get_columns_in_relation(self, relation: BaseRelation) -> List[SparkColumn]:
         columns = []
         try:
             rows: AttrDict = self.execute_macro(
@@ -235,12 +235,16 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
         columns = [x for x in columns if x.name not in self.HUDI_METADATA_COLUMNS]
         return columns
 
-    def parse_columns_from_information(self, relation: SparkRelation) -> List[SparkColumn]:
-        owner_match = re.findall(self.INFORMATION_OWNER_REGEX, relation.information)
+    def parse_columns_from_information(self, relation: BaseRelation) -> List[SparkColumn]:
+        if hasattr(relation, "information"):
+            information = relation.information or ""
+        else:
+            information = ""
+        owner_match = re.findall(self.INFORMATION_OWNER_REGEX, information)
         owner = owner_match[0] if owner_match else None
-        matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, relation.information)
+        matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, information)
         columns = []
-        stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, relation.information)
+        stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, information)
         raw_table_stats = stats_match[0] if stats_match else None
         table_stats = SparkColumn.convert_table_stats(raw_table_stats)
         for match_num, match in enumerate(matches):
@@ -259,7 +263,7 @@ def parse_columns_from_information(self, relation: SparkRelation) -> List[SparkC
             columns.append(column)
         return columns
 
-    def _get_columns_for_catalog(self, relation: SparkRelation) -> Iterable[Dict[str, Any]]:
+    def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str, Any]]:
         columns = self.parse_columns_from_information(relation)
 
         for column in columns:
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 58009d04..780bf5e8 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,23 +3,35 @@
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
-black==22.8.0
-bumpversion
-click~=8.1.3
-flake8
-flaky
-freezegun==0.3.9
-ipdb
-mock>=1.3.0
-mypy==0.971
-pre-commit
-pytest-csv
-pytest-dotenv
-pytest-xdist
-pytest>=6.0.2
-pytz
-tox>=3.2.0
+# if version 1.x or greater -> pin to major version
+# if version 0.x -> pin to minor
+black~=23.1
+bumpversion~=0.6.0
+click~=8.1
+flake8~=5.0;python_version=="3.7"
+flake8~=6.0;python_version>="3.8"
+flaky~=3.7
+freezegun~=1.2
+ipdb~=0.13.11
+mypy==1.0.1  # patch updates have historically introduced breaking changes
+pip-tools~=6.12
+pre-commit~=2.21;python_version=="3.7"
+pre-commit~=3.1;python_version>="3.8"
+pre-commit-hooks~=4.4
+pytest~=7.2
+pytest-csv~=3.0
+pytest-dotenv~=0.5.2
+pytest-logbook~=1.2
+pytest-xdist~=3.2
+pytz~=2022.7
+tox~=3.0;python_version=="3.7"
+tox~=4.4;python_version>="3.8"
+types-pytz~=2022.7
+types-requests~=2.28
+twine~=4.0
+wheel~=0.38
 
-# Test requirements
-sasl>=0.2.1
-thrift_sasl==0.4.3
+# Adapter specific dependencies
+mock~=5.0
+sasl~=0.3.1
+thrift_sasl~=0.4.3
diff --git a/setup.py b/setup.py
index d81fdf26..dd5a6491 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ def _get_plugin_version_dict():
     _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py")
     _semver = r"""(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)"""
     _pre = r"""((?P<prekind>a|b|rc)(?P<pre>\d+))?"""
-    _version_pattern = fr"""version\s*=\s*["']{_semver}{_pre}["']"""
+    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}["']"""
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:

From 154f311d1358b8ae21c1081ffa7e2cb5383f17b3 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Wed, 1 Mar 2023 16:53:54 -0500
Subject: [PATCH 403/603] Revert "Revert "mirror issues to ADAP jira project
 (#642)" (#652)" (#659)

This reverts commit 1e176d09e061222a9b29397d18fc9f7149e8ee16.
---
 .github/workflows/jira-creation.yml   | 4 +++-
 .github/workflows/jira-label.yml      | 4 +++-
 .github/workflows/jira-transition.yml | 7 ++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml
index b4016bef..2611a8bd 100644
--- a/.github/workflows/jira-creation.yml
+++ b/.github/workflows/jira-creation.yml
@@ -19,7 +19,9 @@ permissions:
 
 jobs:
   call-label-action:
-    uses: dbt-labs/jira-actions/.github/workflows/jira-creation.yml@main
+    uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main
+    with:
+      project_key: ADAP
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml
index 3da2e3a3..1637cbe3 100644
--- a/.github/workflows/jira-label.yml
+++ b/.github/workflows/jira-label.yml
@@ -19,7 +19,9 @@ permissions:
 
 jobs:
   call-label-action:
-    uses: dbt-labs/jira-actions/.github/workflows/jira-label.yml@main
+    uses: dbt-labs/actions/.github/workflows/jira-label.yml@main
+    with:
+      project_key: ADAP
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml
index ed9f9cd4..99158a15 100644
--- a/.github/workflows/jira-transition.yml
+++ b/.github/workflows/jira-transition.yml
@@ -15,9 +15,14 @@ on:
   issues:
     types: [closed, deleted, reopened]
 
+# no special access is needed
+permissions: read-all
+
 jobs:
   call-label-action:
-    uses: dbt-labs/jira-actions/.github/workflows/jira-transition.yml@main
+    uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main
+    with:
+      project_key: ADAP
     secrets:
       JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
       JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}

From 5b58681733dd755f20d7539f8be3fa78c5926f9d Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 1 Mar 2023 19:17:23 -0500
Subject: [PATCH 404/603] update integration test badge to use circleci (#660)

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3d9f75d3..fa286b1f 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@
   <a href="https://github.com/dbt-labs/dbt-spark/actions/workflows/main.yml">
     <img src="https://github.com/dbt-labs/dbt-spark/actions/workflows/main.yml/badge.svg?event=push" alt="Unit Tests Badge"/>
   </a>
-  <a href="https://github.com/dbt-labs/dbt-spark/actions/workflows/integration.yml">
-    <img src="https://github.com/dbt-labs/dbt-spark/actions/workflows/integration.yml/badge.svg?event=push" alt="Integration Tests Badge"/>
+  <a href="https://circleci.com/gh/dbt-labs/dbt-spark/?branch=main">
+    <img src="https://circleci.com/gh/dbt-labs/dbt-spark/tree/main.svg?style=shield" alt="Integration Tests Badge"/>
   </a>
 </p>
 

From 20712a407d01ffcd6852b1c53d11d86b1228444f Mon Sep 17 00:00:00 2001
From: Neelesh Salian <nssalian@users.noreply.github.com>
Date: Thu, 2 Mar 2023 13:41:04 -0800
Subject: [PATCH 405/603] Fix for libsasl2 installation (#661)

* Fix for libsasl2-dev installation

---------

Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .github/workflows/main.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ff1c92ce..d399960a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -54,6 +54,7 @@ jobs:
 
       - name: Install python dependencies
         run: |
+          sudo apt-get update
           sudo apt-get install libsasl2-dev
           python -m pip install --user --upgrade pip
           python -m pip --version
@@ -95,6 +96,7 @@ jobs:
 
       - name: Install python dependencies
         run: |
+          sudo apt-get update
           sudo apt-get install libsasl2-dev
           python -m pip install --user --upgrade pip
           python -m pip --version

From 8987e66e6e2ca721c5002bcefcfd2447e284ceaf Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Thu, 2 Mar 2023 15:24:48 -0800
Subject: [PATCH 406/603] Ct 276/iceberg support patch (#651)

* Adding in additional support for iceberg v2 tables

Found a way to identify iceberg tables given that spark returns
an error when trying to execute "SHOW TABLE EXTENDED..."  See
https://issues.apache.org/jira/browse/SPARK-33393

Instead of show table extended a "DESCRIBE EXTENDED" is
performed to retrieve the provider information.  This allows
for identification of iceberg through an is_iceberg member
variable.

Allow for multiple join conditions to allow for mutliple columns to
make a row distinct

Use is_iceberg everywhere handling iceberg tables differs from other
sources of data.

* Setting _schema rather than replacing it

[CT-276] Apache Iceberg Support #294

The _schema variable was used for non-iceberg tables but was being
overridden by work for iceberg v2 tables.  I've made it so the iceberg
condition will set _schema rather than blanket changing the schema for
all providers.

* Renaming macro and method name

On second look I wasn't happy with my name choices for macro name and
method, hopefully what I have now makes more sense.

[CT-276] Apache Iceberg Support #294

* Adding changelog entry.

* Removing the is_iceberg check it is not needed

Upon further investigation this check is not needed since
self.database will not be set.

* Set up CI with Azure Pipelines

[skip ci]

* Fix incremental runs

* Add Iceberg to the list

* Backing out previous merge which broke unit tests

* Removed use of ParsedSourceDefinition, add iceberg

- upstream dbt changed breaking the use of ParsedSourceDefinition, using
  SourceDefinition appears to work instead
- Added in change to include iceberg in adapters.sql:
   macro spark__alter_column_comment

* Allowing the use of /bin/bash by tox

tox on certain platforms will complain that /bin/bash is not allowed to
be used.  I'm allowing it to be used with this change.

* Cleanup based on comments

* Revert some stuff

I noticed that two Spark tests are failing, so tried to revert
some of the changes that seemed related. They are now passing
and also my Iceberg dbt project is still running fine

* Correct makefile -e omission.

* Rewrite impl using first order funcs.

* Rewrite comment to be clearer.

* Add comment to revise str to dict type.

* Fix mypy ignore statement.

* Move inline funcs to instance methods for performance.

* Do some python cleanup as Mike suggested.

* added project fixture

* added version tag

* removed whitespace from seed data

---------

Co-authored-by: Dan Parent <65294587+dparent1@users.noreply.github.com>
Co-authored-by: Fokko Driesprong <fokko@tabular.io>
Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .../unreleased/Fixes-20220819-141350.yaml     |   8 ++
 Makefile                                      |   2 +-
 dbt/adapters/spark/impl.py                    | 118 ++++++++++++++----
 dbt/adapters/spark/relation.py                |  11 +-
 dbt/include/spark/macros/adapters.sql         |  61 +++++++--
 .../incremental/incremental.sql               |   2 +-
 .../incremental/strategies.sql                |  18 ++-
 .../materializations/incremental/validate.sql |   8 +-
 .../macros/materializations/snapshot.sql      |  33 +++--
 .../spark/macros/materializations/table.sql   |   6 +-
 .../adapter/seed_column_types/fixtures.py     |  43 ++++---
 .../test_seed_column_types.py                 |  13 +-
 12 files changed, 239 insertions(+), 84 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220819-141350.yaml

diff --git a/.changes/unreleased/Fixes-20220819-141350.yaml b/.changes/unreleased/Fixes-20220819-141350.yaml
new file mode 100644
index 00000000..594c5216
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220819-141350.yaml
@@ -0,0 +1,8 @@
+kind: Fixes
+body: Support for iceberg v2 tables.  Added ability to use multiple join conditions
+  to allow for multiple columns to make a row distinct.
+time: 2022-08-19T14:13:50.3145273-04:00
+custom:
+  Author: dparent1
+  Issue: "294"
+  PR: "432"
diff --git a/Makefile b/Makefile
index 5cb3297f..876440a0 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
 .PHONY: dev
 dev: ## Installs adapter in develop mode along with development dependencies
 	@\
-	pip install -r requirements.txt -r dev-requirements.txt && pre-commit install
+	pip install -e . -r requirements.txt -r dev-requirements.txt && pre-commit install
 
 .PHONY: dev-uninstall
 dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index e37bc945..4d4caa1b 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,7 @@
 import re
 from concurrent.futures import Future
 from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional, Union, Type
+from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable
 from typing_extensions import TypeAlias
 
 import agate
@@ -32,6 +32,8 @@
 GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw"
 LIST_SCHEMAS_MACRO_NAME = "list_schemas"
 LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
+LIST_RELATIONS_SHOW_TABLES_MACRO_NAME = "list_relations_show_tables_without_caching"
+DESCRIBE_TABLE_EXTENDED_MACRO_NAME = "describe_table_extended_without_caching"
 DROP_RELATION_MACRO_NAME = "drop_relation"
 FETCH_TBL_PROPERTIES_MACRO_NAME = "fetch_tbl_properties"
 
@@ -89,6 +91,7 @@ class SparkAdapter(SQLAdapter):
     ]
 
     Relation: TypeAlias = SparkRelation
+    RelationInfo = Tuple[str, str, str]
     Column: TypeAlias = SparkColumn
     ConnectionManager: TypeAlias = SparkConnectionManager
     AdapterSpecificConfigs: TypeAlias = SparkConfig
@@ -133,42 +136,113 @@ def add_schema_to_cache(self, schema) -> str:
         # so jinja doesn't render things
         return ""
 
-    def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[BaseRelation]:
-        kwargs = {"schema_relation": schema_relation}
+    def _get_relation_information(self, row: agate.Row) -> RelationInfo:
+        """relation info was fetched with SHOW TABLES EXTENDED"""
+        try:
+            _schema, name, _, information = row
+        except ValueError:
+            raise dbt.exceptions.DbtRuntimeError(
+                f'Invalid value from "show tables extended ...", got {len(row)} values, expected 4'
+            )
+
+        return _schema, name, information
+
+    def _get_relation_information_using_describe(self, row: agate.Row) -> RelationInfo:
+        """Relation info fetched using SHOW TABLES and an auxiliary DESCRIBE statement"""
+        try:
+            _schema, name, _ = row
+        except ValueError:
+            raise dbt.exceptions.DbtRuntimeError(
+                f'Invalid value from "show tables ...", got {len(row)} values, expected 3'
+            )
+
+        table_name = f"{_schema}.{name}"
         try:
-            results = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs)
+            table_results = self.execute_macro(
+                DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs={"table_name": table_name}
+            )
         except dbt.exceptions.DbtRuntimeError as e:
-            errmsg = getattr(e, "msg", "")
-            if f"Database '{schema_relation}' not found" in errmsg:
-                return []
-            else:
-                description = "Error while retrieving information about"
-                logger.debug(f"{description} {schema_relation}: {e.msg}")
-                return []
+            logger.debug(f"Error while retrieving information about {table_name}: {e.msg}")
+            table_results = AttrDict()
+
+        information = ""
+        for info_row in table_results:
+            info_type, info_value, _ = info_row
+            if not info_type.startswith("#"):
+                information += f"{info_type}: {info_value}\n"
+
+        return _schema, name, information
 
+    def _build_spark_relation_list(
+        self,
+        row_list: agate.Table,
+        relation_info_func: Callable[[agate.Row], RelationInfo],
+    ) -> List[BaseRelation]:
+        """Aggregate relations with format metadata included."""
         relations = []
-        for row in results:
-            if len(row) != 4:
-                raise dbt.exceptions.DbtRuntimeError(
-                    f'Invalid value from "show table extended ...", '
-                    f"got {len(row)} values, expected 4"
-                )
-            _schema, name, _, information = row
-            rel_type = RelationType.View if "Type: VIEW" in information else RelationType.Table
-            is_delta = "Provider: delta" in information
-            is_hudi = "Provider: hudi" in information
-            relation: BaseRelation = self.Relation.create(
+        for row in row_list:
+            _schema, name, information = relation_info_func(row)
+
+            rel_type: RelationType = (
+                RelationType.View if "Type: VIEW" in information else RelationType.Table
+            )
+            is_delta: bool = "Provider: delta" in information
+            is_hudi: bool = "Provider: hudi" in information
+            is_iceberg: bool = "Provider: iceberg" in information
+
+            relation: BaseRelation = self.Relation.create(  # type: ignore
                 schema=_schema,
                 identifier=name,
                 type=rel_type,
                 information=information,
                 is_delta=is_delta,
+                is_iceberg=is_iceberg,
                 is_hudi=is_hudi,
             )
             relations.append(relation)
 
         return relations
 
+    def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[BaseRelation]:
+        """Distinct Spark compute engines may not support the same SQL featureset. Thus, we must
+        try different methods to fetch relation information."""
+
+        kwargs = {"schema_relation": schema_relation}
+
+        try:
+            # Default compute engine behavior: show tables extended
+            show_table_extended_rows = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs)
+            return self._build_spark_relation_list(
+                row_list=show_table_extended_rows,
+                relation_info_func=self._get_relation_information,
+            )
+        except dbt.exceptions.DbtRuntimeError as e:
+            errmsg = getattr(e, "msg", "")
+            if f"Database '{schema_relation}' not found" in errmsg:
+                return []
+            # Iceberg compute engine behavior: show table
+            elif "SHOW TABLE EXTENDED is not supported for v2 tables" in errmsg:
+                # this happens with spark-iceberg with v2 iceberg tables
+                # https://issues.apache.org/jira/browse/SPARK-33393
+                try:
+                    # Iceberg behavior: 3-row result of relations obtained
+                    show_table_rows = self.execute_macro(
+                        LIST_RELATIONS_SHOW_TABLES_MACRO_NAME, kwargs=kwargs
+                    )
+                    return self._build_spark_relation_list(
+                        row_list=show_table_rows,
+                        relation_info_func=self._get_relation_information_using_describe,
+                    )
+                except dbt.exceptions.DbtRuntimeError as e:
+                    description = "Error while retrieving information about"
+                    logger.debug(f"{description} {schema_relation}: {e.msg}")
+                    return []
+            else:
+                logger.debug(
+                    f"Error while retrieving information about {schema_relation}: {errmsg}"
+                )
+                return []
+
     def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]:
         if not self.Relation.get_default_include_policy().database:
             database = None  # type: ignore
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index 0b0c58bc..f5a3e3e1 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -1,9 +1,14 @@
-from typing import Optional
-
+from typing import Optional, TypeVar
 from dataclasses import dataclass, field
 
 from dbt.adapters.base.relation import BaseRelation, Policy
+
 from dbt.exceptions import DbtRuntimeError
+from dbt.events import AdapterLogger
+
+logger = AdapterLogger("Spark")
+
+Self = TypeVar("Self", bound="BaseRelation")
 
 
 @dataclass
@@ -27,6 +32,8 @@ class SparkRelation(BaseRelation):
     quote_character: str = "`"
     is_delta: Optional[bool] = None
     is_hudi: Optional[bool] = None
+    is_iceberg: Optional[bool] = None
+    # TODO: make this a dict everywhere
     information: Optional[str] = None
 
     def __post_init__(self):
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index f76867cc..656e6b3a 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -1,3 +1,14 @@
+{% macro dbt_spark_tblproperties_clause() -%}
+  {%- set tblproperties = config.get('tblproperties') -%}
+  {%- if tblproperties is not none %}
+    tblproperties (
+      {%- for prop in tblproperties -%}
+      '{{ prop }}' = '{{ tblproperties[prop] }}' {% if not loop.last %}, {% endif %}
+      {%- endfor %}
+    )
+  {%- endif %}
+{%- endmacro -%}
+
 {% macro file_format_clause() %}
   {{ return(adapter.dispatch('file_format_clause', 'dbt')()) }}
 {%- endmacro -%}
@@ -133,7 +144,7 @@
     {%- if temporary -%}
       {{ create_temporary_view(relation, compiled_code) }}
     {%- else -%}
-      {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
+      {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'iceberg'] %}
         create or replace table {{ relation }}
       {% else %}
         create table {{ relation }}
@@ -243,7 +254,10 @@
 {% endmacro %}
 
 {% macro spark__get_columns_in_relation(relation) -%}
-  {{ return(adapter.get_columns_in_relation(relation)) }}
+  {% call statement('get_columns_in_relation', fetch_result=True) %}
+      describe extended {{ relation.include(schema=(schema is not none)) }}
+  {% endcall %}
+  {% do return(load_result('get_columns_in_relation').table) %}
 {% endmacro %}
 
 {% macro spark__list_relations_without_caching(relation) %}
@@ -254,6 +268,27 @@
   {% do return(load_result('list_relations_without_caching').table) %}
 {% endmacro %}
 
+{% macro list_relations_show_tables_without_caching(schema_relation) %}
+  {#-- Spark with iceberg tables don't work with show table extended for #}
+  {#-- V2 iceberg tables #}
+  {#-- https://issues.apache.org/jira/browse/SPARK-33393 #}
+  {% call statement('list_relations_without_caching_show_tables', fetch_result=True) -%}
+    show tables in {{ schema_relation }} like '*'
+  {% endcall %}
+
+  {% do return(load_result('list_relations_without_caching_show_tables').table) %}
+{% endmacro %}
+
+{% macro describe_table_extended_without_caching(table_name) %}
+  {#-- Spark with iceberg tables don't work with show table extended for #}
+  {#-- V2 iceberg tables #}
+  {#-- https://issues.apache.org/jira/browse/SPARK-33393 #}
+  {% call statement('describe_table_extended_without_caching', fetch_result=True) -%}
+    describe extended {{ table_name }}
+  {% endcall %}
+  {% do return(load_result('describe_table_extended_without_caching').table) %}
+{% endmacro %}
+
 {% macro spark__list_schemas(database) -%}
   {% call statement('list_schemas', fetch_result=True, auto_begin=False) %}
     show databases
@@ -293,14 +328,20 @@
 {% endmacro %}
 
 {% macro spark__alter_column_comment(relation, column_dict) %}
-  {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'hudi'] %}
+  {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'hudi', 'iceberg'] %}
     {% for column_name in column_dict %}
       {% set comment = column_dict[column_name]['description'] %}
       {% set escaped_comment = comment | replace('\'', '\\\'') %}
       {% set comment_query %}
-        alter table {{ relation }} change column
-            {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }}
-            comment '{{ escaped_comment }}';
+        {% if relation.is_iceberg %}
+          alter table {{ relation }} alter column
+              {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }}
+              comment '{{ escaped_comment }}';
+        {% else %}
+          alter table {{ relation }} change column
+              {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }}
+              comment '{{ escaped_comment }}';
+        {% endif %}
       {% endset %}
       {% do run_query(comment_query) %}
     {% endfor %}
@@ -328,7 +369,13 @@
 {% macro spark__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
 
   {% if remove_columns %}
-    {% set platform_name = 'Delta Lake' if relation.is_delta else 'Apache Spark' %}
+    {% if relation.is_delta %}
+      {% set platform_name = 'Delta Lake' %}
+    {% elif relation.is_iceberg %}
+      {% set platform_name = 'Iceberg' %}
+    {% else %}
+      {% set platform_name = 'Apache Spark' %}
+    {% endif %}
     {{ exceptions.raise_compiler_error(platform_name + ' does not support dropping columns from tables') }}
   {% endif %}
 
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index cc46d4c1..d2c1f5e4 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -55,7 +55,7 @@
     {%- endcall -%}
     {%- do process_schema_changes(on_schema_change, tmp_relation, existing_relation) -%}
     {%- call statement('main') -%}
-      {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, incremental_predicates) }}
+      {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, existing_relation, unique_key, incremental_predicates) }}
     {%- endcall -%}
     {%- if language == 'python' -%}
       {#--
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index facfaadf..eeb92049 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -1,9 +1,15 @@
-{% macro get_insert_overwrite_sql(source_relation, target_relation) %}
+{% macro get_insert_overwrite_sql(source_relation, target_relation, existing_relation) %}
 
     {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
-    insert overwrite table {{ target_relation }}
-    {{ partition_cols(label="partition") }}
+    {% if existing_relation.is_iceberg %}
+      {# removed table from statement for iceberg #}
+      insert overwrite {{ target_relation }}
+      {# removed partition_cols for iceberg as well #}
+    {% else %}
+      insert overwrite table {{ target_relation }}
+      {{ partition_cols(label="partition") }}
+    {% endif %}
     select {{dest_cols_csv}} from {{ source_relation }}
 
 {% endmacro %}
@@ -62,15 +68,15 @@
 {% endmacro %}
 
 
-{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key, incremental_predicates) %}
+{% macro dbt_spark_get_incremental_sql(strategy, source, target, existing, unique_key, incremental_predicates) %}
   {%- if strategy == 'append' -%}
     {#-- insert new records into existing table, without updating or overwriting #}
     {{ get_insert_into_sql(source, target) }}
   {%- elif strategy == 'insert_overwrite' -%}
     {#-- insert statements don't like CTEs, so support them via a temp view #}
-    {{ get_insert_overwrite_sql(source, target) }}
+    {{ get_insert_overwrite_sql(source, target, existing) }}
   {%- elif strategy == 'merge' -%}
-  {#-- merge all columns with databricks delta - schema changes are handled for us #}
+  {#-- merge all columns for datasources which implement MERGE INTO (e.g. databricks, iceberg) - schema changes are handled for us #}
     {{ get_merge_sql(target, source, unique_key, dest_columns=none, incremental_predicates=incremental_predicates) }}
   {%- else -%}
     {% set no_sql_for_strategy_msg -%}
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql
index ffd56f10..71ec0182 100644
--- a/dbt/include/spark/macros/materializations/incremental/validate.sql
+++ b/dbt/include/spark/macros/materializations/incremental/validate.sql
@@ -1,7 +1,7 @@
 {% macro dbt_spark_validate_get_file_format(raw_file_format) %}
   {#-- Validate the file format #}
 
-  {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm', 'hudi'] %}
+  {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'iceberg', 'libsvm', 'hudi'] %}
 
   {% set invalid_file_format_msg -%}
     Invalid file format provided: {{ raw_file_format }}
@@ -26,12 +26,12 @@
 
   {% set invalid_merge_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
-    You can only choose this strategy when file_format is set to 'delta' or 'hudi'
+    You can only choose this strategy when file_format is set to 'delta' or 'iceberg' or 'hudi'
   {%- endset %}
 
   {% set invalid_insert_overwrite_delta_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
-    You cannot use this strategy when file_format is set to 'delta'
+    You cannot use this strategy when file_format is set to 'delta' or 'iceberg'
     Use the 'append' or 'merge' strategy instead
   {%- endset %}
 
@@ -44,7 +44,7 @@
   {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %}
     {% do exceptions.raise_compiler_error(invalid_strategy_msg) %}
   {%-else %}
-    {% if raw_strategy == 'merge' and file_format not in ['delta', 'hudi'] %}
+    {% if raw_strategy == 'merge' and file_format not in ['delta', 'iceberg', 'hudi'] %}
       {% do exceptions.raise_compiler_error(invalid_merge_msg) %}
     {% endif %}
     {% if raw_strategy == 'insert_overwrite' and file_format == 'delta' %}
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index 6cf2358f..a397f84e 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -15,7 +15,12 @@
 {% macro spark__snapshot_merge_sql(target, source, insert_cols) -%}
 
     merge into {{ target }} as DBT_INTERNAL_DEST
-    using {{ source }} as DBT_INTERNAL_SOURCE
+    {% if target.is_iceberg %}
+      {# create view only supports a name (no catalog, or schema) #}
+      using {{ source.identifier }} as DBT_INTERNAL_SOURCE
+    {% else %}
+      using {{ source }} as DBT_INTERNAL_SOURCE
+    {% endif %}
     on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id
     when matched
      and DBT_INTERNAL_DEST.dbt_valid_to is null
@@ -33,10 +38,18 @@
 {% macro spark_build_snapshot_staging_table(strategy, sql, target_relation) %}
     {% set tmp_identifier = target_relation.identifier ~ '__dbt_tmp' %}
 
-    {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier,
-                                                  schema=target_relation.schema,
-                                                  database=none,
-                                                  type='view') -%}
+    {% if target_relation.is_iceberg %}
+      {# iceberg catalog does not support create view, but regular spark does. We removed the catalog and schema #}
+      {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier,
+                                                    schema=none,
+                                                    database=none,
+                                                    type='view') -%}
+    {% else %}
+      {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier,
+                                                    schema=target_relation.schema,
+                                                    database=none,
+                                                    type='view') -%}
+    {% endif %}
 
     {% set select = snapshot_staging_table(strategy, sql, target_relation) %}
 
@@ -83,25 +96,25 @@
           identifier=target_table,
           type='table') -%}
 
-  {%- if file_format not in ['delta', 'hudi'] -%}
+  {%- if file_format not in ['delta', 'iceberg', 'hudi'] -%}
     {% set invalid_format_msg -%}
       Invalid file format: {{ file_format }}
-      Snapshot functionality requires file_format be set to 'delta' or 'hudi'
+      Snapshot functionality requires file_format be set to 'delta' or 'iceberg' or 'hudi'
     {%- endset %}
     {% do exceptions.raise_compiler_error(invalid_format_msg) %}
   {% endif %}
 
   {%- if target_relation_exists -%}
-    {%- if not target_relation.is_delta and not target_relation.is_hudi -%}
+    {%- if not target_relation.is_delta and not target_relation.is_iceberg and not target_relation.is_hudi -%}
       {% set invalid_format_msg -%}
-        The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta' or 'hudi'
+        The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta' or 'iceberg' or 'hudi'
       {%- endset %}
       {% do exceptions.raise_compiler_error(invalid_format_msg) %}
     {% endif %}
   {% endif %}
 
   {% if not adapter.check_schema_exists(model.database, model.schema) %}
-    {% do create_schema(model.database, model.schema) %}
+    {% do create_schema(model.schema) %}
   {% endif %}
 
   {%- if not target_relation.is_table -%}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index d323e4f3..927816de 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -12,12 +12,16 @@
   {{ run_hooks(pre_hooks) }}
 
   -- setup: if the target relation already exists, drop it
-  -- in case if the existing and future table is delta, we want to do a
+  -- in case if the existing and future table is delta or iceberg, we want to do a
   -- create or replace table instead of dropping, so we don't have the table unavailable
   {% if old_relation and not (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') -%}
     {{ adapter.drop_relation(old_relation) }}
   {%- endif %}
 
+  {% if old_relation and not (old_relation.is_iceberg and config.get('file_format', validator=validation.any[basestring]) == 'iceberg') -%}
+    {{ adapter.drop_relation(old_relation) }}
+  {%- endif %}
+
   -- build model
 
   {%- call statement('main', language=language) -%}
diff --git a/tests/functional/adapter/seed_column_types/fixtures.py b/tests/functional/adapter/seed_column_types/fixtures.py
index 298b49e9..fb53bd99 100644
--- a/tests/functional/adapter/seed_column_types/fixtures.py
+++ b/tests/functional/adapter/seed_column_types/fixtures.py
@@ -7,9 +7,7 @@
         'numeric': column.is_numeric,
         'integer': column.is_integer,
     } %}
-    {% if check not in checks %}
-        {% do exceptions.raise_compiler_error('invalid type check value: ' ~ check) %}
-    {% endif %}
+
     {{ return(checks[check]()) }}
 {% endmacro %}
 
@@ -18,7 +16,6 @@
     {% for type_check in type_checks %}
         {% if type_check.startswith('not ') %}
             {% if simple_type_check_column(column, type_check[4:]) %}
-                {% do log('simple_type_check_column got ', True) %}
                 {% do failures.append(type_check) %}
             {% endif %}
         {% else %}
@@ -27,9 +24,7 @@
             {% endif %}
         {% endif %}
     {% endfor %}
-    {% if (failures | length) > 0 %}
-        {% do log('column ' ~ column.name ~ ' had failures: ' ~ failures, info=True) %}
-    {% endif %}
+
     {% do return((failures | length) == 0) %}
 {% endmacro %}
 
@@ -47,36 +42,37 @@
     {{ return(not type_check_column(column, type_checks)) }}
 {% endmacro %}
 
-{% test is_type(seed, column_map) %}
+{% test is_type(model, column_map) %}
     {% if not execute %}
         {{ return(None) }}
     {% endif %}
-    {% if not column_map %}
-        {% do exceptions.raise_compiler_error('test_is_type must have a column name') %}
-    {% endif %}
-    {% set columns = adapter.get_columns_in_relation(seed) %}
+    
+    {% set columns = adapter.get_columns_in_relation(model) %}
     {% if (column_map | length) != (columns | length) %}
         {% set column_map_keys = (column_map | list | string) %}
         {% set column_names = (columns | map(attribute='name') | list | string) %}
         {% do exceptions.raise_compiler_error('did not get all the columns/all columns not specified:\n' ~ column_map_keys ~ '\nvs\n' ~ column_names) %}
     {% endif %}
+    
     {% set bad_columns = [] %}
     {% for column in columns %}
         {% if is_bad_column(column, column_map) %}
             {% do bad_columns.append(column.name) %}
         {% endif %}
     {% endfor %}
-    {% do log('bad columns: ' ~ bad_columns, info=True) %}
-    {% for bad_column in bad_columns %}
-      select '{{ bad_column }}' as bad_column
-      {{ 'union all' if not loop.last }}
-    {% endfor %}
-      select * from (select 1 limit 0) as nothing
+    
+    {% set num_bad_columns = (bad_columns | length) %}
+
+    select '{{ num_bad_columns }}' as bad_column
+    group by 1
+    having bad_column > 0
+
 {% endtest %}
-"""
+""".strip()
+
 
 _SEED_CSV = """
-ID,ORDERID,PAYMENTMETHOD,STATUS,AMOUNT,AMOUNT_USD,CREATED
+id,orderid,paymentmethod,status,amount,amount_usd,created
 1,1,credit_card,success,1000,10.00,2018-01-01
 2,2,credit_card,success,2000,20.00,2018-01-02
 3,3,coupon,success,100,1.00,2018-01-04
@@ -87,9 +83,12 @@
 8,7,credit_card,success,1600,16.00,2018-01-09
 9,8,credit_card,success,2300,23.00,2018-01-11
 10,9,gift_card,success,2300,23.00,2018-01-12
-"""
+""".strip()
+
 
 _SEED_YML = """
+version: 2
+
 seeds:
   - name: payments
     config:
@@ -111,4 +110,4 @@
                 amount: ["integer", "number"]
                 amount_usd: ["decimal", "number"]
                 created: ["timestamp", "string"]
-"""
+""".strip()
diff --git a/tests/functional/adapter/seed_column_types/test_seed_column_types.py b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
index 6a6175ff..b818092f 100644
--- a/tests/functional/adapter/seed_column_types/test_seed_column_types.py
+++ b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
@@ -6,6 +6,7 @@
     _SEED_YML
 )
 
+
 @pytest.mark.skip_profile('spark_session')
 class TestSeedColumnTypesCast:
 
@@ -20,13 +21,9 @@ def seeds(self):
             "schema.yml": _SEED_YML
         }
 
-    def run_and_test(self):
-        results = run_dbt(["seed"])
-        assert len(results) == 1
-        run_dbt(["test"])
-
-
     #  We want to test seed types because hive would cause all fields to be strings.
     # setting column_types in project.yml should change them and pass.
-    def test_column_seed_type(self):
-        self.run_and_test()
+    def test_column_seed_type(self, project):
+        results = run_dbt(["seed"])
+        assert len(results) == 1
+        run_dbt(["test"], expect_pass=False)

From eadf4a7275cce4ba43561a609fa44672066370af Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 2 Mar 2023 19:23:28 -0800
Subject: [PATCH 407/603] Bumping version to 1.5.0b2 and generate changelog
 (#662)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                                  |  2 +-
 .changes/1.5.0-b2.md                              | 12 ++++++++++++
 .../Fixes-20220819-141350.yaml                    |  0
 .../Under the Hood-20230223-105838.yaml           |  0
 CHANGELOG.md                                      | 15 ++++++++++++++-
 dbt/adapters/spark/__version__.py                 |  2 +-
 setup.py                                          |  2 +-
 7 files changed, 29 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.5.0-b2.md
 rename .changes/{unreleased => 1.5.0}/Fixes-20220819-141350.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Under the Hood-20230223-105838.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 0ae6b8fb..1b1f4e67 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.0b1
+current_version = 1.5.0b2
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.5.0-b2.md b/.changes/1.5.0-b2.md
new file mode 100644
index 00000000..7e248bf4
--- /dev/null
+++ b/.changes/1.5.0-b2.md
@@ -0,0 +1,12 @@
+## dbt-spark 1.5.0-b2 - March 03, 2023
+
+### Fixes
+
+- Support for iceberg v2 tables.  Added ability to use multiple join conditions to allow for multiple columns to make a row distinct. ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
+
+### Under the Hood
+
+- Rename constraints_enabled to contract ([#645](https://github.com/dbt-labs/dbt-spark/issues/645))
+
+### Contributors
+- [@dparent1](https://github.com/dparent1) ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
diff --git a/.changes/unreleased/Fixes-20220819-141350.yaml b/.changes/1.5.0/Fixes-20220819-141350.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20220819-141350.yaml
rename to .changes/1.5.0/Fixes-20220819-141350.yaml
diff --git a/.changes/unreleased/Under the Hood-20230223-105838.yaml b/.changes/1.5.0/Under the Hood-20230223-105838.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230223-105838.yaml
rename to .changes/1.5.0/Under the Hood-20230223-105838.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf19c6c8..ed67043f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,20 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.5.0-b2 - March 03, 2023
+
+### Fixes
+
+- Support for iceberg v2 tables.  Added ability to use multiple join conditions to allow for multiple columns to make a row distinct. ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
+
+### Under the Hood
+
+- Rename constraints_enabled to contract ([#645](https://github.com/dbt-labs/dbt-spark/issues/645))
+
+### Contributors
+- [@dparent1](https://github.com/dparent1) ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
+
+
 ## dbt-spark 1.5.0-b1 - February 22, 2023
 
 ### Features
@@ -27,7 +41,6 @@
 - [@b-per](https://github.com/b-per) ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
 - [@dave-connors-3](https://github.com/dave-connors-3) ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
 
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index c3758128..4f8b1531 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.5.0b1"
+version = "1.5.0b2"
diff --git a/setup.py b/setup.py
index dd5a6491..cf5e1c43 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0b1"
+package_version = "1.5.0b2"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From a405d24074f27637e63571c6c66d2727f96f0273 Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Fri, 3 Mar 2023 14:42:58 -0500
Subject: [PATCH 408/603] implement data_type_code_to_name on
 SparkConnectionManager (#647)

* implement data_type_code_to_name on SparkConnectionManager
---
 .changes/unreleased/Features-20230223-180923.yaml | 6 ++++++
 dbt/adapters/spark/connections.py                 | 4 ++++
 2 files changed, 10 insertions(+)
 create mode 100644 .changes/unreleased/Features-20230223-180923.yaml

diff --git a/.changes/unreleased/Features-20230223-180923.yaml b/.changes/unreleased/Features-20230223-180923.yaml
new file mode 100644
index 00000000..4faed7bc
--- /dev/null
+++ b/.changes/unreleased/Features-20230223-180923.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: implement data_type_code_to_name on SparkConnectionManager
+time: 2023-02-23T18:09:23.787675-05:00
+custom:
+  Author: michelleark
+  Issue: "639"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index a606beb7..88163b5c 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -492,6 +492,10 @@ def open(cls, connection):
         connection.state = ConnectionState.OPEN
         return connection
 
+    @classmethod
+    def data_type_code_to_name(cls, type_code: str) -> str:
+        return type_code
+
 
 def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None):
     transport = None

From 0cb64bc61ba08ddbb8efce9f0efb7dee8ffe8a54 Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Mon, 6 Mar 2023 15:00:41 -0800
Subject: [PATCH 409/603] fix SparkConnectionManager.data_type_code_to_name for
 pyodbc (#664)

 data_type_code_to_name supports pyodbc
---
 .../unreleased/Fixes-20230303-200542.yaml     |   6 +
 dbt/adapters/spark/connections.py             |  15 ++-
 tests/functional/adapter/test_constraints.py  | 103 +++++++++++++++++-
 3 files changed, 116 insertions(+), 8 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230303-200542.yaml

diff --git a/.changes/unreleased/Fixes-20230303-200542.yaml b/.changes/unreleased/Fixes-20230303-200542.yaml
new file mode 100644
index 00000000..948f98bb
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230303-200542.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: 'Fix pyodbc type_code -> data_type conversion '
+time: 2023-03-03T20:05:42.400255-05:00
+custom:
+  Author: michelleark
+  Issue: "665"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 88163b5c..df0dcb5e 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -25,7 +25,7 @@
 
 from hologram.helpers import StrEnum
 from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 try:
     from thrift.transport.TSSLSocket import TSSLSocket
@@ -493,8 +493,17 @@ def open(cls, connection):
         return connection
 
     @classmethod
-    def data_type_code_to_name(cls, type_code: str) -> str:
-        return type_code
+    def data_type_code_to_name(cls, type_code: Union[type, str]) -> str:  # type: ignore
+        """
+        :param Union[type, str] type_code: The sql to execute.
+            * type_code is a python type (!) in pyodbc https://github.com/mkleehammer/pyodbc/wiki/Cursor#description, and a string for other spark runtimes.
+            * ignoring the type annotation on the signature for this adapter instead of updating the base class because this feels like a really special case.
+        :return: stringified the cursor type_code
+        :rtype: str
+        """
+        if isinstance(type_code, str):
+            return type_code
+        return type_code.__name__.upper()
 
 
 def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None):
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index ecf67384..14d7291d 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -4,10 +4,16 @@
     BaseConstraintsColumnsEqual,
     BaseConstraintsRuntimeEnforcement
 )
+from dbt.tests.adapter.constraints.fixtures import (
+    my_model_sql,
+    my_model_wrong_order_sql,
+    my_model_wrong_name_sql,
+    model_schema_yml,
+)
 
 # constraints are enforced via 'alter' statements that run after table creation
 _expected_sql_spark = """
-create or replace table {0}  
+create or replace table {0}
     using delta
     as
 
@@ -17,12 +23,99 @@
     cast('2019-01-01' as date) as date_day
 """
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
-class TestSparkConstraintsColumnsEqual(BaseConstraintsColumnsEqual):
-    pass
+# Different on Spark:
+# - does not support a data type named 'text' (TODO handle this in the base test classes using string_type
+constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "")
+
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
+class TestSparkConstraintsColumnsEqualPyodbc(BaseConstraintsColumnsEqual):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+    @pytest.fixture
+    def string_type(self):
+        return "STR"
+
+    @pytest.fixture
+    def int_type(self):
+        return "INT"
+
+    @pytest.fixture
+    def schema_int_type(self):
+        return "INT"
+
+    @pytest.fixture
+    def data_types(self, int_type, schema_int_type, string_type):
+        # sql_column_value, schema_data_type, error_data_type
+        return [
+            # TODO: the int type is tricky to test in test__constraints_wrong_column_data_type without a schema_string_type to override.
+            # uncomment the line below once https://github.com/dbt-labs/dbt-core/issues/7121 is resolved
+            # ['1', schema_int_type, int_type],
+            ['"1"', "string", string_type],
+            ["true", "boolean", "BOOL"],
+            ['array("1","2","3")', "string", string_type],
+            ['array(1,2,3)', "string", string_type],
+            ["6.45", "decimal", "DECIMAL"],
+            # TODO: test__constraints_correct_column_data_type isn't able to run the following statements in create table statements with pyodbc
+            # ["cast('2019-01-01' as date)", "date", "DATE"],
+            # ["cast('2019-01-01' as timestamp)", "date", "DATE"],
+        ]
+
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
+class TestSparkConstraintsColumnsEqualDatabricksHTTP(BaseConstraintsColumnsEqual):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+    @pytest.fixture
+    def string_type(self):
+        return "STRING_TYPE"
+
+    @pytest.fixture
+    def int_type(self):
+        return "INT_TYPE"
+
+    @pytest.fixture
+    def schema_int_type(self):
+        return "INT"
+
+    @pytest.fixture
+    def data_types(self, int_type, schema_int_type, string_type):
+        # sql_column_value, schema_data_type, error_data_type
+        return [
+            # TODO: the int type is tricky to test in test__constraints_wrong_column_data_type without a schema_string_type to override.
+            # uncomment the line below once https://github.com/dbt-labs/dbt-core/issues/7121 is resolved
+            # ['1', schema_int_type, int_type],
+            ['"1"', "string", string_type],
+            ["true", "boolean", "BOOLEAN_TYPE"],
+            ['array("1","2","3")', "array<string>", "ARRAY_TYPE"],
+            ['array(1,2,3)', "array<int>", "ARRAY_TYPE"],
+            ["cast('2019-01-01' as date)", "date", "DATE_TYPE"],
+            ["cast('2019-01-01' as timestamp)", "timestamp", "TIMESTAMP_TYPE"],
+            ["cast(1.0 AS DECIMAL(4, 2))", "decimal", "DECIMAL_TYPE"],
+        ]
+
 
 @pytest.mark.skip_profile('spark_session', 'apache_spark')
 class TestSparkConstraintsRuntimeEnforcement(BaseConstraintsRuntimeEnforcement):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
@@ -30,7 +123,7 @@ def project_config_update(self):
                 "+file_format": "delta",
             }
         }
-    
+
     @pytest.fixture(scope="class")
     def expected_sql(self, project):
         relation = relation_from_name(project.adapter, "my_model")

From 88266c655763b5bdbcf56260a4cd0f60ef3fa3f5 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Wed, 8 Mar 2023 11:28:06 -0600
Subject: [PATCH 410/603] support contracts on models materialized as view
 (#670)

* first pass to add view mat contract check

* changelog, point to branch for testing, fix typo

* fix tests

* fix skips

* remove dbt-core pin

* remove incremental temp logic
---
 .../unreleased/Features-20230223-180923.yaml  |  6 ++--
 dbt/include/spark/macros/adapters.sql         |  3 ++
 tests/functional/adapter/test_constraints.py  | 29 +++++++++++++++----
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/.changes/unreleased/Features-20230223-180923.yaml b/.changes/unreleased/Features-20230223-180923.yaml
index 4faed7bc..de98ef41 100644
--- a/.changes/unreleased/Features-20230223-180923.yaml
+++ b/.changes/unreleased/Features-20230223-180923.yaml
@@ -1,6 +1,6 @@
 kind: Features
-body: implement data_type_code_to_name on SparkConnectionManager
+body: Enforce contracts on models materialized as tables and views
 time: 2023-02-23T18:09:23.787675-05:00
 custom:
-  Author: michelleark
-  Issue: "639"
+  Author: michelleark emmyoop
+  Issue: 639 654
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 656e6b3a..725277b3 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -226,6 +226,9 @@
 {% macro spark__create_view_as(relation, sql) -%}
   create or replace view {{ relation }}
   {{ comment_clause() }}
+  {% if config.get('contract', False) -%}
+    {{ get_assert_columns_equivalent(sql) }}
+  {%- endif %}
   as
     {{ sql }}
 {% endmacro %}
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 14d7291d..27cf59f1 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -1,7 +1,8 @@
 import pytest
 from dbt.tests.util import relation_from_name
 from dbt.tests.adapter.constraints.test_constraints import (
-    BaseConstraintsColumnsEqual,
+    BaseTableConstraintsColumnsEqual,
+    BaseViewConstraintsColumnsEqual,
     BaseConstraintsRuntimeEnforcement
 )
 from dbt.tests.adapter.constraints.fixtures import (
@@ -28,8 +29,7 @@
 constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "")
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
-class TestSparkConstraintsColumnsEqualPyodbc(BaseConstraintsColumnsEqual):
+class PyodbcSetup:
     @pytest.fixture(scope="class")
     def models(self):
         return {
@@ -68,8 +68,7 @@ def data_types(self, int_type, schema_int_type, string_type):
         ]
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
-class TestSparkConstraintsColumnsEqualDatabricksHTTP(BaseConstraintsColumnsEqual):
+class DatabricksHTTPSetup:
     @pytest.fixture(scope="class")
     def models(self):
         return {
@@ -107,6 +106,26 @@ def data_types(self, int_type, schema_int_type, string_type):
         ]
 
 
+@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
+class TestSparkTableConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseTableConstraintsColumnsEqual):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
+class TestSparkViewConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseViewConstraintsColumnsEqual):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
+class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(DatabricksHTTPSetup, BaseTableConstraintsColumnsEqual):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
+class TestSparkViewConstraintsColumnsEqualDatabricksHTTP(DatabricksHTTPSetup, BaseViewConstraintsColumnsEqual):
+    pass
+
+
 @pytest.mark.skip_profile('spark_session', 'apache_spark')
 class TestSparkConstraintsRuntimeEnforcement(BaseConstraintsRuntimeEnforcement):
     @pytest.fixture(scope="class")

From 28589d25ed1a90e7f36e2617eea8cfefac772c28 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 9 Mar 2023 01:51:12 -0600
Subject: [PATCH 411/603] work on adapter cut-release and nightly-release
 update (#668)

* init push of adding two new bash scripts to be triggered by cut-release-branch workflow to automate release process

* remove unused sh file

* point to test branch for action for testing

* point back to main branch of action post testing

* change action pointer to test new logic

* change action pointer to test new logic

* change action pointer to test new logic

* change action pointer back to main post testing

* remove unneeded changelog
---
 .github/scripts/update_dependencies.sh   | 15 +++++++++++++++
 .github/workflows/cut-release-branch.yml |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 .github/scripts/update_dependencies.sh

diff --git a/.github/scripts/update_dependencies.sh b/.github/scripts/update_dependencies.sh
new file mode 100644
index 00000000..c3df48e5
--- /dev/null
+++ b/.github/scripts/update_dependencies.sh
@@ -0,0 +1,15 @@
+#!/bin/bash -e
+set -e
+
+git_branch=$1
+target_req_file="dev-requirements.txt"
+core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g"
+tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g"
+if [[ "$OSTYPE" == darwin* ]]; then
+ # mac ships with a different version of sed that requires a delimiter arg
+ sed -i "" "$core_req_sed_pattern" $target_req_file
+ sed -i "" "$tests_req_sed_pattern" $target_req_file
+else
+ sed -i "$core_req_sed_pattern" $target_req_file
+ sed -i "$tests_req_sed_pattern" $target_req_file
+fi
diff --git a/.github/workflows/cut-release-branch.yml b/.github/workflows/cut-release-branch.yml
index 4102bedc..f8dfa217 100644
--- a/.github/workflows/cut-release-branch.yml
+++ b/.github/workflows/cut-release-branch.yml
@@ -37,6 +37,6 @@ jobs:
       version_to_bump_main: ${{ inputs.version_to_bump_main }}
       new_branch_name: ${{ inputs.new_branch_name }}
       PR_title: "Cleanup main after cutting new ${{ inputs.new_branch_name }} branch"
-      PR_body: "This PR will fail CI until the dbt-core PR has been merged due to release version conflicts.  dev-requirements.txt needs to be updated to have the dbt-core dependencies point to this new branch."
+      PR_body: "This PR will fail CI until the dbt-core PR has been merged due to release version conflicts."
     secrets:
       FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }}

From 1eca7a6f39554462d94045549194df02b4ceb950 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 13 Mar 2023 12:38:03 -0600
Subject: [PATCH 412/603] add triage-labels workflow (#678)

* add triage-labels workflow

* fix end of files
---
 .github/workflows/triage-labels.yml | 33 +++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 .github/workflows/triage-labels.yml

diff --git a/.github/workflows/triage-labels.yml b/.github/workflows/triage-labels.yml
new file mode 100644
index 00000000..a71dc5e1
--- /dev/null
+++ b/.github/workflows/triage-labels.yml
@@ -0,0 +1,33 @@
+# **what?**
+# When the core team triages, we sometimes need more information from the issue creator.  In
+# those cases we remove the `triage` label and add the `awaiting_response` label.  Once we
+# recieve a response in the form of a comment, we want the `awaiting_response` label removed
+# in favor of the `triage` label so we are aware that the issue needs action.
+
+# **why?**
+# To help with out team triage issue tracking
+
+# **when?**
+# This will run when a comment is added to an issue and that issue has to `awaiting_response` label.
+
+name: Update Triage Label
+
+on: issue_comment
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  issues: write
+
+jobs:
+  triage_label:
+    if: contains(github.event.issue.labels.*.name, 'awaiting_response')
+    runs-on: ubuntu-latest
+    steps:
+      - name: initial labeling
+        uses: andymckay/labeler@master
+        with:
+          add-labels: "triage"
+          remove-labels: "awaiting_response"

From 9596853d39bf33de2a3eb42588b24c3821efe03c Mon Sep 17 00:00:00 2001
From: Akash R Nilugal <akashnilugal@gmail.com>
Date: Wed, 15 Mar 2023 01:50:49 +0530
Subject: [PATCH 413/603] Use take() instead of collect on dataframe to improve
 the performance (#527)

Co-authored-by: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
---
 .changes/unreleased/Fixes-20221124-125501.yaml | 7 +++++++
 dbt/adapters/spark/session.py                  | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20221124-125501.yaml

diff --git a/.changes/unreleased/Fixes-20221124-125501.yaml b/.changes/unreleased/Fixes-20221124-125501.yaml
new file mode 100644
index 00000000..2badb1dc
--- /dev/null
+++ b/.changes/unreleased/Fixes-20221124-125501.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Use take() instead of collect on dataframe to improve the performance
+time: 2022-11-24T12:55:01.928275+05:30
+custom:
+  Author: akashrn5
+  Issue: "526"
+  PR: "527"
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index beb77d54..d275c73c 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -140,7 +140,7 @@ def fetchone(self) -> Optional[Row]:
         https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchone
         """
         if self._rows is None and self._df is not None:
-            self._rows = self._df.collect()
+            self._rows = self._df.take(1)
 
         if self._rows is not None and len(self._rows) > 0:
             row = self._rows.pop(0)

From 5c22997975871af497650baa4eeedbe199853f72 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Wed, 15 Mar 2023 10:43:22 -0700
Subject: [PATCH 414/603] ADAP-370/remove integration references (#683)

* Cleanup integration test references and assets

* Add tests to precommit hooks and so close out this repo for test conversions.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .gitignore                                    |   1 -
 .pre-commit-config.yaml                       |   3 -
 pytest.ini                                    |   1 -
 requirements.txt                              |   3 +
 tests/conftest.py                             |  14 +-
 .../test_incremental_merge_exclude_columns.py |  14 +-
 .../test_incremental_on_schema_change.py      |  14 +-
 .../test_incremental_predicates.py            |  28 +-
 .../incremental/test_incremental_unique_id.py |   5 +-
 .../incremental_strategies/fixtures.py        |   2 +-
 .../test_incremental_strategies.py            |  77 +-
 .../adapter/persist_docs/test_persist_docs.py |  63 +-
 .../adapter/seed_column_types/fixtures.py     |   6 +-
 .../test_seed_column_types.py                 |  10 +-
 .../test_store_test_failures.py               |  26 +-
 tests/functional/adapter/test_basic.py        |  19 +-
 tests/functional/adapter/test_constraints.py  |  28 +-
 .../adapter/test_get_columns_in_relation.py   |   1 -
 tests/functional/adapter/test_grants.py       |   2 +-
 tests/functional/adapter/test_python_model.py |  11 +-
 .../adapter/utils/test_data_types.py          |  14 +-
 .../adapter/utils/test_timestamps.py          |   8 +-
 tests/functional/adapter/utils/test_utils.py  |  11 +-
 tests/integration/__init__.py                 |   0
 tests/integration/base.py                     | 965 ------------------
 tests/integration/conftest.py                 |   4 -
 tests/unit/test_adapter.py                    | 898 ++++++++--------
 tests/unit/test_column.py                     |  34 +-
 tests/unit/test_macros.py                     | 223 ++--
 tests/unit/utils.py                           | 120 +--
 30 files changed, 853 insertions(+), 1752 deletions(-)
 delete mode 100644 tests/integration/__init__.py
 delete mode 100644 tests/integration/base.py
 delete mode 100644 tests/integration/conftest.py

diff --git a/.gitignore b/.gitignore
index 189589cf..33a83848 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,4 +44,3 @@ test.env
 .hive-metastore/
 .spark-warehouse/
 dbt-integration-tests
-test/integration/.user.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1141ccc9..b748e03e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,8 +1,5 @@
 # For more on configuring pre-commit hooks (see https://pre-commit.com/)
 
-# TODO: remove global exclusion of tests when testing overhaul is complete
-exclude: '^tests/.*'
-
 # Force all unspecified python hooks to run python 3.8
 default_language_version:
   python: python3
diff --git a/pytest.ini b/pytest.ini
index b04a6ccf..b3d74bc1 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -6,5 +6,4 @@ env_files =
     test.env
 testpaths =
     tests/unit
-    tests/integration
     tests/functional
diff --git a/requirements.txt b/requirements.txt
index 14b36b72..ce359bec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,6 @@ pyodbc==4.0.34
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
+
+types-PyYAML
+types-python-dateutil
diff --git a/tests/conftest.py b/tests/conftest.py
index 2fa50d6c..94969e40 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,7 +60,7 @@ def databricks_cluster_target():
         "connect_retries": 3,
         "connect_timeout": 5,
         "retry_all": True,
-        "user": os.getenv('DBT_DATABRICKS_USER'),
+        "user": os.getenv("DBT_DATABRICKS_USER"),
     }
 
 
@@ -82,17 +82,17 @@ def databricks_sql_endpoint_target():
 def databricks_http_cluster_target():
     return {
         "type": "spark",
-        "host": os.getenv('DBT_DATABRICKS_HOST_NAME'),
-        "cluster": os.getenv('DBT_DATABRICKS_CLUSTER_NAME'),
-        "token": os.getenv('DBT_DATABRICKS_TOKEN'),
+        "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
+        "cluster": os.getenv("DBT_DATABRICKS_CLUSTER_NAME"),
+        "token": os.getenv("DBT_DATABRICKS_TOKEN"),
         "method": "http",
         "port": 443,
         # more retries + longer timout to handle unavailability while cluster is restarting
         # return failures quickly in dev, retry all failures in CI (up to 5 min)
         "connect_retries": 5,
-        "connect_timeout": 60, 
-        "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)),
-        "user": os.getenv('DBT_DATABRICKS_USER'),
+        "connect_timeout": 60,
+        "retry_all": bool(os.getenv("DBT_DATABRICKS_RETRY_ALL", False)),
+        "user": os.getenv("DBT_DATABRICKS_USER"),
     }
 
 
diff --git a/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py b/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
index e3b07f03..7560b25c 100644
--- a/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
+++ b/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
@@ -1,14 +1,12 @@
 import pytest
 
-from dbt.tests.util import run_dbt
-from dbt.tests.adapter.incremental.test_incremental_merge_exclude_columns import BaseMergeExcludeColumns
+from dbt.tests.adapter.incremental.test_incremental_merge_exclude_columns import (
+    BaseMergeExcludeColumns,
+)
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestMergeExcludeColumns(BaseMergeExcludeColumns):
     @pytest.fixture(scope="class")
     def project_config_update(self):
-        return {
-            "models": {
-                "+file_format": "delta"
-            }
-        }
+        return {"models": {"+file_format": "delta"}}
diff --git a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
index 974edd26..47832966 100644
--- a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
+++ b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
@@ -2,7 +2,9 @@
 
 from dbt.tests.util import run_dbt
 
-from dbt.tests.adapter.incremental.test_incremental_on_schema_change import BaseIncrementalOnSchemaChangeSetup
+from dbt.tests.adapter.incremental.test_incremental_on_schema_change import (
+    BaseIncrementalOnSchemaChangeSetup,
+)
 
 
 class IncrementalOnSchemaChangeIgnoreFail(BaseIncrementalOnSchemaChangeSetup):
@@ -57,20 +59,16 @@ def project_config_update(self):
 
     def run_incremental_sync_all_columns(self, project):
         select = "model_a incremental_sync_all_columns incremental_sync_all_columns_target"
-        compare_source = "incremental_sync_all_columns"
-        compare_target = "incremental_sync_all_columns_target"
         run_dbt(["run", "--models", select, "--full-refresh"])
         # Delta Lake doesn"t support removing columns -- show a nice compilation error
-        results = run_dbt(["run", "--models", select], expect_pass = False)
+        results = run_dbt(["run", "--models", select], expect_pass=False)
         assert "Compilation Error" in results[1].message
-        
+
     def run_incremental_sync_remove_only(self, project):
         select = "model_a incremental_sync_remove_only incremental_sync_remove_only_target"
-        compare_source = "incremental_sync_remove_only"
-        compare_target = "incremental_sync_remove_only_target"
         run_dbt(["run", "--models", select, "--full-refresh"])
         # Delta Lake doesn"t support removing columns -- show a nice compilation error
-        results = run_dbt(["run", "--models", select], expect_pass = False)
+        results = run_dbt(["run", "--models", select], expect_pass=False)
         assert "Compilation Error" in results[1].message
 
     def test_run_incremental_append_new_columns(self, project):
diff --git a/tests/functional/adapter/incremental/test_incremental_predicates.py b/tests/functional/adapter/incremental/test_incremental_predicates.py
index 59c6b353..52c01a74 100644
--- a/tests/functional/adapter/incremental/test_incremental_predicates.py
+++ b/tests/functional/adapter/incremental/test_incremental_predicates.py
@@ -26,42 +26,40 @@
 {% endif %}
 """
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestIncrementalPredicatesMergeSpark(BaseIncrementalPredicates):
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
-            "models": { 
-                "+incremental_predicates": [
-                    "dbt_internal_dest.id != 2"
-                ],
+            "models": {
+                "+incremental_predicates": ["dbt_internal_dest.id != 2"],
                 "+incremental_strategy": "merge",
-                "+file_format": "delta"
+                "+file_format": "delta",
             }
         }
-        
+
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "delete_insert_incremental_predicates.sql": models__spark_incremental_predicates_sql
         }
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestPredicatesMergeSpark(BaseIncrementalPredicates):
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
-            "models": { 
-                "+predicates": [
-                    "dbt_internal_dest.id != 2"
-                ],
+            "models": {
+                "+predicates": ["dbt_internal_dest.id != 2"],
                 "+incremental_strategy": "merge",
-                "+file_format": "delta"
+                "+file_format": "delta",
             }
         }
-        
+
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "delete_insert_incremental_predicates.sql": models__spark_incremental_predicates_sql
-        }
\ No newline at end of file
+        }
diff --git a/tests/functional/adapter/incremental/test_incremental_unique_id.py b/tests/functional/adapter/incremental/test_incremental_unique_id.py
index 18bac3f3..de8cb652 100644
--- a/tests/functional/adapter/incremental/test_incremental_unique_id.py
+++ b/tests/functional/adapter/incremental/test_incremental_unique_id.py
@@ -1,7 +1,8 @@
 import pytest
 from dbt.tests.adapter.incremental.test_incremental_unique_id import BaseIncrementalUniqueKey
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestUniqueKeySpark(BaseIncrementalUniqueKey):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -10,4 +11,4 @@ def project_config_update(self):
                 "+file_format": "delta",
                 "+incremental_strategy": "merge",
             }
-        }
\ No newline at end of file
+        }
diff --git a/tests/functional/adapter/incremental_strategies/fixtures.py b/tests/functional/adapter/incremental_strategies/fixtures.py
index f82decf2..50fba51d 100644
--- a/tests/functional/adapter/incremental_strategies/fixtures.py
+++ b/tests/functional/adapter/incremental_strategies/fixtures.py
@@ -395,4 +395,4 @@
 select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
 
 {% endif %}
-""".lstrip()
\ No newline at end of file
+""".lstrip()
diff --git a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
index 202eeb44..516f3da7 100644
--- a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
+++ b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
@@ -2,8 +2,26 @@
 
 from dbt.tests.util import run_dbt, check_relations_equal
 from dbt.tests.adapter.simple_seed.test_seed import SeedConfigBase
-from tests.functional.adapter.incremental_strategies.seeds import *
-from tests.functional.adapter.incremental_strategies.fixtures import *
+from tests.functional.adapter.incremental_strategies.seeds import (
+    expected_append_csv,
+    expected_overwrite_csv,
+    expected_upsert_csv,
+    expected_partial_upsert_csv,
+)
+from tests.functional.adapter.incremental_strategies.fixtures import (
+    bad_file_format_sql,
+    bad_insert_overwrite_delta_sql,
+    bad_merge_not_delta_sql,
+    bad_strategy_sql,
+    default_append_sql,
+    insert_overwrite_no_partitions_sql,
+    insert_overwrite_partitions_sql,
+    append_delta_sql,
+    delta_merge_no_key_sql,
+    delta_merge_unique_key_sql,
+    delta_merge_update_columns_sql,
+)
+
 
 class BaseIncrementalStrategies(SeedConfigBase):
     @pytest.fixture(scope="class")
@@ -12,7 +30,7 @@ def seeds(self):
             "expected_append.csv": expected_append_csv,
             "expected_overwrite.csv": expected_overwrite_csv,
             "expected_upsert.csv": expected_upsert_csv,
-            "expected_partial_upsert.csv": expected_partial_upsert_csv
+            "expected_partial_upsert.csv": expected_partial_upsert_csv,
         }
 
     @staticmethod
@@ -26,19 +44,19 @@ def seed_and_run_twice():
         run_dbt(["run"])
         run_dbt(["run"])
 
+
 class TestDefaultAppend(BaseIncrementalStrategies):
     @pytest.fixture(scope="class")
     def models(self):
-        return {
-            "default_append.sql" : default_append_sql
-        }
+        return {"default_append.sql": default_append_sql}
 
     def run_and_test(self, project):
         self.seed_and_run_twice()
         check_relations_equal(project.adapter, ["default_append", "expected_append"])
 
-
-    @pytest.mark.skip_profile("databricks_http_cluster", "databricks_sql_endpoint", "spark_session")
+    @pytest.mark.skip_profile(
+        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+    )
     def test_default_append(self, project):
         self.run_and_test(project)
 
@@ -48,18 +66,23 @@ class TestInsertOverwrite(BaseIncrementalStrategies):
     def models(self):
         return {
             "insert_overwrite_no_partitions.sql": insert_overwrite_no_partitions_sql,
-            "insert_overwrite_partitions.sql": insert_overwrite_partitions_sql
+            "insert_overwrite_partitions.sql": insert_overwrite_partitions_sql,
         }
 
     def run_and_test(self, project):
         self.seed_and_run_twice()
-        check_relations_equal(project.adapter, ["insert_overwrite_no_partitions", "expected_overwrite"])
+        check_relations_equal(
+            project.adapter, ["insert_overwrite_no_partitions", "expected_overwrite"]
+        )
         check_relations_equal(project.adapter, ["insert_overwrite_partitions", "expected_upsert"])
 
-    @pytest.mark.skip_profile("databricks_http_cluster", "databricks_sql_endpoint", "spark_session")
+    @pytest.mark.skip_profile(
+        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+    )
     def test_insert_overwrite(self, project):
         self.run_and_test(project)
 
+
 class TestDeltaStrategies(BaseIncrementalStrategies):
     @pytest.fixture(scope="class")
     def models(self):
@@ -77,36 +100,12 @@ def run_and_test(self, project):
         check_relations_equal(project.adapter, ["merge_unique_key", "expected_upsert"])
         check_relations_equal(project.adapter, ["merge_update_columns", "expected_partial_upsert"])
 
-    @pytest.mark.skip_profile("apache_spark", "databricks_http_cluster", "databricks_sql_endpoint",
-                              "spark_session")
+    @pytest.mark.skip_profile(
+        "apache_spark", "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+    )
     def test_delta_strategies(self, project):
         self.run_and_test(project)
 
-# class TestHudiStrategies(BaseIncrementalStrategies):
-#     @pytest.fixture(scope="class")
-#     def models(self):
-#         return {
-#             "append.sql": append_hudi_sql,
-#             "insert_overwrite_no_partitions.sql": hudi_insert_overwrite_no_partitions_sql,
-#             "insert_overwrite_partitions.sql": hudi_insert_overwrite_partitions_sql,
-#             "merge_no_key.sql": hudi_merge_no_key_sql,
-#             "merge_unique_key.sql": hudi_merge_unique_key_sql,
-#             "merge_update_columns.sql": hudi_update_columns_sql,
-#         }
-#
-#     def run_and_test(self, project):
-#         self.seed_and_run_twice()
-#         check_relations_equal(project.adapter, ["append", "expected_append"])
-#         check_relations_equal(project.adapter, ["merge_no_key", "expected_append"])
-#         check_relations_equal(project.adapter, ["merge_unique_key", "expected_upsert"])
-#         check_relations_equal(project.adapter, ["insert_overwrite_no_partitions", "expected_overwrite"])
-#         check_relations_equal(project.adapter, ["insert_overwrite_partitions", "expected_upsert"])
-#
-#     @pytest.mark.skip_profile("databricks_http_cluster", "databricks_cluster",
-#                               "databricks_sql_endpoint", "spark_session")
-#     def test_hudi_strategies(self, project):
-#         self.run_and_test(project)
-
 
 class TestBadStrategies(BaseIncrementalStrategies):
     @pytest.fixture(scope="class")
@@ -115,7 +114,7 @@ def models(self):
             "bad_file_format.sql": bad_file_format_sql,
             "bad_insert_overwrite_delta.sql": bad_insert_overwrite_delta_sql,
             "bad_merge_not_delta.sql": bad_merge_not_delta_sql,
-            "bad_strategy.sql": bad_strategy_sql
+            "bad_strategy.sql": bad_strategy_sql,
         }
 
     @staticmethod
diff --git a/tests/functional/adapter/persist_docs/test_persist_docs.py b/tests/functional/adapter/persist_docs/test_persist_docs.py
index 3ddab6df..0e3d102d 100644
--- a/tests/functional/adapter/persist_docs/test_persist_docs.py
+++ b/tests/functional/adapter/persist_docs/test_persist_docs.py
@@ -1,5 +1,3 @@
-import json
-import os
 import pytest
 
 from dbt.tests.util import run_dbt
@@ -23,61 +21,59 @@ def models(self):
             "incremental_delta_model.sql": _MODELS__INCREMENTAL_DELTA,
             "my_fun_docs.md": _MODELS__MY_FUN_DOCS,
             "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL,
-            "schema.yml": _PROPERTIES__MODELS
+            "schema.yml": _PROPERTIES__MODELS,
         }
 
     @pytest.fixture(scope="class")
     def seeds(self):
-        return {
-            "seed.csv": _SEEDS__BASIC,
-            "seed.yml": _PROPERTIES__SEEDS
-        }
-
+        return {"seed.csv": _SEEDS__BASIC, "seed.yml": _PROPERTIES__SEEDS}
 
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
-            'models': {
-                'test': {
-                    '+persist_docs': {
+            "models": {
+                "test": {
+                    "+persist_docs": {
                         "relation": True,
                         "columns": True,
                     },
                 }
             },
-            'seeds': {
-                'test': {
-                    '+persist_docs': {
+            "seeds": {
+                "test": {
+                    "+persist_docs": {
                         "relation": True,
                         "columns": True,
                     },
-                    '+file_format': 'delta',
-                    '+quote_columns': True
+                    "+file_format": "delta",
+                    "+quote_columns": True,
                 }
             },
         }
 
     def test_delta_comments(self, project):
-        run_dbt(['seed'])
-        run_dbt(['run'])
+        run_dbt(["seed"])
+        run_dbt(["run"])
 
         for table, whatis in [
-            ('table_delta_model', 'Table'),
-            ('seed', 'Seed'),
-            ('incremental_delta_model', 'Incremental')
+            ("table_delta_model", "Table"),
+            ("seed", "Seed"),
+            ("incremental_delta_model", "Incremental"),
         ]:
             results = project.run_sql(
-                'describe extended {schema}.{table}'.format(schema=project.test_schema, table=table),
-                fetch='all'
+                "describe extended {schema}.{table}".format(
+                    schema=project.test_schema, table=table
+                ),
+                fetch="all",
             )
 
             for result in results:
-                if result[0] == 'Comment':
-                    assert result[1].startswith(f'{whatis} model description')
-                if result[0] == 'id':
-                    assert result[2].startswith('id Column description')
-                if result[0] == 'name':
-                    assert result[2].startswith('Some stuff here and then a call to')
+                if result[0] == "Comment":
+                    assert result[1].startswith(f"{whatis} model description")
+                if result[0] == "id":
+                    assert result[2].startswith("id Column description")
+                if result[0] == "name":
+                    assert result[2].startswith("Some stuff here and then a call to")
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session")
@@ -96,24 +92,21 @@ def project_config_update(self):
 
     @pytest.fixture(scope="class")
     def seeds(self):
-        return {
-            "seed.csv": _SEEDS__BASIC,
-            "seed.yml": _PROPERTIES__SEEDS
-        }
+        return {"seed.csv": _SEEDS__BASIC, "seed.yml": _PROPERTIES__SEEDS}
 
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN,
             "my_fun_docs.md": _MODELS__MY_FUN_DOCS,
-    }
+        }
 
     @pytest.fixture(scope="class")
     def properties(self):
         return {"schema.yml": _PROPERTIES__MODELS}
 
     def test_missing_column(self, project):
-        '''spark will use our schema to verify all columns exist rather than fail silently'''
+        """spark will use our schema to verify all columns exist rather than fail silently"""
         run_dbt(["seed"])
         res = run_dbt(["run"], expect_pass=False)
         assert "Missing field name in table" in res[0].message
diff --git a/tests/functional/adapter/seed_column_types/fixtures.py b/tests/functional/adapter/seed_column_types/fixtures.py
index fb53bd99..e002d57b 100644
--- a/tests/functional/adapter/seed_column_types/fixtures.py
+++ b/tests/functional/adapter/seed_column_types/fixtures.py
@@ -46,21 +46,21 @@
     {% if not execute %}
         {{ return(None) }}
     {% endif %}
-    
+
     {% set columns = adapter.get_columns_in_relation(model) %}
     {% if (column_map | length) != (columns | length) %}
         {% set column_map_keys = (column_map | list | string) %}
         {% set column_names = (columns | map(attribute='name') | list | string) %}
         {% do exceptions.raise_compiler_error('did not get all the columns/all columns not specified:\n' ~ column_map_keys ~ '\nvs\n' ~ column_names) %}
     {% endif %}
-    
+
     {% set bad_columns = [] %}
     {% for column in columns %}
         {% if is_bad_column(column, column_map) %}
             {% do bad_columns.append(column.name) %}
         {% endif %}
     {% endfor %}
-    
+
     {% set num_bad_columns = (bad_columns | length) %}
 
     select '{{ num_bad_columns }}' as bad_column
diff --git a/tests/functional/adapter/seed_column_types/test_seed_column_types.py b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
index b818092f..3326490f 100644
--- a/tests/functional/adapter/seed_column_types/test_seed_column_types.py
+++ b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
@@ -3,23 +3,19 @@
 from tests.functional.adapter.seed_column_types.fixtures import (
     _MACRO_TEST_IS_TYPE_SQL,
     _SEED_CSV,
-    _SEED_YML
+    _SEED_YML,
 )
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestSeedColumnTypesCast:
-
     @pytest.fixture(scope="class")
     def macros(self):
         return {"test_is_type.sql": _MACRO_TEST_IS_TYPE_SQL}
 
     @pytest.fixture(scope="class")
     def seeds(self):
-        return {
-            "payments.csv": _SEED_CSV,
-            "schema.yml": _SEED_YML
-        }
+        return {"payments.csv": _SEED_CSV, "schema.yml": _SEED_YML}
 
     #  We want to test seed types because hive would cause all fields to be strings.
     # setting column_types in project.yml should change them and pass.
diff --git a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
index 86c15a55..c445fe67 100644
--- a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
+++ b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
@@ -1,31 +1,29 @@
 import pytest
 
-from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import StoreTestFailuresBase, \
-    TEST_AUDIT_SCHEMA_SUFFIX
+from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import (
+    StoreTestFailuresBase,
+    TEST_AUDIT_SCHEMA_SUFFIX,
+)
 
 
-@pytest.mark.skip_profile('spark_session', 'databricks_cluster', 'databricks_sql_endpoint')
+@pytest.mark.skip_profile("spark_session", "databricks_cluster", "databricks_sql_endpoint")
 class TestSparkStoreTestFailures(StoreTestFailuresBase):
-
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
             "seeds": {
                 "quote_columns": True,
             },
-            'tests': {
-                "+schema": TEST_AUDIT_SCHEMA_SUFFIX,
-                '+store_failures': True
-            }
+            "tests": {"+schema": TEST_AUDIT_SCHEMA_SUFFIX, "+store_failures": True},
         }
+
     def test_store_and_assert(self, project):
         self.run_tests_store_one_failure(project)
         self.run_tests_store_failures_and_assert(project)
 
 
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestSparkStoreTestFailuresWithDelta(StoreTestFailuresBase):
-
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
@@ -34,11 +32,11 @@ def project_config_update(self):
                 "test": self.column_type_overrides(),
                 "+file_format": "delta",
             },
-            'tests': {
+            "tests": {
                 "+schema": TEST_AUDIT_SCHEMA_SUFFIX,
-                '+store_failures': True,
-                '+file_format': 'delta',
-            }
+                "+store_failures": True,
+                "+file_format": "delta",
+            },
         }
 
     def test_store_and_assert_failure_with_delta(self, project):
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index bdccf169..072d211d 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -14,7 +14,7 @@
 from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestSimpleMaterializationsSpark(BaseSimpleMaterializations):
     pass
 
@@ -25,7 +25,7 @@ class TestSingularTestsSpark(BaseSingularTests):
 
 # The local cluster currently tests on spark 2.x, which does not support this
 # if we upgrade it to 3.x, we can enable this test
-@pytest.mark.skip_profile('apache_spark')
+@pytest.mark.skip_profile("apache_spark")
 class TestSingularTestsEphemeralSpark(BaseSingularTestsEphemeral):
     pass
 
@@ -34,12 +34,12 @@ class TestEmptySpark(BaseEmpty):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestEphemeralSpark(BaseEphemeral):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestIncrementalSpark(BaseIncremental):
     pass
 
@@ -50,7 +50,7 @@ class TestGenericTestsSpark(BaseGenericTests):
 
 # These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestSnapshotCheckColsSpark(BaseSnapshotCheckCols):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -60,13 +60,13 @@ def project_config_update(self):
             },
             "snapshots": {
                 "+file_format": "delta",
-            }
+            },
         }
 
 
 # These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -76,9 +76,10 @@ def project_config_update(self):
             },
             "snapshots": {
                 "+file_format": "delta",
-            }
+            },
         }
 
-@pytest.mark.skip_profile('spark_session')
+
+@pytest.mark.skip_profile("spark_session")
 class TestBaseAdapterMethod(BaseAdapterMethod):
     pass
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 27cf59f1..a5fee2e7 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -3,7 +3,7 @@
 from dbt.tests.adapter.constraints.test_constraints import (
     BaseTableConstraintsColumnsEqual,
     BaseViewConstraintsColumnsEqual,
-    BaseConstraintsRuntimeEnforcement
+    BaseConstraintsRuntimeEnforcement,
 )
 from dbt.tests.adapter.constraints.fixtures import (
     my_model_sql,
@@ -60,7 +60,7 @@ def data_types(self, int_type, schema_int_type, string_type):
             ['"1"', "string", string_type],
             ["true", "boolean", "BOOL"],
             ['array("1","2","3")', "string", string_type],
-            ['array(1,2,3)', "string", string_type],
+            ["array(1,2,3)", "string", string_type],
             ["6.45", "decimal", "DECIMAL"],
             # TODO: test__constraints_correct_column_data_type isn't able to run the following statements in create table statements with pyodbc
             # ["cast('2019-01-01' as date)", "date", "DATE"],
@@ -99,34 +99,42 @@ def data_types(self, int_type, schema_int_type, string_type):
             ['"1"', "string", string_type],
             ["true", "boolean", "BOOLEAN_TYPE"],
             ['array("1","2","3")', "array<string>", "ARRAY_TYPE"],
-            ['array(1,2,3)', "array<int>", "ARRAY_TYPE"],
+            ["array(1,2,3)", "array<int>", "ARRAY_TYPE"],
             ["cast('2019-01-01' as date)", "date", "DATE_TYPE"],
             ["cast('2019-01-01' as timestamp)", "timestamp", "TIMESTAMP_TYPE"],
             ["cast(1.0 AS DECIMAL(4, 2))", "decimal", "DECIMAL_TYPE"],
         ]
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
 class TestSparkTableConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseTableConstraintsColumnsEqual):
     pass
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
 class TestSparkViewConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseViewConstraintsColumnsEqual):
     pass
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
-class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(DatabricksHTTPSetup, BaseTableConstraintsColumnsEqual):
+@pytest.mark.skip_profile(
+    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+)
+class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(
+    DatabricksHTTPSetup, BaseTableConstraintsColumnsEqual
+):
     pass
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
-class TestSparkViewConstraintsColumnsEqualDatabricksHTTP(DatabricksHTTPSetup, BaseViewConstraintsColumnsEqual):
+@pytest.mark.skip_profile(
+    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+)
+class TestSparkViewConstraintsColumnsEqualDatabricksHTTP(
+    DatabricksHTTPSetup, BaseViewConstraintsColumnsEqual
+):
     pass
 
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestSparkConstraintsRuntimeEnforcement(BaseConstraintsRuntimeEnforcement):
     @pytest.fixture(scope="class")
     def models(self):
diff --git a/tests/functional/adapter/test_get_columns_in_relation.py b/tests/functional/adapter/test_get_columns_in_relation.py
index da21be18..a037bb1c 100644
--- a/tests/functional/adapter/test_get_columns_in_relation.py
+++ b/tests/functional/adapter/test_get_columns_in_relation.py
@@ -18,7 +18,6 @@
 
 
 class TestColumnsInRelation:
-
     @pytest.fixture(scope="class")
     def models(self):
         return {
diff --git a/tests/functional/adapter/test_grants.py b/tests/functional/adapter/test_grants.py
index 8e0341df..1b1a005a 100644
--- a/tests/functional/adapter/test_grants.py
+++ b/tests/functional/adapter/test_grants.py
@@ -55,6 +55,6 @@ def project_config_update(self):
 class TestInvalidGrantsSpark(BaseInvalidGrants):
     def grantee_does_not_exist_error(self):
         return "RESOURCE_DOES_NOT_EXIST"
-        
+
     def privilege_does_not_exist_error(self):
         return "Action Unknown"
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index e5e7debe..1195cbd3 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -1,16 +1,23 @@
 import os
 import pytest
-from dbt.tests.util import run_dbt, write_file, run_dbt_and_capture
-from dbt.tests.adapter.python_model.test_python_model import BasePythonModelTests, BasePythonIncrementalTests
+from dbt.tests.util import run_dbt, write_file
+from dbt.tests.adapter.python_model.test_python_model import (
+    BasePythonModelTests,
+    BasePythonIncrementalTests,
+)
 from dbt.tests.adapter.python_model.test_spark import BasePySparkTests
+
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonModelSpark(BasePythonModelTests):
     pass
 
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
     pass
 
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     @pytest.fixture(scope="class")
diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py
index ce608580..8ca38ab1 100644
--- a/tests/functional/adapter/utils/test_data_types.py
+++ b/tests/functional/adapter/utils/test_data_types.py
@@ -1,10 +1,12 @@
 import pytest
 from dbt.tests.adapter.utils.data_types.test_type_bigint import BaseTypeBigInt
 from dbt.tests.adapter.utils.data_types.test_type_float import (
-    BaseTypeFloat, seeds__expected_csv as seeds__float_expected_csv
+    BaseTypeFloat,
+    seeds__expected_csv as seeds__float_expected_csv,
 )
 from dbt.tests.adapter.utils.data_types.test_type_int import (
-    BaseTypeInt, seeds__expected_csv as seeds__int_expected_csv
+    BaseTypeInt,
+    seeds__expected_csv as seeds__int_expected_csv,
 )
 from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
 from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
@@ -27,6 +29,7 @@ class TestTypeBigInt(BaseTypeBigInt):
         float_col: float
 """
 
+
 class TestTypeFloat(BaseTypeFloat):
     @pytest.fixture(scope="class")
     def seeds(self):
@@ -46,6 +49,7 @@ def seeds(self):
         int_col: int
 """
 
+
 class TestTypeInt(BaseTypeInt):
     @pytest.fixture(scope="class")
     def seeds(self):
@@ -54,16 +58,16 @@ def seeds(self):
             "expected.yml": seeds__int_expected_yml,
         }
 
-    
+
 class TestTypeNumeric(BaseTypeNumeric):
     def numeric_fixture_type(self):
         return "decimal(28,6)"
 
-    
+
 class TestTypeString(BaseTypeString):
     pass
 
-    
+
 class TestTypeTimestamp(BaseTypeTimestamp):
     pass
 
diff --git a/tests/functional/adapter/utils/test_timestamps.py b/tests/functional/adapter/utils/test_timestamps.py
index 8507c0a6..d05d2399 100644
--- a/tests/functional/adapter/utils/test_timestamps.py
+++ b/tests/functional/adapter/utils/test_timestamps.py
@@ -5,13 +5,13 @@
 class TestCurrentTimestampSpark(BaseCurrentTimestamps):
     @pytest.fixture(scope="class")
     def models(self):
-        return {"get_current_timestamp.sql": "select {{ current_timestamp() }} as current_timestamp"}
+        return {
+            "get_current_timestamp.sql": "select {{ current_timestamp() }} as current_timestamp"
+        }
 
     @pytest.fixture(scope="class")
     def expected_schema(self):
-        return {
-            "current_timestamp": "timestamp"
-        }
+        return {"current_timestamp": "timestamp"}
 
     @pytest.fixture(scope="class")
     def expected_sql(self):
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 6fd6a912..8f222b9a 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -11,7 +11,6 @@
 from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
-from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesQuote
 from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesBackslash
 from dbt.tests.adapter.utils.test_except import BaseExcept
 from dbt.tests.adapter.utils.test_hash import BaseHash
@@ -55,7 +54,7 @@ class TestCastBoolToText(BaseCastBoolToText):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestConcat(BaseConcat):
     pass
 
@@ -70,7 +69,7 @@ class TestDateAdd(BaseDateAdd):
 
 
 # this generates too much SQL to run successfully in our testing environments :(
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestDateDiff(BaseDateDiff):
     pass
 
@@ -87,7 +86,7 @@ class TestExcept(BaseExcept):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestHash(BaseHash):
     pass
 
@@ -121,12 +120,12 @@ class TestPosition(BasePosition):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestReplace(BaseReplace):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestRight(BaseRight):
     pass
 
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/integration/base.py b/tests/integration/base.py
deleted file mode 100644
index 7e557217..00000000
--- a/tests/integration/base.py
+++ /dev/null
@@ -1,965 +0,0 @@
-import json
-import os
-import io
-import random
-import shutil
-import sys
-import tempfile
-import traceback
-import unittest
-from contextlib import contextmanager
-from datetime import datetime
-from functools import wraps
-
-import pyodbc
-import pytest
-import yaml
-from unittest.mock import patch
-
-import dbt.main as dbt
-from dbt import flags
-from dbt.deprecations import reset_deprecations
-from dbt.adapters.factory import get_adapter, reset_adapters, register_adapter
-from dbt.clients.jinja import template_cache
-from dbt.config import RuntimeConfig
-from dbt.context import providers
-from dbt.logger import log_manager
-from dbt.events.functions import (
-    capture_stdout_logs, fire_event, setup_event_logger, stop_capture_stdout_logs
-)
-from dbt.events import AdapterLogger
-from dbt.contracts.graph.manifest import Manifest
-
-logger = AdapterLogger("Spark")
-
-INITIAL_ROOT = os.getcwd()
-
-
-def normalize(path):
-    """On windows, neither is enough on its own:
-
-    >>> normcase('C:\\documents/ALL CAPS/subdir\\..')
-    'c:\\documents\\all caps\\subdir\\..'
-    >>> normpath('C:\\documents/ALL CAPS/subdir\\..')
-    'C:\\documents\\ALL CAPS'
-    >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..'))
-    'c:\\documents\\all caps'
-    """
-    return os.path.normcase(os.path.normpath(path))
-
-
-class Normalized:
-    def __init__(self, value):
-        self.value = value
-
-    def __repr__(self):
-        return f'Normalized({self.value!r})'
-
-    def __str__(self):
-        return f'Normalized({self.value!s})'
-
-    def __eq__(self, other):
-        return normalize(self.value) == normalize(other)
-
-
-class FakeArgs:
-    def __init__(self):
-        self.threads = 1
-        self.defer = False
-        self.full_refresh = False
-        self.models = None
-        self.select = None
-        self.exclude = None
-        self.single_threaded = False
-        self.selector_name = None
-        self.state = None
-        self.defer = None
-
-
-class TestArgs:
-    __test__ = False
-
-    def __init__(self, kwargs):
-        self.which = 'run'
-        self.single_threaded = False
-        self.profiles_dir = None
-        self.project_dir = None
-        self.__dict__.update(kwargs)
-
-
-def _profile_from_test_name(test_name):
-    adapter_names = ('apache_spark', 'databricks_cluster',
-                     'databricks_sql_endpoint')
-    adapters_in_name = sum(x in test_name for x in adapter_names)
-    if adapters_in_name != 1:
-        raise ValueError(
-            'test names must have exactly 1 profile choice embedded, {} has {}'
-            .format(test_name, adapters_in_name)
-        )
-
-    for adapter_name in adapter_names:
-        if adapter_name in test_name:
-            return adapter_name
-
-    raise ValueError(
-        'could not find adapter name in test name {}'.format(test_name)
-    )
-
-
-def _pytest_test_name():
-    return os.environ['PYTEST_CURRENT_TEST'].split()[0]
-
-
-def _pytest_get_test_root():
-    test_path = _pytest_test_name().split('::')[0]
-    relative_to = INITIAL_ROOT
-    head = os.path.relpath(test_path, relative_to)
-
-    path_parts = []
-    while head:
-        head, tail = os.path.split(head)
-        path_parts.append(tail)
-    path_parts.reverse()
-    # dbt tests are all of the form 'tests/integration/suite_name'
-    target = os.path.join(*path_parts[:3])  # TODO: try to not hard code this
-    return os.path.join(relative_to, target)
-
-
-def _really_makedirs(path):
-    while not os.path.exists(path):
-        try:
-            os.makedirs(path)
-        except EnvironmentError:
-            raise
-
-
-class DBTIntegrationTest(unittest.TestCase):
-    CREATE_SCHEMA_STATEMENT = 'CREATE SCHEMA {}'
-    DROP_SCHEMA_STATEMENT = 'DROP SCHEMA IF EXISTS {} CASCADE'
-
-    _randint = random.randint(0, 9999)
-    _runtime_timedelta = (datetime.utcnow() - datetime(1970, 1, 1, 0, 0, 0))
-    _runtime = (
-        (int(_runtime_timedelta.total_seconds() * 1e6)) +
-        _runtime_timedelta.microseconds
-    )
-
-    prefix = f'test{_runtime}{_randint:04}'
-    setup_alternate_db = False
-
-    def apache_spark_profile(self):
-        return {
-            'config': {
-                'send_anonymous_usage_stats': False
-            },
-            'test': {
-                'outputs': {
-                    'thrift': {
-                        'type': 'spark',
-                        'host': 'localhost',
-                        'user': 'dbt',
-                        'method': 'thrift',
-                        'port': 10000,
-                        'connect_retries': 3,
-                        'connect_timeout': 5,
-                        'retry_all': True,
-                        'schema': self.unique_schema()
-                    },
-                },
-                'target': 'thrift'
-            }
-        }
-
-    def databricks_cluster_profile(self):
-        return {
-            'config': {
-                'send_anonymous_usage_stats': False
-            },
-            'test': {
-                'outputs': {
-                    'cluster': {
-                        'type': 'spark',
-                        'method': 'odbc',
-                        'host': os.getenv('DBT_DATABRICKS_HOST_NAME'),
-                        'cluster': os.getenv('DBT_DATABRICKS_CLUSTER_NAME'),
-                        'token': os.getenv('DBT_DATABRICKS_TOKEN'),
-                        'driver': os.getenv('ODBC_DRIVER'),
-                        'port': 443,
-                        'connect_retries': 3,
-                        'connect_timeout': 5,
-                        'retry_all': True,
-                        'schema': self.unique_schema()
-                    },
-                },
-                'target': 'cluster'
-            }
-        }
-
-    def databricks_sql_endpoint_profile(self):
-        return {
-            'config': {
-                'send_anonymous_usage_stats': False
-            },
-            'test': {
-                'outputs': {
-                    'endpoint': {
-                        'type': 'spark',
-                        'method': 'odbc',
-                        'host': os.getenv('DBT_DATABRICKS_HOST_NAME'),
-                        'endpoint': os.getenv('DBT_DATABRICKS_ENDPOINT'),
-                        'token': os.getenv('DBT_DATABRICKS_TOKEN'),
-                        'driver': os.getenv('ODBC_DRIVER'),
-                        'port': 443,
-                        'connect_retries': 3,
-                        'connect_timeout': 5,
-                        'retry_all': True,
-                        'schema': self.unique_schema()
-                    },
-                },
-                'target': 'endpoint'
-            }
-        }
-
-    @property
-    def packages_config(self):
-        return None
-
-    @property
-    def selectors_config(self):
-        return None
-
-    def unique_schema(self):
-        schema = self.schema
-
-        to_return = "{}_{}".format(self.prefix, schema)
-
-        return to_return.lower()
-
-    @property
-    def default_database(self):
-        database = self.config.credentials.database
-        return database
-
-    @property
-    def alternative_database(self):
-        return None
-
-    def get_profile(self, adapter_type):
-        if adapter_type == 'apache_spark':
-            return self.apache_spark_profile()
-        elif adapter_type == 'databricks_cluster':
-            return self.databricks_cluster_profile()
-        elif adapter_type == 'databricks_sql_endpoint':
-            return self.databricks_sql_endpoint_profile()
-        else:
-            raise ValueError('invalid adapter type {}'.format(adapter_type))
-
-    def _pick_profile(self):
-        test_name = self.id().split('.')[-1]
-        return _profile_from_test_name(test_name)
-
-    def _symlink_test_folders(self):
-        for entry in os.listdir(self.test_original_source_path):
-            src = os.path.join(self.test_original_source_path, entry)
-            tst = os.path.join(self.test_root_dir, entry)
-            if os.path.isdir(src) or src.endswith('.sql'):
-                # symlink all sql files and all directories.
-                os.symlink(src, tst)
-        os.symlink(self._logs_dir, os.path.join(self.test_root_dir, 'logs'))
-
-    @property
-    def test_root_realpath(self):
-        if sys.platform == 'darwin':
-            return os.path.realpath(self.test_root_dir)
-        else:
-            return self.test_root_dir
-
-    def _generate_test_root_dir(self):
-        return normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
-
-    def setUp(self):
-        self.dbt_core_install_root = os.path.dirname(dbt.__file__)
-        log_manager.reset_handlers()
-        self.initial_dir = INITIAL_ROOT
-        os.chdir(self.initial_dir)
-        # before we go anywhere, collect the initial path info
-        self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
-        setup_event_logger(self._logs_dir)
-        _really_makedirs(self._logs_dir)
-        self.test_original_source_path = _pytest_get_test_root()
-        self.test_root_dir = self._generate_test_root_dir()
-
-        os.chdir(self.test_root_dir)
-        try:
-            self._symlink_test_folders()
-        except Exception as exc:
-            msg = '\n\t'.join((
-                'Failed to symlink test folders!',
-                'initial_dir={0.initial_dir}',
-                'test_original_source_path={0.test_original_source_path}',
-                'test_root_dir={0.test_root_dir}'
-            )).format(self)
-            logger.exception(msg)
-
-            # if logging isn't set up, I still really want this message.
-            print(msg)
-            traceback.print_exc()
-
-            raise
-
-        self._created_schemas = set()
-        reset_deprecations()
-        template_cache.clear()
-
-        self.use_profile(self._pick_profile())
-        self.use_default_project()
-        self.set_packages()
-        self.set_selectors()
-        self.load_config()
-
-    def use_default_project(self, overrides=None):
-        # create a dbt_project.yml
-        base_project_config = {
-            'name': 'test',
-            'version': '1.0',
-            'config-version': 2,
-            'test-paths': [],
-            'source-paths': [self.models],
-            'profile': 'test',
-        }
-
-        project_config = {}
-        project_config.update(base_project_config)
-        project_config.update(self.project_config)
-        project_config.update(overrides or {})
-
-        with open("dbt_project.yml", 'w') as f:
-            yaml.safe_dump(project_config, f, default_flow_style=True)
-
-    def use_profile(self, adapter_type):
-        self.adapter_type = adapter_type
-
-        profile_config = {}
-        default_profile_config = self.get_profile(adapter_type)
-
-        profile_config.update(default_profile_config)
-        profile_config.update(self.profile_config)
-
-        if not os.path.exists(self.test_root_dir):
-            os.makedirs(self.test_root_dir)
-
-        flags.PROFILES_DIR = self.test_root_dir
-        profiles_path = os.path.join(self.test_root_dir, 'profiles.yml')
-        with open(profiles_path, 'w') as f:
-            yaml.safe_dump(profile_config, f, default_flow_style=True)
-        self._profile_config = profile_config
-
-    def set_packages(self):
-        if self.packages_config is not None:
-            with open('packages.yml', 'w') as f:
-                yaml.safe_dump(self.packages_config, f, default_flow_style=True)
-
-    def set_selectors(self):
-        if self.selectors_config is not None:
-            with open('selectors.yml', 'w') as f:
-                yaml.safe_dump(self.selectors_config, f, default_flow_style=True)
-
-    def load_config(self):
-        # we've written our profile and project. Now we want to instantiate a
-        # fresh adapter for the tests.
-        # it's important to use a different connection handle here so
-        # we don't look into an incomplete transaction
-        kwargs = {
-            'profile': None,
-            'profiles_dir': self.test_root_dir,
-            'target': None,
-        }
-
-        config = RuntimeConfig.from_args(TestArgs(kwargs))
-
-        register_adapter(config)
-        adapter = get_adapter(config)
-        adapter.cleanup_connections()
-        self.adapter_type = adapter.type()
-        self.adapter = adapter
-        self.config = config
-
-        self._drop_schemas()
-        self._create_schemas()
-
-    def quote_as_configured(self, value, quote_key):
-        return self.adapter.quote_as_configured(value, quote_key)
-
-    def tearDown(self):
-        # get any current run adapter and clean up its connections before we
-        # reset them. It'll probably be different from ours because
-        # handle_and_check() calls reset_adapters().
-        register_adapter(self.config)
-        adapter = get_adapter(self.config)
-        if adapter is not self.adapter:
-            adapter.cleanup_connections()
-        if not hasattr(self, 'adapter'):
-            self.adapter = adapter
-
-        self._drop_schemas()
-
-        self.adapter.cleanup_connections()
-        reset_adapters()
-        os.chdir(INITIAL_ROOT)
-        try:
-            shutil.rmtree(self.test_root_dir)
-        except EnvironmentError:
-            logger.exception('Could not clean up after test - {} not removable'
-                             .format(self.test_root_dir))
-
-    def _get_schema_fqn(self, database, schema):
-        schema_fqn = self.quote_as_configured(schema, 'schema')
-        return schema_fqn
-
-    def _create_schema_named(self, database, schema):
-        self.run_sql('CREATE SCHEMA {schema}')
-
-    def _drop_schema_named(self, database, schema):
-        self.run_sql('DROP SCHEMA IF EXISTS {schema} CASCADE')
-
-    def _create_schemas(self):
-        schema = self.unique_schema()
-        with self.adapter.connection_named('__test'):
-            self._create_schema_named(self.default_database, schema)
-
-    def _drop_schemas(self):
-        with self.adapter.connection_named('__test'):
-            schema = self.unique_schema()
-            self._drop_schema_named(self.default_database, schema)
-            if self.setup_alternate_db and self.alternative_database:
-                self._drop_schema_named(self.alternative_database, schema)
-
-    @property
-    def project_config(self):
-        return {
-            'config-version': 2,
-        }
-
-    @property
-    def profile_config(self):
-        return {}
-
-    def run_dbt(self, args=None, expect_pass=True, profiles_dir=True):
-        res, success = self.run_dbt_and_check(args=args, profiles_dir=profiles_dir)
-        self.assertEqual(
-            success, expect_pass,
-            "dbt exit state did not match expected")
-
-        return res
-
-
-    def run_dbt_and_capture(self, *args, **kwargs):
-        try:
-            stringbuf = capture_stdout_logs()
-            res = self.run_dbt(*args, **kwargs)
-            stdout = stringbuf.getvalue()
-
-        finally:
-            stop_capture_stdout_logs()
-
-        return res, stdout
-
-    def run_dbt_and_check(self, args=None, profiles_dir=True):
-        log_manager.reset_handlers()
-        if args is None:
-            args = ["run"]
-
-        final_args = []
-
-        if os.getenv('DBT_TEST_SINGLE_THREADED') in ('y', 'Y', '1'):
-            final_args.append('--single-threaded')
-
-        final_args.extend(args)
-
-        if profiles_dir:
-            final_args.extend(['--profiles-dir', self.test_root_dir])
-        final_args.append('--log-cache-events')
-
-        logger.info("Invoking dbt with {}".format(final_args))
-        return dbt.handle_and_check(final_args)
-
-    def run_sql_file(self, path, kwargs=None):
-        with open(path, 'r') as f:
-            statements = f.read().split(";")
-            for statement in statements:
-                self.run_sql(statement, kwargs=kwargs)
-
-    def transform_sql(self, query, kwargs=None):
-        to_return = query
-
-        base_kwargs = {
-            'schema': self.unique_schema(),
-            'database': self.adapter.quote(self.default_database),
-        }
-        if kwargs is None:
-            kwargs = {}
-        base_kwargs.update(kwargs)
-
-        to_return = to_return.format(**base_kwargs)
-
-        return to_return
-
-    def run_sql(self, query, fetch='None', kwargs=None, connection_name=None):
-        if connection_name is None:
-            connection_name = '__test'
-
-        if query.strip() == "":
-            return
-
-        sql = self.transform_sql(query, kwargs=kwargs)
-
-        with self.get_connection(connection_name) as conn:
-            cursor = conn.handle.cursor()
-            try:
-                cursor.execute(sql)
-                if fetch == 'one':
-                    return cursor.fetchall()[0]
-                elif fetch == 'all':
-                    return cursor.fetchall()
-                else:
-                    # we have to fetch.
-                    cursor.fetchall()
-            except pyodbc.ProgrammingError as e:
-                # hacks for dropping schema
-                if "No results.  Previous SQL was not a query." not in str(e):
-                    raise e
-            except Exception as e:
-                conn.handle.rollback()
-                conn.transaction_open = False
-                print(sql)
-                print(e)
-                raise
-            else:
-                conn.transaction_open = False
-
-    def _ilike(self, target, value):
-        return "{} ilike '{}'".format(target, value)
-
-    def get_many_table_columns_bigquery(self, tables, schema, database=None):
-        result = []
-        for table in tables:
-            relation = self._make_relation(table, schema, database)
-            columns = self.adapter.get_columns_in_relation(relation)
-            for col in columns:
-                result.append((table, col.column, col.dtype, col.char_size))
-        return result
-
-    def get_many_table_columns(self, tables, schema, database=None):
-        result = self.get_many_table_columns_bigquery(tables, schema, database)
-        result.sort(key=lambda x: '{}.{}'.format(x[0], x[1]))
-        return result
-
-    def filter_many_columns(self, column):
-        if len(column) == 3:
-            table_name, column_name, data_type = column
-            char_size = None
-        else:
-            table_name, column_name, data_type, char_size = column
-        return (table_name, column_name, data_type, char_size)
-
-    @contextmanager
-    def get_connection(self, name=None):
-        """Create a test connection context where all executed macros, etc will
-        get self.adapter as the adapter.
-
-        This allows tests to run normal adapter macros as if reset_adapters()
-        were not called by handle_and_check (for asserts, etc)
-        """
-        if name is None:
-            name = '__test'
-        with patch.object(providers, 'get_adapter', return_value=self.adapter):
-            with self.adapter.connection_named(name):
-                conn = self.adapter.connections.get_thread_connection()
-                yield conn
-
-    def get_relation_columns(self, relation):
-        with self.get_connection():
-            columns = self.adapter.get_columns_in_relation(relation)
-
-        return sorted(((c.name, c.dtype, c.char_size) for c in columns),
-                      key=lambda x: x[0])
-
-    def get_table_columns(self, table, schema=None, database=None):
-        schema = self.unique_schema() if schema is None else schema
-        database = self.default_database if database is None else database
-        relation = self.adapter.Relation.create(
-            database=database,
-            schema=schema,
-            identifier=table,
-            type='table',
-            quote_policy=self.config.quoting
-        )
-        return self.get_relation_columns(relation)
-
-    def get_table_columns_as_dict(self, tables, schema=None):
-        col_matrix = self.get_many_table_columns(tables, schema)
-        res = {}
-        for row in col_matrix:
-            table_name = row[0]
-            col_def = row[1:]
-            if table_name not in res:
-                res[table_name] = []
-            res[table_name].append(col_def)
-        return res
-
-    def get_models_in_schema(self, schema=None):
-        schema = self.unique_schema() if schema is None else schema
-        sql = """
-                select table_name,
-                        case when table_type = 'BASE TABLE' then 'table'
-                             when table_type = 'VIEW' then 'view'
-                             else table_type
-                        end as materialization
-                from information_schema.tables
-                where {}
-                order by table_name
-                """
-
-        sql = sql.format(self._ilike('table_schema', schema))
-        result = self.run_sql(sql, fetch='all')
-
-        return {model_name: materialization for (model_name, materialization) in result}
-
-    def _assertTablesEqualSql(self, relation_a, relation_b, columns=None):
-        if columns is None:
-            columns = self.get_relation_columns(relation_a)
-        column_names = [c[0] for c in columns]
-
-        sql = self.adapter.get_rows_different_sql(
-            relation_a, relation_b, column_names
-        )
-
-        return sql
-
-    def assertTablesEqual(self, table_a, table_b,
-                          table_a_schema=None, table_b_schema=None,
-                          table_a_db=None, table_b_db=None):
-        if table_a_schema is None:
-            table_a_schema = self.unique_schema()
-
-        if table_b_schema is None:
-            table_b_schema = self.unique_schema()
-
-        if table_a_db is None:
-            table_a_db = self.default_database
-
-        if table_b_db is None:
-            table_b_db = self.default_database
-
-        relation_a = self._make_relation(table_a, table_a_schema, table_a_db)
-        relation_b = self._make_relation(table_b, table_b_schema, table_b_db)
-
-        self._assertTableColumnsEqual(relation_a, relation_b)
-
-        sql = self._assertTablesEqualSql(relation_a, relation_b)
-        result = self.run_sql(sql, fetch='one')
-
-        self.assertEqual(
-            result[0],
-            0,
-            'row_count_difference nonzero: ' + sql
-        )
-        self.assertEqual(
-            result[1],
-            0,
-            'num_mismatched nonzero: ' + sql
-        )
-
-    def _make_relation(self, identifier, schema=None, database=None):
-        if schema is None:
-            schema = self.unique_schema()
-        if database is None:
-            database = self.default_database
-        return self.adapter.Relation.create(
-            database=database,
-            schema=schema,
-            identifier=identifier,
-            quote_policy=self.config.quoting
-        )
-
-    def get_many_relation_columns(self, relations):
-        """Returns a dict of (datbase, schema) -> (dict of (table_name -> list of columns))
-        """
-        schema_fqns = {}
-        for rel in relations:
-            this_schema = schema_fqns.setdefault((rel.database, rel.schema), [])
-            this_schema.append(rel.identifier)
-
-        column_specs = {}
-        for key, tables in schema_fqns.items():
-            database, schema = key
-            columns = self.get_many_table_columns(tables, schema, database=database)
-            table_columns = {}
-            for col in columns:
-                table_columns.setdefault(col[0], []).append(col[1:])
-            for rel_name, columns in table_columns.items():
-                key = (database, schema, rel_name)
-                column_specs[key] = columns
-
-        return column_specs
-
-    def assertManyRelationsEqual(self, relations, default_schema=None, default_database=None):
-        if default_schema is None:
-            default_schema = self.unique_schema()
-        if default_database is None:
-            default_database = self.default_database
-
-        specs = []
-        for relation in relations:
-            if not isinstance(relation, (tuple, list)):
-                relation = [relation]
-
-            assert len(relation) <= 3
-
-            if len(relation) == 3:
-                relation = self._make_relation(*relation)
-            elif len(relation) == 2:
-                relation = self._make_relation(relation[0], relation[1], default_database)
-            elif len(relation) == 1:
-                relation = self._make_relation(relation[0], default_schema, default_database)
-            else:
-                raise ValueError('relation must be a sequence of 1, 2, or 3 values')
-
-            specs.append(relation)
-
-        with self.get_connection():
-            column_specs = self.get_many_relation_columns(specs)
-
-        # make sure everyone has equal column definitions
-        first_columns = None
-        for relation in specs:
-            key = (relation.database, relation.schema, relation.identifier)
-            # get a good error here instead of a hard-to-diagnose KeyError
-            self.assertIn(key, column_specs, f'No columns found for {key}')
-            columns = column_specs[key]
-            if first_columns is None:
-                first_columns = columns
-            else:
-                self.assertEqual(
-                    first_columns, columns,
-                    '{} did not match {}'.format(str(specs[0]), str(relation))
-                )
-
-        # make sure everyone has the same data. if we got here, everyone had
-        # the same column specs!
-        first_relation = None
-        for relation in specs:
-            if first_relation is None:
-                first_relation = relation
-            else:
-                sql = self._assertTablesEqualSql(first_relation, relation,
-                                                 columns=first_columns)
-                result = self.run_sql(sql, fetch='one')
-
-                self.assertEqual(
-                    result[0],
-                    0,
-                    'row_count_difference nonzero: ' + sql
-                )
-                self.assertEqual(
-                    result[1],
-                    0,
-                    'num_mismatched nonzero: ' + sql
-                )
-
-    def assertManyTablesEqual(self, *args):
-        schema = self.unique_schema()
-
-        all_tables = []
-        for table_equivalencies in args:
-            all_tables += list(table_equivalencies)
-
-        all_cols = self.get_table_columns_as_dict(all_tables, schema)
-
-        for table_equivalencies in args:
-            first_table = table_equivalencies[0]
-            first_relation = self._make_relation(first_table)
-
-            # assert that all tables have the same columns
-            base_result = all_cols[first_table]
-            self.assertTrue(len(base_result) > 0)
-
-            for other_table in table_equivalencies[1:]:
-                other_result = all_cols[other_table]
-                self.assertTrue(len(other_result) > 0)
-                self.assertEqual(base_result, other_result)
-
-                other_relation = self._make_relation(other_table)
-                sql = self._assertTablesEqualSql(first_relation,
-                                                 other_relation,
-                                                 columns=base_result)
-                result = self.run_sql(sql, fetch='one')
-
-                self.assertEqual(
-                    result[0],
-                    0,
-                    'row_count_difference nonzero: ' + sql
-                )
-                self.assertEqual(
-                    result[1],
-                    0,
-                    'num_mismatched nonzero: ' + sql
-                )
-
-
-    def _assertTableRowCountsEqual(self, relation_a, relation_b):
-        cmp_query = """
-            with table_a as (
-
-                select count(*) as num_rows from {}
-
-            ), table_b as (
-
-                select count(*) as num_rows from {}
-
-            )
-
-            select table_a.num_rows - table_b.num_rows as difference
-            from table_a, table_b
-
-        """.format(str(relation_a), str(relation_b))
-
-        res = self.run_sql(cmp_query, fetch='one')
-
-        self.assertEqual(int(res[0]), 0, "Row count of table {} doesn't match row count of table {}. ({} rows different)".format(
-                relation_a.identifier,
-                relation_b.identifier,
-                res[0]
-            )
-        )
-
-    def assertTableDoesNotExist(self, table, schema=None, database=None):
-        columns = self.get_table_columns(table, schema, database)
-
-        self.assertEqual(
-            len(columns),
-            0
-        )
-
-    def assertTableDoesExist(self, table, schema=None, database=None):
-        columns = self.get_table_columns(table, schema, database)
-
-        self.assertGreater(
-            len(columns),
-            0
-        )
-
-    def _assertTableColumnsEqual(self, relation_a, relation_b):
-        table_a_result = self.get_relation_columns(relation_a)
-        table_b_result = self.get_relation_columns(relation_b)
-
-        text_types = {'text', 'character varying', 'character', 'varchar'}
-
-        self.assertEqual(len(table_a_result), len(table_b_result))
-        for a_column, b_column in zip(table_a_result, table_b_result):
-            a_name, a_type, a_size = a_column
-            b_name, b_type, b_size = b_column
-            self.assertEqual(a_name, b_name,
-                '{} vs {}: column "{}" != "{}"'.format(
-                    relation_a, relation_b, a_name, b_name
-                ))
-
-            self.assertEqual(a_type, b_type,
-                '{} vs {}: column "{}" has type "{}" != "{}"'.format(
-                    relation_a, relation_b, a_name, a_type, b_type
-                ))
-
-            self.assertEqual(a_size, b_size,
-                '{} vs {}: column "{}" has size "{}" != "{}"'.format(
-                    relation_a, relation_b, a_name, a_size, b_size
-                ))
-
-    def assertEquals(self, *args, **kwargs):
-        # assertEquals is deprecated. This makes the warnings less chatty
-        self.assertEqual(*args, **kwargs)
-
-    def assertBetween(self, timestr, start, end=None):
-        datefmt = '%Y-%m-%dT%H:%M:%S.%fZ'
-        if end is None:
-            end = datetime.utcnow()
-
-        parsed = datetime.strptime(timestr, datefmt)
-
-        self.assertLessEqual(start, parsed,
-            'parsed date {} happened before {}'.format(
-                parsed,
-                start.strftime(datefmt))
-        )
-        self.assertGreaterEqual(end, parsed,
-            'parsed date {} happened after {}'.format(
-                parsed,
-                end.strftime(datefmt))
-        )
-
-
-def use_profile(profile_name):
-    """A decorator to declare a test method as using a particular profile.
-    Handles both setting the nose attr and calling self.use_profile.
-
-    Use like this:
-
-    class TestSomething(DBIntegrationTest):
-        @use_profile('postgres')
-        def test_postgres_thing(self):
-            self.assertEqual(self.adapter_type, 'postgres')
-
-        @use_profile('snowflake')
-        def test_snowflake_thing(self):
-            self.assertEqual(self.adapter_type, 'snowflake')
-    """
-    def outer(wrapped):
-        @getattr(pytest.mark, 'profile_'+profile_name)
-        @wraps(wrapped)
-        def func(self, *args, **kwargs):
-            return wrapped(self, *args, **kwargs)
-        # sanity check at import time
-        assert _profile_from_test_name(wrapped.__name__) == profile_name
-        return func
-    return outer
-
-
-class AnyFloat:
-    """Any float. Use this in assertEqual() calls to assert that it is a float.
-    """
-    def __eq__(self, other):
-        return isinstance(other, float)
-
-
-class AnyString:
-    """Any string. Use this in assertEqual() calls to assert that it is a string.
-    """
-    def __eq__(self, other):
-        return isinstance(other, str)
-
-
-class AnyStringWith:
-    def __init__(self, contains=None):
-        self.contains = contains
-
-    def __eq__(self, other):
-        if not isinstance(other, str):
-            return False
-
-        if self.contains is None:
-            return True
-
-        return self.contains in other
-
-    def __repr__(self):
-        return 'AnyStringWith<{!r}>'.format(self.contains)
-
-
-def get_manifest():
-    path = './target/partial_parse.msgpack'
-    if os.path.exists(path):
-        with open(path, 'rb') as fp:
-            manifest_mp = fp.read()
-        manifest: Manifest = Manifest.from_msgpack(manifest_mp)
-        return manifest
-    else:
-        return None
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
deleted file mode 100644
index b76bc6c3..00000000
--- a/tests/integration/conftest.py
+++ /dev/null
@@ -1,4 +0,0 @@
-def pytest_configure(config):
-    config.addinivalue_line("markers", "profile_databricks_cluster")
-    config.addinivalue_line("markers", "profile_databricks_sql_endpoint")
-    config.addinivalue_line("markers", "profile_apache_spark")
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index d24bc8a2..3c7fccd3 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -10,144 +10,161 @@
 
 
 class TestSparkAdapter(unittest.TestCase):
-
     def setUp(self):
         flags.STRICT_MODE = False
 
         self.project_cfg = {
-            'name': 'X',
-            'version': '0.1',
-            'profile': 'test',
-            'project-root': '/tmp/dbt/does-not-exist',
-            'quoting': {
-                'identifier': False,
-                'schema': False,
+            "name": "X",
+            "version": "0.1",
+            "profile": "test",
+            "project-root": "/tmp/dbt/does-not-exist",
+            "quoting": {
+                "identifier": False,
+                "schema": False,
             },
-            'config-version': 2
+            "config-version": 2,
         }
 
     def _get_target_http(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'http',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "http",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 443,
+                        "token": "abc123",
+                        "organization": "0123456789",
+                        "cluster": "01234-23423-coffeetime",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_thrift(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'thrift',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 10001,
-                    'user': 'dbt'
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "thrift",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 10001,
+                        "user": "dbt",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_thrift_kerberos(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'thrift',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 10001,
-                    'user': 'dbt',
-                    'auth': 'KERBEROS',
-                    'kerberos_service_name': 'hive'
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "thrift",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 10001,
+                        "user": "dbt",
+                        "auth": "KERBEROS",
+                        "kerberos_service_name": "hive",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_use_ssl_thrift(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'thrift',
-                    'use_ssl': True,
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 10001,
-                    'user': 'dbt'
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "thrift",
+                        "use_ssl": True,
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 10001,
+                        "user": "dbt",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_odbc_cluster(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'odbc',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
-                    'driver': 'Simba',
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "odbc",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 443,
+                        "token": "abc123",
+                        "organization": "0123456789",
+                        "cluster": "01234-23423-coffeetime",
+                        "driver": "Simba",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_odbc_sql_endpoint(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'odbc',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'endpoint': '012342342393920a',
-                    'driver': 'Simba',
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "odbc",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 443,
+                        "token": "abc123",
+                        "endpoint": "012342342393920a",
+                        "driver": "Simba",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def test_http_connection(self):
         config = self._get_target_http(self.project_cfg)
         adapter = SparkAdapter(config)
 
         def hive_http_connect(thrift_transport):
-            self.assertEqual(thrift_transport.scheme, 'https')
+            self.assertEqual(thrift_transport.scheme, "https")
             self.assertEqual(thrift_transport.port, 443)
-            self.assertEqual(thrift_transport.host, 'myorg.sparkhost.com')
+            self.assertEqual(thrift_transport.host, "myorg.sparkhost.com")
             self.assertEqual(
-                thrift_transport.path, '/sql/protocolv1/o/0123456789/01234-23423-coffeetime')
+                thrift_transport.path, "/sql/protocolv1/o/0123456789/01234-23423-coffeetime"
+            )
 
         # with mock.patch.object(hive, 'connect', new=hive_http_connect):
-        with mock.patch('dbt.adapters.spark.connections.hive.connect', new=hive_http_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch("dbt.adapters.spark.connections.hive.connect", new=hive_http_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.cluster,
-                             '01234-23423-coffeetime')
-            self.assertEqual(connection.credentials.token, 'abc123')
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.cluster, "01234-23423-coffeetime")
+            self.assertEqual(connection.credentials.token, "abc123")
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_connection(self):
@@ -155,20 +172,20 @@ def test_thrift_connection(self):
         adapter = SparkAdapter(config)
 
         def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
-            self.assertEqual(host, 'myorg.sparkhost.com')
+            self.assertEqual(host, "myorg.sparkhost.com")
             self.assertEqual(port, 10001)
-            self.assertEqual(username, 'dbt')
+            self.assertEqual(username, "dbt")
             self.assertIsNone(auth)
             self.assertIsNone(kerberos_service_name)
             self.assertIsNone(password)
 
-        with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch.object(hive, "connect", new=hive_thrift_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_ssl_connection(self):
@@ -178,16 +195,16 @@ def test_thrift_ssl_connection(self):
         def hive_thrift_connect(thrift_transport):
             self.assertIsNotNone(thrift_transport)
             transport = thrift_transport._trans
-            self.assertEqual(transport.host, 'myorg.sparkhost.com')
+            self.assertEqual(transport.host, "myorg.sparkhost.com")
             self.assertEqual(transport.port, 10001)
 
-        with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch.object(hive, "connect", new=hive_thrift_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_connection_kerberos(self):
@@ -195,20 +212,20 @@ def test_thrift_connection_kerberos(self):
         adapter = SparkAdapter(config)
 
         def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
-            self.assertEqual(host, 'myorg.sparkhost.com')
+            self.assertEqual(host, "myorg.sparkhost.com")
             self.assertEqual(port, 10001)
-            self.assertEqual(username, 'dbt')
-            self.assertEqual(auth, 'KERBEROS')
-            self.assertEqual(kerberos_service_name, 'hive')
+            self.assertEqual(username, "dbt")
+            self.assertEqual(auth, "KERBEROS")
+            self.assertEqual(kerberos_service_name, "hive")
             self.assertIsNone(password)
 
-        with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch.object(hive, "connect", new=hive_thrift_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_odbc_cluster_connection(self):
@@ -217,23 +234,25 @@ def test_odbc_cluster_connection(self):
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
-            self.assertIn('driver=simba;', connection_str.lower())
-            self.assertIn('port=443;', connection_str.lower())
-            self.assertIn('host=myorg.sparkhost.com;',
-                          connection_str.lower())
+            self.assertIn("driver=simba;", connection_str.lower())
+            self.assertIn("port=443;", connection_str.lower())
+            self.assertIn("host=myorg.sparkhost.com;", connection_str.lower())
             self.assertIn(
-                'httppath=/sql/protocolv1/o/0123456789/01234-23423-coffeetime;', connection_str.lower())  # noqa
-
-        with mock.patch('dbt.adapters.spark.connections.pyodbc.connect', new=pyodbc_connect):  # noqa
-            connection = adapter.acquire_connection('dummy')
+                "httppath=/sql/protocolv1/o/0123456789/01234-23423-coffeetime;",
+                connection_str.lower(),
+            )  # noqa
+
+        with mock.patch(
+            "dbt.adapters.spark.connections.pyodbc.connect", new=pyodbc_connect
+        ):  # noqa
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.cluster,
-                             '01234-23423-coffeetime')
-            self.assertEqual(connection.credentials.token, 'abc123')
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.cluster, "01234-23423-coffeetime")
+            self.assertEqual(connection.credentials.token, "abc123")
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_odbc_endpoint_connection(self):
@@ -242,23 +261,24 @@ def test_odbc_endpoint_connection(self):
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
-            self.assertIn('driver=simba;', connection_str.lower())
-            self.assertIn('port=443;', connection_str.lower())
-            self.assertIn('host=myorg.sparkhost.com;',
-                          connection_str.lower())
+            self.assertIn("driver=simba;", connection_str.lower())
+            self.assertIn("port=443;", connection_str.lower())
+            self.assertIn("host=myorg.sparkhost.com;", connection_str.lower())
             self.assertIn(
-                'httppath=/sql/1.0/endpoints/012342342393920a;', connection_str.lower())  # noqa
+                "httppath=/sql/1.0/endpoints/012342342393920a;", connection_str.lower()
+            )  # noqa
 
-        with mock.patch('dbt.adapters.spark.connections.pyodbc.connect', new=pyodbc_connect):  # noqa
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch(
+            "dbt.adapters.spark.connections.pyodbc.connect", new=pyodbc_connect
+        ):  # noqa
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.endpoint,
-                             '012342342393920a')
-            self.assertEqual(connection.credentials.token, 'abc123')
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.endpoint, "012342342393920a")
+            self.assertEqual(connection.credentials.token, "abc123")
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_parse_relation(self):
@@ -266,235 +286,238 @@ def test_parse_relation(self):
         rel_type = SparkRelation.get_relation_type.Table
 
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type
+            schema="default_schema", identifier="mytable", type=rel_type
         )
         assert relation.database is None
 
         # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
         plain_rows = [
-            ('col1', 'decimal(22,0)'),
-            ('col2', 'string',),
-            ('dt', 'date'),
-            ('struct_col', 'struct<struct_inner_col:string>'),
-            ('# Partition Information', 'data_type'),
-            ('# col_name', 'data_type'),
-            ('dt', 'date'),
+            ("col1", "decimal(22,0)"),
+            (
+                "col2",
+                "string",
+            ),
+            ("dt", "date"),
+            ("struct_col", "struct<struct_inner_col:string>"),
+            ("# Partition Information", "data_type"),
+            ("# col_name", "data_type"),
+            ("dt", "date"),
             (None, None),
-            ('# Detailed Table Information', None),
-            ('Database', None),
-            ('Owner', 'root'),
-            ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'),
-            ('Last Access', 'Wed May 20 19:25:00 UTC 1925'),
-            ('Type', 'MANAGED'),
-            ('Provider', 'delta'),
-            ('Location', '/mnt/vo'),
-            ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'),
-            ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'),
-            ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'),
-            ('Partition Provider', 'Catalog')
+            ("# Detailed Table Information", None),
+            ("Database", None),
+            ("Owner", "root"),
+            ("Created Time", "Wed Feb 04 18:15:00 UTC 1815"),
+            ("Last Access", "Wed May 20 19:25:00 UTC 1925"),
+            ("Type", "MANAGED"),
+            ("Provider", "delta"),
+            ("Location", "/mnt/vo"),
+            ("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
+            ("InputFormat", "org.apache.hadoop.mapred.SequenceFileInputFormat"),
+            ("OutputFormat", "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
+            ("Partition Provider", "Catalog"),
         ]
 
-        input_cols = [Row(keys=['col_name', 'data_type'], values=r)
-                      for r in plain_rows]
+        input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(
-            relation, input_cols)
+        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
         self.assertEqual(len(rows), 4)
-        self.assertEqual(rows[0].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col1',
-            'column_index': 0,
-            'dtype': 'decimal(22,0)',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
-
-        self.assertEqual(rows[1].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col2',
-            'column_index': 1,
-            'dtype': 'string',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
-
-        self.assertEqual(rows[2].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'dt',
-            'column_index': 2,
-            'dtype': 'date',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
-
-        self.assertEqual(rows[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct<struct_inner_col:string>',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            rows[0].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col1",
+                "column_index": 0,
+                "dtype": "decimal(22,0)",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
+
+        self.assertEqual(
+            rows[1].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col2",
+                "column_index": 1,
+                "dtype": "string",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
+
+        self.assertEqual(
+            rows[2].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "dt",
+                "column_index": 2,
+                "dtype": "date",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
+
+        self.assertEqual(
+            rows[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct<struct_inner_col:string>",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
     def test_parse_relation_with_integer_owner(self):
         self.maxDiff = None
         rel_type = SparkRelation.get_relation_type.Table
 
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type
+            schema="default_schema", identifier="mytable", type=rel_type
         )
         assert relation.database is None
 
         # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
         plain_rows = [
-            ('col1', 'decimal(22,0)'),
-            ('# Detailed Table Information', None),
-            ('Owner', 1234)
+            ("col1", "decimal(22,0)"),
+            ("# Detailed Table Information", None),
+            ("Owner", 1234),
         ]
 
-        input_cols = [Row(keys=['col_name', 'data_type'], values=r)
-                      for r in plain_rows]
+        input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(
-            relation, input_cols)
+        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
 
-        self.assertEqual(rows[0].to_column_dict().get('table_owner'), '1234')
+        self.assertEqual(rows[0].to_column_dict().get("table_owner"), "1234")
 
     def test_parse_relation_with_statistics(self):
         self.maxDiff = None
         rel_type = SparkRelation.get_relation_type.Table
 
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type
+            schema="default_schema", identifier="mytable", type=rel_type
         )
         assert relation.database is None
 
         # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
         plain_rows = [
-            ('col1', 'decimal(22,0)'),
-            ('# Partition Information', 'data_type'),
+            ("col1", "decimal(22,0)"),
+            ("# Partition Information", "data_type"),
             (None, None),
-            ('# Detailed Table Information', None),
-            ('Database', None),
-            ('Owner', 'root'),
-            ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'),
-            ('Last Access', 'Wed May 20 19:25:00 UTC 1925'),
-            ('Statistics', '1109049927 bytes, 14093476 rows'),
-            ('Type', 'MANAGED'),
-            ('Provider', 'delta'),
-            ('Location', '/mnt/vo'),
-            ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'),
-            ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'),
-            ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'),
-            ('Partition Provider', 'Catalog')
+            ("# Detailed Table Information", None),
+            ("Database", None),
+            ("Owner", "root"),
+            ("Created Time", "Wed Feb 04 18:15:00 UTC 1815"),
+            ("Last Access", "Wed May 20 19:25:00 UTC 1925"),
+            ("Statistics", "1109049927 bytes, 14093476 rows"),
+            ("Type", "MANAGED"),
+            ("Provider", "delta"),
+            ("Location", "/mnt/vo"),
+            ("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
+            ("InputFormat", "org.apache.hadoop.mapred.SequenceFileInputFormat"),
+            ("OutputFormat", "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
+            ("Partition Provider", "Catalog"),
         ]
 
-        input_cols = [Row(keys=['col_name', 'data_type'], values=r)
-                      for r in plain_rows]
+        input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(
-            relation, input_cols)
+        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
         self.assertEqual(len(rows), 1)
-        self.assertEqual(rows[0].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col1',
-            'column_index': 0,
-            'dtype': 'decimal(22,0)',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 1109049927,
-
-            'stats:rows:description': '',
-            'stats:rows:include': True,
-            'stats:rows:label': 'rows',
-            'stats:rows:value': 14093476,
-        })
+        self.assertEqual(
+            rows[0].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col1",
+                "column_index": 0,
+                "dtype": "decimal(22,0)",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 1109049927,
+                "stats:rows:description": "",
+                "stats:rows:include": True,
+                "stats:rows:label": "rows",
+                "stats:rows:value": 14093476,
+            },
+        )
 
     def test_relation_with_database(self):
         config = self._get_target_http(self.project_cfg)
         adapter = SparkAdapter(config)
         # fine
-        adapter.Relation.create(schema='different', identifier='table')
+        adapter.Relation.create(schema="different", identifier="table")
         with self.assertRaises(DbtRuntimeError):
             # not fine - database set
-            adapter.Relation.create(
-                database='something', schema='different', identifier='table')
+            adapter.Relation.create(database="something", schema="different", identifier="table")
 
     def test_profile_with_database(self):
         profile = {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'http',
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "http",
                     # not allowed
-                    'database': 'analytics2',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
+                    "database": "analytics2",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "organization": "0123456789",
+                    "cluster": "01234-23423-coffeetime",
                 }
             },
-            'target': 'test'
+            "target": "test",
         }
         with self.assertRaises(DbtRuntimeError):
             config_from_parts_or_dicts(self.project_cfg, profile)
 
     def test_profile_with_cluster_and_sql_endpoint(self):
         profile = {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'odbc',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
-                    'endpoint': '0123412341234e',
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "odbc",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "organization": "0123456789",
+                    "cluster": "01234-23423-coffeetime",
+                    "endpoint": "0123412341234e",
                 }
             },
-            'target': 'test'
+            "target": "test",
         }
         with self.assertRaises(DbtRuntimeError):
             config_from_parts_or_dicts(self.project_cfg, profile)
@@ -528,53 +551,53 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self)
             " |    |-- struct_inner_col: string (nullable = true)\n"
         )
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type,
-            information=information
+            schema="default_schema", identifier="mytable", type=rel_type, information=information
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(
-            relation)
+        columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
-        self.assertEqual(columns[0].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col1',
-            'column_index': 0,
-            'dtype': 'decimal(22,0)',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 123456789,
-        })
-
-        self.assertEqual(columns[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 123456789,
-        })
+        self.assertEqual(
+            columns[0].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col1",
+                "column_index": 0,
+                "dtype": "decimal(22,0)",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 123456789,
+            },
+        )
+
+        self.assertEqual(
+            columns[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 123456789,
+            },
+        )
 
     def test_parse_columns_from_information_with_view_type(self):
         self.maxDiff = None
@@ -613,43 +636,45 @@ def test_parse_columns_from_information_with_view_type(self):
             " |    |-- struct_inner_col: string (nullable = true)\n"
         )
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='myview',
-            type=rel_type,
-            information=information
+            schema="default_schema", identifier="myview", type=rel_type, information=information
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(
-            relation)
+        columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
-        self.assertEqual(columns[1].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col2',
-            'column_index': 1,
-            'dtype': 'string',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
-
-        self.assertEqual(columns[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            columns[1].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col2",
+                "column_index": 1,
+                "dtype": "string",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
+
+        self.assertEqual(
+            columns[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
     def test_parse_columns_from_information_with_table_type_and_parquet_provider(self):
         self.maxDiff = None
@@ -677,61 +702,58 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
             " |    |-- struct_inner_col: string (nullable = true)\n"
         )
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type,
-            information=information
+            schema="default_schema", identifier="mytable", type=rel_type, information=information
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(
-            relation)
+        columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
-        self.assertEqual(columns[2].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'dt',
-            'column_index': 2,
-            'dtype': 'date',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 1234567890,
-
-            'stats:rows:description': '',
-            'stats:rows:include': True,
-            'stats:rows:label': 'rows',
-            'stats:rows:value': 12345678
-        })
-
-        self.assertEqual(columns[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 1234567890,
-
-            'stats:rows:description': '',
-            'stats:rows:include': True,
-            'stats:rows:label': 'rows',
-            'stats:rows:value': 12345678
-        })
+        self.assertEqual(
+            columns[2].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "dt",
+                "column_index": 2,
+                "dtype": "date",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 1234567890,
+                "stats:rows:description": "",
+                "stats:rows:include": True,
+                "stats:rows:label": "rows",
+                "stats:rows:value": 12345678,
+            },
+        )
 
+        self.assertEqual(
+            columns[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 1234567890,
+                "stats:rows:description": "",
+                "stats:rows:include": True,
+                "stats:rows:label": "rows",
+                "stats:rows:value": 12345678,
+            },
+        )
diff --git a/tests/unit/test_column.py b/tests/unit/test_column.py
index f7f8d877..0132ad88 100644
--- a/tests/unit/test_column.py
+++ b/tests/unit/test_column.py
@@ -4,35 +4,31 @@
 
 
 class TestSparkColumn(unittest.TestCase):
-
     def test_convert_table_stats_with_no_statistics(self):
-        self.assertDictEqual(
-            SparkColumn.convert_table_stats(None),
-            {}
-        )
+        self.assertDictEqual(SparkColumn.convert_table_stats(None), {})
 
     def test_convert_table_stats_with_bytes(self):
         self.assertDictEqual(
             SparkColumn.convert_table_stats("123456789 bytes"),
             {
-                'stats:bytes:description': '',
-                'stats:bytes:include': True,
-                'stats:bytes:label': 'bytes',
-                'stats:bytes:value': 123456789
-            }
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 123456789,
+            },
         )
 
     def test_convert_table_stats_with_bytes_and_rows(self):
         self.assertDictEqual(
             SparkColumn.convert_table_stats("1234567890 bytes, 12345678 rows"),
             {
-                'stats:bytes:description': '',
-                'stats:bytes:include': True,
-                'stats:bytes:label': 'bytes',
-                'stats:bytes:value': 1234567890,
-                'stats:rows:description': '',
-                'stats:rows:include': True,
-                'stats:rows:label': 'rows',
-                'stats:rows:value': 12345678
-            }
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 1234567890,
+                "stats:rows:description": "",
+                "stats:rows:include": True,
+                "stats:rows:label": "rows",
+                "stats:rows:value": 12345678,
+            },
         )
diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py
index 220a74db..5b648a07 100644
--- a/tests/unit/test_macros.py
+++ b/tests/unit/test_macros.py
@@ -5,153 +5,206 @@
 
 
 class TestSparkMacros(unittest.TestCase):
-
     def setUp(self):
-        self.jinja_env = Environment(loader=FileSystemLoader('dbt/include/spark/macros'),
-                                     extensions=['jinja2.ext.do', ])
+        self.jinja_env = Environment(
+            loader=FileSystemLoader("dbt/include/spark/macros"),
+            extensions=[
+                "jinja2.ext.do",
+            ],
+        )
 
         self.config = {}
         self.default_context = {
-            'validation': mock.Mock(),
-            'model': mock.Mock(),
-            'exceptions': mock.Mock(),
-            'config': mock.Mock(),
-            'adapter': mock.Mock(),
-            'return': lambda r: r,
+            "validation": mock.Mock(),
+            "model": mock.Mock(),
+            "exceptions": mock.Mock(),
+            "config": mock.Mock(),
+            "adapter": mock.Mock(),
+            "return": lambda r: r,
         }
-        self.default_context['config'].get = lambda key, default=None, **kwargs: self.config.get(key, default)
+        self.default_context["config"].get = lambda key, default=None, **kwargs: self.config.get(
+            key, default
+        )
 
     def __get_template(self, template_filename):
         return self.jinja_env.get_template(template_filename, globals=self.default_context)
 
     def __run_macro(self, template, name, temporary, relation, sql):
-        self.default_context['model'].alias = relation
+        self.default_context["model"].alias = relation
 
         def dispatch(macro_name, macro_namespace=None, packages=None):
-            return getattr(template.module, f'spark__{macro_name}')
-        self.default_context['adapter'].dispatch = dispatch
+            return getattr(template.module, f"spark__{macro_name}")
+
+        self.default_context["adapter"].dispatch = dispatch
 
         value = getattr(template.module, name)(temporary, relation, sql)
-        return re.sub(r'\s\s+', ' ', value)
+        return re.sub(r"\s\s+", " ", value)
 
     def test_macros_load(self):
-        self.jinja_env.get_template('adapters.sql')
+        self.jinja_env.get_template("adapters.sql")
 
     def test_macros_create_table_as(self):
-        template = self.__get_template('adapters.sql')
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        template = self.__get_template("adapters.sql")
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
 
         self.assertEqual(sql, "create table my_table as select 1")
 
     def test_macros_create_table_as_file_format(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['file_format'] = 'delta'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["file_format"] = "delta"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create or replace table my_table using delta as select 1")
 
-        self.config['file_format'] = 'hudi'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["file_format"] = "hudi"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table using hudi as select 1")
 
     def test_macros_create_table_as_options(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['file_format'] = 'delta'
-        self.config['options'] = {"compression": "gzip"}
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, 'create or replace table my_table using delta options (compression "gzip" ) as select 1')
+        self.config["file_format"] = "delta"
+        self.config["options"] = {"compression": "gzip"}
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql,
+            'create or replace table my_table using delta options (compression "gzip" ) as select 1',
+        )
 
-        self.config['file_format'] = 'hudi'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, 'create table my_table using hudi options (compression "gzip" ) as select 1')
+        self.config["file_format"] = "hudi"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql, 'create table my_table using hudi options (compression "gzip" ) as select 1'
+        )
 
     def test_macros_create_table_as_hudi_options(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['file_format'] = 'hudi'
-        self.config['unique_key'] = 'id'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id').strip()
-        self.assertEqual(sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id')
+        self.config["file_format"] = "hudi"
+        self.config["unique_key"] = "id"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1 as id"
+        ).strip()
+        self.assertEqual(
+            sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id'
+        )
 
-        self.config['file_format'] = 'hudi'
-        self.config['unique_key'] = 'id'
-        self.config['options'] = {'primaryKey': 'id'}
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id').strip()
-        self.assertEqual(sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id')
+        self.config["file_format"] = "hudi"
+        self.config["unique_key"] = "id"
+        self.config["options"] = {"primaryKey": "id"}
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1 as id"
+        ).strip()
+        self.assertEqual(
+            sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id'
+        )
 
-        self.config['file_format'] = 'hudi'
-        self.config['unique_key'] = 'uuid'
-        self.config['options'] = {'primaryKey': 'id'}
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id')
-        self.assertIn('mock.raise_compiler_error()', sql)
+        self.config["file_format"] = "hudi"
+        self.config["unique_key"] = "uuid"
+        self.config["options"] = {"primaryKey": "id"}
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1 as id"
+        )
+        self.assertIn("mock.raise_compiler_error()", sql)
 
     def test_macros_create_table_as_partition(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['partition_by'] = 'partition_1'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["partition_by"] = "partition_1"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table partitioned by (partition_1) as select 1")
 
     def test_macros_create_table_as_partitions(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['partition_by'] = ['partition_1', 'partition_2']
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql,
-                         "create table my_table partitioned by (partition_1,partition_2) as select 1")
+        self.config["partition_by"] = ["partition_1", "partition_2"]
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql, "create table my_table partitioned by (partition_1,partition_2) as select 1"
+        )
 
     def test_macros_create_table_as_cluster(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['clustered_by'] = 'cluster_1'
-        self.config['buckets'] = '1'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, "create table my_table clustered by (cluster_1) into 1 buckets as select 1")
+        self.config["clustered_by"] = "cluster_1"
+        self.config["buckets"] = "1"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql, "create table my_table clustered by (cluster_1) into 1 buckets as select 1"
+        )
 
     def test_macros_create_table_as_clusters(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['clustered_by'] = ['cluster_1', 'cluster_2']
-        self.config['buckets'] = '1'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1")
+        self.config["clustered_by"] = ["cluster_1", "cluster_2"]
+        self.config["buckets"] = "1"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql,
+            "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1",
+        )
 
     def test_macros_create_table_as_location(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['location_root'] = '/mnt/root'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["location_root"] = "/mnt/root"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table location '/mnt/root/my_table' as select 1")
 
     def test_macros_create_table_as_comment(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['persist_docs'] = {'relation': True}
-        self.default_context['model'].description = 'Description Test'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["persist_docs"] = {"relation": True}
+        self.default_context["model"].description = "Description Test"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table comment 'Description Test' as select 1")
 
     def test_macros_create_table_as_all(self):
-        template = self.__get_template('adapters.sql')
-
-        self.config['file_format'] = 'delta'
-        self.config['location_root'] = '/mnt/root'
-        self.config['partition_by'] = ['partition_1', 'partition_2']
-        self.config['clustered_by'] = ['cluster_1', 'cluster_2']
-        self.config['buckets'] = '1'
-        self.config['persist_docs'] = {'relation': True}
-        self.default_context['model'].description = 'Description Test'
-
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        template = self.__get_template("adapters.sql")
+
+        self.config["file_format"] = "delta"
+        self.config["location_root"] = "/mnt/root"
+        self.config["partition_by"] = ["partition_1", "partition_2"]
+        self.config["clustered_by"] = ["cluster_1", "cluster_2"]
+        self.config["buckets"] = "1"
+        self.config["persist_docs"] = {"relation": True}
+        self.default_context["model"].description = "Description Test"
+
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(
             sql,
-            "create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
+            "create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1",
         )
 
-        self.config['file_format'] = 'hudi'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["file_format"] = "hudi"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(
             sql,
-            "create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
+            "create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1",
         )
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 585f1f61..ac8c6224 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -26,22 +26,22 @@ def normalize(path):
 
 
 class Obj:
-    which = 'blah'
+    which = "blah"
     single_threaded = False
 
 
-def mock_connection(name, state='open'):
+def mock_connection(name, state="open"):
     conn = mock.MagicMock()
     conn.name = name
     conn.state = state
     return conn
 
 
-def profile_from_dict(profile, profile_name, cli_vars='{}'):
+def profile_from_dict(profile, profile_name, cli_vars="{}"):
     from dbt.config import Profile
     from dbt.config.renderer import ProfileRenderer
-    from dbt.context.base import generate_base_context
     from dbt.config.utils import parse_cli_vars
+
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
@@ -51,6 +51,7 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
     # flags global. This is a bit of a hack, but it's the best way to do it.
     from dbt.flags import set_from_args
     from argparse import Namespace
+
     set_from_args(Namespace(), None)
     return Profile.from_raw_profile_info(
         profile,
@@ -59,16 +60,16 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
     )
 
 
-def project_from_dict(project, profile, packages=None, selectors=None, cli_vars='{}'):
-    from dbt.config import Project
+def project_from_dict(project, profile, packages=None, selectors=None, cli_vars="{}"):
     from dbt.config.renderer import DbtProjectYamlRenderer
     from dbt.config.utils import parse_cli_vars
+
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
     renderer = DbtProjectYamlRenderer(profile, cli_vars)
 
-    project_root = project.pop('project-root', os.getcwd())
+    project_root = project.pop("project-root", os.getcwd())
 
     partial = PartialProject.from_dicts(
         project_root=project_root,
@@ -79,19 +80,18 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars=
     return partial.render(renderer)
 
 
-
-def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars='{}'):
+def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars="{}"):
     from dbt.config import Project, Profile, RuntimeConfig
     from dbt.config.utils import parse_cli_vars
     from copy import deepcopy
+
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
-
     if isinstance(project, Project):
         profile_name = project.profile_name
     else:
-        profile_name = project.get('profile')
+        profile_name = project.get("profile")
 
     if not isinstance(profile, Profile):
         profile = profile_from_dict(
@@ -111,16 +111,13 @@ def config_from_parts_or_dicts(project, profile, packages=None, selectors=None,
 
     args = Obj()
     args.vars = cli_vars
-    args.profile_dir = '/dev/null'
-    return RuntimeConfig.from_parts(
-        project=project,
-        profile=profile,
-        args=args
-    )
+    args.profile_dir = "/dev/null"
+    return RuntimeConfig.from_parts(project=project, profile=profile, args=args)
 
 
 def inject_plugin(plugin):
     from dbt.adapters.factory import FACTORY
+
     key = plugin.adapter.type()
     FACTORY.plugins[key] = plugin
 
@@ -128,6 +125,7 @@ def inject_plugin(plugin):
 def inject_plugin_for(config):
     # from dbt.adapters.postgres import Plugin, PostgresAdapter
     from dbt.adapters.factory import FACTORY
+
     FACTORY.load_plugin(config.credentials.type)
     adapter = FACTORY.get_adapter(config)
     return adapter
@@ -139,12 +137,14 @@ def inject_adapter(value, plugin):
     """
     inject_plugin(plugin)
     from dbt.adapters.factory import FACTORY
+
     key = value.type()
     FACTORY.adapters[key] = value
 
 
 def clear_plugin(plugin):
     from dbt.adapters.factory import FACTORY
+
     key = plugin.adapter.type()
     FACTORY.plugins.pop(key, None)
     FACTORY.adapters.pop(key, None)
@@ -164,7 +164,7 @@ def assert_from_dict(self, obj, dct, cls=None):
         if cls is None:
             cls = self.ContractType
         cls.validate(dct)
-        self.assertEqual(cls.from_dict(dct),  obj)
+        self.assertEqual(cls.from_dict(dct), obj)
 
     def assert_symmetric(self, obj, dct, cls=None):
         self.assert_to_dict(obj, dct)
@@ -187,7 +187,7 @@ def compare_dicts(dict1, dict2):
     common_keys = set(first_set).intersection(set(second_set))
     found_differences = False
     for key in common_keys:
-        if dict1[key] != dict2[key] :
+        if dict1[key] != dict2[key]:
             print(f"--- --- first dict: {key}: {str(dict1[key])}")
             print(f"--- --- second dict: {key}: {str(dict2[key])}")
             found_differences = True
@@ -202,7 +202,7 @@ def assert_from_dict(obj, dct, cls=None):
         cls = obj.__class__
     cls.validate(dct)
     obj_from_dict = cls.from_dict(dct)
-    if hasattr(obj, 'created_at'):
+    if hasattr(obj, "created_at"):
         obj_from_dict.created_at = 1
         obj.created_at = 1
     assert obj_from_dict == obj
@@ -210,10 +210,10 @@ def assert_from_dict(obj, dct, cls=None):
 
 def assert_to_dict(obj, dct):
     obj_to_dict = obj.to_dict(omit_none=True)
-    if 'created_at' in obj_to_dict:
-        obj_to_dict['created_at'] = 1
-    if 'created_at' in dct:
-        dct['created_at'] = 1
+    if "created_at" in obj_to_dict:
+        obj_to_dict["created_at"] = 1
+    if "created_at" in dct:
+        dct["created_at"] = 1
     assert obj_to_dict == dct
 
 
@@ -231,25 +231,26 @@ def assert_fails_validation(dct, cls):
 def generate_name_macros(package):
     from dbt.contracts.graph.parsed import ParsedMacro
     from dbt.node_types import NodeType
+
     name_sql = {}
-    for component in ('database', 'schema', 'alias'):
-        if component == 'alias':
-            source = 'node.name'
+    for component in ("database", "schema", "alias"):
+        if component == "alias":
+            source = "node.name"
         else:
-            source = f'target.{component}'
-        name = f'generate_{component}_name'
-        sql = f'{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}'
+            source = f"target.{component}"
+        name = f"generate_{component}_name"
+        sql = f"{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}"
         name_sql[name] = sql
 
     for name, sql in name_sql.items():
         pm = ParsedMacro(
             name=name,
             resource_type=NodeType.Macro,
-            unique_id=f'macro.{package}.{name}',
+            unique_id=f"macro.{package}.{name}",
             package_name=package,
-            original_file_path=normalize('macros/macro.sql'),
-            root_path='./dbt_packages/root',
-            path=normalize('macros/macro.sql'),
+            original_file_path=normalize("macros/macro.sql"),
+            root_path="./dbt_packages/root",
+            path=normalize("macros/macro.sql"),
             macro_sql=sql,
         )
         yield pm
@@ -258,6 +259,7 @@ def generate_name_macros(package):
 class TestAdapterConversions(TestCase):
     def _get_tester_for(self, column_type):
         from dbt.clients import agate_helper
+
         if column_type is agate.TimeDelta:  # dbt never makes this!
             return agate.TimeDelta()
 
@@ -265,10 +267,10 @@ def _get_tester_for(self, column_type):
             if type(instance) is column_type:
                 return instance
 
-        raise ValueError(f'no tester for {column_type}')
+        raise ValueError(f"no tester for {column_type}")
 
     def _make_table_of(self, rows, column_types):
-        column_names = list(string.ascii_letters[:len(rows[0])])
+        column_names = list(string.ascii_letters[: len(rows[0])])
         if isinstance(column_types, type):
             column_types = [self._get_tester_for(column_types) for _ in column_names]
         else:
@@ -277,50 +279,48 @@ def _make_table_of(self, rows, column_types):
         return table
 
 
-def MockMacro(package, name='my_macro', **kwargs):
+def MockMacro(package, name="my_macro", **kwargs):
     from dbt.contracts.graph.parsed import ParsedMacro
     from dbt.node_types import NodeType
 
     mock_kwargs = dict(
         resource_type=NodeType.Macro,
         package_name=package,
-        unique_id=f'macro.{package}.{name}',
-        original_file_path='/dev/null',
+        unique_id=f"macro.{package}.{name}",
+        original_file_path="/dev/null",
     )
 
     mock_kwargs.update(kwargs)
 
-    macro = mock.MagicMock(
-        spec=ParsedMacro,
-        **mock_kwargs
-    )
+    macro = mock.MagicMock(spec=ParsedMacro, **mock_kwargs)
     macro.name = name
     return macro
 
 
-def MockMaterialization(package, name='my_materialization', adapter_type=None, **kwargs):
+def MockMaterialization(package, name="my_materialization", adapter_type=None, **kwargs):
     if adapter_type is None:
-        adapter_type = 'default'
-    kwargs['adapter_type'] = adapter_type
-    return MockMacro(package, f'materialization_{name}_{adapter_type}', **kwargs)
+        adapter_type = "default"
+    kwargs["adapter_type"] = adapter_type
+    return MockMacro(package, f"materialization_{name}_{adapter_type}", **kwargs)
 
 
-def MockGenerateMacro(package, component='some_component', **kwargs):
-    name = f'generate_{component}_name'
+def MockGenerateMacro(package, component="some_component", **kwargs):
+    name = f"generate_{component}_name"
     return MockMacro(package, name=name, **kwargs)
 
 
 def MockSource(package, source_name, name, **kwargs):
     from dbt.node_types import NodeType
     from dbt.contracts.graph.parsed import ParsedSourceDefinition
+
     src = mock.MagicMock(
         __class__=ParsedSourceDefinition,
         resource_type=NodeType.Source,
         source_name=source_name,
         package_name=package,
-        unique_id=f'source.{package}.{source_name}.{name}',
-        search_name=f'{source_name}.{name}',
-        **kwargs
+        unique_id=f"source.{package}.{source_name}.{name}",
+        search_name=f"{source_name}.{name}",
+        **kwargs,
     )
     src.name = name
     return src
@@ -329,6 +329,7 @@ def MockSource(package, source_name, name, **kwargs):
 def MockNode(package, name, resource_type=None, **kwargs):
     from dbt.node_types import NodeType
     from dbt.contracts.graph.parsed import ParsedModelNode, ParsedSeedNode
+
     if resource_type is None:
         resource_type = NodeType.Model
     if resource_type == NodeType.Model:
@@ -336,14 +337,14 @@ def MockNode(package, name, resource_type=None, **kwargs):
     elif resource_type == NodeType.Seed:
         cls = ParsedSeedNode
     else:
-        raise ValueError(f'I do not know how to handle {resource_type}')
+        raise ValueError(f"I do not know how to handle {resource_type}")
     node = mock.MagicMock(
         __class__=cls,
         resource_type=resource_type,
         package_name=package,
-        unique_id=f'{str(resource_type)}.{package}.{name}',
+        unique_id=f"{str(resource_type)}.{package}.{name}",
         search_name=name,
-        **kwargs
+        **kwargs,
     )
     node.name = name
     return node
@@ -352,22 +353,23 @@ def MockNode(package, name, resource_type=None, **kwargs):
 def MockDocumentation(package, name, **kwargs):
     from dbt.node_types import NodeType
     from dbt.contracts.graph.parsed import ParsedDocumentation
+
     doc = mock.MagicMock(
         __class__=ParsedDocumentation,
         resource_type=NodeType.Documentation,
         package_name=package,
         search_name=name,
-        unique_id=f'{package}.{name}',
-        **kwargs
+        unique_id=f"{package}.{name}",
+        **kwargs,
     )
     doc.name = name
     return doc
 
 
-def load_internal_manifest_macros(config, macro_hook = lambda m: None):
+def load_internal_manifest_macros(config, macro_hook=lambda m: None):
     from dbt.parser.manifest import ManifestLoader
-    return ManifestLoader.load_macros(config, macro_hook)
 
+    return ManifestLoader.load_macros(config, macro_hook)
 
 
 def dict_replace(dct, **kwargs):

From 53c586a4b803fc731d6cde3651edc4c283cc8c25 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 16 Mar 2023 17:33:05 -0700
Subject: [PATCH 415/603] [create-pull-request] automated change (#686)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                                 |  2 +-
 .changes/1.5.0-b3.md                             | 13 +++++++++++++
 .../Features-20230223-180923.yaml                |  0
 .../Fixes-20221124-125501.yaml                   |  0
 .../Fixes-20230303-200542.yaml                   |  0
 CHANGELOG.md                                     | 16 +++++++++++++++-
 dbt/adapters/spark/__version__.py                |  2 +-
 setup.py                                         |  2 +-
 8 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.5.0-b3.md
 rename .changes/{unreleased => 1.5.0}/Features-20230223-180923.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Fixes-20221124-125501.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Fixes-20230303-200542.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 1b1f4e67..adead0c6 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.0b2
+current_version = 1.5.0b3
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.5.0-b3.md b/.changes/1.5.0-b3.md
new file mode 100644
index 00000000..5d721823
--- /dev/null
+++ b/.changes/1.5.0-b3.md
@@ -0,0 +1,13 @@
+## dbt-spark 1.5.0-b3 - March 16, 2023
+
+### Features
+
+- Enforce contracts on models materialized as tables and views ([#639](https://github.com/dbt-labs/dbt-spark/issues/639), [#654](https://github.com/dbt-labs/dbt-spark/issues/654))
+
+### Fixes
+
+- Use take() instead of collect on dataframe to improve the performance ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
+- Fix pyodbc type_code -> data_type conversion  ([#665](https://github.com/dbt-labs/dbt-spark/issues/665))
+
+### Contributors
+- [@akashrn5](https://github.com/akashrn5) ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
diff --git a/.changes/unreleased/Features-20230223-180923.yaml b/.changes/1.5.0/Features-20230223-180923.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230223-180923.yaml
rename to .changes/1.5.0/Features-20230223-180923.yaml
diff --git a/.changes/unreleased/Fixes-20221124-125501.yaml b/.changes/1.5.0/Fixes-20221124-125501.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20221124-125501.yaml
rename to .changes/1.5.0/Fixes-20221124-125501.yaml
diff --git a/.changes/unreleased/Fixes-20230303-200542.yaml b/.changes/1.5.0/Fixes-20230303-200542.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230303-200542.yaml
rename to .changes/1.5.0/Fixes-20230303-200542.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed67043f..fc7262eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,21 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.5.0-b3 - March 16, 2023
+
+### Features
+
+- Enforce contracts on models materialized as tables and views ([#639](https://github.com/dbt-labs/dbt-spark/issues/639), [#654](https://github.com/dbt-labs/dbt-spark/issues/654))
+
+### Fixes
+
+- Use take() instead of collect on dataframe to improve the performance ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
+- Fix pyodbc type_code -> data_type conversion  ([#665](https://github.com/dbt-labs/dbt-spark/issues/665))
+
+### Contributors
+- [@akashrn5](https://github.com/akashrn5) ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
+
+
 ## dbt-spark 1.5.0-b2 - March 03, 2023
 
 ### Fixes
@@ -18,7 +33,6 @@
 ### Contributors
 - [@dparent1](https://github.com/dparent1) ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
 
-
 ## dbt-spark 1.5.0-b1 - February 22, 2023
 
 ### Features
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 4f8b1531..649c005a 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.5.0b2"
+version = "1.5.0b3"
diff --git a/setup.py b/setup.py
index cf5e1c43..8e7b6ab0 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0b2"
+package_version = "1.5.0b3"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From d8ed842e52610c65fadee7b016f0bd280fb8037b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 19 Mar 2023 15:56:36 -0400
Subject: [PATCH 416/603] Update wheel requirement from ~=0.38 to ~=0.40 (#680)

* Update wheel requirement from ~=0.38 to ~=0.40

Updates the requirements on [wheel](https://github.com/pypa/wheel) to permit the latest version.
- [Release notes](https://github.com/pypa/wheel/releases)
- [Changelog](https://github.com/pypa/wheel/blob/main/docs/news.rst)
- [Commits](https://github.com/pypa/wheel/compare/0.38.0...0.40.0)

---
updated-dependencies:
- dependency-name: wheel
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230314-232721.yaml | 6 ++++++
 .changes/unreleased/Dependencies-20230319-190421.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230314-232721.yaml
 create mode 100644 .changes/unreleased/Dependencies-20230319-190421.yaml

diff --git a/.changes/unreleased/Dependencies-20230314-232721.yaml b/.changes/unreleased/Dependencies-20230314-232721.yaml
new file mode 100644
index 00000000..e3e57d9c
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230314-232721.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update wheel requirement from ~=0.38 to ~=0.40"
+time: 2023-03-14T23:27:21.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 680
diff --git a/.changes/unreleased/Dependencies-20230319-190421.yaml b/.changes/unreleased/Dependencies-20230319-190421.yaml
new file mode 100644
index 00000000..5651f47a
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230319-190421.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update wheel requirement from ~=0.38 to ~=0.40"
+time: 2023-03-19T19:04:21.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 680
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 780bf5e8..1c97fd85 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -29,7 +29,7 @@ tox~=4.4;python_version>="3.8"
 types-pytz~=2022.7
 types-requests~=2.28
 twine~=4.0
-wheel~=0.38
+wheel~=0.40
 
 # Adapter specific dependencies
 mock~=5.0

From 941768d66001bb3435b367aea0f641315e21957f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 19 Mar 2023 16:47:26 -0400
Subject: [PATCH 417/603] Update ipdb requirement from ~=0.13.11 to ~=0.13.13
 (#677)

* Update ipdb requirement from ~=0.13.11 to ~=0.13.13

Updates the requirements on [ipdb](https://github.com/gotcha/ipdb) to permit the latest version.
- [Release notes](https://github.com/gotcha/ipdb/releases)
- [Changelog](https://github.com/gotcha/ipdb/blob/master/HISTORY.txt)
- [Commits](https://github.com/gotcha/ipdb/compare/0.13.11...0.13.13)

---
updated-dependencies:
- dependency-name: ipdb
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230309-230322.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230309-230322.yaml

diff --git a/.changes/unreleased/Dependencies-20230309-230322.yaml b/.changes/unreleased/Dependencies-20230309-230322.yaml
new file mode 100644
index 00000000..b02c62d5
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230309-230322.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update ipdb requirement from ~=0.13.11 to ~=0.13.13"
+time: 2023-03-09T23:03:22.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 677
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1c97fd85..dd2589d2 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=5.0;python_version=="3.7"
 flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
-ipdb~=0.13.11
+ipdb~=0.13.13
 mypy==1.0.1  # patch updates have historically introduced breaking changes
 pip-tools~=6.12
 pre-commit~=2.21;python_version=="3.7"

From a867b996469ab8124af560c84e014b97d0f0da7f Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Sun, 19 Mar 2023 21:06:36 -0400
Subject: [PATCH 418/603] ADAP 364 use constraints column order in
 create_table_as (#684)

\
---
 .../unreleased/Features-20230315-103748.yaml  |  6 +++++
 dbt/include/spark/macros/adapters.sql         |  3 ++-
 tests/functional/adapter/test_constraints.py  | 24 ++++++++++++++-----
 3 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230315-103748.yaml

diff --git a/.changes/unreleased/Features-20230315-103748.yaml b/.changes/unreleased/Features-20230315-103748.yaml
new file mode 100644
index 00000000..84a3704b
--- /dev/null
+++ b/.changes/unreleased/Features-20230315-103748.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Modify order of columns in create_table_as to match contract
+time: 2023-03-15T10:37:48.541043-04:00
+custom:
+  Author: gshank
+  Issue: "671"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 725277b3..f4bd09c2 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -150,7 +150,8 @@
         create table {{ relation }}
       {% endif %}
       {% if config.get('contract', False) %}
-        {{ get_assert_columns_equivalent(sql) }}
+        {{ get_assert_columns_equivalent(compiled_code) }}
+        {%- set compiled_code = get_select_subquery(compiled_code) %}
       {% endif %}
       {{ file_format_clause() }}
       {{ options_clause() }}
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index a5fee2e7..42154251 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -17,11 +17,16 @@
 create or replace table {0}
     using delta
     as
-
 select
-    1 as id,
+  id,
+  color,
+  date_day
+from
+
+( select
     'blue' as color,
-    cast('2019-01-01' as date) as date_day
+    1 as id,
+    '2019-01-01' as date_day ) as model_subq
 """
 
 # Different on Spark:
@@ -38,6 +43,14 @@ def models(self):
             "constraints_schema.yml": constraints_yml,
         }
 
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+
     @pytest.fixture
     def string_type(self):
         return "STR"
@@ -62,9 +75,8 @@ def data_types(self, int_type, schema_int_type, string_type):
             ['array("1","2","3")', "string", string_type],
             ["array(1,2,3)", "string", string_type],
             ["6.45", "decimal", "DECIMAL"],
-            # TODO: test__constraints_correct_column_data_type isn't able to run the following statements in create table statements with pyodbc
-            # ["cast('2019-01-01' as date)", "date", "DATE"],
-            # ["cast('2019-01-01' as timestamp)", "date", "DATE"],
+            ["cast('2019-01-01' as date)", "date", "DATE"],
+            ["cast('2019-01-01' as timestamp)", "timestamp", "DATETIME"],
         ]
 
 

From 7e1fd69b33f7abf00126fceaa3fd4f0eddca024a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Mar 2023 02:04:42 -0700
Subject: [PATCH 419/603] Bump mypy from 1.0.1 to 1.1.1 (#675)

* Bump mypy from 1.0.1 to 1.1.1

Bumps [mypy](https://github.com/python/mypy) from 1.0.1 to 1.1.1.
- [Release notes](https://github.com/python/mypy/releases)
- [Commits](https://github.com/python/mypy/compare/v1.0.1...v1.1.1)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* Update pre commit config

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230321-064511.yaml | 6 ++++++
 .pre-commit-config.yaml                               | 2 +-
 dev-requirements.txt                                  | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20230321-064511.yaml

diff --git a/.changes/unreleased/Dependencies-20230321-064511.yaml b/.changes/unreleased/Dependencies-20230321-064511.yaml
new file mode 100644
index 00000000..cbfd080c
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230321-064511.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.0.1 to 1.1.1"
+time: 2023-03-21T06:45:11.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 675
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b748e03e..3d80b955 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
     alias: flake8-check
     stages: [manual]
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v1.0.1
+  rev: v1.1.1
   hooks:
   - id: mypy
     # N.B.: Mypy is... a bit fragile.
diff --git a/dev-requirements.txt b/dev-requirements.txt
index dd2589d2..be54f170 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,7 @@ flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.0.1  # patch updates have historically introduced breaking changes
+mypy==1.1.1  # patch updates have historically introduced breaking changes
 pip-tools~=6.12
 pre-commit~=2.21;python_version=="3.7"
 pre-commit~=3.1;python_version>="3.8"

From 28e449377f5505ec02b5e3ae0c0a23f71fa6773c Mon Sep 17 00:00:00 2001
From: Peter Webb <peter.webb@dbtlabs.com>
Date: Wed, 22 Mar 2023 13:35:32 -0400
Subject: [PATCH 420/603] CT-2215: Modify adapter to support unified constraint
 fields (#681)

* CT-2215: Modify adapter to support unified constraint fields

* CT-2215: Fix up not null constraint issue

* CT-2215: Revert requirements in preparation for merge
---
 .../unreleased/Features-20230314-223919.yaml  |  6 +++++
 dbt/include/spark/macros/adapters.sql         | 22 ++++++++++---------
 2 files changed, 18 insertions(+), 10 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230314-223919.yaml

diff --git a/.changes/unreleased/Features-20230314-223919.yaml b/.changes/unreleased/Features-20230314-223919.yaml
new file mode 100644
index 00000000..fd47767b
--- /dev/null
+++ b/.changes/unreleased/Features-20230314-223919.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Modify adapter to support unified constraint fields
+time: 2023-03-14T22:39:19.183649-04:00
+custom:
+  Author: peterallenwebb
+  Issue: "655"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index f4bd09c2..dbd301c3 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -193,13 +193,15 @@
 {% macro spark__alter_table_add_constraints(relation, column_dict) %}
 
   {% for column_name in column_dict %}
-    {% set constraints_check = column_dict[column_name]['constraints_check'] %}
-    {% if constraints_check and not is_incremental() %}
-      {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint_check) -%}
-      {% call statement() %}
-        alter table {{ relation }} add constraint {{ constraint_hash }} check {{ constraints_check }};
-      {% endcall %}
-    {% endif %}
+    {% set constraints = column_dict[column_name]['constraints'] %}
+    {% for constraint in constraints %}
+      {% if constraint.type == 'check' and not is_incremental() %}
+        {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%}
+        {% call statement() %}
+          alter table {{ relation }} add constraint {{ constraint_hash }} check {{ constraint.expression }};
+        {% endcall %}
+      {% endif %}
+    {% endfor %}
   {% endfor %}
 {% endmacro %}
 
@@ -211,12 +213,12 @@
   {% for column_name in column_dict %}
     {% set constraints = column_dict[column_name]['constraints'] %}
     {% for constraint in constraints %}
-      {% if constraint != 'not null' %}
-        {{ exceptions.warn('Invalid constraint for column ' ~ column_name ~ '. Only `not null` is supported.') }}
+      {% if constraint.type != 'not_null' %}
+        {{ exceptions.warn('Invalid constraint for column ' ~ column_name ~ '. Only `not_null` is supported.') }}
       {% else %}
         {% set quoted_name = adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name %}
         {% call statement() %}
-          alter table {{ relation }} change column {{ quoted_name }} set {{ constraint }};
+          alter table {{ relation }} change column {{ quoted_name }} set not null {{ constraint.expression or "" }};
         {% endcall %}
       {% endif %}
     {% endfor %}

From 3b3b2a0ff8a0cffa32e0154f33084178e8f831c2 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Tue, 28 Mar 2023 17:20:47 -0500
Subject: [PATCH 421/603] add tests for enforcing contracts for incremental
 materializations (#685)

* add tests for enforcing contracts for incremental materializations

* remove changelog

* modify test

* add new test value

* add another error msg

* use the right models

* fix model definition

* reorganize tests

* persist constraints for incremental mats

* fix expected color fixture

* move consraints

* move do persists_constraints

* reset dev reqs

* stringify relation (#698)
---
 dbt/adapters/spark/impl.py                    |   2 +-
 .../incremental/incremental.sql               |   4 +-
 tests/functional/adapter/test_constraints.py  | 165 ++++++++++++++----
 3 files changed, 135 insertions(+), 36 deletions(-)

diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 4d4caa1b..37de188c 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -394,7 +394,7 @@ def _get_one_catalog(
 
         columns: List[Dict[str, Any]] = []
         for relation in self.list_relations(database, schema):
-            logger.debug("Getting table schema for relation {}", relation)
+            logger.debug("Getting table schema for relation {}", str(relation))
             columns.extend(self._get_columns_for_catalog(relation))
         return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER)
 
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index d2c1f5e4..10d4f3ed 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -39,6 +39,7 @@
     {%- call statement('main', language=language) -%}
       {{ create_table_as(False, target_relation, compiled_code, language) }}
     {%- endcall -%}
+    {% do persist_constraints(target_relation, model) %}
   {%- elif existing_relation.is_view or should_full_refresh() -%}
     {#-- Relation must be dropped & recreated --#}
     {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
@@ -48,6 +49,7 @@
     {%- call statement('main', language=language) -%}
       {{ create_table_as(False, target_relation, compiled_code, language) }}
     {%- endcall -%}
+    {% do persist_constraints(target_relation, model) %}
   {%- else -%}
     {#-- Relation must be merged --#}
     {%- call statement('create_tmp_relation', language=language) -%}
@@ -63,7 +65,7 @@
       See note in dbt-spark/dbt/include/spark/macros/adapters.sql
       re: python models and temporary views.
 
-      Also, why doesn't either drop_relation or adapter.drop_relation work here?!
+      Also, why do neither drop_relation or adapter.drop_relation work here?!
       --#}
       {% call statement('drop_relation') -%}
         drop table if exists {{ tmp_relation }}
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 42154251..1287dfd3 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -1,20 +1,28 @@
 import pytest
-from dbt.tests.util import relation_from_name
 from dbt.tests.adapter.constraints.test_constraints import (
     BaseTableConstraintsColumnsEqual,
     BaseViewConstraintsColumnsEqual,
-    BaseConstraintsRuntimeEnforcement,
+    BaseIncrementalConstraintsColumnsEqual,
+    BaseConstraintsRuntimeDdlEnforcement,
+    BaseConstraintsRollback,
+    BaseIncrementalConstraintsRuntimeDdlEnforcement,
+    BaseIncrementalConstraintsRollback,
 )
 from dbt.tests.adapter.constraints.fixtures import (
     my_model_sql,
     my_model_wrong_order_sql,
     my_model_wrong_name_sql,
     model_schema_yml,
+    my_model_view_wrong_order_sql,
+    my_model_view_wrong_name_sql,
+    my_model_incremental_wrong_order_sql,
+    my_model_incremental_wrong_name_sql,
+    my_incremental_model_sql,
 )
 
 # constraints are enforced via 'alter' statements that run after table creation
 _expected_sql_spark = """
-create or replace table {0}
+create or replace table <model_identifier>
     using delta
     as
 select
@@ -35,14 +43,6 @@
 
 
 class PyodbcSetup:
-    @pytest.fixture(scope="class")
-    def models(self):
-        return {
-            "my_model_wrong_order.sql": my_model_wrong_order_sql,
-            "my_model_wrong_name.sql": my_model_wrong_name_sql,
-            "constraints_schema.yml": constraints_yml,
-        }
-
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
@@ -81,14 +81,6 @@ def data_types(self, int_type, schema_int_type, string_type):
 
 
 class DatabricksHTTPSetup:
-    @pytest.fixture(scope="class")
-    def models(self):
-        return {
-            "my_model_wrong_order.sql": my_model_wrong_order_sql,
-            "my_model_wrong_name.sql": my_model_wrong_name_sql,
-            "constraints_schema.yml": constraints_yml,
-        }
-
     @pytest.fixture
     def string_type(self):
         return "STRING_TYPE"
@@ -120,12 +112,37 @@ def data_types(self, int_type, schema_int_type, string_type):
 
 @pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
 class TestSparkTableConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseTableConstraintsColumnsEqual):
-    pass
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
 
 
 @pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
 class TestSparkViewConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseViewConstraintsColumnsEqual):
-    pass
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_view_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_view_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
+class TestSparkIncrementalConstraintsColumnsEqualPyodbc(
+    PyodbcSetup, BaseIncrementalConstraintsColumnsEqual
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
 
 
 @pytest.mark.skip_profile(
@@ -134,7 +151,13 @@ class TestSparkViewConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseViewConstraint
 class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(
     DatabricksHTTPSetup, BaseTableConstraintsColumnsEqual
 ):
-    pass
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
 
 
 @pytest.mark.skip_profile(
@@ -143,18 +166,31 @@ class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(
 class TestSparkViewConstraintsColumnsEqualDatabricksHTTP(
     DatabricksHTTPSetup, BaseViewConstraintsColumnsEqual
 ):
-    pass
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_view_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_view_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
 
 
-@pytest.mark.skip_profile("spark_session", "apache_spark")
-class TestSparkConstraintsRuntimeEnforcement(BaseConstraintsRuntimeEnforcement):
+@pytest.mark.skip_profile(
+    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+)
+class TestSparkIncrementalConstraintsColumnsEqualDatabricksHTTP(
+    DatabricksHTTPSetup, BaseIncrementalConstraintsColumnsEqual
+):
     @pytest.fixture(scope="class")
     def models(self):
         return {
-            "my_model.sql": my_model_sql,
+            "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql,
             "constraints_schema.yml": constraints_yml,
         }
 
+
+class BaseSparkConstraintsDdlEnforcementSetup:
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
@@ -164,16 +200,42 @@ def project_config_update(self):
         }
 
     @pytest.fixture(scope="class")
-    def expected_sql(self, project):
-        relation = relation_from_name(project.adapter, "my_model")
-        return _expected_sql_spark.format(relation)
+    def expected_sql(self):
+        return _expected_sql_spark
 
-    # On Spark/Databricks, constraints are applied *after* the table is replaced.
-    # We don't have any way to "rollback" the table to its previous happy state.
-    # So the 'color' column will be updated to 'red', instead of 'blue'.
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkTableConstraintsDdlEnforcement(
+    BaseSparkConstraintsDdlEnforcementSetup, BaseConstraintsRuntimeDdlEnforcement
+):
     @pytest.fixture(scope="class")
-    def expected_color(self):
-        return "red"
+    def models(self):
+        return {
+            "my_model.sql": my_model_wrong_order_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkIncrementalConstraintsDdlEnforcement(
+    BaseSparkConstraintsDdlEnforcementSetup, BaseIncrementalConstraintsRuntimeDdlEnforcement
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_incremental_wrong_order_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+class BaseSparkConstraintsRollbackSetup:
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
 
     @pytest.fixture(scope="class")
     def expected_error_messages(self):
@@ -181,6 +243,8 @@ def expected_error_messages(self):
             "violate the new CHECK constraint",
             "DELTA_NEW_CHECK_CONSTRAINT_VIOLATION",
             "violate the new NOT NULL constraint",
+            "(id > 0) violated by row with values:",  # incremental mats
+            "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES",  # incremental mats
         ]
 
     def assert_expected_error_messages(self, error_message, expected_error_messages):
@@ -188,3 +252,36 @@ def assert_expected_error_messages(self, error_message, expected_error_messages)
         # The CHECK constraint is added before the NOT NULL constraint
         # and different connection types display/truncate the error message in different ways...
         assert any(msg in error_message for msg in expected_error_messages)
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkTableConstraintsRollback(
+    BaseSparkConstraintsRollbackSetup, BaseConstraintsRollback
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+    # On Spark/Databricks, constraints are applied *after* the table is replaced.
+    # We don't have any way to "rollback" the table to its previous happy state.
+    # So the 'color' column will be updated to 'red', instead of 'blue'.
+    @pytest.fixture(scope="class")
+    def expected_color(self):
+        return "red"
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkIncrementalConstraintsRollback(
+    BaseSparkConstraintsRollbackSetup, BaseIncrementalConstraintsRollback
+):
+    # color stays blue for incremental models since it's a new row that just
+    # doesn't get inserted
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_incremental_model_sql,
+            "constraints_schema.yml": constraints_yml,
+        }

From 57a13957f42b9ae8609da378f7df4c51ac168ec6 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Tue, 28 Mar 2023 17:52:43 -0500
Subject: [PATCH 422/603] update to retrieve contract enforced from dict (#694)

* update to retrieve contract enforced from dict

* add dependecy on branch

* change ref

* Update Under the Hood-20230223-105838.yaml

* revert branch dependency
---
 .changes/1.5.0/Under the Hood-20230223-105838.yaml      | 6 ------
 .changes/unreleased/Under the Hood-20230223-105838.yaml | 6 ++++++
 dbt/include/spark/macros/adapters.sql                   | 9 ++++++---
 3 files changed, 12 insertions(+), 9 deletions(-)
 delete mode 100644 .changes/1.5.0/Under the Hood-20230223-105838.yaml
 create mode 100644 .changes/unreleased/Under the Hood-20230223-105838.yaml

diff --git a/.changes/1.5.0/Under the Hood-20230223-105838.yaml b/.changes/1.5.0/Under the Hood-20230223-105838.yaml
deleted file mode 100644
index 8b882f3b..00000000
--- a/.changes/1.5.0/Under the Hood-20230223-105838.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Rename constraints_enabled to contract
-time: 2023-02-23T10:58:38.092981-05:00
-custom:
-  Author: gshank
-  Issue: "645"
diff --git a/.changes/unreleased/Under the Hood-20230223-105838.yaml b/.changes/unreleased/Under the Hood-20230223-105838.yaml
new file mode 100644
index 00000000..f6821d78
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230223-105838.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Treat contract config as a python object
+time: 2023-02-23T10:58:38.092981-05:00
+custom:
+  Author: gshank emmyoop
+  Issue: 645 693
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index dbd301c3..0d397e6d 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -149,7 +149,8 @@
       {% else %}
         create table {{ relation }}
       {% endif %}
-      {% if config.get('contract', False) %}
+      {%- set contract_config = config.get('contract') -%}
+      {%- if contract_config.enforced -%}
         {{ get_assert_columns_equivalent(compiled_code) }}
         {%- set compiled_code = get_select_subquery(compiled_code) %}
       {% endif %}
@@ -180,7 +181,8 @@
 {% endmacro %}
 
 {% macro spark__persist_constraints(relation, model) %}
-  {% if config.get('contract', False) and config.get('file_format', 'delta') == 'delta' %}
+  {%- set contract_config = config.get('contract') -%}
+  {% if contract_config.enforced and config.get('file_format', 'delta') == 'delta' %}
     {% do alter_table_add_constraints(relation, model.columns) %}
     {% do alter_column_set_constraints(relation, model.columns) %}
   {% endif %}
@@ -229,7 +231,8 @@
 {% macro spark__create_view_as(relation, sql) -%}
   create or replace view {{ relation }}
   {{ comment_clause() }}
-  {% if config.get('contract', False) -%}
+  {%- set contract_config = config.get('contract') -%}
+  {%- if contract_config.enforced -%}
     {{ get_assert_columns_equivalent(sql) }}
   {%- endif %}
   as

From ccf9a094286bfceb1f7dd7550f65e310398fc9d5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 29 Mar 2023 03:19:09 -0700
Subject: [PATCH 423/603] Update types-pytz requirement from ~=2022.7 to
 ~=2023.2 (#697)

* Update types-pytz requirement from ~=2022.7 to ~=2023.2

Updates the requirements on [types-pytz](https://github.com/python/typeshed) to permit the latest version.
- [Release notes](https://github.com/python/typeshed/releases)
- [Commits](https://github.com/python/typeshed/commits)

---
updated-dependencies:
- dependency-name: types-pytz
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mila Page <67295367+VersusFacit@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230327-230653.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230327-230653.yaml

diff --git a/.changes/unreleased/Dependencies-20230327-230653.yaml b/.changes/unreleased/Dependencies-20230327-230653.yaml
new file mode 100644
index 00000000..0f4ee1fd
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230327-230653.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update types-pytz requirement from ~=2022.7 to ~=2023.2"
+time: 2023-03-27T23:06:53.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 697
diff --git a/dev-requirements.txt b/dev-requirements.txt
index be54f170..d20da2b5 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -26,7 +26,7 @@ pytest-xdist~=3.2
 pytz~=2022.7
 tox~=3.0;python_version=="3.7"
 tox~=4.4;python_version>="3.8"
-types-pytz~=2022.7
+types-pytz~=2023.2
 types-requests~=2.28
 twine~=4.0
 wheel~=0.40

From ee5ca9f36560fa51200385c196ff445633de17cd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 29 Mar 2023 03:19:18 -0700
Subject: [PATCH 424/603] Update pytz requirement from ~=2022.7 to ~=2023.2
 (#696)

* Update pytz requirement from ~=2022.7 to ~=2023.2

Updates the requirements on [pytz](https://github.com/stub42/pytz) to permit the latest version.
- [Release notes](https://github.com/stub42/pytz/releases)
- [Commits](https://github.com/stub42/pytz/compare/release_2022.7...release_2023.2)

---
updated-dependencies:
- dependency-name: pytz
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* Add automated changelog yaml from template for bot PR

* Delete Dependencies-20230327-230640.yaml

Duplicate Changelog

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mila Page <67295367+VersusFacit@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230329-022830.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230329-022830.yaml

diff --git a/.changes/unreleased/Dependencies-20230329-022830.yaml b/.changes/unreleased/Dependencies-20230329-022830.yaml
new file mode 100644
index 00000000..26b2f9f1
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230329-022830.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytz requirement from ~=2022.7 to ~=2023.2"
+time: 2023-03-29T02:28:30.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 696
diff --git a/dev-requirements.txt b/dev-requirements.txt
index d20da2b5..f6a3a215 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -23,7 +23,7 @@ pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.2
-pytz~=2022.7
+pytz~=2023.2
 tox~=3.0;python_version=="3.7"
 tox~=4.4;python_version>="3.8"
 types-pytz~=2023.2

From 74cf095388257362248e2c2aa01dbb1f7f6540e0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 29 Mar 2023 03:19:50 -0700
Subject: [PATCH 425/603] Update pre-commit requirement from ~=2.21 to ~=3.2
 (#687)

* Update pre-commit requirement from ~=2.21 to ~=3.2

Updates the requirements on [pre-commit](https://github.com/pre-commit/pre-commit) to permit the latest version.
- [Release notes](https://github.com/pre-commit/pre-commit/releases)
- [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit/commits/v3.2.0)

---
updated-dependencies:
- dependency-name: pre-commit
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mila Page <67295367+VersusFacit@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230317-230215.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230317-230215.yaml

diff --git a/.changes/unreleased/Dependencies-20230317-230215.yaml b/.changes/unreleased/Dependencies-20230317-230215.yaml
new file mode 100644
index 00000000..0c72fdb4
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230317-230215.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pre-commit requirement from ~=2.21 to ~=3.2"
+time: 2023-03-17T23:02:15.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 687
diff --git a/dev-requirements.txt b/dev-requirements.txt
index f6a3a215..446da331 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -16,7 +16,7 @@ ipdb~=0.13.13
 mypy==1.1.1  # patch updates have historically introduced breaking changes
 pip-tools~=6.12
 pre-commit~=2.21;python_version=="3.7"
-pre-commit~=3.1;python_version>="3.8"
+pre-commit~=3.2;python_version>="3.8"
 pre-commit-hooks~=4.4
 pytest~=7.2
 pytest-csv~=3.0

From a0562e001d88e3af518623c0d424858ebdb99cee Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 30 Mar 2023 15:46:48 -0500
Subject: [PATCH 426/603] [create-pull-request] automated change (#703)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.5.0-b4.md                          | 20 ++++++++++++++++
 .../Dependencies-20230309-230322.yaml         |  0
 .../Dependencies-20230314-232721.yaml         |  0
 .../Dependencies-20230317-230215.yaml         |  0
 .../Dependencies-20230319-190421.yaml         |  0
 .../Dependencies-20230321-064511.yaml         |  0
 .../Dependencies-20230327-230653.yaml         |  0
 .../Dependencies-20230329-022830.yaml         |  0
 .../Features-20230314-223919.yaml             |  0
 .../Features-20230315-103748.yaml             |  0
 .../Under the Hood-20230223-105838.yaml       |  0
 CHANGELOG.md                                  | 24 ++++++++++++++++++-
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 15 files changed, 46 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.5.0-b4.md
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230309-230322.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230314-232721.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230317-230215.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230319-190421.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230321-064511.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230327-230653.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Dependencies-20230329-022830.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Features-20230314-223919.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Features-20230315-103748.yaml (100%)
 rename .changes/{unreleased => 1.5.0}/Under the Hood-20230223-105838.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index adead0c6..eea391a3 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.0b3
+current_version = 1.5.0b4
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.5.0-b4.md b/.changes/1.5.0-b4.md
new file mode 100644
index 00000000..2728735d
--- /dev/null
+++ b/.changes/1.5.0-b4.md
@@ -0,0 +1,20 @@
+## dbt-spark 1.5.0-b4 - March 30, 2023
+
+### Features
+
+- Modify adapter to support unified constraint fields ([#655](https://github.com/dbt-labs/dbt-spark/issues/655))
+- Modify order of columns in create_table_as to match contract ([#671](https://github.com/dbt-labs/dbt-spark/issues/671))
+
+### Under the Hood
+
+- Treat contract config as a python object ([#645](https://github.com/dbt-labs/dbt-spark/issues/645), [#693](https://github.com/dbt-labs/dbt-spark/issues/693))
+
+### Dependencies
+
+- Update ipdb requirement from ~=0.13.11 to ~=0.13.13 ([#677](https://github.com/dbt-labs/dbt-spark/pull/677))
+- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
+- Update pre-commit requirement from ~=2.21 to ~=3.2 ([#687](https://github.com/dbt-labs/dbt-spark/pull/687))
+- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
+- Bump mypy from 1.0.1 to 1.1.1 ([#675](https://github.com/dbt-labs/dbt-spark/pull/675))
+- Update types-pytz requirement from ~=2022.7 to ~=2023.2 ([#697](https://github.com/dbt-labs/dbt-spark/pull/697))
+- Update pytz requirement from ~=2022.7 to ~=2023.2 ([#696](https://github.com/dbt-labs/dbt-spark/pull/696))
diff --git a/.changes/unreleased/Dependencies-20230309-230322.yaml b/.changes/1.5.0/Dependencies-20230309-230322.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230309-230322.yaml
rename to .changes/1.5.0/Dependencies-20230309-230322.yaml
diff --git a/.changes/unreleased/Dependencies-20230314-232721.yaml b/.changes/1.5.0/Dependencies-20230314-232721.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230314-232721.yaml
rename to .changes/1.5.0/Dependencies-20230314-232721.yaml
diff --git a/.changes/unreleased/Dependencies-20230317-230215.yaml b/.changes/1.5.0/Dependencies-20230317-230215.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230317-230215.yaml
rename to .changes/1.5.0/Dependencies-20230317-230215.yaml
diff --git a/.changes/unreleased/Dependencies-20230319-190421.yaml b/.changes/1.5.0/Dependencies-20230319-190421.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230319-190421.yaml
rename to .changes/1.5.0/Dependencies-20230319-190421.yaml
diff --git a/.changes/unreleased/Dependencies-20230321-064511.yaml b/.changes/1.5.0/Dependencies-20230321-064511.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230321-064511.yaml
rename to .changes/1.5.0/Dependencies-20230321-064511.yaml
diff --git a/.changes/unreleased/Dependencies-20230327-230653.yaml b/.changes/1.5.0/Dependencies-20230327-230653.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230327-230653.yaml
rename to .changes/1.5.0/Dependencies-20230327-230653.yaml
diff --git a/.changes/unreleased/Dependencies-20230329-022830.yaml b/.changes/1.5.0/Dependencies-20230329-022830.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230329-022830.yaml
rename to .changes/1.5.0/Dependencies-20230329-022830.yaml
diff --git a/.changes/unreleased/Features-20230314-223919.yaml b/.changes/1.5.0/Features-20230314-223919.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230314-223919.yaml
rename to .changes/1.5.0/Features-20230314-223919.yaml
diff --git a/.changes/unreleased/Features-20230315-103748.yaml b/.changes/1.5.0/Features-20230315-103748.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230315-103748.yaml
rename to .changes/1.5.0/Features-20230315-103748.yaml
diff --git a/.changes/unreleased/Under the Hood-20230223-105838.yaml b/.changes/1.5.0/Under the Hood-20230223-105838.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230223-105838.yaml
rename to .changes/1.5.0/Under the Hood-20230223-105838.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc7262eb..3bec7cef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,29 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.5.0-b4 - March 30, 2023
+
+### Features
+
+- Modify adapter to support unified constraint fields ([#655](https://github.com/dbt-labs/dbt-spark/issues/655))
+- Modify order of columns in create_table_as to match contract ([#671](https://github.com/dbt-labs/dbt-spark/issues/671))
+
+### Under the Hood
+
+- Treat contract config as a python object ([#645](https://github.com/dbt-labs/dbt-spark/issues/645), [#693](https://github.com/dbt-labs/dbt-spark/issues/693))
+
+### Dependencies
+
+- Update ipdb requirement from ~=0.13.11 to ~=0.13.13 ([#677](https://github.com/dbt-labs/dbt-spark/pull/677))
+- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
+- Update pre-commit requirement from ~=2.21 to ~=3.2 ([#687](https://github.com/dbt-labs/dbt-spark/pull/687))
+- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
+- Bump mypy from 1.0.1 to 1.1.1 ([#675](https://github.com/dbt-labs/dbt-spark/pull/675))
+- Update types-pytz requirement from ~=2022.7 to ~=2023.2 ([#697](https://github.com/dbt-labs/dbt-spark/pull/697))
+- Update pytz requirement from ~=2022.7 to ~=2023.2 ([#696](https://github.com/dbt-labs/dbt-spark/pull/696))
+
+
+
 ## dbt-spark 1.5.0-b3 - March 16, 2023
 
 ### Features
@@ -19,7 +42,6 @@
 ### Contributors
 - [@akashrn5](https://github.com/akashrn5) ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
 
-
 ## dbt-spark 1.5.0-b2 - March 03, 2023
 
 ### Fixes
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 649c005a..4a7a8147 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.5.0b3"
+version = "1.5.0b4"
diff --git a/setup.py b/setup.py
index 8e7b6ab0..9760d925 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0b3"
+package_version = "1.5.0b4"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 414c1a02552afe14b66fff8c6efb667d4bf5ac35 Mon Sep 17 00:00:00 2001
From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com>
Date: Mon, 17 Apr 2023 16:40:08 -0500
Subject: [PATCH 427/603] Cleanup main after cutting new 1.5.latest branch
 (#711)

* Bumping version to 1.5.0rc1

* Clean up changelog on main

* created 1.5.0rc1 changelog

---------

Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.5.0-b1.md                          | 21 ------
 .changes/1.5.0-b2.md                          | 12 ----
 .changes/1.5.0-b3.md                          | 13 ----
 .changes/1.5.0-b4.md                          | 20 ------
 .../1.5.0/Dependencies-20230126-152319.yaml   |  7 --
 .../1.5.0/Dependencies-20230309-230322.yaml   |  6 --
 .../1.5.0/Dependencies-20230314-232721.yaml   |  6 --
 .../1.5.0/Dependencies-20230317-230215.yaml   |  6 --
 .../1.5.0/Dependencies-20230319-190421.yaml   |  6 --
 .../1.5.0/Dependencies-20230321-064511.yaml   |  6 --
 .../1.5.0/Dependencies-20230327-230653.yaml   |  6 --
 .../1.5.0/Dependencies-20230329-022830.yaml   |  6 --
 .changes/1.5.0/Features-20230130-125855.yaml  |  8 ---
 .changes/1.5.0/Features-20230223-180923.yaml  |  6 --
 .changes/1.5.0/Features-20230314-223919.yaml  |  6 --
 .changes/1.5.0/Features-20230315-103748.yaml  |  6 --
 .changes/1.5.0/Fixes-20220819-141350.yaml     |  8 ---
 .changes/1.5.0/Fixes-20221124-125501.yaml     |  7 --
 .changes/1.5.0/Fixes-20230123-134955.yaml     |  7 --
 .changes/1.5.0/Fixes-20230303-200542.yaml     |  6 --
 .../1.5.0/Under the Hood-20230130-170310.yaml |  6 --
 .../1.5.0/Under the Hood-20230223-105838.yaml |  6 --
 CHANGELOG.md                                  | 72 -------------------
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 26 files changed, 3 insertions(+), 256 deletions(-)
 delete mode 100644 .changes/1.5.0-b1.md
 delete mode 100644 .changes/1.5.0-b2.md
 delete mode 100644 .changes/1.5.0-b3.md
 delete mode 100644 .changes/1.5.0-b4.md
 delete mode 100644 .changes/1.5.0/Dependencies-20230126-152319.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230309-230322.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230314-232721.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230317-230215.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230319-190421.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230321-064511.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230327-230653.yaml
 delete mode 100644 .changes/1.5.0/Dependencies-20230329-022830.yaml
 delete mode 100644 .changes/1.5.0/Features-20230130-125855.yaml
 delete mode 100644 .changes/1.5.0/Features-20230223-180923.yaml
 delete mode 100644 .changes/1.5.0/Features-20230314-223919.yaml
 delete mode 100644 .changes/1.5.0/Features-20230315-103748.yaml
 delete mode 100644 .changes/1.5.0/Fixes-20220819-141350.yaml
 delete mode 100644 .changes/1.5.0/Fixes-20221124-125501.yaml
 delete mode 100644 .changes/1.5.0/Fixes-20230123-134955.yaml
 delete mode 100644 .changes/1.5.0/Fixes-20230303-200542.yaml
 delete mode 100644 .changes/1.5.0/Under the Hood-20230130-170310.yaml
 delete mode 100644 .changes/1.5.0/Under the Hood-20230223-105838.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index eea391a3..3130d395 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.0b4
+current_version = 1.5.0rc1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.5.0-b1.md b/.changes/1.5.0-b1.md
deleted file mode 100644
index 693f8154..00000000
--- a/.changes/1.5.0-b1.md
+++ /dev/null
@@ -1,21 +0,0 @@
-## dbt-spark 1.5.0-b1 - February 22, 2023
-
-### Features
-
-- Support for data types constraints in Spark following the dbt Core feature #6271 ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
-
-### Fixes
-
-- add merge_exclude_columns tests ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
-
-### Under the Hood
-
-- remove tox call to integration tests ([#584](https://github.com/dbt-labs/dbt-spark/issues/584))
-
-### Dependencies
-
-- Allow thrift 0.16.0 ([#605](https://github.com/dbt-labs/dbt-spark/pull/605))
-
-### Contributors
-- [@b-per](https://github.com/b-per) ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
-- [@dave-connors-3](https://github.com/dave-connors-3) ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
diff --git a/.changes/1.5.0-b2.md b/.changes/1.5.0-b2.md
deleted file mode 100644
index 7e248bf4..00000000
--- a/.changes/1.5.0-b2.md
+++ /dev/null
@@ -1,12 +0,0 @@
-## dbt-spark 1.5.0-b2 - March 03, 2023
-
-### Fixes
-
-- Support for iceberg v2 tables.  Added ability to use multiple join conditions to allow for multiple columns to make a row distinct. ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
-
-### Under the Hood
-
-- Rename constraints_enabled to contract ([#645](https://github.com/dbt-labs/dbt-spark/issues/645))
-
-### Contributors
-- [@dparent1](https://github.com/dparent1) ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
diff --git a/.changes/1.5.0-b3.md b/.changes/1.5.0-b3.md
deleted file mode 100644
index 5d721823..00000000
--- a/.changes/1.5.0-b3.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## dbt-spark 1.5.0-b3 - March 16, 2023
-
-### Features
-
-- Enforce contracts on models materialized as tables and views ([#639](https://github.com/dbt-labs/dbt-spark/issues/639), [#654](https://github.com/dbt-labs/dbt-spark/issues/654))
-
-### Fixes
-
-- Use take() instead of collect on dataframe to improve the performance ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
-- Fix pyodbc type_code -> data_type conversion  ([#665](https://github.com/dbt-labs/dbt-spark/issues/665))
-
-### Contributors
-- [@akashrn5](https://github.com/akashrn5) ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
diff --git a/.changes/1.5.0-b4.md b/.changes/1.5.0-b4.md
deleted file mode 100644
index 2728735d..00000000
--- a/.changes/1.5.0-b4.md
+++ /dev/null
@@ -1,20 +0,0 @@
-## dbt-spark 1.5.0-b4 - March 30, 2023
-
-### Features
-
-- Modify adapter to support unified constraint fields ([#655](https://github.com/dbt-labs/dbt-spark/issues/655))
-- Modify order of columns in create_table_as to match contract ([#671](https://github.com/dbt-labs/dbt-spark/issues/671))
-
-### Under the Hood
-
-- Treat contract config as a python object ([#645](https://github.com/dbt-labs/dbt-spark/issues/645), [#693](https://github.com/dbt-labs/dbt-spark/issues/693))
-
-### Dependencies
-
-- Update ipdb requirement from ~=0.13.11 to ~=0.13.13 ([#677](https://github.com/dbt-labs/dbt-spark/pull/677))
-- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
-- Update pre-commit requirement from ~=2.21 to ~=3.2 ([#687](https://github.com/dbt-labs/dbt-spark/pull/687))
-- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
-- Bump mypy from 1.0.1 to 1.1.1 ([#675](https://github.com/dbt-labs/dbt-spark/pull/675))
-- Update types-pytz requirement from ~=2022.7 to ~=2023.2 ([#697](https://github.com/dbt-labs/dbt-spark/pull/697))
-- Update pytz requirement from ~=2022.7 to ~=2023.2 ([#696](https://github.com/dbt-labs/dbt-spark/pull/696))
diff --git a/.changes/1.5.0/Dependencies-20230126-152319.yaml b/.changes/1.5.0/Dependencies-20230126-152319.yaml
deleted file mode 100644
index 7fe5d0c7..00000000
--- a/.changes/1.5.0/Dependencies-20230126-152319.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Dependencies
-body: Allow thrift 0.16.0
-time: 2023-01-26T15:23:19.978823-08:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "606"
-  PR: "605"
diff --git a/.changes/1.5.0/Dependencies-20230309-230322.yaml b/.changes/1.5.0/Dependencies-20230309-230322.yaml
deleted file mode 100644
index b02c62d5..00000000
--- a/.changes/1.5.0/Dependencies-20230309-230322.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update ipdb requirement from ~=0.13.11 to ~=0.13.13"
-time: 2023-03-09T23:03:22.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 677
diff --git a/.changes/1.5.0/Dependencies-20230314-232721.yaml b/.changes/1.5.0/Dependencies-20230314-232721.yaml
deleted file mode 100644
index e3e57d9c..00000000
--- a/.changes/1.5.0/Dependencies-20230314-232721.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update wheel requirement from ~=0.38 to ~=0.40"
-time: 2023-03-14T23:27:21.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 680
diff --git a/.changes/1.5.0/Dependencies-20230317-230215.yaml b/.changes/1.5.0/Dependencies-20230317-230215.yaml
deleted file mode 100644
index 0c72fdb4..00000000
--- a/.changes/1.5.0/Dependencies-20230317-230215.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit requirement from ~=2.21 to ~=3.2"
-time: 2023-03-17T23:02:15.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 687
diff --git a/.changes/1.5.0/Dependencies-20230319-190421.yaml b/.changes/1.5.0/Dependencies-20230319-190421.yaml
deleted file mode 100644
index 5651f47a..00000000
--- a/.changes/1.5.0/Dependencies-20230319-190421.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update wheel requirement from ~=0.38 to ~=0.40"
-time: 2023-03-19T19:04:21.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 680
diff --git a/.changes/1.5.0/Dependencies-20230321-064511.yaml b/.changes/1.5.0/Dependencies-20230321-064511.yaml
deleted file mode 100644
index cbfd080c..00000000
--- a/.changes/1.5.0/Dependencies-20230321-064511.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.0.1 to 1.1.1"
-time: 2023-03-21T06:45:11.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 675
diff --git a/.changes/1.5.0/Dependencies-20230327-230653.yaml b/.changes/1.5.0/Dependencies-20230327-230653.yaml
deleted file mode 100644
index 0f4ee1fd..00000000
--- a/.changes/1.5.0/Dependencies-20230327-230653.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update types-pytz requirement from ~=2022.7 to ~=2023.2"
-time: 2023-03-27T23:06:53.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 697
diff --git a/.changes/1.5.0/Dependencies-20230329-022830.yaml b/.changes/1.5.0/Dependencies-20230329-022830.yaml
deleted file mode 100644
index 26b2f9f1..00000000
--- a/.changes/1.5.0/Dependencies-20230329-022830.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytz requirement from ~=2022.7 to ~=2023.2"
-time: 2023-03-29T02:28:30.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 696
diff --git a/.changes/1.5.0/Features-20230130-125855.yaml b/.changes/1.5.0/Features-20230130-125855.yaml
deleted file mode 100644
index 1f27a388..00000000
--- a/.changes/1.5.0/Features-20230130-125855.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Features
-body: 'Support for data types constraints in Spark following the dbt Core feature
-  #6271'
-time: 2023-01-30T12:58:55.972992+01:00
-custom:
-  Author: b-per
-  Issue: "558"
-  PR: "574"
diff --git a/.changes/1.5.0/Features-20230223-180923.yaml b/.changes/1.5.0/Features-20230223-180923.yaml
deleted file mode 100644
index de98ef41..00000000
--- a/.changes/1.5.0/Features-20230223-180923.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Enforce contracts on models materialized as tables and views
-time: 2023-02-23T18:09:23.787675-05:00
-custom:
-  Author: michelleark emmyoop
-  Issue: 639 654
diff --git a/.changes/1.5.0/Features-20230314-223919.yaml b/.changes/1.5.0/Features-20230314-223919.yaml
deleted file mode 100644
index fd47767b..00000000
--- a/.changes/1.5.0/Features-20230314-223919.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Modify adapter to support unified constraint fields
-time: 2023-03-14T22:39:19.183649-04:00
-custom:
-  Author: peterallenwebb
-  Issue: "655"
diff --git a/.changes/1.5.0/Features-20230315-103748.yaml b/.changes/1.5.0/Features-20230315-103748.yaml
deleted file mode 100644
index 84a3704b..00000000
--- a/.changes/1.5.0/Features-20230315-103748.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Modify order of columns in create_table_as to match contract
-time: 2023-03-15T10:37:48.541043-04:00
-custom:
-  Author: gshank
-  Issue: "671"
diff --git a/.changes/1.5.0/Fixes-20220819-141350.yaml b/.changes/1.5.0/Fixes-20220819-141350.yaml
deleted file mode 100644
index 594c5216..00000000
--- a/.changes/1.5.0/Fixes-20220819-141350.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Fixes
-body: Support for iceberg v2 tables.  Added ability to use multiple join conditions
-  to allow for multiple columns to make a row distinct.
-time: 2022-08-19T14:13:50.3145273-04:00
-custom:
-  Author: dparent1
-  Issue: "294"
-  PR: "432"
diff --git a/.changes/1.5.0/Fixes-20221124-125501.yaml b/.changes/1.5.0/Fixes-20221124-125501.yaml
deleted file mode 100644
index 2badb1dc..00000000
--- a/.changes/1.5.0/Fixes-20221124-125501.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: Use take() instead of collect on dataframe to improve the performance
-time: 2022-11-24T12:55:01.928275+05:30
-custom:
-  Author: akashrn5
-  Issue: "526"
-  PR: "527"
diff --git a/.changes/1.5.0/Fixes-20230123-134955.yaml b/.changes/1.5.0/Fixes-20230123-134955.yaml
deleted file mode 100644
index a40d912b..00000000
--- a/.changes/1.5.0/Fixes-20230123-134955.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: add merge_exclude_columns tests
-time: 2023-01-23T13:49:55.74249-06:00
-custom:
-  Author: dave-connors-3
-  Issue: "00"
-  PR: "600"
diff --git a/.changes/1.5.0/Fixes-20230303-200542.yaml b/.changes/1.5.0/Fixes-20230303-200542.yaml
deleted file mode 100644
index 948f98bb..00000000
--- a/.changes/1.5.0/Fixes-20230303-200542.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: 'Fix pyodbc type_code -> data_type conversion '
-time: 2023-03-03T20:05:42.400255-05:00
-custom:
-  Author: michelleark
-  Issue: "665"
diff --git a/.changes/1.5.0/Under the Hood-20230130-170310.yaml b/.changes/1.5.0/Under the Hood-20230130-170310.yaml
deleted file mode 100644
index c9131b6b..00000000
--- a/.changes/1.5.0/Under the Hood-20230130-170310.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: 'remove tox call to integration tests'
-time: 2023-01-30T17:03:10.031843-08:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "584"
diff --git a/.changes/1.5.0/Under the Hood-20230223-105838.yaml b/.changes/1.5.0/Under the Hood-20230223-105838.yaml
deleted file mode 100644
index f6821d78..00000000
--- a/.changes/1.5.0/Under the Hood-20230223-105838.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Treat contract config as a python object
-time: 2023-02-23T10:58:38.092981-05:00
-custom:
-  Author: gshank emmyoop
-  Issue: 645 693
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3bec7cef..c08ed34a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,78 +5,6 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.5.0-b4 - March 30, 2023
-
-### Features
-
-- Modify adapter to support unified constraint fields ([#655](https://github.com/dbt-labs/dbt-spark/issues/655))
-- Modify order of columns in create_table_as to match contract ([#671](https://github.com/dbt-labs/dbt-spark/issues/671))
-
-### Under the Hood
-
-- Treat contract config as a python object ([#645](https://github.com/dbt-labs/dbt-spark/issues/645), [#693](https://github.com/dbt-labs/dbt-spark/issues/693))
-
-### Dependencies
-
-- Update ipdb requirement from ~=0.13.11 to ~=0.13.13 ([#677](https://github.com/dbt-labs/dbt-spark/pull/677))
-- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
-- Update pre-commit requirement from ~=2.21 to ~=3.2 ([#687](https://github.com/dbt-labs/dbt-spark/pull/687))
-- Update wheel requirement from ~=0.38 to ~=0.40 ([#680](https://github.com/dbt-labs/dbt-spark/pull/680))
-- Bump mypy from 1.0.1 to 1.1.1 ([#675](https://github.com/dbt-labs/dbt-spark/pull/675))
-- Update types-pytz requirement from ~=2022.7 to ~=2023.2 ([#697](https://github.com/dbt-labs/dbt-spark/pull/697))
-- Update pytz requirement from ~=2022.7 to ~=2023.2 ([#696](https://github.com/dbt-labs/dbt-spark/pull/696))
-
-
-
-## dbt-spark 1.5.0-b3 - March 16, 2023
-
-### Features
-
-- Enforce contracts on models materialized as tables and views ([#639](https://github.com/dbt-labs/dbt-spark/issues/639), [#654](https://github.com/dbt-labs/dbt-spark/issues/654))
-
-### Fixes
-
-- Use take() instead of collect on dataframe to improve the performance ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
-- Fix pyodbc type_code -> data_type conversion  ([#665](https://github.com/dbt-labs/dbt-spark/issues/665))
-
-### Contributors
-- [@akashrn5](https://github.com/akashrn5) ([#526](https://github.com/dbt-labs/dbt-spark/issues/526))
-
-## dbt-spark 1.5.0-b2 - March 03, 2023
-
-### Fixes
-
-- Support for iceberg v2 tables.  Added ability to use multiple join conditions to allow for multiple columns to make a row distinct. ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
-
-### Under the Hood
-
-- Rename constraints_enabled to contract ([#645](https://github.com/dbt-labs/dbt-spark/issues/645))
-
-### Contributors
-- [@dparent1](https://github.com/dparent1) ([#294](https://github.com/dbt-labs/dbt-spark/issues/294))
-
-## dbt-spark 1.5.0-b1 - February 22, 2023
-
-### Features
-
-- Support for data types constraints in Spark following the dbt Core feature #6271 ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
-
-### Fixes
-
-- add merge_exclude_columns tests ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
-
-### Under the Hood
-
-- remove tox call to integration tests ([#584](https://github.com/dbt-labs/dbt-spark/issues/584))
-
-### Dependencies
-
-- Allow thrift 0.16.0 ([#605](https://github.com/dbt-labs/dbt-spark/pull/605))
-
-### Contributors
-- [@b-per](https://github.com/b-per) ([#558](https://github.com/dbt-labs/dbt-spark/issues/558))
-- [@dave-connors-3](https://github.com/dave-connors-3) ([#00](https://github.com/dbt-labs/dbt-spark/issues/00))
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 4a7a8147..fa6c5a1a 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.5.0b4"
+version = "1.5.0rc1"
diff --git a/setup.py b/setup.py
index 9760d925..478dc3e8 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0b4"
+package_version = "1.5.0rc1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 81325545303d36037ebb71391f85c88d6fff8666 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 18 Apr 2023 11:29:36 -0400
Subject: [PATCH 428/603] [create-pull-request] automated change (#717)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                  | 2 +-
 .changes/1.6.0-a1.md              | 1 +
 CHANGELOG.md                      | 4 ++++
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 5 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 .changes/1.6.0-a1.md

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 3130d395..874e5b68 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.5.0rc1
+current_version = 1.6.0a1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.6.0-a1.md b/.changes/1.6.0-a1.md
new file mode 100644
index 00000000..a748db5a
--- /dev/null
+++ b/.changes/1.6.0-a1.md
@@ -0,0 +1 @@
+## dbt-spark 1.6.0-a1 - April 17, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c08ed34a..8561e246 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,10 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.6.0-a1 - April 17, 2023
+
+
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index fa6c5a1a..07fc02ee 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.5.0rc1"
+version = "1.6.0a1"
diff --git a/setup.py b/setup.py
index 478dc3e8..07c96ee2 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0rc1"
+package_version = "1.6.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 44b10f92a8ea9956c455dd96cfd6388325c4da91 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 21 Apr 2023 01:04:35 -0400
Subject: [PATCH 429/603] ADAP-473: Table materialization not properly dropping
 existing relation on refresh (#724)

* update `drop_relation()` to drop `target_relation` instead of `old_relation`; these should be the same anyway, but `old_relation` doesn't always get populated

* add in old relation type in cases where the relation type changes
---
 .../unreleased/Fixes-20230420-214433.yaml     |  6 +++++
 .../spark/macros/materializations/table.sql   | 22 ++++++++++++-------
 2 files changed, 20 insertions(+), 8 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230420-214433.yaml

diff --git a/.changes/unreleased/Fixes-20230420-214433.yaml b/.changes/unreleased/Fixes-20230420-214433.yaml
new file mode 100644
index 00000000..57a3fe3a
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230420-214433.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks
+time: 2023-04-20T21:44:33.343598-04:00
+custom:
+  Author: mikealfare
+  Issue: "725"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 927816de..39a9caba 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -14,16 +14,22 @@
   -- setup: if the target relation already exists, drop it
   -- in case if the existing and future table is delta or iceberg, we want to do a
   -- create or replace table instead of dropping, so we don't have the table unavailable
-  {% if old_relation and not (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') -%}
-    {{ adapter.drop_relation(old_relation) }}
-  {%- endif %}
-
-  {% if old_relation and not (old_relation.is_iceberg and config.get('file_format', validator=validation.any[basestring]) == 'iceberg') -%}
-    {{ adapter.drop_relation(old_relation) }}
-  {%- endif %}
+  {% if old_relation is not none %}
+    {% set is_delta = (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') %}
+    {% set is_iceberg = (old_relation.is_iceberg and config.get('file_format', validator=validation.any[basestring]) == 'iceberg') %}
+    {% set old_relation_type = old_relation.type %}
+  {% else %}
+    {% set is_delta = false %}
+    {% set is_iceberg = false %}
+    {% set old_relation_type = target_relation.type %}
+  {% endif %}
+
+  {% if not is_delta and not is_iceberg %}
+    {% set existing_relation = target_relation %}
+    {{ adapter.drop_relation(existing_relation.incorporate(type=old_relation_type)) }}
+  {% endif %}
 
   -- build model
-
   {%- call statement('main', language=language) -%}
     {{ create_table_as(False, target_relation, compiled_code, language) }}
   {%- endcall -%}

From 6d3dd65a73142d0e2977344a15353b0ab272bda8 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 21 Apr 2023 12:24:59 -0400
Subject: [PATCH 430/603] Update pytest requirement from ~=7.2 to ~=7.3 (#726)

* upgrade pytest to 7.3

* Add automated changelog yaml from template for bot PR

---------

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230421-041623.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230421-041623.yaml

diff --git a/.changes/unreleased/Dependencies-20230421-041623.yaml b/.changes/unreleased/Dependencies-20230421-041623.yaml
new file mode 100644
index 00000000..bd05eb90
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230421-041623.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytest requirement from ~=7.2 to ~=7.3"
+time: 2023-04-21T04:16:23.00000Z
+custom:
+  Author: mikealfare
+  PR: 726
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 446da331..c5ec28bb 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -18,7 +18,7 @@ pip-tools~=6.12
 pre-commit~=2.21;python_version=="3.7"
 pre-commit~=3.2;python_version>="3.8"
 pre-commit-hooks~=4.4
-pytest~=7.2
+pytest~=7.3
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2

From b13fa3b313fbe9f9742324e5c082de6f826db9c8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 21 Apr 2023 14:00:22 -0400
Subject: [PATCH 431/603] Bump mypy from 1.1.1 to 1.2.0 (#708)

* Bump mypy from 1.1.1 to 1.2.0

Bumps [mypy](https://github.com/python/mypy) from 1.1.1 to 1.2.0.
- [Release notes](https://github.com/python/mypy/releases)
- [Commits](https://github.com/python/mypy/compare/v1.1.1...v1.2.0)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* Add automated changelog yaml from template for bot PR

* removed duplicate changie

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20230406-230203.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230406-230203.yaml

diff --git a/.changes/unreleased/Dependencies-20230406-230203.yaml b/.changes/unreleased/Dependencies-20230406-230203.yaml
new file mode 100644
index 00000000..969d2102
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230406-230203.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.1.1 to 1.2.0"
+time: 2023-04-06T23:02:03.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 708
diff --git a/dev-requirements.txt b/dev-requirements.txt
index c5ec28bb..30cd7862 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,7 @@ flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.1.1  # patch updates have historically introduced breaking changes
+mypy==1.2.0  # patch updates have historically introduced breaking changes
 pip-tools~=6.12
 pre-commit~=2.21;python_version=="3.7"
 pre-commit~=3.2;python_version>="3.8"

From c2e1a62d9c0f9436b7b3ff3080372944c89834cb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 23 Apr 2023 17:53:33 -0400
Subject: [PATCH 432/603] Update types-pytz requirement from ~=2023.2 to
 ~=2023.3 (#704)

* Update types-pytz requirement from ~=2023.2 to ~=2023.3

Updates the requirements on [types-pytz](https://github.com/python/typeshed) to permit the latest version.
- [Release notes](https://github.com/python/typeshed/releases)
- [Commits](https://github.com/python/typeshed/commits)

---
updated-dependencies:
- dependency-name: types-pytz
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20230421-180054.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230421-180054.yaml

diff --git a/.changes/unreleased/Dependencies-20230421-180054.yaml b/.changes/unreleased/Dependencies-20230421-180054.yaml
new file mode 100644
index 00000000..c5522ef6
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230421-180054.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update types-pytz requirement from ~=2023.2 to ~=2023.3"
+time: 2023-04-21T18:00:54.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 704
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 30cd7862..b4035b5a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -26,7 +26,7 @@ pytest-xdist~=3.2
 pytz~=2023.2
 tox~=3.0;python_version=="3.7"
 tox~=4.4;python_version>="3.8"
-types-pytz~=2023.2
+types-pytz~=2023.3
 types-requests~=2.28
 twine~=4.0
 wheel~=0.40

From 92ce52a3098ae3a6603dbb247e9ab9fd3bf576fb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 23 Apr 2023 19:23:51 -0400
Subject: [PATCH 433/603] Update pytz requirement from ~=2023.2 to ~=2023.3
 (#702)

* Update pytz requirement from ~=2023.2 to ~=2023.3

Updates the requirements on [pytz](https://github.com/stub42/pytz) to permit the latest version.
- [Release notes](https://github.com/stub42/pytz/releases)
- [Commits](https://github.com/stub42/pytz/compare/release_2023.2...release_2023.3)

---
updated-dependencies:
- dependency-name: pytz
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230423-215745.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230423-215745.yaml

diff --git a/.changes/unreleased/Dependencies-20230423-215745.yaml b/.changes/unreleased/Dependencies-20230423-215745.yaml
new file mode 100644
index 00000000..a6f733f7
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230423-215745.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytz requirement from ~=2023.2 to ~=2023.3"
+time: 2023-04-23T21:57:45.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 702
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b4035b5a..d37b2485 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -23,7 +23,7 @@ pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.2
-pytz~=2023.2
+pytz~=2023.3
 tox~=3.0;python_version=="3.7"
 tox~=4.4;python_version>="3.8"
 types-pytz~=2023.3

From 48b50a9da09094ace5ed33bd8d09d5ff8cf7af7f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 23 Apr 2023 23:27:52 -0400
Subject: [PATCH 434/603] Update black requirement from ~=23.1 to ~=23.3 (#701)

* Update black requirement from ~=23.1 to ~=23.3

Updates the requirements on [black](https://github.com/psf/black) to permit the latest version.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/23.1.0...23.3.0)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20230329-102021.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230329-102021.yaml

diff --git a/.changes/unreleased/Dependencies-20230329-102021.yaml b/.changes/unreleased/Dependencies-20230329-102021.yaml
new file mode 100644
index 00000000..c60c4e69
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230329-102021.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update black requirement from ~=23.1 to ~=23.3"
+time: 2023-03-29T10:20:21.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 701
diff --git a/dev-requirements.txt b/dev-requirements.txt
index d37b2485..87796aac 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.1
+black~=23.3
 bumpversion~=0.6.0
 click~=8.1
 flake8~=5.0;python_version=="3.7"

From 535e2a49ae7bc47f94cdda7e5d980d72432fe80f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Apr 2023 12:37:58 -0400
Subject: [PATCH 435/603] Update pip-tools requirement from ~=6.12 to ~=6.13
 (#737)

* Update pip-tools requirement from ~=6.12 to ~=6.13

Updates the requirements on [pip-tools](https://github.com/jazzband/pip-tools) to permit the latest version.
- [Release notes](https://github.com/jazzband/pip-tools/releases)
- [Changelog](https://github.com/jazzband/pip-tools/blob/main/CHANGELOG.md)
- [Commits](https://github.com/jazzband/pip-tools/compare/6.12.0...6.13.0)

---
updated-dependencies:
- dependency-name: pip-tools
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230421-230051.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230421-230051.yaml

diff --git a/.changes/unreleased/Dependencies-20230421-230051.yaml b/.changes/unreleased/Dependencies-20230421-230051.yaml
new file mode 100644
index 00000000..2bcd4125
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230421-230051.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pip-tools requirement from ~=6.12 to ~=6.13"
+time: 2023-04-21T23:00:51.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 737
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 87796aac..1bc886f7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -14,7 +14,7 @@ flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.2.0  # patch updates have historically introduced breaking changes
-pip-tools~=6.12
+pip-tools~=6.13
 pre-commit~=2.21;python_version=="3.7"
 pre-commit~=3.2;python_version>="3.8"
 pre-commit-hooks~=4.4

From 8bc8c2c5f1a818c3b1eade83f422565dc8e32f86 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Apr 2023 15:45:35 -0400
Subject: [PATCH 436/603] Bump pyodbc from 4.0.34 to 4.0.35 (#519)

* Bump pyodbc from 4.0.34 to 4.0.35

Bumps [pyodbc](https://github.com/mkleehammer/pyodbc) from 4.0.34 to 4.0.35.
- [Release notes](https://github.com/mkleehammer/pyodbc/releases)
- [Commits](https://github.com/mkleehammer/pyodbc/compare/4.0.34...4.0.35)

---
updated-dependencies:
- dependency-name: pyodbc
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* sync `requirements.txt` and `setup.py`

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Neelesh Salian <nssalian@users.noreply.github.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependency-20221116-221906.yaml | 7 +++++++
 requirements.txt                                    | 2 +-
 setup.py                                            | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Dependency-20221116-221906.yaml

diff --git a/.changes/unreleased/Dependency-20221116-221906.yaml b/.changes/unreleased/Dependency-20221116-221906.yaml
new file mode 100644
index 00000000..5e126d07
--- /dev/null
+++ b/.changes/unreleased/Dependency-20221116-221906.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump pyodbc from 4.0.34 to 4.0.35"
+time: 2022-11-16T22:19:06.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 519
diff --git a/requirements.txt b/requirements.txt
index ce359bec..4132645f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 PyHive[hive]>=0.6.0,<0.7.0
 requests[python]>=2.28.1
 
-pyodbc==4.0.34
+pyodbc~=4.0.30
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/setup.py b/setup.py
index 07c96ee2..266f35b1 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ def _get_dbt_core_version():
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
-odbc_extras = ["pyodbc>=4.0.30"]
+odbc_extras = ["pyodbc~=4.0.30"]
 pyhive_extras = [
     "PyHive[hive]>=0.6.0,<0.7.0",
     "thrift>=0.11.0,<0.17.0",

From cb41ab049481bc458871d5c37fad47e59d6b759c Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Fri, 28 Apr 2023 09:19:01 -0700
Subject: [PATCH 437/603] Ct 1873/support insert overwrite (#700)

* Remove thrown exception when using insert_overwrite with delta

* Add changelog entry

* Update integration tests

* Fix missing comma in model config

* Fix the typo

* Tweaking the skip profiles for tests

* readd the other validate

* disabling test but leaving feature

* disabling test but leaving feature

* commit for ci

* fix test

---------

Co-authored-by: flvndh <17010377+flvndh@users.noreply.github.com>
Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .../unreleased/Features-20220812-091652.yaml  |  7 ++++
 .../materializations/incremental/validate.sql |  4 ---
 .../incremental_strategies/fixtures.py        | 34 ++++++++++---------
 .../test_incremental_strategies.py            | 14 ++++++--
 4 files changed, 37 insertions(+), 22 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220812-091652.yaml

diff --git a/.changes/unreleased/Features-20220812-091652.yaml b/.changes/unreleased/Features-20220812-091652.yaml
new file mode 100644
index 00000000..58c60aab
--- /dev/null
+++ b/.changes/unreleased/Features-20220812-091652.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Support insert_overwrite strategy with delta
+time: 2022-08-12T09:16:52.7995122+02:00
+custom:
+  Author: flvndh
+  Issue: "1013"
+  PR: "430"
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql
index 71ec0182..88b851ca 100644
--- a/dbt/include/spark/macros/materializations/incremental/validate.sql
+++ b/dbt/include/spark/macros/materializations/incremental/validate.sql
@@ -40,16 +40,12 @@
     You cannot use this strategy when connecting via endpoint
     Use the 'append' or 'merge' strategy instead
   {%- endset %}
-
   {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %}
     {% do exceptions.raise_compiler_error(invalid_strategy_msg) %}
   {%-else %}
     {% if raw_strategy == 'merge' and file_format not in ['delta', 'iceberg', 'hudi'] %}
       {% do exceptions.raise_compiler_error(invalid_merge_msg) %}
     {% endif %}
-    {% if raw_strategy == 'insert_overwrite' and file_format == 'delta' %}
-      {% do exceptions.raise_compiler_error(invalid_insert_overwrite_delta_msg) %}
-    {% endif %}
     {% if raw_strategy == 'insert_overwrite' and target.endpoint %}
       {% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %}
     {% endif %}
diff --git a/tests/functional/adapter/incremental_strategies/fixtures.py b/tests/functional/adapter/incremental_strategies/fixtures.py
index 50fba51d..9cee477d 100644
--- a/tests/functional/adapter/incremental_strategies/fixtures.py
+++ b/tests/functional/adapter/incremental_strategies/fixtures.py
@@ -47,11 +47,10 @@
 {% endif %}
 """.lstrip()
 
-bad_insert_overwrite_delta_sql = """
+bad_merge_not_delta_sql = """
 {{ config(
     materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'delta',
+    incremental_strategy = 'merge',
 ) }}
 
 {% if not is_incremental() %}
@@ -69,10 +68,10 @@
 {% endif %}
 """.lstrip()
 
-bad_merge_not_delta_sql = """
+bad_strategy_sql = """
 {{ config(
     materialized = 'incremental',
-    incremental_strategy = 'merge',
+    incremental_strategy = 'something_else',
 ) }}
 
 {% if not is_incremental() %}
@@ -90,10 +89,15 @@
 {% endif %}
 """.lstrip()
 
-bad_strategy_sql = """
+#
+# Delta Models
+#
+
+append_delta_sql = """
 {{ config(
     materialized = 'incremental',
-    incremental_strategy = 'something_else',
+    incremental_strategy = 'append',
+    file_format = 'delta',
 ) }}
 
 {% if not is_incremental() %}
@@ -111,15 +115,12 @@
 {% endif %}
 """.lstrip()
 
-#
-# Delta Models
-#
-
-append_delta_sql = """
+insert_overwrite_partitions_delta_sql = """
 {{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'append',
-    file_format = 'delta',
+    materialized='incremental',
+    incremental_strategy='insert_overwrite',
+    partition_by='id',
+    file_format='delta'
 ) }}
 
 {% if not is_incremental() %}
@@ -135,7 +136,8 @@
 select cast(3 as bigint) as id, 'anyway' as msg
 
 {% endif %}
-""".lstrip()
+"""
+
 
 delta_merge_no_key_sql = """
 {{ config(
diff --git a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
index 516f3da7..b05fcb27 100644
--- a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
+++ b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
@@ -10,7 +10,6 @@
 )
 from tests.functional.adapter.incremental_strategies.fixtures import (
     bad_file_format_sql,
-    bad_insert_overwrite_delta_sql,
     bad_merge_not_delta_sql,
     bad_strategy_sql,
     default_append_sql,
@@ -20,6 +19,7 @@
     delta_merge_no_key_sql,
     delta_merge_unique_key_sql,
     delta_merge_update_columns_sql,
+    # Skip: CT-1873 insert_overwrite_partitions_delta_sql,
 )
 
 
@@ -91,6 +91,8 @@ def models(self):
             "merge_no_key.sql": delta_merge_no_key_sql,
             "merge_unique_key.sql": delta_merge_unique_key_sql,
             "merge_update_columns.sql": delta_merge_update_columns_sql,
+            # Skip: cannot be acnive on any endpoint with grants
+            # "insert_overwrite_partitions_delta.sql": insert_overwrite_partitions_delta_sql,
         }
 
     def run_and_test(self, project):
@@ -106,13 +108,21 @@ def run_and_test(self, project):
     def test_delta_strategies(self, project):
         self.run_and_test(project)
 
+    @pytest.mark.skip(
+        reason="this feature is incompatible with databricks settings required for grants"
+    )
+    def test_delta_strategies_overwrite(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(
+            project.adapter, ["insert_overwrite_partitions_delta", "expected_upsert"]
+        )
+
 
 class TestBadStrategies(BaseIncrementalStrategies):
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "bad_file_format.sql": bad_file_format_sql,
-            "bad_insert_overwrite_delta.sql": bad_insert_overwrite_delta_sql,
             "bad_merge_not_delta.sql": bad_merge_not_delta_sql,
             "bad_strategy.sql": bad_strategy_sql,
         }

From 142c7a67c6a2c989ed07b640abf753f79d97dd9a Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Mon, 8 May 2023 23:53:34 +0200
Subject: [PATCH 438/603] Remove unused `# type: ignore` (#754)

---
 .changes/unreleased/Under the Hood-20230508-222118.yaml | 6 ++++++
 .pre-commit-config.yaml                                 | 2 +-
 dbt/adapters/spark/__init__.py                          | 2 +-
 dbt/adapters/spark/impl.py                              | 4 ++--
 4 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230508-222118.yaml

diff --git a/.changes/unreleased/Under the Hood-20230508-222118.yaml b/.changes/unreleased/Under the Hood-20230508-222118.yaml
new file mode 100644
index 00000000..daf66ad1
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230508-222118.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Remove unneeded type ignore
+time: 2023-05-08T22:21:18.093232+02:00
+custom:
+  Author: Fokko
+  Issue: "753"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3d80b955..ddec9b66 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,7 +52,7 @@ repos:
     # of our control to the mix.  Unfortunately, there's nothing we can
     # do about per pre-commit's author.
     # See https://github.com/pre-commit/pre-commit/issues/730 for details.
-    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases]
+    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores]
     files: ^dbt/adapters/.*
     language: system
   - id: mypy
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index 31588019..c25ba40d 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -5,7 +5,7 @@
 from dbt.adapters.spark.impl import SparkAdapter
 
 from dbt.adapters.base import AdapterPlugin
-from dbt.include import spark  # type: ignore
+from dbt.include import spark
 
 Plugin = AdapterPlugin(
     adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH  # type: ignore
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 37de188c..b28d2c77 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -131,7 +131,7 @@ def add_schema_to_cache(self, schema) -> str:
             raise dbt.exceptions.CompilationError(
                 "Attempted to cache a null schema for {}".format(name)
             )
-        if get_flags().USE_CACHE:  # type: ignore
+        if get_flags().USE_CACHE:
             self.cache.add_schema(None, schema)
         # so jinja doesn't render things
         return ""
@@ -190,7 +190,7 @@ def _build_spark_relation_list(
             is_hudi: bool = "Provider: hudi" in information
             is_iceberg: bool = "Provider: iceberg" in information
 
-            relation: BaseRelation = self.Relation.create(  # type: ignore
+            relation: BaseRelation = self.Relation.create(
                 schema=_schema,
                 identifier=name,
                 type=rel_type,

From 41304128f642555acee26762f14ed4890f529eca Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Thu, 11 May 2023 10:32:44 -0700
Subject: [PATCH 439/603] override schema_string_type fixture and test
 constraints for int type (#695)

---
 tests/functional/adapter/test_constraints.py | 24 ++++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 1287dfd3..0a71b928 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -59,18 +59,20 @@ def string_type(self):
     def int_type(self):
         return "INT"
 
+    @pytest.fixture
+    def schema_string_type(self):
+        return "STRING"
+
     @pytest.fixture
     def schema_int_type(self):
         return "INT"
 
     @pytest.fixture
-    def data_types(self, int_type, schema_int_type, string_type):
+    def data_types(self, int_type, schema_int_type, string_type, schema_string_type):
         # sql_column_value, schema_data_type, error_data_type
         return [
-            # TODO: the int type is tricky to test in test__constraints_wrong_column_data_type without a schema_string_type to override.
-            # uncomment the line below once https://github.com/dbt-labs/dbt-core/issues/7121 is resolved
-            # ['1', schema_int_type, int_type],
-            ['"1"', "string", string_type],
+            ["1", schema_int_type, int_type],
+            ['"1"', schema_string_type, string_type],
             ["true", "boolean", "BOOL"],
             ['array("1","2","3")', "string", string_type],
             ["array(1,2,3)", "string", string_type],
@@ -89,18 +91,20 @@ def string_type(self):
     def int_type(self):
         return "INT_TYPE"
 
+    @pytest.fixture
+    def schema_string_type(self):
+        return "STRING"
+
     @pytest.fixture
     def schema_int_type(self):
         return "INT"
 
     @pytest.fixture
-    def data_types(self, int_type, schema_int_type, string_type):
+    def data_types(self, int_type, schema_int_type, string_type, schema_string_type):
         # sql_column_value, schema_data_type, error_data_type
         return [
-            # TODO: the int type is tricky to test in test__constraints_wrong_column_data_type without a schema_string_type to override.
-            # uncomment the line below once https://github.com/dbt-labs/dbt-core/issues/7121 is resolved
-            # ['1', schema_int_type, int_type],
-            ['"1"', "string", string_type],
+            ["1", schema_int_type, int_type],
+            ['"1"', schema_string_type, string_type],
             ["true", "boolean", "BOOLEAN_TYPE"],
             ['array("1","2","3")', "array<string>", "ARRAY_TYPE"],
             ["array(1,2,3)", "array<int>", "ARRAY_TYPE"],

From 5ebff0db40247977a4ec6804898fdc7a77f2cedd Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Fri, 12 May 2023 17:44:48 +0200
Subject: [PATCH 440/603] =?UTF-8?q?Remove=20dead=20code=20=F0=9F=92=80=20(?=
 =?UTF-8?q?#759)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves #758

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .../unreleased/Fixes-20230510-154735.yaml     |  6 +++++
 dbt/adapters/spark/impl.py                    | 23 +------------------
 2 files changed, 7 insertions(+), 22 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230510-154735.yaml

diff --git a/.changes/unreleased/Fixes-20230510-154735.yaml b/.changes/unreleased/Fixes-20230510-154735.yaml
new file mode 100644
index 00000000..d3078564
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230510-154735.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: "Remove dead code \U0001F480"
+time: 2023-05-10T15:47:35.848176+02:00
+custom:
+  Author: Fokko
+  Issue: "758"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index b28d2c77..46b435fd 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -24,7 +24,6 @@
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
-from dbt.flags import get_flags
 from dbt.utils import executor, AttrDict
 
 logger = AdapterLogger("Spark")
@@ -34,8 +33,6 @@
 LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
 LIST_RELATIONS_SHOW_TABLES_MACRO_NAME = "list_relations_show_tables_without_caching"
 DESCRIBE_TABLE_EXTENDED_MACRO_NAME = "describe_table_extended_without_caching"
-DROP_RELATION_MACRO_NAME = "drop_relation"
-FETCH_TBL_PROPERTIES_MACRO_NAME = "fetch_tbl_properties"
 
 KEY_TABLE_OWNER = "Owner"
 KEY_TABLE_STATISTICS = "Statistics"
@@ -124,18 +121,6 @@ def convert_datetime_type(cls, agate_table, col_idx):
     def quote(self, identifier):
         return "`{}`".format(identifier)
 
-    def add_schema_to_cache(self, schema) -> str:
-        """Cache a new schema in dbt. It will show up in `list relations`."""
-        if schema is None:
-            name = self.nice_connection_name()
-            raise dbt.exceptions.CompilationError(
-                "Attempted to cache a null schema for {}".format(name)
-            )
-        if get_flags().USE_CACHE:
-            self.cache.add_schema(None, schema)
-        # so jinja doesn't render things
-        return ""
-
     def _get_relation_information(self, row: agate.Row) -> RelationInfo:
         """relation info was fetched with SHOW TABLES EXTENDED"""
         try:
@@ -348,12 +333,6 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str,
             as_dict["table_database"] = None
             yield as_dict
 
-    def get_properties(self, relation: Relation) -> Dict[str, str]:
-        properties = self.execute_macro(
-            FETCH_TBL_PROPERTIES_MACRO_NAME, kwargs={"relation": relation}
-        )
-        return dict(properties)
-
     def get_catalog(self, manifest):
         schema_map = self._get_catalog_schemas(manifest)
         if len(schema_map) > 1:
@@ -411,7 +390,7 @@ def get_rows_different_sql(
         column_names: Optional[List[str]] = None,
         except_operator: str = "EXCEPT",
     ) -> str:
-        """Generate SQL for a query that returns a single row with a two
+        """Generate SQL for a query that returns a single row with two
         columns: the number of rows that are different between the two
         relations and the number of mismatched rows.
         """

From 5e9bf97cca5cb537e78048bd15ef0b835a1fea54 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 12 May 2023 13:48:23 -0700
Subject: [PATCH 441/603] [create-pull-request] automated change (#769)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.6.0-b1.md                          | 31 +++++++++++++++++
 .../Dependencies-20230329-102021.yaml         |  0
 .../Dependencies-20230406-230203.yaml         |  0
 .../Dependencies-20230421-041623.yaml         |  0
 .../Dependencies-20230421-180054.yaml         |  0
 .../Dependencies-20230421-230051.yaml         |  0
 .../Dependencies-20230423-215745.yaml         |  0
 .../Dependency-20221116-221906.yaml           |  0
 .../Features-20220812-091652.yaml             |  0
 .../Fixes-20230420-214433.yaml                |  0
 .../Fixes-20230510-154735.yaml                |  0
 .../Under the Hood-20230508-222118.yaml       |  0
 CHANGELOG.md                                  | 33 ++++++++++++++++++-
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 16 files changed, 66 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.6.0-b1.md
 rename .changes/{unreleased => 1.6.0}/Dependencies-20230329-102021.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Dependencies-20230406-230203.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Dependencies-20230421-041623.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Dependencies-20230421-180054.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Dependencies-20230421-230051.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Dependencies-20230423-215745.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Dependency-20221116-221906.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Features-20220812-091652.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Fixes-20230420-214433.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Fixes-20230510-154735.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Under the Hood-20230508-222118.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 874e5b68..7540a87b 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.6.0a1
+current_version = 1.6.0b1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.6.0-b1.md b/.changes/1.6.0-b1.md
new file mode 100644
index 00000000..5c385a7f
--- /dev/null
+++ b/.changes/1.6.0-b1.md
@@ -0,0 +1,31 @@
+## dbt-spark 1.6.0-b1 - May 12, 2023
+
+### Features
+
+- Support insert_overwrite strategy with delta ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
+
+### Fixes
+
+- Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks ([#725](https://github.com/dbt-labs/dbt-spark/issues/725))
+- Remove dead code 💀 ([#758](https://github.com/dbt-labs/dbt-spark/issues/758))
+
+### Under the Hood
+
+- Remove unneeded type ignore ([#753](https://github.com/dbt-labs/dbt-spark/issues/753))
+
+### Dependencies
+
+- Update black requirement from ~=23.1 to ~=23.3 ([#701](https://github.com/dbt-labs/dbt-spark/pull/701))
+- Bump mypy from 1.1.1 to 1.2.0 ([#708](https://github.com/dbt-labs/dbt-spark/pull/708))
+- Update pytest requirement from ~=7.2 to ~=7.3 ([#726](https://github.com/dbt-labs/dbt-spark/pull/726))
+- Update types-pytz requirement from ~=2023.2 to ~=2023.3 ([#704](https://github.com/dbt-labs/dbt-spark/pull/704))
+- Update pip-tools requirement from ~=6.12 to ~=6.13 ([#737](https://github.com/dbt-labs/dbt-spark/pull/737))
+- Update pytz requirement from ~=2023.2 to ~=2023.3 ([#702](https://github.com/dbt-labs/dbt-spark/pull/702))
+
+### Dependency
+
+- Bump pyodbc from 4.0.34 to 4.0.35 ([#417](https://github.com/dbt-labs/dbt-spark/issues/417))
+
+### Contributors
+- [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753))
+- [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
diff --git a/.changes/unreleased/Dependencies-20230329-102021.yaml b/.changes/1.6.0/Dependencies-20230329-102021.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230329-102021.yaml
rename to .changes/1.6.0/Dependencies-20230329-102021.yaml
diff --git a/.changes/unreleased/Dependencies-20230406-230203.yaml b/.changes/1.6.0/Dependencies-20230406-230203.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230406-230203.yaml
rename to .changes/1.6.0/Dependencies-20230406-230203.yaml
diff --git a/.changes/unreleased/Dependencies-20230421-041623.yaml b/.changes/1.6.0/Dependencies-20230421-041623.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230421-041623.yaml
rename to .changes/1.6.0/Dependencies-20230421-041623.yaml
diff --git a/.changes/unreleased/Dependencies-20230421-180054.yaml b/.changes/1.6.0/Dependencies-20230421-180054.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230421-180054.yaml
rename to .changes/1.6.0/Dependencies-20230421-180054.yaml
diff --git a/.changes/unreleased/Dependencies-20230421-230051.yaml b/.changes/1.6.0/Dependencies-20230421-230051.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230421-230051.yaml
rename to .changes/1.6.0/Dependencies-20230421-230051.yaml
diff --git a/.changes/unreleased/Dependencies-20230423-215745.yaml b/.changes/1.6.0/Dependencies-20230423-215745.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230423-215745.yaml
rename to .changes/1.6.0/Dependencies-20230423-215745.yaml
diff --git a/.changes/unreleased/Dependency-20221116-221906.yaml b/.changes/1.6.0/Dependency-20221116-221906.yaml
similarity index 100%
rename from .changes/unreleased/Dependency-20221116-221906.yaml
rename to .changes/1.6.0/Dependency-20221116-221906.yaml
diff --git a/.changes/unreleased/Features-20220812-091652.yaml b/.changes/1.6.0/Features-20220812-091652.yaml
similarity index 100%
rename from .changes/unreleased/Features-20220812-091652.yaml
rename to .changes/1.6.0/Features-20220812-091652.yaml
diff --git a/.changes/unreleased/Fixes-20230420-214433.yaml b/.changes/1.6.0/Fixes-20230420-214433.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230420-214433.yaml
rename to .changes/1.6.0/Fixes-20230420-214433.yaml
diff --git a/.changes/unreleased/Fixes-20230510-154735.yaml b/.changes/1.6.0/Fixes-20230510-154735.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230510-154735.yaml
rename to .changes/1.6.0/Fixes-20230510-154735.yaml
diff --git a/.changes/unreleased/Under the Hood-20230508-222118.yaml b/.changes/1.6.0/Under the Hood-20230508-222118.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230508-222118.yaml
rename to .changes/1.6.0/Under the Hood-20230508-222118.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8561e246..d141dcb3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,9 +5,40 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.6.0-a1 - April 17, 2023
+## dbt-spark 1.6.0-b1 - May 12, 2023
+
+### Features
+
+- Support insert_overwrite strategy with delta ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
+
+### Fixes
+
+- Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks ([#725](https://github.com/dbt-labs/dbt-spark/issues/725))
+- Remove dead code 💀 ([#758](https://github.com/dbt-labs/dbt-spark/issues/758))
+
+### Under the Hood
+
+- Remove unneeded type ignore ([#753](https://github.com/dbt-labs/dbt-spark/issues/753))
 
+### Dependencies
 
+- Update black requirement from ~=23.1 to ~=23.3 ([#701](https://github.com/dbt-labs/dbt-spark/pull/701))
+- Bump mypy from 1.1.1 to 1.2.0 ([#708](https://github.com/dbt-labs/dbt-spark/pull/708))
+- Update pytest requirement from ~=7.2 to ~=7.3 ([#726](https://github.com/dbt-labs/dbt-spark/pull/726))
+- Update types-pytz requirement from ~=2023.2 to ~=2023.3 ([#704](https://github.com/dbt-labs/dbt-spark/pull/704))
+- Update pip-tools requirement from ~=6.12 to ~=6.13 ([#737](https://github.com/dbt-labs/dbt-spark/pull/737))
+- Update pytz requirement from ~=2023.2 to ~=2023.3 ([#702](https://github.com/dbt-labs/dbt-spark/pull/702))
+
+### Dependency
+
+- Bump pyodbc from 4.0.34 to 4.0.35 ([#417](https://github.com/dbt-labs/dbt-spark/issues/417))
+
+### Contributors
+- [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753))
+- [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
+
+
+## dbt-spark 1.6.0-a1 - April 17, 2023
 
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 07fc02ee..cafa9196 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.6.0a1"
+version = "1.6.0b1"
diff --git a/setup.py b/setup.py
index 266f35b1..15cf8efa 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.6.0a1"
+package_version = "1.6.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From cce8975906ff5693d4c761ac32dc761f3f097a36 Mon Sep 17 00:00:00 2001
From: David Bloss <david.bloss@dbtlabs.com>
Date: Tue, 16 May 2023 09:29:43 -0500
Subject: [PATCH 442/603] update used gh actions ahead of set-output, node12
 deprecation (#745)

* update used gh actions ahead of set-output, node12 deprecation

* replace triage label with github script

* use swap-labels for updating labels

---------

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .github/workflows/bot-changelog.yml |  2 +-
 .github/workflows/main.yml          | 16 ++++++++--------
 .github/workflows/triage-labels.yml | 12 +++++-------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index 92aff8eb..2bbd1fa1 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -49,7 +49,7 @@ jobs:
     - name: Create and commit changelog on bot PR
       if: ${{ contains(github.event.pull_request.labels.*.name, matrix.label) }}
       id: bot_changelog
-      uses: emmyoop/changie_bot@v1.0.1
+      uses: emmyoop/changie_bot@v1.1.0
       with:
         GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
         commit_author_name: "Github Build Bot"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d399960a..0dccff84 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -43,7 +43,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
         with:
           persist-credentials: false
 
@@ -87,7 +87,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4.3.0
@@ -108,9 +108,9 @@ jobs:
       - name: Get current date
         if: always()
         id: date
-        run: echo "::set-output name=date::$(date +'%Y-%m-%dT%H_%M_%S')" #no colons allowed for artifacts
+        run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts
 
-      - uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v3
         if: always()
         with:
           name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv
@@ -126,7 +126,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Set up Python
         uses: actions/setup-python@v4.3.0
@@ -157,9 +157,9 @@ jobs:
         run: |
           export is_alpha=0
           if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi
-          echo "::set-output name=is_alpha::$is_alpha"
+          echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT
 
-      - uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v3
         with:
           name: dist
           path: dist/
@@ -190,7 +190,7 @@ jobs:
           python -m pip install --user --upgrade pip
           python -m pip install --upgrade wheel
           python -m pip --version
-      - uses: actions/download-artifact@v2
+      - uses: actions/download-artifact@v3
         with:
           name: dist
           path: dist/
diff --git a/.github/workflows/triage-labels.yml b/.github/workflows/triage-labels.yml
index a71dc5e1..91f529e3 100644
--- a/.github/workflows/triage-labels.yml
+++ b/.github/workflows/triage-labels.yml
@@ -24,10 +24,8 @@ permissions:
 jobs:
   triage_label:
     if: contains(github.event.issue.labels.*.name, 'awaiting_response')
-    runs-on: ubuntu-latest
-    steps:
-      - name: initial labeling
-        uses: andymckay/labeler@master
-        with:
-          add-labels: "triage"
-          remove-labels: "awaiting_response"
+    uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main
+    with:
+      add_label: "triage"
+      remove_label: "awaiting_response"
+    secrets: inherit # this is only acceptable because we own the action we're calling

From 933a9def6209612aa83c10e2775786ff448b3999 Mon Sep 17 00:00:00 2001
From: David Bloss <david.bloss@dbtlabs.com>
Date: Wed, 17 May 2023 15:46:55 -0500
Subject: [PATCH 443/603] update used gh actions ahead of node12 deprecation
 (#783)

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
---
 .github/workflows/backport.yml      | 2 +-
 .github/workflows/bot-changelog.yml | 2 +-
 .github/workflows/main.yml          | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml
index 8c0355bd..ea80cbc2 100644
--- a/.github/workflows/backport.yml
+++ b/.github/workflows/backport.yml
@@ -37,6 +37,6 @@ jobs:
       github.event.pull_request.merged
       && contains(github.event.label.name, 'backport')
     steps:
-      - uses: tibdex/backport@v2.0.2
+      - uses: tibdex/backport@v2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index 2bbd1fa1..89972070 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -49,7 +49,7 @@ jobs:
     - name: Create and commit changelog on bot PR
       if: ${{ contains(github.event.pull_request.labels.*.name, matrix.label) }}
       id: bot_changelog
-      uses: emmyoop/changie_bot@v1.1.0
+      uses: emmyoop/changie_bot@v1
       with:
         GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
         commit_author_name: "Github Build Bot"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 0dccff84..f24aa74f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -48,7 +48,7 @@ jobs:
           persist-credentials: false
 
       - name: Set up Python
-        uses: actions/setup-python@v4.3.0
+        uses: actions/setup-python@v4
         with:
           python-version: '3.8'
 
@@ -90,7 +90,7 @@ jobs:
         uses: actions/checkout@v3
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4.3.0
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -129,7 +129,7 @@ jobs:
         uses: actions/checkout@v3
 
       - name: Set up Python
-        uses: actions/setup-python@v4.3.0
+        uses: actions/setup-python@v4
         with:
           python-version: '3.8'
 
@@ -181,7 +181,7 @@ jobs:
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4.3.0
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
 

From 4f1039d2f0e26eb99516a2803d43fe0083c2f82e Mon Sep 17 00:00:00 2001
From: Peter Webb <peter.webb@dbtlabs.com>
Date: Wed, 24 May 2023 13:41:06 -0400
Subject: [PATCH 444/603] Finish Constraint Support for Spark (#747)

* CT-2223: Add CONSTRAINT_SUPPORT mapping to adapter implementation

* CT-2223: Model constraints implemented for spark

* CT-2223: Add unit test

* CT-2223: Exclude test not applicable to spark

* CT-2223: Another unit test fix.

* CT-2223: Final (?) unit test fix

* CT-2223: Final (??) unit test fix

* Update .changes/unreleased/Features-20230427-123135.yaml

Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>

---------

Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
---
 .../unreleased/Features-20230427-123135.yaml  |  6 +++
 dbt/adapters/spark/impl.py                    | 17 +++++--
 dbt/include/spark/macros/adapters.sql         | 22 ++++-----
 tests/functional/adapter/test_constraints.py  | 46 +++++++++++++++++++
 4 files changed, 75 insertions(+), 16 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230427-123135.yaml

diff --git a/.changes/unreleased/Features-20230427-123135.yaml b/.changes/unreleased/Features-20230427-123135.yaml
new file mode 100644
index 00000000..a1cf88be
--- /dev/null
+++ b/.changes/unreleased/Features-20230427-123135.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: All constraint types are supported, but not enforced.
+time: 2023-04-27T12:31:35.011284-04:00
+custom:
+  Author: peterallenwebb
+  Issue: 656 657
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 46b435fd..bfe890eb 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -2,17 +2,16 @@
 from concurrent.futures import Future
 from dataclasses import dataclass
 from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable
+
 from typing_extensions import TypeAlias
 
 import agate
-from dbt.contracts.relation import RelationType
 
 import dbt
 import dbt.exceptions
 
 from dbt.adapters.base import AdapterConfig, PythonJobHelper
-from dbt.adapters.base.impl import catch_as_completed
-from dbt.contracts.connection import AdapterResponse
+from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport
 from dbt.adapters.sql import SQLAdapter
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
@@ -23,6 +22,9 @@
 )
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
+from dbt.contracts.connection import AdapterResponse
+from dbt.contracts.graph.nodes import ConstraintType
+from dbt.contracts.relation import RelationType
 from dbt.events import AdapterLogger
 from dbt.utils import executor, AttrDict
 
@@ -79,6 +81,7 @@ class SparkAdapter(SQLAdapter):
     INFORMATION_COLUMNS_REGEX = re.compile(r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE)
     INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE)
     INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE)
+
     HUDI_METADATA_COLUMNS = [
         "_hoodie_commit_time",
         "_hoodie_commit_seqno",
@@ -87,6 +90,14 @@ class SparkAdapter(SQLAdapter):
         "_hoodie_file_name",
     ]
 
+    CONSTRAINT_SUPPORT = {
+        ConstraintType.check: ConstraintSupport.NOT_ENFORCED,
+        ConstraintType.not_null: ConstraintSupport.NOT_ENFORCED,
+        ConstraintType.unique: ConstraintSupport.NOT_ENFORCED,
+        ConstraintType.primary_key: ConstraintSupport.NOT_ENFORCED,
+        ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED,
+    }
+
     Relation: TypeAlias = SparkRelation
     RelationInfo = Tuple[str, str, str]
     Column: TypeAlias = SparkColumn
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 0d397e6d..1ef5d79e 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -183,7 +183,7 @@
 {% macro spark__persist_constraints(relation, model) %}
   {%- set contract_config = config.get('contract') -%}
   {% if contract_config.enforced and config.get('file_format', 'delta') == 'delta' %}
-    {% do alter_table_add_constraints(relation, model.columns) %}
+    {% do alter_table_add_constraints(relation, model.constraints) %}
     {% do alter_column_set_constraints(relation, model.columns) %}
   {% endif %}
 {% endmacro %}
@@ -192,18 +192,14 @@
   {{ return(adapter.dispatch('alter_table_add_constraints', 'dbt')(relation, constraints)) }}
 {% endmacro %}
 
-{% macro spark__alter_table_add_constraints(relation, column_dict) %}
-
-  {% for column_name in column_dict %}
-    {% set constraints = column_dict[column_name]['constraints'] %}
-    {% for constraint in constraints %}
-      {% if constraint.type == 'check' and not is_incremental() %}
-        {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%}
-        {% call statement() %}
-          alter table {{ relation }} add constraint {{ constraint_hash }} check {{ constraint.expression }};
-        {% endcall %}
-      {% endif %}
-    {% endfor %}
+{% macro spark__alter_table_add_constraints(relation, constraints) %}
+  {% for constraint in constraints %}
+    {% if constraint.type == 'check' and not is_incremental() %}
+      {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%}
+      {% call statement() %}
+        alter table {{ relation }} add constraint {{ constraint.name if constraint.name else constraint_hash }} check {{ constraint.expression }};
+      {% endcall %}
+    {% endif %}
   {% endfor %}
 {% endmacro %}
 
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 0a71b928..3bfa888a 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -1,5 +1,6 @@
 import pytest
 from dbt.tests.adapter.constraints.test_constraints import (
+    BaseModelConstraintsRuntimeEnforcement,
     BaseTableConstraintsColumnsEqual,
     BaseViewConstraintsColumnsEqual,
     BaseIncrementalConstraintsColumnsEqual,
@@ -9,6 +10,7 @@
     BaseIncrementalConstraintsRollback,
 )
 from dbt.tests.adapter.constraints.fixtures import (
+    constrained_model_schema_yml,
     my_model_sql,
     my_model_wrong_order_sql,
     my_model_wrong_name_sql,
@@ -37,9 +39,26 @@
     '2019-01-01' as date_day ) as model_subq
 """
 
+_expected_sql_spark_model_constraints = """
+create or replace table <model_identifier>
+    using delta
+    as
+select
+  id,
+  color,
+  date_day
+from
+
+( select
+    1 as id,
+    'blue' as color,
+    '2019-01-01' as date_day ) as model_subq
+"""
+
 # Different on Spark:
 # - does not support a data type named 'text' (TODO handle this in the base test classes using string_type
 constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "")
+model_constraints_yml = constrained_model_schema_yml.replace("text", "string")
 
 
 class PyodbcSetup:
@@ -246,9 +265,11 @@ def expected_error_messages(self):
         return [
             "violate the new CHECK constraint",
             "DELTA_NEW_CHECK_CONSTRAINT_VIOLATION",
+            "DELTA_NEW_NOT_NULL_VIOLATION",
             "violate the new NOT NULL constraint",
             "(id > 0) violated by row with values:",  # incremental mats
             "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES",  # incremental mats
+            "NOT NULL constraint violated for column",
         ]
 
     def assert_expected_error_messages(self, error_message, expected_error_messages):
@@ -289,3 +310,28 @@ def models(self):
             "my_model.sql": my_incremental_model_sql,
             "constraints_schema.yml": constraints_yml,
         }
+
+
+# TODO: Like the tests above, this does test that model-level constraints don't
+# result in errors, but it does not verify that they are actually present in
+# Spark and that the ALTER TABLE statement actually ran.
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_incremental_model_sql,
+            "constraints_schema.yml": model_constraints_yml,
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return _expected_sql_spark_model_constraints

From ed3189286d6005f84e504b7568e29ab8f2de0c79 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 25 May 2023 14:19:08 -0500
Subject: [PATCH 445/603] [create-pull-request] automated change (#786)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                                         | 2 +-
 .changes/1.6.0-b2.md                                     | 5 +++++
 .../{unreleased => 1.6.0}/Features-20230427-123135.yaml  | 0
 CHANGELOG.md                                             | 9 ++++++++-
 dbt/adapters/spark/__version__.py                        | 2 +-
 setup.py                                                 | 2 +-
 6 files changed, 16 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.6.0-b2.md
 rename .changes/{unreleased => 1.6.0}/Features-20230427-123135.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 7540a87b..c218e1c7 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.6.0b1
+current_version = 1.6.0b2
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.6.0-b2.md b/.changes/1.6.0-b2.md
new file mode 100644
index 00000000..ea758062
--- /dev/null
+++ b/.changes/1.6.0-b2.md
@@ -0,0 +1,5 @@
+## dbt-spark 1.6.0-b2 - May 25, 2023
+
+### Features
+
+- All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657))
diff --git a/.changes/unreleased/Features-20230427-123135.yaml b/.changes/1.6.0/Features-20230427-123135.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230427-123135.yaml
rename to .changes/1.6.0/Features-20230427-123135.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d141dcb3..ec26d106 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,14 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.6.0-b2 - May 25, 2023
+
+### Features
+
+- All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657))
+
+
+
 ## dbt-spark 1.6.0-b1 - May 12, 2023
 
 ### Features
@@ -37,7 +45,6 @@
 - [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753))
 - [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
 
-
 ## dbt-spark 1.6.0-a1 - April 17, 2023
 
 ## Previous Releases
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index cafa9196..21c2b283 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.6.0b1"
+version = "1.6.0b2"
diff --git a/setup.py b/setup.py
index 15cf8efa..914c553c 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.6.0b1"
+package_version = "1.6.0b2"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 1ecd8e902d0451c3a0c0db8393eeeef13f933516 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 30 May 2023 16:47:13 -0500
Subject: [PATCH 446/603] drop support for python 3.7 (#793)

* drop support for python 3.7

* add changelog entry

* missed change
---
 .changes/unreleased/Under the Hood-20230530-162533.yaml | 6 ++++++
 .github/workflows/main.yml                              | 4 ++--
 CONTRIBUTING.md                                         | 2 +-
 dev-requirements.txt                                    | 3 ---
 setup.py                                                | 9 ++++-----
 tox.ini                                                 | 2 +-
 6 files changed, 14 insertions(+), 12 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230530-162533.yaml

diff --git a/.changes/unreleased/Under the Hood-20230530-162533.yaml b/.changes/unreleased/Under the Hood-20230530-162533.yaml
new file mode 100644
index 00000000..857db085
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230530-162533.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: drop support for python 3.7
+time: 2023-05-30T16:25:33.109326-05:00
+custom:
+  Author: McKnight-42
+  Issue: "792"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f24aa74f..6b3d93b6 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -79,7 +79,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10"]
 
     env:
       TOXENV: "unit"
@@ -177,7 +177,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 57d6ee47..a61306ea 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -66,7 +66,7 @@ $EDITOR test.env
 There are a few methods for running tests locally.
 
 #### `tox`
-`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.7, Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py37`. The configuration of these tests are located in `tox.ini`.
+`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`.
 
 #### `pytest`
 Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like:
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1bc886f7..fd8c6813 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -8,14 +8,12 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 black~=23.3
 bumpversion~=0.6.0
 click~=8.1
-flake8~=5.0;python_version=="3.7"
 flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.2.0  # patch updates have historically introduced breaking changes
 pip-tools~=6.13
-pre-commit~=2.21;python_version=="3.7"
 pre-commit~=3.2;python_version>="3.8"
 pre-commit-hooks~=4.4
 pytest~=7.3
@@ -24,7 +22,6 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.2
 pytz~=2023.3
-tox~=3.0;python_version=="3.7"
 tox~=4.4;python_version>="3.8"
 types-pytz~=2023.3
 types-requests~=2.28
diff --git a/setup.py b/setup.py
index 914c553c..33bc7e4e 100644
--- a/setup.py
+++ b/setup.py
@@ -3,10 +3,10 @@
 import sys
 import re
 
-# require python 3.7 or newer
-if sys.version_info < (3, 7):
+# require python 3.8 or newer
+if sys.version_info < (3, 8):
     print("Error: dbt does not support this version of Python.")
-    print("Please upgrade to Python 3.7 or higher.")
+    print("Please upgrade to Python 3.8 or higher.")
     sys.exit(1)
 
 
@@ -90,10 +90,9 @@ def _get_dbt_core_version():
         "Operating System :: Microsoft :: Windows",
         "Operating System :: MacOS :: MacOS X",
         "Operating System :: POSIX :: Linux",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
     ],
-    python_requires=">=3.7",
+    python_requires=">=3.8",
 )
diff --git a/tox.ini b/tox.ini
index 0747b61f..1bba38b6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,7 +2,7 @@
 skipsdist = True
 envlist = unit, flake8, integration-spark-thrift
 
-[testenv:{unit,py37,py38,py39,py310,py}]
+[testenv:{unit,py38,py39,py310,py}]
 allowlist_externals =
     /bin/bash
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'

From 973f5385aebc5de6922e749e3db9bd1d27e7bc44 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 1 Jun 2023 11:04:41 -0500
Subject: [PATCH 447/603] adding link to 1.5 release notes (#788)

* adding link to 1.5 release notes

* changie merge

* rerun pre-commit
---
 .changes/0.0.0.md | 1 +
 CHANGELOG.md      | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
index bed77333..87fa376a 100644
--- a/.changes/0.0.0.md
+++ b/.changes/0.0.0.md
@@ -1,5 +1,6 @@
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec26d106..6f95141b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,8 +11,6 @@
 
 - All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657))
 
-
-
 ## dbt-spark 1.6.0-b1 - May 12, 2023
 
 ### Features
@@ -49,6 +47,7 @@
 
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)

From b4f6b2ca26b2705ce489d38a53ed04bd41c1ddcf Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Fri, 2 Jun 2023 13:15:37 -0700
Subject: [PATCH 448/603] [Fix] Wrap constraint type 'check' expression in
 parentheses  (#750)

---
 .../unreleased/Fixes-20230512-151453.yaml     |  6 ++++
 dbt/include/spark/macros/adapters.sql         |  2 +-
 tests/functional/adapter/test_constraints.py  | 32 +++++++++++++------
 3 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230512-151453.yaml

diff --git a/.changes/unreleased/Fixes-20230512-151453.yaml b/.changes/unreleased/Fixes-20230512-151453.yaml
new file mode 100644
index 00000000..cb8c2b76
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230512-151453.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: wrap expression for check constraints in parentheses
+time: 2023-05-12T15:14:53.151149-04:00
+custom:
+  Author: michelleark
+  Issue: "7480"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 1ef5d79e..202564e4 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -197,7 +197,7 @@
     {% if constraint.type == 'check' and not is_incremental() %}
       {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%}
       {% call statement() %}
-        alter table {{ relation }} add constraint {{ constraint.name if constraint.name else constraint_hash }} check {{ constraint.expression }};
+        alter table {{ relation }} add constraint {{ constraint.name if constraint.name else constraint_hash }} check ({{ constraint.expression }});
       {% endcall %}
     {% endif %}
   {% endfor %}
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 3bfa888a..541bdabc 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -20,6 +20,10 @@
     my_model_incremental_wrong_order_sql,
     my_model_incremental_wrong_name_sql,
     my_incremental_model_sql,
+    model_fk_constraint_schema_yml,
+    my_model_wrong_order_depends_on_fk_sql,
+    foreign_key_model_sql,
+    my_model_incremental_wrong_order_depends_on_fk_sql,
 )
 
 # constraints are enforced via 'alter' statements that run after table creation
@@ -33,7 +37,9 @@
   date_day
 from
 
-( select
+(
+    -- depends_on: <foreign_key_model_identifier>
+    select
     'blue' as color,
     1 as id,
     '2019-01-01' as date_day ) as model_subq
@@ -49,15 +55,20 @@
   date_day
 from
 
-( select
-    1 as id,
+(
+    -- depends_on: <foreign_key_model_identifier>
+    select
     'blue' as color,
+    1 as id,
     '2019-01-01' as date_day ) as model_subq
 """
 
 # Different on Spark:
 # - does not support a data type named 'text' (TODO handle this in the base test classes using string_type
 constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "")
+model_fk_constraint_schema_yml = model_fk_constraint_schema_yml.replace("text", "string").replace(
+    "primary key", ""
+)
 model_constraints_yml = constrained_model_schema_yml.replace("text", "string")
 
 
@@ -234,8 +245,9 @@ class TestSparkTableConstraintsDdlEnforcement(
     @pytest.fixture(scope="class")
     def models(self):
         return {
-            "my_model.sql": my_model_wrong_order_sql,
-            "constraints_schema.yml": constraints_yml,
+            "my_model.sql": my_model_wrong_order_depends_on_fk_sql,
+            "foreign_key_model.sql": foreign_key_model_sql,
+            "constraints_schema.yml": model_fk_constraint_schema_yml,
         }
 
 
@@ -246,8 +258,9 @@ class TestSparkIncrementalConstraintsDdlEnforcement(
     @pytest.fixture(scope="class")
     def models(self):
         return {
-            "my_model.sql": my_model_incremental_wrong_order_sql,
-            "constraints_schema.yml": constraints_yml,
+            "my_model.sql": my_model_incremental_wrong_order_depends_on_fk_sql,
+            "foreign_key_model.sql": foreign_key_model_sql,
+            "constraints_schema.yml": model_fk_constraint_schema_yml,
         }
 
 
@@ -328,8 +341,9 @@ def project_config_update(self):
     @pytest.fixture(scope="class")
     def models(self):
         return {
-            "my_model.sql": my_incremental_model_sql,
-            "constraints_schema.yml": model_constraints_yml,
+            "my_model.sql": my_model_wrong_order_depends_on_fk_sql,
+            "foreign_key_model.sql": foreign_key_model_sql,
+            "constraints_schema.yml": model_fk_constraint_schema_yml,
         }
 
     @pytest.fixture(scope="class")

From c22d19ab083c7aac6453a82e172bb03d9eeac432 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Wed, 7 Jun 2023 09:57:38 -0700
Subject: [PATCH 449/603] Standardize the adapter for core debug changes (#794)

* Standardize the adapter for core debug changes

* Review tweaks

* Fix tests caused by core merge of equal macro changes.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .../unreleased/Features-20230604-043421.yaml  |  6 +++++
 dbt/adapters/spark/connections.py             |  3 ++-
 dbt/adapters/spark/impl.py                    |  4 ++++
 tests/functional/adapter/utils/test_utils.py  | 22 +++++++++++++++++--
 4 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230604-043421.yaml

diff --git a/.changes/unreleased/Features-20230604-043421.yaml b/.changes/unreleased/Features-20230604-043421.yaml
new file mode 100644
index 00000000..153b43e5
--- /dev/null
+++ b/.changes/unreleased/Features-20230604-043421.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Standardize the _connection_keys and debug_query for `dbt debug`.
+time: 2023-06-04T04:34:21.968669-07:00
+custom:
+  Author: versusfacit
+  Issue: PR754
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index df0dcb5e..9d3e385b 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,4 +1,5 @@
 from contextlib import contextmanager
+from typing import Tuple
 
 import dbt.exceptions
 from dbt.adapters.base import Credentials
@@ -147,7 +148,7 @@ def type(self):
     def unique_field(self):
         return self.host
 
-    def _connection_keys(self):
+    def _connection_keys(self) -> Tuple[str, ...]:
         return ("host", "port", "cluster", "endpoint", "schema", "organization")
 
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index bfe890eb..1d4a6497 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -477,6 +477,10 @@ def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
                     grants_dict.update({privilege: [grantee]})
         return grants_dict
 
+    def debug_query(self):
+        """Override for DebugTask method"""
+        self.execute("select 1 as id")
+
 
 # spark does something interesting with joins when both tables have the same
 # static values for the join condition and complains that the join condition is
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 8f222b9a..3be3ff2a 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -21,6 +21,7 @@
 from dbt.tests.adapter.utils.test_replace import BaseReplace
 from dbt.tests.adapter.utils.test_right import BaseRight
 from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast
+
 from dbt.tests.adapter.utils.test_split_part import BaseSplitPart
 from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral
 
@@ -29,6 +30,19 @@
 from dbt.tests.adapter.utils.fixture_listagg import models__test_listagg_yml
 from tests.functional.adapter.utils.fixture_listagg import models__test_listagg_no_order_by_sql
 
+seeds__data_split_part_csv = """parts,split_on,result_1,result_2,result_3
+a|b|c,|,a,b,c
+1|2|3,|,1,2,3
+EMPTY|EMPTY|EMPTY,|,EMPTY,EMPTY,EMPTY
+"""
+
+seeds__data_last_day_csv = """date_day,date_part,result
+2018-01-02,month,2018-01-31
+2018-01-02,quarter,2018-03-31
+2018-01-02,year,2018-12-31
+"""
+# skipped: ,month,
+
 
 class TestAnyValue(BaseAnyValue):
     pass
@@ -96,7 +110,9 @@ class TestIntersect(BaseIntersect):
 
 
 class TestLastDay(BaseLastDay):
-    pass
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"data_last_day.csv": seeds__data_last_day_csv}
 
 
 class TestLength(BaseLength):
@@ -135,7 +151,9 @@ class TestSafeCast(BaseSafeCast):
 
 
 class TestSplitPart(BaseSplitPart):
-    pass
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"data_split_part.csv": seeds__data_split_part_csv}
 
 
 class TestStringLiteral(BaseStringLiteral):

From e741034160444eb7aa06aef7550a366cdcacc913 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 8 Jun 2023 18:03:39 -0700
Subject: [PATCH 450/603] [create-pull-request] automated change (#800)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                                 |  2 +-
 .changes/1.6.0-b3.md                             | 13 +++++++++++++
 .../Features-20230604-043421.yaml                |  0
 .../Fixes-20230512-151453.yaml                   |  0
 .../Under the Hood-20230530-162533.yaml          |  0
 CHANGELOG.md                                     | 16 ++++++++++++++++
 dbt/adapters/spark/__version__.py                |  2 +-
 setup.py                                         |  2 +-
 8 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 .changes/1.6.0-b3.md
 rename .changes/{unreleased => 1.6.0}/Features-20230604-043421.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Fixes-20230512-151453.yaml (100%)
 rename .changes/{unreleased => 1.6.0}/Under the Hood-20230530-162533.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index c218e1c7..f0c90061 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.6.0b2
+current_version = 1.6.0b3
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.6.0-b3.md b/.changes/1.6.0-b3.md
new file mode 100644
index 00000000..3379fe50
--- /dev/null
+++ b/.changes/1.6.0-b3.md
@@ -0,0 +1,13 @@
+## dbt-spark 1.6.0-b3 - June 09, 2023
+
+### Features
+
+- Standardize the _connection_keys and debug_query for `dbt debug`. ([#PR754](https://github.com/dbt-labs/dbt-spark/issues/PR754))
+
+### Fixes
+
+- wrap expression for check constraints in parentheses ([#7480](https://github.com/dbt-labs/dbt-spark/issues/7480))
+
+### Under the Hood
+
+- drop support for python 3.7 ([#792](https://github.com/dbt-labs/dbt-spark/issues/792))
diff --git a/.changes/unreleased/Features-20230604-043421.yaml b/.changes/1.6.0/Features-20230604-043421.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230604-043421.yaml
rename to .changes/1.6.0/Features-20230604-043421.yaml
diff --git a/.changes/unreleased/Fixes-20230512-151453.yaml b/.changes/1.6.0/Fixes-20230512-151453.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230512-151453.yaml
rename to .changes/1.6.0/Fixes-20230512-151453.yaml
diff --git a/.changes/unreleased/Under the Hood-20230530-162533.yaml b/.changes/1.6.0/Under the Hood-20230530-162533.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230530-162533.yaml
rename to .changes/1.6.0/Under the Hood-20230530-162533.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f95141b..9f525b3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,22 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.6.0-b3 - June 09, 2023
+
+### Features
+
+- Standardize the _connection_keys and debug_query for `dbt debug`. ([#PR754](https://github.com/dbt-labs/dbt-spark/issues/PR754))
+
+### Fixes
+
+- wrap expression for check constraints in parentheses ([#7480](https://github.com/dbt-labs/dbt-spark/issues/7480))
+
+### Under the Hood
+
+- drop support for python 3.7 ([#792](https://github.com/dbt-labs/dbt-spark/issues/792))
+
+
+
 ## dbt-spark 1.6.0-b2 - May 25, 2023
 
 ### Features
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 21c2b283..0c2870f8 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.6.0b2"
+version = "1.6.0b3"
diff --git a/setup.py b/setup.py
index 33bc7e4e..2cd78c3a 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.6.0b2"
+package_version = "1.6.0b3"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 4e193c03d262976936585b1993f05fb0591a92c9 Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Tue, 13 Jun 2023 09:52:54 -0700
Subject: [PATCH 451/603] TestSparkConstraintQuotedColumn (#798)

* TestSparkConstraintQuotedColumn

* test on pyodbc

* linting

* test quoting char

* linting
---
 dev-requirements.txt                         |  4 +--
 tests/functional/adapter/test_constraints.py | 34 ++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index fd8c6813..7873b041 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git@jerco/7370-model-contracts-respect-quoting#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git@jerco/7370-model-contracts-respect-quoting#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 541bdabc..41b50ef8 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -8,6 +8,7 @@
     BaseConstraintsRollback,
     BaseIncrementalConstraintsRuntimeDdlEnforcement,
     BaseIncrementalConstraintsRollback,
+    BaseConstraintQuotedColumn,
 )
 from dbt.tests.adapter.constraints.fixtures import (
     constrained_model_schema_yml,
@@ -24,6 +25,8 @@
     my_model_wrong_order_depends_on_fk_sql,
     foreign_key_model_sql,
     my_model_incremental_wrong_order_depends_on_fk_sql,
+    my_model_with_quoted_column_name_sql,
+    model_quoted_column_schema_yml,
 )
 
 # constraints are enforced via 'alter' statements that run after table creation
@@ -264,6 +267,37 @@ def models(self):
         }
 
 
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
+class TestSparkConstraintQuotedColumn(PyodbcSetup, BaseConstraintQuotedColumn):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_with_quoted_column_name_sql,
+            "constraints_schema.yml": model_quoted_column_schema_yml.replace(
+                "text", "string"
+            ).replace('"from"', "`from`"),
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return """
+create or replace table <model_identifier>
+    using delta
+    as
+select
+  id,
+  `from`,
+  date_day
+from
+
+(
+    select
+    'blue' as `from`,
+    1 as id,
+    '2019-01-01' as date_day ) as model_subq
+"""
+
+
 class BaseSparkConstraintsRollbackSetup:
     @pytest.fixture(scope="class")
     def project_config_update(self):

From 3faffd8ee48c9d70cd1fbd124c71f4824d8c1b9f Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 13 Jun 2023 10:11:46 -0700
Subject: [PATCH 452/603] revert change to dev-requirements.txt (#805)

---
 dev-requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 7873b041..fd8c6813 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@jerco/7370-model-contracts-respect-quoting#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git@jerco/7370-model-contracts-respect-quoting#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor

From d88bd35b11bc734eedc5ef4244e9e4979014b8f1 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 15 Jun 2023 09:51:36 -0700
Subject: [PATCH 453/603] skip TestLastDay in spark session (#807)

---
 tests/functional/adapter/utils/test_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 3be3ff2a..ba03c5f6 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -41,6 +41,8 @@
 2018-01-02,quarter,2018-03-31
 2018-01-02,year,2018-12-31
 """
+
+
 # skipped: ,month,
 
 
@@ -109,6 +111,7 @@ class TestIntersect(BaseIntersect):
     pass
 
 
+@pytest.mark.skip_profile("spark_session")  # spark session crashes in CI
 class TestLastDay(BaseLastDay):
     @pytest.fixture(scope="class")
     def seeds(self):

From a1d161c76bc1a9e113d27b944e4fa3c64d7e7087 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Fri, 16 Jun 2023 10:05:35 -0700
Subject: [PATCH 454/603] fix utils.py (#809)

---
 tests/unit/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index ac8c6224..722f4030 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -75,6 +75,7 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars=
         project_root=project_root,
         project_dict=project,
         packages_dict=packages,
+        dependent_projects_dict={},
         selectors_dict=selectors,
     )
     return partial.render(renderer)

From 8ea1597f3757adef4c940e7b0cf88622c46b5322 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Sat, 24 Jun 2023 20:41:07 +0200
Subject: [PATCH 455/603] Disallow untyped `def`'s (#767)

* Disallow untyped `def`'s

---------

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .../unreleased/Fixes-20230510-163110.yaml     |  6 ++
 .pre-commit-config.yaml                       |  4 +-
 dbt/adapters/spark/column.py                  |  2 +-
 dbt/adapters/spark/connections.py             | 80 +++++++++++--------
 dbt/adapters/spark/impl.py                    | 31 +++----
 dbt/adapters/spark/python_submissions.py      | 18 ++---
 dbt/adapters/spark/relation.py                |  4 +-
 dbt/adapters/spark/session.py                 | 28 ++++---
 8 files changed, 101 insertions(+), 72 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230510-163110.yaml

diff --git a/.changes/unreleased/Fixes-20230510-163110.yaml b/.changes/unreleased/Fixes-20230510-163110.yaml
new file mode 100644
index 00000000..06672ac9
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230510-163110.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Disallow untyped `def`'s
+time: 2023-05-10T16:31:10.593358+02:00
+custom:
+  Author: Fokko
+  Issue: "760"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ddec9b66..5e7fdbd0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
     alias: flake8-check
     stages: [manual]
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v1.1.1
+  rev: v1.2.0
   hooks:
   - id: mypy
     # N.B.: Mypy is... a bit fragile.
@@ -52,7 +52,7 @@ repos:
     # of our control to the mix.  Unfortunately, there's nothing we can
     # do about per pre-commit's author.
     # See https://github.com/pre-commit/pre-commit/issues/730 for details.
-    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores]
+    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores, --disallow-untyped-defs]
     files: ^dbt/adapters/.*
     language: system
   - id: mypy
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index 8100fa45..bde49a49 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -26,7 +26,7 @@ def can_expand_to(self: Self, other_column: Self) -> bool:  # type: ignore
         """returns True if both columns are strings"""
         return self.is_string() and other_column.is_string()
 
-    def literal(self, value):
+    def literal(self, value: Any) -> str:
         return "cast({} as {})".format(value, self.dtype)
 
     @property
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 9d3e385b..bde614fa 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,5 +1,4 @@
 from contextlib import contextmanager
-from typing import Tuple
 
 import dbt.exceptions
 from dbt.adapters.base import Credentials
@@ -23,10 +22,10 @@
     pyodbc = None
 from datetime import datetime
 import sqlparams
-
+from dbt.contracts.connection import Connection
 from hologram.helpers import StrEnum
 from dataclasses import dataclass, field
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable
 
 try:
     from thrift.transport.TSSLSocket import TSSLSocket
@@ -45,7 +44,7 @@
 NUMBERS = DECIMALS + (int, float)
 
 
-def _build_odbc_connnection_string(**kwargs) -> str:
+def _build_odbc_connnection_string(**kwargs: Any) -> str:
     return ";".join([f"{k}={v}" for k, v in kwargs.items()])
 
 
@@ -78,17 +77,17 @@ class SparkCredentials(Credentials):
     retry_all: bool = False
 
     @classmethod
-    def __pre_deserialize__(cls, data):
+    def __pre_deserialize__(cls, data: Any) -> Any:
         data = super().__pre_deserialize__(data)
         if "database" not in data:
             data["database"] = None
         return data
 
     @property
-    def cluster_id(self):
+    def cluster_id(self) -> Optional[str]:
         return self.cluster
 
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         # spark classifies database and schema as the same thing
         if self.database is not None and self.database != self.schema:
             raise dbt.exceptions.DbtRuntimeError(
@@ -141,15 +140,15 @@ def __post_init__(self):
                 ) from e
 
     @property
-    def type(self):
+    def type(self) -> str:
         return "spark"
 
     @property
-    def unique_field(self):
+    def unique_field(self) -> str:
         return self.host
 
     def _connection_keys(self) -> Tuple[str, ...]:
-        return ("host", "port", "cluster", "endpoint", "schema", "organization")
+        return "host", "port", "cluster", "endpoint", "schema", "organization"
 
 
 class PyhiveConnectionWrapper(object):
@@ -157,15 +156,18 @@ class PyhiveConnectionWrapper(object):
 
     # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html  # noqa
 
-    def __init__(self, handle):
+    handle: "pyodbc.Connection"
+    _cursor: "Optional[pyodbc.Cursor]"
+
+    def __init__(self, handle: "pyodbc.Connection") -> None:
         self.handle = handle
         self._cursor = None
 
-    def cursor(self):
+    def cursor(self) -> "PyhiveConnectionWrapper":
         self._cursor = self.handle.cursor()
         return self
 
-    def cancel(self):
+    def cancel(self) -> None:
         if self._cursor:
             # Handle bad response in the pyhive lib when
             # the connection is cancelled
@@ -174,7 +176,7 @@ def cancel(self):
             except EnvironmentError as exc:
                 logger.debug("Exception while cancelling query: {}".format(exc))
 
-    def close(self):
+    def close(self) -> None:
         if self._cursor:
             # Handle bad response in the pyhive lib when
             # the connection is cancelled
@@ -184,13 +186,14 @@ def close(self):
                 logger.debug("Exception while closing cursor: {}".format(exc))
         self.handle.close()
 
-    def rollback(self, *args, **kwargs):
+    def rollback(self, *args: Any, **kwargs: Any) -> None:
         logger.debug("NotImplemented: rollback")
 
-    def fetchall(self):
+    def fetchall(self) -> List["pyodbc.Row"]:
+        assert self._cursor, "Cursor not available"
         return self._cursor.fetchall()
 
-    def execute(self, sql, bindings=None):
+    def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
         if sql.strip().endswith(";"):
             sql = sql.strip()[:-1]
 
@@ -212,6 +215,8 @@ def execute(self, sql, bindings=None):
         if bindings is not None:
             bindings = [self._fix_binding(binding) for binding in bindings]
 
+        assert self._cursor, "Cursor not available"
+
         self._cursor.execute(sql, bindings, async_=True)
         poll_state = self._cursor.poll()
         state = poll_state.operationState
@@ -245,7 +250,7 @@ def execute(self, sql, bindings=None):
         logger.debug("Poll status: {}, query complete".format(state))
 
     @classmethod
-    def _fix_binding(cls, value):
+    def _fix_binding(cls, value: Any) -> Union[float, str]:
         """Convert complex datatypes to primitives that can be loaded by
         the Spark driver"""
         if isinstance(value, NUMBERS):
@@ -256,12 +261,14 @@ def _fix_binding(cls, value):
             return value
 
     @property
-    def description(self):
+    def description(self) -> Tuple[Tuple[str, Any, int, int, int, int, bool]]:
+        assert self._cursor, "Cursor not available"
         return self._cursor.description
 
 
 class PyodbcConnectionWrapper(PyhiveConnectionWrapper):
-    def execute(self, sql, bindings=None):
+    def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
+        assert self._cursor, "Cursor not available"
         if sql.strip().endswith(";"):
             sql = sql.strip()[:-1]
         # pyodbc does not handle a None type binding!
@@ -282,7 +289,7 @@ class SparkConnectionManager(SQLConnectionManager):
     SPARK_CONNECTION_URL = "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH
 
     @contextmanager
-    def exception_handler(self, sql):
+    def exception_handler(self, sql: str) -> Generator[None, None, None]:
         try:
             yield
 
@@ -299,30 +306,30 @@ def exception_handler(self, sql):
             else:
                 raise dbt.exceptions.DbtRuntimeError(str(exc))
 
-    def cancel(self, connection):
+    def cancel(self, connection: Connection) -> None:
         connection.handle.cancel()
 
     @classmethod
-    def get_response(cls, cursor) -> AdapterResponse:
+    def get_response(cls, cursor: Any) -> AdapterResponse:
         # https://github.com/dbt-labs/dbt-spark/issues/142
         message = "OK"
         return AdapterResponse(_message=message)
 
     # No transactions on Spark....
-    def add_begin_query(self, *args, **kwargs):
+    def add_begin_query(self, *args: Any, **kwargs: Any) -> None:
         logger.debug("NotImplemented: add_begin_query")
 
-    def add_commit_query(self, *args, **kwargs):
+    def add_commit_query(self, *args: Any, **kwargs: Any) -> None:
         logger.debug("NotImplemented: add_commit_query")
 
-    def commit(self, *args, **kwargs):
+    def commit(self, *args: Any, **kwargs: Any) -> None:
         logger.debug("NotImplemented: commit")
 
-    def rollback(self, *args, **kwargs):
+    def rollback(self, *args: Any, **kwargs: Any) -> None:
         logger.debug("NotImplemented: rollback")
 
     @classmethod
-    def validate_creds(cls, creds, required):
+    def validate_creds(cls, creds: Any, required: Iterable[str]) -> None:
         method = creds.method
 
         for key in required:
@@ -333,7 +340,7 @@ def validate_creds(cls, creds, required):
                 )
 
     @classmethod
-    def open(cls, connection):
+    def open(cls, connection: Connection) -> Connection:
         if connection.state == ConnectionState.OPEN:
             logger.debug("Connection is already open, skipping open.")
             return connection
@@ -450,7 +457,7 @@ def open(cls, connection):
                         SessionConnectionWrapper,
                     )
 
-                    handle = SessionConnectionWrapper(Connection())
+                    handle = SessionConnectionWrapper(Connection())  # type: ignore
                 else:
                     raise dbt.exceptions.DbtProfileError(
                         f"invalid credential method: {creds.method}"
@@ -487,7 +494,7 @@ def open(cls, connection):
                 else:
                     raise dbt.exceptions.FailedToConnectError("failed to connect") from e
         else:
-            raise exc
+            raise exc  # type: ignore
 
         connection.handle = handle
         connection.state = ConnectionState.OPEN
@@ -507,7 +514,14 @@ def data_type_code_to_name(cls, type_code: Union[type, str]) -> str:  # type: ig
         return type_code.__name__.upper()
 
 
-def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None):
+def build_ssl_transport(
+    host: str,
+    port: int,
+    username: str,
+    auth: str,
+    kerberos_service_name: str,
+    password: Optional[str] = None,
+) -> "thrift_sasl.TSaslClientTransport":
     transport = None
     if port is None:
         port = 10000
@@ -531,7 +545,7 @@ def build_ssl_transport(host, port, username, auth, kerberos_service_name, passw
                 # to be nonempty.
                 password = "x"
 
-        def sasl_factory():
+        def sasl_factory() -> sasl.Client:
             sasl_client = sasl.Client()
             sasl_client.setAttr("host", host)
             if sasl_auth == "GSSAPI":
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 1d4a6497..2864c4f3 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,10 @@
 import re
 from concurrent.futures import Future
 from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable
+from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable, Set
+
+from dbt.adapters.base.relation import InformationSchema
+from dbt.contracts.graph.manifest import Manifest
 
 from typing_extensions import TypeAlias
 
@@ -109,27 +112,27 @@ def date_function(cls) -> str:
         return "current_timestamp()"
 
     @classmethod
-    def convert_text_type(cls, agate_table, col_idx):
+    def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "string"
 
     @classmethod
-    def convert_number_type(cls, agate_table, col_idx):
+    def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
         return "double" if decimals else "bigint"
 
     @classmethod
-    def convert_date_type(cls, agate_table, col_idx):
+    def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "date"
 
     @classmethod
-    def convert_time_type(cls, agate_table, col_idx):
+    def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "time"
 
     @classmethod
-    def convert_datetime_type(cls, agate_table, col_idx):
+    def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "timestamp"
 
-    def quote(self, identifier):
+    def quote(self, identifier: str) -> str:  # type: ignore
         return "`{}`".format(identifier)
 
     def _get_relation_information(self, row: agate.Row) -> RelationInfo:
@@ -344,7 +347,7 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str,
             as_dict["table_database"] = None
             yield as_dict
 
-    def get_catalog(self, manifest):
+    def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]]:
         schema_map = self._get_catalog_schemas(manifest)
         if len(schema_map) > 1:
             raise dbt.exceptions.CompilationError(
@@ -370,9 +373,9 @@ def get_catalog(self, manifest):
 
     def _get_one_catalog(
         self,
-        information_schema,
-        schemas,
-        manifest,
+        information_schema: InformationSchema,
+        schemas: Set[str],
+        manifest: Manifest,
     ) -> agate.Table:
         if len(schemas) != 1:
             raise dbt.exceptions.CompilationError(
@@ -388,7 +391,7 @@ def _get_one_catalog(
             columns.extend(self._get_columns_for_catalog(relation))
         return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER)
 
-    def check_schema_exists(self, database, schema):
+    def check_schema_exists(self, database: str, schema: str) -> bool:
         results = self.execute_macro(LIST_SCHEMAS_MACRO_NAME, kwargs={"database": database})
 
         exists = True if schema in [row[0] for row in results] else False
@@ -425,7 +428,7 @@ def get_rows_different_sql(
     # This is for use in the test suite
     # Spark doesn't have 'commit' and 'rollback', so this override
     # doesn't include those commands.
-    def run_sql_for_tests(self, sql, fetch, conn):
+    def run_sql_for_tests(self, sql, fetch, conn):  # type: ignore
         cursor = conn.handle.cursor()
         try:
             cursor.execute(sql)
@@ -477,7 +480,7 @@ def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
                     grants_dict.update({privilege: [grantee]})
         return grants_dict
 
-    def debug_query(self):
+    def debug_query(self) -> None:
         """Override for DebugTask method"""
         self.execute("select 1 as id")
 
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 47529e07..89831ca7 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -1,7 +1,7 @@
 import base64
 import time
 import requests
-from typing import Any, Dict
+from typing import Any, Dict, Callable, Iterable
 import uuid
 
 import dbt.exceptions
@@ -149,18 +149,18 @@ def submit(self, compiled_code: str) -> None:
 
     def polling(
         self,
-        status_func,
-        status_func_kwargs,
-        get_state_func,
-        terminal_states,
-        expected_end_state,
-        get_state_msg_func,
+        status_func: Callable,
+        status_func_kwargs: Dict,
+        get_state_func: Callable,
+        terminal_states: Iterable[str],
+        expected_end_state: str,
+        get_state_msg_func: Callable,
     ) -> Dict:
         state = None
         start = time.time()
         exceeded_timeout = False
-        response = {}
-        while state not in terminal_states:
+        response: Dict = {}
+        while state is None or state not in terminal_states:
             if time.time() - start > self.timeout:
                 exceeded_timeout = True
                 break
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index f5a3e3e1..e80f2623 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -36,11 +36,11 @@ class SparkRelation(BaseRelation):
     # TODO: make this a dict everywhere
     information: Optional[str] = None
 
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         if self.database != self.schema and self.database:
             raise DbtRuntimeError("Cannot set database in spark!")
 
-    def render(self):
+    def render(self) -> str:
         if self.include_policy.database and self.include_policy.schema:
             raise DbtRuntimeError(
                 "Got a spark relation with schema and database set to "
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index d275c73c..5e4bcc49 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -4,7 +4,7 @@
 
 import datetime as dt
 from types import TracebackType
-from typing import Any, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, Union
 
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
@@ -172,33 +172,38 @@ def cursor(self) -> Cursor:
 
 
 class SessionConnectionWrapper(object):
-    """Connection wrapper for the sessoin connection method."""
+    """Connection wrapper for the session connection method."""
 
-    def __init__(self, handle):
+    handle: Connection
+    _cursor: Optional[Cursor]
+
+    def __init__(self, handle: Connection) -> None:
         self.handle = handle
         self._cursor = None
 
-    def cursor(self):
+    def cursor(self) -> "SessionConnectionWrapper":
         self._cursor = self.handle.cursor()
         return self
 
-    def cancel(self):
+    def cancel(self) -> None:
         logger.debug("NotImplemented: cancel")
 
-    def close(self):
+    def close(self) -> None:
         if self._cursor:
             self._cursor.close()
 
-    def rollback(self, *args, **kwargs):
+    def rollback(self, *args: Any, **kwargs: Any) -> None:
         logger.debug("NotImplemented: rollback")
 
-    def fetchall(self):
+    def fetchall(self) -> Optional[List[Row]]:
+        assert self._cursor, "Cursor not available"
         return self._cursor.fetchall()
 
-    def execute(self, sql, bindings=None):
+    def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
         if sql.strip().endswith(";"):
             sql = sql.strip()[:-1]
 
+        assert self._cursor, "Cursor not available"
         if bindings is None:
             self._cursor.execute(sql)
         else:
@@ -206,11 +211,12 @@ def execute(self, sql, bindings=None):
             self._cursor.execute(sql, *bindings)
 
     @property
-    def description(self):
+    def description(self) -> List[Tuple[str, str, None, None, None, None, bool]]:
+        assert self._cursor, "Cursor not available"
         return self._cursor.description
 
     @classmethod
-    def _fix_binding(cls, value):
+    def _fix_binding(cls, value: Any) -> Union[str, float]:
         """Convert complex datatypes to primitives that can be loaded by
         the Spark driver"""
         if isinstance(value, NUMBERS):

From b4a2f94c1601bb762a8be8a44085d23b3188b3cb Mon Sep 17 00:00:00 2001
From: tim-steinkuhler <tim.steinkuhler@gmail.com>
Date: Mon, 26 Jun 2023 19:56:05 +0200
Subject: [PATCH 456/603] Allow hostname to be provided with or without a
 trailing slash (#784)

* Ignore trailing / in host name

The trailing / is often added automatically when copy pasting
Before this change, you would get "Database Error - failed to connect"

After this change, the trailing backslash won't prevent your connection

* Update changelog

---------

Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .changes/unreleased/Fixes-20230623-112100.yaml | 6 ++++++
 dbt/adapters/spark/connections.py              | 3 +++
 2 files changed, 9 insertions(+)
 create mode 100644 .changes/unreleased/Fixes-20230623-112100.yaml

diff --git a/.changes/unreleased/Fixes-20230623-112100.yaml b/.changes/unreleased/Fixes-20230623-112100.yaml
new file mode 100644
index 00000000..d6129c90
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230623-112100.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Allow hostname to be provided with or without trailing slash
+time: 2023-06-23T11:21:00.901430172Z
+custom:
+  Author: tim-steinkuhler
+  Issue: "302"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index bde614fa..2a7f8188 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -139,6 +139,9 @@ def __post_init__(self) -> None:
                     f"ImportError({e.msg})"
                 ) from e
 
+        if self.method != SparkConnectionMethod.SESSION:
+            self.host = self.host.rstrip("/")
+
     @property
     def type(self) -> str:
         return "spark"

From 682c3a314737e062860c1df6314c9e67df7cfd4d Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 29 Jun 2023 10:57:41 -0500
Subject: [PATCH 457/603] remove dependent_projects_dict from utils.py (#819)

---
 .changes/unreleased/Fixes-20230628-162413.yaml | 6 ++++++
 tests/unit/utils.py                            | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20230628-162413.yaml

diff --git a/.changes/unreleased/Fixes-20230628-162413.yaml b/.changes/unreleased/Fixes-20230628-162413.yaml
new file mode 100644
index 00000000..ffa86652
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230628-162413.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Remove dependent_projects argument from PartialProject call in unit tests
+time: 2023-06-28T16:24:13.288246-05:00
+custom:
+  Author: McKnight-42
+  Issue: "7955"
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 722f4030..ac8c6224 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -75,7 +75,6 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars=
         project_root=project_root,
         project_dict=project,
         packages_dict=packages,
-        dependent_projects_dict={},
         selectors_dict=selectors,
     )
     return partial.render(renderer)

From 46e991aeed00f9bdf055aa2feab8d40d70bcada0 Mon Sep 17 00:00:00 2001
From: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Date: Mon, 10 Jul 2023 13:47:28 -0400
Subject: [PATCH 458/603] update pr template (#831)

---
 .github/pull_request_template.md | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 11381456..a3c340cc 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,26 +1,35 @@
 resolves #
+[docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose) dbt-labs/docs.getdbt.com/#
 
 <!---
   Include the number of the issue addressed by this PR above if applicable.
   PRs for code changes without an associated issue *will not be merged*.
   See CONTRIBUTING.md for more information.
 
-  Example:
-    resolves #1234
+  Include the number of the docs issue that was opened for this PR. If
+  this change has no user-facing implications, "N/A" suffices instead. New
+  docs tickets can be created by clicking the link above or by going to
+  https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose.
 -->
 
-### Description
+### Problem
 
 <!---
-  Describe the Pull Request here. Add any references and info to help reviewers
-  understand your changes. Include any tradeoffs you considered.
+  Describe the problem this PR is solving. What is the application state
+  before this PR is merged?
+-->
+
+### Solution
+
+<!---
+  Describe the way this PR solves the above problem. Add as much detail as you
+  can to help reviewers understand your changes. Include any alternatives and
+  tradeoffs you considered.
 -->
 
 ### Checklist
 
-- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md) and understand what's expected of me
-- [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
+- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-core/blob/main/CONTRIBUTING.md) and understand what's expected of me
 - [ ] I have run this code in development and it appears to resolve the stated issue
 - [ ] This PR includes tests, or tests are not required/relevant for this PR
-- [ ] I have [opened an issue to add/update docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose), or docs changes are not required/relevant for this PR
-- [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry)
+- [ ] This PR has no interface changes (e.g. macros, cli, logs, json artifacts, config files, adapter interface, etc) or this PR has already received feedback and approval from Product or DX

From b685297b1dd33307dbc6bacdddb98c7b947345bb Mon Sep 17 00:00:00 2001
From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com>
Date: Tue, 11 Jul 2023 10:29:06 -0500
Subject: [PATCH 459/603] Fix split part negative part number (#689)

* pin dev branch for tests

* update split part

* changie

* update to delimiter text for replace

* whitespace

* update to new core branch

* Add a negative number (rather than subtract a positive number)

* Expected negative test case

* Restore original dev-requirements.txt

---------

Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
---
 .../unreleased/Fixes-20230319-155618.yaml     |  6 ++++
 dbt/include/spark/macros/utils/split_part.sql | 32 +++++++++++++++----
 tests/functional/adapter/utils/test_utils.py  |  8 ++---
 3 files changed, 36 insertions(+), 10 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230319-155618.yaml

diff --git a/.changes/unreleased/Fixes-20230319-155618.yaml b/.changes/unreleased/Fixes-20230319-155618.yaml
new file mode 100644
index 00000000..58c88727
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230319-155618.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: allow negative args for spark split part
+time: 2023-03-19T15:56:18.630146-05:00
+custom:
+  Author: dave-connors-3
+  Issue: "688"
diff --git a/dbt/include/spark/macros/utils/split_part.sql b/dbt/include/spark/macros/utils/split_part.sql
index d5ae3092..2da03376 100644
--- a/dbt/include/spark/macros/utils/split_part.sql
+++ b/dbt/include/spark/macros/utils/split_part.sql
@@ -9,14 +9,34 @@
 
     {% endset %}
 
-    {% set split_part_expr %}
+    {% if part_number >= 0 %}
 
-    split(
-        {{ string_text }},
-        {{ delimiter_expr }}
-        )[({{ part_number - 1 }})]
+        {% set split_part_expr %}
 
-    {% endset %}
+        split(
+            {{ string_text }},
+            {{ delimiter_expr }}
+            )[({{ part_number - 1 if part_number > 0 else part_number }})]
+
+        {% endset %}
+
+    {% else %}
+
+        {% set split_part_expr %}
+
+        split(
+            {{ string_text }},
+            {{ delimiter_expr }}
+            )[(
+                length({{ string_text }})
+                - length(
+                    replace({{ string_text }},  {{ delimiter_text }}, '')
+                ) + 1 + {{ part_number }}
+            )]
+
+        {% endset %}
+
+    {% endif %}
 
     {{ return(split_part_expr) }}
 
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index ba03c5f6..0dc52656 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -30,10 +30,10 @@
 from dbt.tests.adapter.utils.fixture_listagg import models__test_listagg_yml
 from tests.functional.adapter.utils.fixture_listagg import models__test_listagg_no_order_by_sql
 
-seeds__data_split_part_csv = """parts,split_on,result_1,result_2,result_3
-a|b|c,|,a,b,c
-1|2|3,|,1,2,3
-EMPTY|EMPTY|EMPTY,|,EMPTY,EMPTY,EMPTY
+seeds__data_split_part_csv = """parts,split_on,result_1,result_2,result_3,result_4
+a|b|c,|,a,b,c,c
+1|2|3,|,1,2,3,3
+EMPTY|EMPTY|EMPTY,|,EMPTY,EMPTY,EMPTY,EMPTY
 """
 
 seeds__data_last_day_csv = """date_day,date_part,result

From a6dc99f102843afbef83dd02b7b7ffa6f400c5f3 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 11 Jul 2023 15:35:19 -0500
Subject: [PATCH 460/603] dbt_clone macros, materialization and tests to
 dbt-spark (#816)

* dbt_clone macros, materialization and tests to dbt-spark

* slight reorg of macro

* add file_format to profile_config_update

* change pointer back to main

* add commonly used pytest skip
---
 .../unreleased/Features-20230627-155913.yaml  |   6 ++
 .../spark/macros/materializations/clone.sql   |  76 +++++++++++++
 .../functional/adapter/dbt_clone/fixtures.py  | 101 ++++++++++++++++++
 .../adapter/dbt_clone/test_dbt_clone.py       |  80 ++++++++++++++
 4 files changed, 263 insertions(+)
 create mode 100644 .changes/unreleased/Features-20230627-155913.yaml
 create mode 100644 dbt/include/spark/macros/materializations/clone.sql
 create mode 100644 tests/functional/adapter/dbt_clone/fixtures.py
 create mode 100644 tests/functional/adapter/dbt_clone/test_dbt_clone.py

diff --git a/.changes/unreleased/Features-20230627-155913.yaml b/.changes/unreleased/Features-20230627-155913.yaml
new file mode 100644
index 00000000..af12abfd
--- /dev/null
+++ b/.changes/unreleased/Features-20230627-155913.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: add dbt-spark portion of dbt_clone functionality
+time: 2023-06-27T15:59:13.605278-05:00
+custom:
+  Author: McKnight-42 aranke
+  Issue: "815"
diff --git a/dbt/include/spark/macros/materializations/clone.sql b/dbt/include/spark/macros/materializations/clone.sql
new file mode 100644
index 00000000..a5b80a5c
--- /dev/null
+++ b/dbt/include/spark/macros/materializations/clone.sql
@@ -0,0 +1,76 @@
+{% macro spark__can_clone_table() %}
+    {{ return(True) }}
+{% endmacro %}
+
+{% macro spark__create_or_replace_clone(this_relation, defer_relation) %}
+    create or replace table {{ this_relation }} shallow clone {{ defer_relation }}
+{% endmacro %}
+
+{%- materialization clone, adapter='spark' -%}
+
+  {%- set relations = {'relations': []} -%}
+
+  {%- if not defer_relation -%}
+      -- nothing to do
+      {{ log("No relation found in state manifest for " ~ model.unique_id, info=True) }}
+      {{ return(relations) }}
+  {%- endif -%}
+
+  {%- set existing_relation = load_cached_relation(this) -%}
+
+  {%- if existing_relation and not flags.FULL_REFRESH -%}
+      -- noop!
+      {{ log("Relation " ~ existing_relation ~ " already exists", info=True) }}
+      {{ return(relations) }}
+  {%- endif -%}
+
+  {%- set other_existing_relation = load_cached_relation(defer_relation) -%}
+  {%- set file_format = config.get('file_format', validator=validation.any[basestring]) -%}
+
+  -- If this is a database that can do zero-copy cloning of tables, and the other relation is a table, then this will be a table
+  -- Otherwise, this will be a view
+
+  {% set can_clone_table = can_clone_table() %}
+
+  {%- if file_format != 'delta' -%}
+    {% set invalid_format_msg -%}
+      Invalid file format: {{ file_format }}
+      shallow clone requires file_format be set to 'delta'
+    {%- endset %}
+    {% do exceptions.raise_compiler_error(invalid_format_msg) %}
+  {%- elif other_existing_relation and other_existing_relation.type == 'table' and can_clone_table -%}
+
+      {%- set target_relation = this.incorporate(type='table') -%}
+      {% if existing_relation is not none and not existing_relation.is_table %}
+        {{ log("Dropping relation " ~ existing_relation ~ " because it is of type " ~ existing_relation.type) }}
+        {{ drop_relation_if_exists(existing_relation) }}
+      {% endif %}
+
+      -- as a general rule, data platforms that can clone tables can also do atomic 'create or replace'
+      {% call statement('main') %}
+          {{ create_or_replace_clone(target_relation, defer_relation) }}
+      {% endcall %}
+
+      {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}
+      {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
+      {% do persist_docs(target_relation, model) %}
+
+      {{ return({'relations': [target_relation]}) }}
+
+  {%- else -%}
+
+      {%- set target_relation = this.incorporate(type='view') -%}
+
+      -- reuse the view materialization
+      -- TODO: support actual dispatch for materialization macros
+      -- Tracking ticket: https://github.com/dbt-labs/dbt-core/issues/7799
+      {% set search_name = "materialization_view_" ~ adapter.type() %}
+      {% if not search_name in context %}
+          {% set search_name = "materialization_view_default" %}
+      {% endif %}
+      {% set materialization_macro = context[search_name] %}
+      {% set relations = materialization_macro() %}
+      {{ return(relations) }}
+  {% endif %}
+
+{%- endmaterialization -%}
diff --git a/tests/functional/adapter/dbt_clone/fixtures.py b/tests/functional/adapter/dbt_clone/fixtures.py
new file mode 100644
index 00000000..a4bb12a4
--- /dev/null
+++ b/tests/functional/adapter/dbt_clone/fixtures.py
@@ -0,0 +1,101 @@
+seed_csv = """id,name
+1,Alice
+2,Bob
+"""
+
+table_model_sql = """
+{{ config(materialized='table') }}
+select * from {{ ref('ephemeral_model') }}
+-- establish a macro dependency to trigger state:modified.macros
+-- depends on: {{ my_macro() }}
+"""
+
+view_model_sql = """
+{{ config(materialized='view') }}
+select * from {{ ref('seed') }}
+-- establish a macro dependency that trips infinite recursion if not handled
+-- depends on: {{ my_infinitely_recursive_macro() }}
+"""
+
+ephemeral_model_sql = """
+{{ config(materialized='ephemeral') }}
+select * from {{ ref('view_model') }}
+"""
+
+exposures_yml = """
+version: 2
+exposures:
+  - name: my_exposure
+    type: application
+    depends_on:
+      - ref('view_model')
+    owner:
+      email: test@example.com
+"""
+
+schema_yml = """
+version: 2
+models:
+  - name: view_model
+    columns:
+      - name: id
+        tests:
+          - unique:
+              severity: error
+          - not_null
+      - name: name
+"""
+
+get_schema_name_sql = """
+{% macro generate_schema_name(custom_schema_name, node) -%}
+    {%- set default_schema = target.schema -%}
+    {%- if custom_schema_name is not none -%}
+        {{ return(default_schema ~ '_' ~ custom_schema_name|trim) }}
+    -- put seeds into a separate schema in "prod", to verify that cloning in "dev" still works
+    {%- elif target.name == 'default' and node.resource_type == 'seed' -%}
+        {{ return(default_schema ~ '_' ~ 'seeds') }}
+    {%- else -%}
+        {{ return(default_schema) }}
+    {%- endif -%}
+{%- endmacro %}
+"""
+
+snapshot_sql = """
+{% snapshot my_cool_snapshot %}
+    {{
+        config(
+            target_database=database,
+            target_schema=schema,
+            unique_key='id',
+            strategy='check',
+            check_cols=['id'],
+        )
+    }}
+    select * from {{ ref('view_model') }}
+{% endsnapshot %}
+"""
+macros_sql = """
+{% macro my_macro() %}
+    {% do log('in a macro' ) %}
+{% endmacro %}
+"""
+
+infinite_macros_sql = """
+{# trigger infinite recursion if not handled #}
+{% macro my_infinitely_recursive_macro() %}
+  {{ return(adapter.dispatch('my_infinitely_recursive_macro')()) }}
+{% endmacro %}
+{% macro default__my_infinitely_recursive_macro() %}
+    {% if unmet_condition %}
+        {{ my_infinitely_recursive_macro() }}
+    {% else %}
+        {{ return('') }}
+    {% endif %}
+{% endmacro %}
+"""
+
+custom_can_clone_tables_false_macros_sql = """
+{% macro can_clone_table() %}
+    {{ return(False) }}
+{% endmacro %}
+"""
diff --git a/tests/functional/adapter/dbt_clone/test_dbt_clone.py b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
new file mode 100644
index 00000000..a5e8d70e
--- /dev/null
+++ b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
@@ -0,0 +1,80 @@
+import pytest
+from dbt.tests.adapter.dbt_clone.test_dbt_clone import BaseClonePossible
+from tests.functional.adapter.dbt_clone.fixtures import (
+    seed_csv,
+    table_model_sql,
+    view_model_sql,
+    ephemeral_model_sql,
+    exposures_yml,
+    schema_yml,
+    snapshot_sql,
+    get_schema_name_sql,
+    macros_sql,
+    infinite_macros_sql,
+)
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestSparkBigqueryClonePossible(BaseClonePossible):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "table_model.sql": table_model_sql,
+            "view_model.sql": view_model_sql,
+            "ephemeral_model.sql": ephemeral_model_sql,
+            "schema.yml": schema_yml,
+            "exposures.yml": exposures_yml,
+        }
+
+    @pytest.fixture(scope="class")
+    def macros(self):
+        return {
+            "macros.sql": macros_sql,
+            "infinite_macros.sql": infinite_macros_sql,
+            "get_schema_name.sql": get_schema_name_sql,
+        }
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "seed.csv": seed_csv,
+        }
+
+    @pytest.fixture(scope="class")
+    def snapshots(self):
+        return {
+            "snapshot.sql": snapshot_sql,
+        }
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            },
+            "seeds": {
+                "test": {
+                    "quote_columns": False,
+                },
+                "+file_format": "delta",
+            },
+            "snapshots": {
+                "+file_format": "delta",
+            },
+        }
+
+    @pytest.fixture(autouse=True)
+    def clean_up(self, project):
+        yield
+        with project.adapter.connection_named("__test"):
+            relation = project.adapter.Relation.create(
+                database=project.database, schema=f"{project.test_schema}_seeds"
+            )
+            project.adapter.drop_schema(relation)
+
+            relation = project.adapter.Relation.create(
+                database=project.database, schema=project.test_schema
+            )
+            project.adapter.drop_schema(relation)
+
+    pass

From 6bbc5a7149fdf51cb4a5f4d22da8857f337b389f Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Mon, 17 Jul 2023 17:19:38 -0500
Subject: [PATCH 461/603] add datadog env vars (#836)

---
 .circleci/config.yml | 10 +++++-----
 dev-requirements.txt |  1 +
 tox.ini              | 10 ++++++++++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5fea78c3..de76a063 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -23,7 +23,7 @@ jobs:
       - run: python3 -m pip install tox
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-session
+          command: tox -e integration-spark-session -- --ddtrace
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -54,7 +54,7 @@ jobs:
 
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-thrift
+          command: tox -e integration-spark-thrift -- --ddtrace
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -72,7 +72,7 @@ jobs:
       - checkout
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-databricks-http
+          command: tox -e integration-spark-databricks-http -- --ddtrace
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -94,7 +94,7 @@ jobs:
       - checkout
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-databricks-odbc-cluster
+          command: tox -e integration-spark-databricks-odbc-cluster -- --ddtrace
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -105,7 +105,7 @@ jobs:
       - checkout
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-databricks-odbc-sql-endpoint
+          command: tox -e integration-spark-databricks-odbc-sql-endpoint -- --ddtrace
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
diff --git a/dev-requirements.txt b/dev-requirements.txt
index fd8c6813..c62d6ed7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -8,6 +8,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 black~=23.3
 bumpversion~=0.6.0
 click~=8.1
+ddtrace~=1.16
 flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
diff --git a/tox.ini b/tox.ini
index 1bba38b6..9302e91b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -21,6 +21,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_clus
 passenv =
     DBT_*
     PYTEST_ADDOPTS
+    DD_SERVICE
+    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -35,6 +37,8 @@ passenv =
     DBT_*
     PYTEST_ADDOPTS
     ODBC_DRIVER
+    DD_SERVICE
+    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -49,6 +53,8 @@ passenv =
     DBT_*
     PYTEST_ADDOPTS
     ODBC_DRIVER
+    DD_SERVICE
+    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -63,6 +69,8 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posarg
 passenv =
     DBT_*
     PYTEST_ADDOPTS
+    DD_SERVICE
+    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -77,6 +85,8 @@ passenv =
     DBT_*
     PYTEST_*
     PIP_CACHE_DIR
+    DD_SERVICE
+    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt

From e335c76627b5807e30853780551691cacded14c4 Mon Sep 17 00:00:00 2001
From: Emily Rockman <emily.rockman@dbtlabs.com>
Date: Mon, 17 Jul 2023 18:51:27 -0500
Subject: [PATCH 462/603] revert dd (#840)

---
 .circleci/config.yml | 10 +++++-----
 dev-requirements.txt |  1 -
 tox.ini              | 10 ----------
 3 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index de76a063..5fea78c3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -23,7 +23,7 @@ jobs:
       - run: python3 -m pip install tox
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-session -- --ddtrace
+          command: tox -e integration-spark-session
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -54,7 +54,7 @@ jobs:
 
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-thrift -- --ddtrace
+          command: tox -e integration-spark-thrift
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -72,7 +72,7 @@ jobs:
       - checkout
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-databricks-http -- --ddtrace
+          command: tox -e integration-spark-databricks-http
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -94,7 +94,7 @@ jobs:
       - checkout
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-databricks-odbc-cluster -- --ddtrace
+          command: tox -e integration-spark-databricks-odbc-cluster
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
@@ -105,7 +105,7 @@ jobs:
       - checkout
       - run:
           name: Run integration tests
-          command: tox -e integration-spark-databricks-odbc-sql-endpoint -- --ddtrace
+          command: tox -e integration-spark-databricks-odbc-sql-endpoint
           no_output_timeout: 1h
       - store_artifacts:
           path: ./logs
diff --git a/dev-requirements.txt b/dev-requirements.txt
index c62d6ed7..fd8c6813 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -8,7 +8,6 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 black~=23.3
 bumpversion~=0.6.0
 click~=8.1
-ddtrace~=1.16
 flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
diff --git a/tox.ini b/tox.ini
index 9302e91b..1bba38b6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -21,8 +21,6 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_clus
 passenv =
     DBT_*
     PYTEST_ADDOPTS
-    DD_SERVICE
-    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -37,8 +35,6 @@ passenv =
     DBT_*
     PYTEST_ADDOPTS
     ODBC_DRIVER
-    DD_SERVICE
-    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -53,8 +49,6 @@ passenv =
     DBT_*
     PYTEST_ADDOPTS
     ODBC_DRIVER
-    DD_SERVICE
-    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -69,8 +63,6 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posarg
 passenv =
     DBT_*
     PYTEST_ADDOPTS
-    DD_SERVICE
-    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt
@@ -85,8 +77,6 @@ passenv =
     DBT_*
     PYTEST_*
     PIP_CACHE_DIR
-    DD_SERVICE
-    DD_ENV
 deps =
     -r{toxinidir}/requirements.txt
     -r{toxinidir}/dev-requirements.txt

From 98f42767cff052f1c8696aaee53eaed433149768 Mon Sep 17 00:00:00 2001
From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com>
Date: Tue, 18 Jul 2023 15:55:14 -0500
Subject: [PATCH 463/603] Cleanup main after cutting new 1.6.latest branch
 (#838)

* Clean up changelog on main

* Bumping version to 1.7.0a1

* pre-commit

* changie merge

* changie merge

* re trigger tests

* lower sasl bound

* revert back to 0.3.1 for sasl

* hard pin sasl requirement

* reset pin sasl requirement

* reset pin sasl requirement

* add pin for cython

* lower cython requrement

* remove cython pin

* update config.yml

* repin to 38

* specify python in tox

* specify 3.10

* commenting out session runs, and leaving a note

---------

Co-authored-by: Matthew McKnight <matthew.mcknight@dbtlabs.com>
Co-authored-by: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/0.0.0.md                             |  1 +
 .changes/1.6.0-a1.md                          |  1 -
 .changes/1.6.0-b1.md                          | 31 ----------
 .changes/1.6.0-b2.md                          |  5 --
 .changes/1.6.0-b3.md                          | 13 -----
 .../1.6.0/Dependencies-20230329-102021.yaml   |  6 --
 .../1.6.0/Dependencies-20230406-230203.yaml   |  6 --
 .../1.6.0/Dependencies-20230421-041623.yaml   |  6 --
 .../1.6.0/Dependencies-20230421-180054.yaml   |  6 --
 .../1.6.0/Dependencies-20230421-230051.yaml   |  6 --
 .../1.6.0/Dependencies-20230423-215745.yaml   |  6 --
 .../1.6.0/Dependency-20221116-221906.yaml     |  7 ---
 .changes/1.6.0/Features-20220812-091652.yaml  |  7 ---
 .changes/1.6.0/Features-20230427-123135.yaml  |  6 --
 .changes/1.6.0/Features-20230604-043421.yaml  |  6 --
 .changes/1.6.0/Fixes-20230420-214433.yaml     |  6 --
 .changes/1.6.0/Fixes-20230510-154735.yaml     |  6 --
 .changes/1.6.0/Fixes-20230512-151453.yaml     |  6 --
 .../1.6.0/Under the Hood-20230508-222118.yaml |  6 --
 .../1.6.0/Under the Hood-20230530-162533.yaml |  6 --
 .../unreleased/Features-20230627-155913.yaml  |  6 --
 .../unreleased/Fixes-20230319-155618.yaml     |  6 --
 .../unreleased/Fixes-20230510-163110.yaml     |  6 --
 .../unreleased/Fixes-20230623-112100.yaml     |  6 --
 .../unreleased/Fixes-20230628-162413.yaml     |  6 --
 .circleci/config.yml                          | 41 ++++++-------
 CHANGELOG.md                                  | 57 +------------------
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 tox.ini                                       |  2 +-
 31 files changed, 27 insertions(+), 252 deletions(-)
 delete mode 100644 .changes/1.6.0-a1.md
 delete mode 100644 .changes/1.6.0-b1.md
 delete mode 100644 .changes/1.6.0-b2.md
 delete mode 100644 .changes/1.6.0-b3.md
 delete mode 100644 .changes/1.6.0/Dependencies-20230329-102021.yaml
 delete mode 100644 .changes/1.6.0/Dependencies-20230406-230203.yaml
 delete mode 100644 .changes/1.6.0/Dependencies-20230421-041623.yaml
 delete mode 100644 .changes/1.6.0/Dependencies-20230421-180054.yaml
 delete mode 100644 .changes/1.6.0/Dependencies-20230421-230051.yaml
 delete mode 100644 .changes/1.6.0/Dependencies-20230423-215745.yaml
 delete mode 100644 .changes/1.6.0/Dependency-20221116-221906.yaml
 delete mode 100644 .changes/1.6.0/Features-20220812-091652.yaml
 delete mode 100644 .changes/1.6.0/Features-20230427-123135.yaml
 delete mode 100644 .changes/1.6.0/Features-20230604-043421.yaml
 delete mode 100644 .changes/1.6.0/Fixes-20230420-214433.yaml
 delete mode 100644 .changes/1.6.0/Fixes-20230510-154735.yaml
 delete mode 100644 .changes/1.6.0/Fixes-20230512-151453.yaml
 delete mode 100644 .changes/1.6.0/Under the Hood-20230508-222118.yaml
 delete mode 100644 .changes/1.6.0/Under the Hood-20230530-162533.yaml
 delete mode 100644 .changes/unreleased/Features-20230627-155913.yaml
 delete mode 100644 .changes/unreleased/Fixes-20230319-155618.yaml
 delete mode 100644 .changes/unreleased/Fixes-20230510-163110.yaml
 delete mode 100644 .changes/unreleased/Fixes-20230623-112100.yaml
 delete mode 100644 .changes/unreleased/Fixes-20230628-162413.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f0c90061..3b310155 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.6.0b3
+current_version = 1.7.0a1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
index 87fa376a..0bec014d 100644
--- a/.changes/0.0.0.md
+++ b/.changes/0.0.0.md
@@ -1,5 +1,6 @@
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
 - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
diff --git a/.changes/1.6.0-a1.md b/.changes/1.6.0-a1.md
deleted file mode 100644
index a748db5a..00000000
--- a/.changes/1.6.0-a1.md
+++ /dev/null
@@ -1 +0,0 @@
-## dbt-spark 1.6.0-a1 - April 17, 2023
diff --git a/.changes/1.6.0-b1.md b/.changes/1.6.0-b1.md
deleted file mode 100644
index 5c385a7f..00000000
--- a/.changes/1.6.0-b1.md
+++ /dev/null
@@ -1,31 +0,0 @@
-## dbt-spark 1.6.0-b1 - May 12, 2023
-
-### Features
-
-- Support insert_overwrite strategy with delta ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
-
-### Fixes
-
-- Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks ([#725](https://github.com/dbt-labs/dbt-spark/issues/725))
-- Remove dead code 💀 ([#758](https://github.com/dbt-labs/dbt-spark/issues/758))
-
-### Under the Hood
-
-- Remove unneeded type ignore ([#753](https://github.com/dbt-labs/dbt-spark/issues/753))
-
-### Dependencies
-
-- Update black requirement from ~=23.1 to ~=23.3 ([#701](https://github.com/dbt-labs/dbt-spark/pull/701))
-- Bump mypy from 1.1.1 to 1.2.0 ([#708](https://github.com/dbt-labs/dbt-spark/pull/708))
-- Update pytest requirement from ~=7.2 to ~=7.3 ([#726](https://github.com/dbt-labs/dbt-spark/pull/726))
-- Update types-pytz requirement from ~=2023.2 to ~=2023.3 ([#704](https://github.com/dbt-labs/dbt-spark/pull/704))
-- Update pip-tools requirement from ~=6.12 to ~=6.13 ([#737](https://github.com/dbt-labs/dbt-spark/pull/737))
-- Update pytz requirement from ~=2023.2 to ~=2023.3 ([#702](https://github.com/dbt-labs/dbt-spark/pull/702))
-
-### Dependency
-
-- Bump pyodbc from 4.0.34 to 4.0.35 ([#417](https://github.com/dbt-labs/dbt-spark/issues/417))
-
-### Contributors
-- [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753))
-- [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
diff --git a/.changes/1.6.0-b2.md b/.changes/1.6.0-b2.md
deleted file mode 100644
index ea758062..00000000
--- a/.changes/1.6.0-b2.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## dbt-spark 1.6.0-b2 - May 25, 2023
-
-### Features
-
-- All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657))
diff --git a/.changes/1.6.0-b3.md b/.changes/1.6.0-b3.md
deleted file mode 100644
index 3379fe50..00000000
--- a/.changes/1.6.0-b3.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## dbt-spark 1.6.0-b3 - June 09, 2023
-
-### Features
-
-- Standardize the _connection_keys and debug_query for `dbt debug`. ([#PR754](https://github.com/dbt-labs/dbt-spark/issues/PR754))
-
-### Fixes
-
-- wrap expression for check constraints in parentheses ([#7480](https://github.com/dbt-labs/dbt-spark/issues/7480))
-
-### Under the Hood
-
-- drop support for python 3.7 ([#792](https://github.com/dbt-labs/dbt-spark/issues/792))
diff --git a/.changes/1.6.0/Dependencies-20230329-102021.yaml b/.changes/1.6.0/Dependencies-20230329-102021.yaml
deleted file mode 100644
index c60c4e69..00000000
--- a/.changes/1.6.0/Dependencies-20230329-102021.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.1 to ~=23.3"
-time: 2023-03-29T10:20:21.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 701
diff --git a/.changes/1.6.0/Dependencies-20230406-230203.yaml b/.changes/1.6.0/Dependencies-20230406-230203.yaml
deleted file mode 100644
index 969d2102..00000000
--- a/.changes/1.6.0/Dependencies-20230406-230203.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.1.1 to 1.2.0"
-time: 2023-04-06T23:02:03.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 708
diff --git a/.changes/1.6.0/Dependencies-20230421-041623.yaml b/.changes/1.6.0/Dependencies-20230421-041623.yaml
deleted file mode 100644
index bd05eb90..00000000
--- a/.changes/1.6.0/Dependencies-20230421-041623.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest requirement from ~=7.2 to ~=7.3"
-time: 2023-04-21T04:16:23.00000Z
-custom:
-  Author: mikealfare
-  PR: 726
diff --git a/.changes/1.6.0/Dependencies-20230421-180054.yaml b/.changes/1.6.0/Dependencies-20230421-180054.yaml
deleted file mode 100644
index c5522ef6..00000000
--- a/.changes/1.6.0/Dependencies-20230421-180054.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update types-pytz requirement from ~=2023.2 to ~=2023.3"
-time: 2023-04-21T18:00:54.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 704
diff --git a/.changes/1.6.0/Dependencies-20230421-230051.yaml b/.changes/1.6.0/Dependencies-20230421-230051.yaml
deleted file mode 100644
index 2bcd4125..00000000
--- a/.changes/1.6.0/Dependencies-20230421-230051.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pip-tools requirement from ~=6.12 to ~=6.13"
-time: 2023-04-21T23:00:51.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 737
diff --git a/.changes/1.6.0/Dependencies-20230423-215745.yaml b/.changes/1.6.0/Dependencies-20230423-215745.yaml
deleted file mode 100644
index a6f733f7..00000000
--- a/.changes/1.6.0/Dependencies-20230423-215745.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytz requirement from ~=2023.2 to ~=2023.3"
-time: 2023-04-23T21:57:45.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 702
diff --git a/.changes/1.6.0/Dependency-20221116-221906.yaml b/.changes/1.6.0/Dependency-20221116-221906.yaml
deleted file mode 100644
index 5e126d07..00000000
--- a/.changes/1.6.0/Dependency-20221116-221906.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump pyodbc from 4.0.34 to 4.0.35"
-time: 2022-11-16T22:19:06.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 519
diff --git a/.changes/1.6.0/Features-20220812-091652.yaml b/.changes/1.6.0/Features-20220812-091652.yaml
deleted file mode 100644
index 58c60aab..00000000
--- a/.changes/1.6.0/Features-20220812-091652.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Support insert_overwrite strategy with delta
-time: 2022-08-12T09:16:52.7995122+02:00
-custom:
-  Author: flvndh
-  Issue: "1013"
-  PR: "430"
diff --git a/.changes/1.6.0/Features-20230427-123135.yaml b/.changes/1.6.0/Features-20230427-123135.yaml
deleted file mode 100644
index a1cf88be..00000000
--- a/.changes/1.6.0/Features-20230427-123135.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: All constraint types are supported, but not enforced.
-time: 2023-04-27T12:31:35.011284-04:00
-custom:
-  Author: peterallenwebb
-  Issue: 656 657
diff --git a/.changes/1.6.0/Features-20230604-043421.yaml b/.changes/1.6.0/Features-20230604-043421.yaml
deleted file mode 100644
index 153b43e5..00000000
--- a/.changes/1.6.0/Features-20230604-043421.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Standardize the _connection_keys and debug_query for `dbt debug`.
-time: 2023-06-04T04:34:21.968669-07:00
-custom:
-  Author: versusfacit
-  Issue: PR754
diff --git a/.changes/1.6.0/Fixes-20230420-214433.yaml b/.changes/1.6.0/Fixes-20230420-214433.yaml
deleted file mode 100644
index 57a3fe3a..00000000
--- a/.changes/1.6.0/Fixes-20230420-214433.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks
-time: 2023-04-20T21:44:33.343598-04:00
-custom:
-  Author: mikealfare
-  Issue: "725"
diff --git a/.changes/1.6.0/Fixes-20230510-154735.yaml b/.changes/1.6.0/Fixes-20230510-154735.yaml
deleted file mode 100644
index d3078564..00000000
--- a/.changes/1.6.0/Fixes-20230510-154735.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: "Remove dead code \U0001F480"
-time: 2023-05-10T15:47:35.848176+02:00
-custom:
-  Author: Fokko
-  Issue: "758"
diff --git a/.changes/1.6.0/Fixes-20230512-151453.yaml b/.changes/1.6.0/Fixes-20230512-151453.yaml
deleted file mode 100644
index cb8c2b76..00000000
--- a/.changes/1.6.0/Fixes-20230512-151453.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: wrap expression for check constraints in parentheses
-time: 2023-05-12T15:14:53.151149-04:00
-custom:
-  Author: michelleark
-  Issue: "7480"
diff --git a/.changes/1.6.0/Under the Hood-20230508-222118.yaml b/.changes/1.6.0/Under the Hood-20230508-222118.yaml
deleted file mode 100644
index daf66ad1..00000000
--- a/.changes/1.6.0/Under the Hood-20230508-222118.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Remove unneeded type ignore
-time: 2023-05-08T22:21:18.093232+02:00
-custom:
-  Author: Fokko
-  Issue: "753"
diff --git a/.changes/1.6.0/Under the Hood-20230530-162533.yaml b/.changes/1.6.0/Under the Hood-20230530-162533.yaml
deleted file mode 100644
index 857db085..00000000
--- a/.changes/1.6.0/Under the Hood-20230530-162533.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: drop support for python 3.7
-time: 2023-05-30T16:25:33.109326-05:00
-custom:
-  Author: McKnight-42
-  Issue: "792"
diff --git a/.changes/unreleased/Features-20230627-155913.yaml b/.changes/unreleased/Features-20230627-155913.yaml
deleted file mode 100644
index af12abfd..00000000
--- a/.changes/unreleased/Features-20230627-155913.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: add dbt-spark portion of dbt_clone functionality
-time: 2023-06-27T15:59:13.605278-05:00
-custom:
-  Author: McKnight-42 aranke
-  Issue: "815"
diff --git a/.changes/unreleased/Fixes-20230319-155618.yaml b/.changes/unreleased/Fixes-20230319-155618.yaml
deleted file mode 100644
index 58c88727..00000000
--- a/.changes/unreleased/Fixes-20230319-155618.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: allow negative args for spark split part
-time: 2023-03-19T15:56:18.630146-05:00
-custom:
-  Author: dave-connors-3
-  Issue: "688"
diff --git a/.changes/unreleased/Fixes-20230510-163110.yaml b/.changes/unreleased/Fixes-20230510-163110.yaml
deleted file mode 100644
index 06672ac9..00000000
--- a/.changes/unreleased/Fixes-20230510-163110.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Disallow untyped `def`'s
-time: 2023-05-10T16:31:10.593358+02:00
-custom:
-  Author: Fokko
-  Issue: "760"
diff --git a/.changes/unreleased/Fixes-20230623-112100.yaml b/.changes/unreleased/Fixes-20230623-112100.yaml
deleted file mode 100644
index d6129c90..00000000
--- a/.changes/unreleased/Fixes-20230623-112100.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Allow hostname to be provided with or without trailing slash
-time: 2023-06-23T11:21:00.901430172Z
-custom:
-  Author: tim-steinkuhler
-  Issue: "302"
diff --git a/.changes/unreleased/Fixes-20230628-162413.yaml b/.changes/unreleased/Fixes-20230628-162413.yaml
deleted file mode 100644
index ffa86652..00000000
--- a/.changes/unreleased/Fixes-20230628-162413.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Remove dependent_projects argument from PartialProject call in unit tests
-time: 2023-06-28T16:24:13.288246-05:00
-custom:
-  Author: McKnight-42
-  Issue: "7955"
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5fea78c3..71ca356c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,23 +10,24 @@ jobs:
       - checkout
       - run: tox -e flake8,unit
 
-  integration-spark-session:
-    environment:
-      DBT_INVOCATION_ENV: circle
-    docker:
-      - image: godatadriven/pyspark:3.1
-    steps:
-      - checkout
-      - run: apt-get update
-      - run: python3 -m pip install --upgrade pip
-      - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev
-      - run: python3 -m pip install tox
-      - run:
-          name: Run integration tests
-          command: tox -e integration-spark-session
-          no_output_timeout: 1h
-      - store_artifacts:
-          path: ./logs
+# Turning off for now due to flaky runs of tests will turn back on at later date.
+  # integration-spark-session:
+  #   environment:
+  #     DBT_INVOCATION_ENV: circle
+  #   docker:
+  #     - image: godatadriven/pyspark:3.1
+  #   steps:
+  #     - checkout
+  #     - run: apt-get update
+  #     - run: python3 -m pip install --upgrade pip
+  #     - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev
+  #     - run: python3 -m pip install tox
+  #     - run:
+  #         name: Run integration tests
+  #         command: tox -e integration-spark-session
+  #         no_output_timeout: 1h
+  #     - store_artifacts:
+  #         path: ./logs
 
   integration-spark-thrift:
     environment:
@@ -115,9 +116,9 @@ workflows:
   test-everything:
     jobs:
       - unit
-      - integration-spark-session:
-          requires:
-            - unit
+      # - integration-spark-session:
+      #     requires:
+      #       - unit
       - integration-spark-thrift:
           requires:
             - unit
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f525b3b..902db37f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,64 +5,9 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.6.0-b3 - June 09, 2023
-
-### Features
-
-- Standardize the _connection_keys and debug_query for `dbt debug`. ([#PR754](https://github.com/dbt-labs/dbt-spark/issues/PR754))
-
-### Fixes
-
-- wrap expression for check constraints in parentheses ([#7480](https://github.com/dbt-labs/dbt-spark/issues/7480))
-
-### Under the Hood
-
-- drop support for python 3.7 ([#792](https://github.com/dbt-labs/dbt-spark/issues/792))
-
-
-
-## dbt-spark 1.6.0-b2 - May 25, 2023
-
-### Features
-
-- All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657))
-
-## dbt-spark 1.6.0-b1 - May 12, 2023
-
-### Features
-
-- Support insert_overwrite strategy with delta ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
-
-### Fixes
-
-- Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks ([#725](https://github.com/dbt-labs/dbt-spark/issues/725))
-- Remove dead code 💀 ([#758](https://github.com/dbt-labs/dbt-spark/issues/758))
-
-### Under the Hood
-
-- Remove unneeded type ignore ([#753](https://github.com/dbt-labs/dbt-spark/issues/753))
-
-### Dependencies
-
-- Update black requirement from ~=23.1 to ~=23.3 ([#701](https://github.com/dbt-labs/dbt-spark/pull/701))
-- Bump mypy from 1.1.1 to 1.2.0 ([#708](https://github.com/dbt-labs/dbt-spark/pull/708))
-- Update pytest requirement from ~=7.2 to ~=7.3 ([#726](https://github.com/dbt-labs/dbt-spark/pull/726))
-- Update types-pytz requirement from ~=2023.2 to ~=2023.3 ([#704](https://github.com/dbt-labs/dbt-spark/pull/704))
-- Update pip-tools requirement from ~=6.12 to ~=6.13 ([#737](https://github.com/dbt-labs/dbt-spark/pull/737))
-- Update pytz requirement from ~=2023.2 to ~=2023.3 ([#702](https://github.com/dbt-labs/dbt-spark/pull/702))
-
-### Dependency
-
-- Bump pyodbc from 4.0.34 to 4.0.35 ([#417](https://github.com/dbt-labs/dbt-spark/issues/417))
-
-### Contributors
-- [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753))
-- [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013))
-
-## dbt-spark 1.6.0-a1 - April 17, 2023
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
 - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
 - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 0c2870f8..874bd74c 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.6.0b3"
+version = "1.7.0a1"
diff --git a/setup.py b/setup.py
index 2cd78c3a..c6713e89 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.6.0b3"
+package_version = "1.7.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
diff --git a/tox.ini b/tox.ini
index 1bba38b6..97017a92 100644
--- a/tox.ini
+++ b/tox.ini
@@ -71,7 +71,7 @@ deps =
 [testenv:integration-spark-session]
 allowlist_externals =
     /bin/bash
-basepython = python3
+basepython = python3.10
 commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
 passenv =
     DBT_*

From d91bd17fc919536820131181ce959110920da063 Mon Sep 17 00:00:00 2001
From: Cor <jczuurmond@protonmail.com>
Date: Mon, 24 Jul 2023 20:07:58 +0200
Subject: [PATCH 464/603] Add server side parameters to session connection
 method (#823)

* Pass existing server_side_parameters to session connection wrapper and use to configure SparkSession.

* Incorporating feedback. Moved server side parameters to Connection and pass to cursor from there.

* Add changie

* Add type hint

* Write out loop

* Add type hint

* Remove server_side_parameters from connection wrapper

* Add handle type hint

* Make server_side_parameters optional

---------

Co-authored-by: Anthony LaRocca <alarocca@apixio.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .../unreleased/Features-20230707-104150.yaml    |  6 ++++++
 dbt/adapters/spark/connections.py               |  5 ++++-
 dbt/adapters/spark/session.py                   | 17 +++++++++++++----
 3 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230707-104150.yaml

diff --git a/.changes/unreleased/Features-20230707-104150.yaml b/.changes/unreleased/Features-20230707-104150.yaml
new file mode 100644
index 00000000..183a37b4
--- /dev/null
+++ b/.changes/unreleased/Features-20230707-104150.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Support server_side_parameters for Spark session connection method
+time: 2023-07-07T10:41:50.01541+02:00
+custom:
+  Author: alarocca-apixio
+  Issue: "690"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 2a7f8188..5d3e99a6 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -350,6 +350,7 @@ def open(cls, connection: Connection) -> Connection:
 
         creds = connection.credentials
         exc = None
+        handle: Any
 
         for i in range(1 + creds.connect_retries):
             try:
@@ -460,7 +461,9 @@ def open(cls, connection: Connection) -> Connection:
                         SessionConnectionWrapper,
                     )
 
-                    handle = SessionConnectionWrapper(Connection())  # type: ignore
+                    handle = SessionConnectionWrapper(
+                        Connection(server_side_parameters=creds.server_side_parameters)
+                    )
                 else:
                     raise dbt.exceptions.DbtProfileError(
                         f"invalid credential method: {creds.method}"
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index 5e4bcc49..0e371717 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -4,7 +4,7 @@
 
 import datetime as dt
 from types import TracebackType
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
@@ -24,9 +24,10 @@ class Cursor:
     https://github.com/mkleehammer/pyodbc/wiki/Cursor
     """
 
-    def __init__(self) -> None:
+    def __init__(self, *, server_side_parameters: Optional[Dict[str, Any]] = None) -> None:
         self._df: Optional[DataFrame] = None
         self._rows: Optional[List[Row]] = None
+        self.server_side_parameters = server_side_parameters or {}
 
     def __enter__(self) -> Cursor:
         return self
@@ -106,7 +107,12 @@ def execute(self, sql: str, *parameters: Any) -> None:
         """
         if len(parameters) > 0:
             sql = sql % parameters
-        spark_session = SparkSession.builder.enableHiveSupport().getOrCreate()
+        builder = SparkSession.builder.enableHiveSupport()
+
+        for parameter, value in self.server_side_parameters.items():
+            builder = builder.config(parameter, value)
+
+        spark_session = builder.getOrCreate()
         self._df = spark_session.sql(sql)
 
     def fetchall(self) -> Optional[List[Row]]:
@@ -159,6 +165,9 @@ class Connection:
     https://github.com/mkleehammer/pyodbc/wiki/Connection
     """
 
+    def __init__(self, *, server_side_parameters: Optional[Dict[Any, str]] = None) -> None:
+        self.server_side_parameters = server_side_parameters or {}
+
     def cursor(self) -> Cursor:
         """
         Get a cursor.
@@ -168,7 +177,7 @@ def cursor(self) -> Cursor:
         out : Cursor
             The cursor.
         """
-        return Cursor()
+        return Cursor(server_side_parameters=self.server_side_parameters)
 
 
 class SessionConnectionWrapper(object):

From 2dd6553cd4b6c1d2986e4d60b7699dd388545ad0 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 25 Jul 2023 10:10:44 -0400
Subject: [PATCH 465/603] update stale workflow to use centralized version
 (#842)

---
 .../Under the Hood-20230724-165508.yaml         |  6 ++++++
 .github/workflows/stale.yml                     | 17 +++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230724-165508.yaml

diff --git a/.changes/unreleased/Under the Hood-20230724-165508.yaml b/.changes/unreleased/Under the Hood-20230724-165508.yaml
new file mode 100644
index 00000000..88948464
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230724-165508.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Update stale workflow to use centralized version
+time: 2023-07-24T16:55:08.096947-04:00
+custom:
+  Author: mikealfare
+  Issue: "842"
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index a56455d5..d902340a 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -1,15 +1,12 @@
-name: 'Close stale issues and PRs'
+name: "Close stale issues and PRs"
 on:
   schedule:
     - cron: "30 1 * * *"
+
+permissions:
+  issues: write
+  pull-requests: write
+
 jobs:
   stale:
-    runs-on: ubuntu-latest
-    steps:
-      # pinned at v4 (https://github.com/actions/stale/releases/tag/v4.0.0)
-      - uses: actions/stale@cdf15f641adb27a71842045a94023bef6945e3aa
-        with:
-          stale-issue-message: "This issue has been marked as Stale because it has been open for 180 days with no activity. If you would like the issue to remain open, please remove the stale label or comment on the issue, or it will be closed in 7 days."
-          stale-pr-message: "This PR has been marked as Stale because it has been open for 180 days with no activity. If you would like the PR to remain open, please remove the stale label or comment on the PR, or it will be closed in 7 days."
-          # mark issues/PRs stale when they haven't seen activity in 180 days
-          days-before-stale: 180
+    uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main

From 955564de33f24d5bfc1b10064570e07a930021cc Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 25 Jul 2023 11:17:44 -0400
Subject: [PATCH 466/603] add new line at the end of the dependabot changie to
 pass linter (#841)

---
 .github/workflows/bot-changelog.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index 89972070..52f50b1e 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -57,4 +57,4 @@ jobs:
         commit_message: "Add automated changelog yaml from template for bot PR"
         changie_kind: ${{ matrix.changie_kind }}
         label: ${{ matrix.label }}
-        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  PR: ${{ github.event.pull_request.number }}"
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  PR: ${{ github.event.pull_request.number }}\n"

From 53809c38a04ba70ea4d8197a08c04ed3c5bc1505 Mon Sep 17 00:00:00 2001
From: Cor <jczuurmond@protonmail.com>
Date: Wed, 26 Jul 2023 20:06:56 +0200
Subject: [PATCH 467/603] Enforce server side parameters to be strings (#827)

* Enfore server side parameters to be strings

* Add changie

* Iterate over items

* Test for casting server_side_parameters to strings

---------

Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .changes/unreleased/Features-20230707-114650.yaml |  6 ++++++
 dbt/adapters/spark/connections.py                 |  6 +++++-
 tests/unit/test_credentials.py                    | 12 ++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Features-20230707-114650.yaml
 create mode 100644 tests/unit/test_credentials.py

diff --git a/.changes/unreleased/Features-20230707-114650.yaml b/.changes/unreleased/Features-20230707-114650.yaml
new file mode 100644
index 00000000..6f1b3d38
--- /dev/null
+++ b/.changes/unreleased/Features-20230707-114650.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Enforce server side parameters keys and values to be strings
+time: 2023-07-07T11:46:50.390918+02:00
+custom:
+  Author: Fokko,JCZuurmond
+  Issue: "826"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 5d3e99a6..7efa2a07 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -73,7 +73,7 @@ class SparkCredentials(Credentials):
     connect_retries: int = 0
     connect_timeout: int = 10
     use_ssl: bool = False
-    server_side_parameters: Dict[str, Any] = field(default_factory=dict)
+    server_side_parameters: Dict[str, str] = field(default_factory=dict)
     retry_all: bool = False
 
     @classmethod
@@ -142,6 +142,10 @@ def __post_init__(self) -> None:
         if self.method != SparkConnectionMethod.SESSION:
             self.host = self.host.rstrip("/")
 
+        self.server_side_parameters = {
+            str(key): str(value) for key, value in self.server_side_parameters.items()
+        }
+
     @property
     def type(self) -> str:
         return "spark"
diff --git a/tests/unit/test_credentials.py b/tests/unit/test_credentials.py
new file mode 100644
index 00000000..7a81fdbb
--- /dev/null
+++ b/tests/unit/test_credentials.py
@@ -0,0 +1,12 @@
+from dbt.adapters.spark.connections import SparkConnectionMethod, SparkCredentials
+
+
+def test_credentials_server_side_parameters_keys_and_values_are_strings() -> None:
+    credentials = SparkCredentials(
+        host="localhost",
+        method=SparkConnectionMethod.THRIFT,
+        database="tests",
+        schema="tests",
+        server_side_parameters={"spark.configuration": 10},
+    )
+    assert credentials.server_side_parameters["spark.configuration"] == "10"

From 2680d88e290cb75a2aad46f788d96e0472f5ff45 Mon Sep 17 00:00:00 2001
From: Cor <jczuurmond@protonmail.com>
Date: Sat, 29 Jul 2023 00:42:25 +0200
Subject: [PATCH 468/603] Add server side parameters to http connection method
 (#825)

* Pass server side parameters to hive connects

* Force server_side_parameters keys and values to strings

* Undo changes to thrift

* Add changie

* Revert enfore server side parameters to be string

* Update hive http test to check server_side_parameters

* Format test config

---------

Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .changes/unreleased/Features-20230707-113337.yaml | 6 ++++++
 dbt/adapters/spark/connections.py                 | 5 ++++-
 tests/unit/test_adapter.py                        | 4 +++-
 3 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230707-113337.yaml

diff --git a/.changes/unreleased/Features-20230707-113337.yaml b/.changes/unreleased/Features-20230707-113337.yaml
new file mode 100644
index 00000000..de0a50fe
--- /dev/null
+++ b/.changes/unreleased/Features-20230707-113337.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Add server_side_parameters to HTTP connection method
+time: 2023-07-07T11:33:37.794112+02:00
+custom:
+  Author: Fokko,JCZuurmond
+  Issue: "824"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 7efa2a07..6c7899ad 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -381,7 +381,10 @@ def open(cls, connection: Connection) -> Connection:
                     token = base64.standard_b64encode(raw_token).decode()
                     transport.setCustomHeaders({"Authorization": "Basic {}".format(token)})
 
-                    conn = hive.connect(thrift_transport=transport)
+                    conn = hive.connect(
+                        thrift_transport=transport,
+                        configuration=creds.server_side_parameters,
+                    )
                     handle = PyhiveConnectionWrapper(conn)
                 elif creds.method == SparkConnectionMethod.THRIFT:
                     cls.validate_creds(creds, ["host", "port", "user", "schema"])
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index 3c7fccd3..1eb81824 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -39,6 +39,7 @@ def _get_target_http(self, project):
                         "token": "abc123",
                         "organization": "0123456789",
                         "cluster": "01234-23423-coffeetime",
+                        "server_side_parameters": {"spark.driver.memory": "4g"},
                     }
                 },
                 "target": "test",
@@ -147,13 +148,14 @@ def test_http_connection(self):
         config = self._get_target_http(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_http_connect(thrift_transport):
+        def hive_http_connect(thrift_transport, configuration):
             self.assertEqual(thrift_transport.scheme, "https")
             self.assertEqual(thrift_transport.port, 443)
             self.assertEqual(thrift_transport.host, "myorg.sparkhost.com")
             self.assertEqual(
                 thrift_transport.path, "/sql/protocolv1/o/0123456789/01234-23423-coffeetime"
             )
+            self.assertEqual(configuration["spark.driver.memory"], "4g")
 
         # with mock.patch.object(hive, 'connect', new=hive_http_connect):
         with mock.patch("dbt.adapters.spark.connections.hive.connect", new=hive_http_connect):

From 82ab52aff5f706097f85f7ec6530c400caad4bdd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 3 Aug 2023 09:11:07 -0400
Subject: [PATCH 469/603] Bump mypy from 1.2.0 to 1.3.0 (#768)

* Bump mypy from 1.2.0 to 1.3.0

Bumps [mypy](https://github.com/python/mypy) from 1.2.0 to 1.3.0.
- [Commits](https://github.com/python/mypy/compare/v1.2.0...v1.3.0)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230510-230725.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230510-230725.yaml

diff --git a/.changes/unreleased/Dependencies-20230510-230725.yaml b/.changes/unreleased/Dependencies-20230510-230725.yaml
new file mode 100644
index 00000000..dfd04ad3
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230510-230725.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.2.0 to 1.3.0"
+time: 2023-05-10T23:07:25.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 768
diff --git a/dev-requirements.txt b/dev-requirements.txt
index fd8c6813..56c29e4a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.0;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.2.0  # patch updates have historically introduced breaking changes
+mypy==1.3.0  # patch updates have historically introduced breaking changes
 pip-tools~=6.13
 pre-commit~=3.2;python_version>="3.8"
 pre-commit-hooks~=4.4

From 4ce348a24d341976c094acc2c4e5123082eed49d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 3 Aug 2023 11:23:47 -0400
Subject: [PATCH 470/603] Update pre-commit requirement from ~=2.21 to ~=3.3
 (#748)

* Update pre-commit requirement from ~=2.21 to ~=3.3

Updates the requirements on [pre-commit](https://github.com/pre-commit/pre-commit) to permit the latest version.
- [Release notes](https://github.com/pre-commit/pre-commit/releases)
- [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit/commits/v3.3.0)

---
updated-dependencies:
- dependency-name: pre-commit
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230501-231003.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230501-231003.yaml

diff --git a/.changes/unreleased/Dependencies-20230501-231003.yaml b/.changes/unreleased/Dependencies-20230501-231003.yaml
new file mode 100644
index 00000000..b3e3a750
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230501-231003.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pre-commit requirement from ~=2.21 to ~=3.3"
+time: 2023-05-01T23:10:03.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 748
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 56c29e4a..ccc5b116 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -14,7 +14,7 @@ freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.3.0  # patch updates have historically introduced breaking changes
 pip-tools~=6.13
-pre-commit~=3.2;python_version>="3.8"
+pre-commit~=3.3
 pre-commit-hooks~=4.4
 pytest~=7.3
 pytest-csv~=3.0

From 8a4572cfc2a2c18a3a6f8b31ba08636baad55433 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 3 Aug 2023 14:11:14 -0400
Subject: [PATCH 471/603] Update types-requests requirement from ~=2.28 to
 ~=2.29 (#749)

* Update types-requests requirement from ~=2.28 to ~=2.29

Updates the requirements on [types-requests](https://github.com/python/typeshed) to permit the latest version.
- [Release notes](https://github.com/python/typeshed/releases)
- [Commits](https://github.com/python/typeshed/commits)

---
updated-dependencies:
- dependency-name: types-requests
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230501-231035.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230501-231035.yaml

diff --git a/.changes/unreleased/Dependencies-20230501-231035.yaml b/.changes/unreleased/Dependencies-20230501-231035.yaml
new file mode 100644
index 00000000..7bbf9820
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230501-231035.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update types-requests requirement from ~=2.28 to ~=2.29"
+time: 2023-05-01T23:10:35.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 749
diff --git a/dev-requirements.txt b/dev-requirements.txt
index ccc5b116..b61700e3 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -24,7 +24,7 @@ pytest-xdist~=3.2
 pytz~=2023.3
 tox~=4.4;python_version>="3.8"
 types-pytz~=2023.3
-types-requests~=2.28
+types-requests~=2.29
 twine~=4.0
 wheel~=0.40
 

From 7d0da163441f117f25b7304fbd56ba77b93d1864 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 3 Aug 2023 16:34:44 -0400
Subject: [PATCH 472/603] Update tox requirement from ~=3.0 to ~=4.5 (#741)

* Update tox requirement from ~=3.0 to ~=4.5

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/commits/4.5.0)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230424-230630.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230424-230630.yaml

diff --git a/.changes/unreleased/Dependencies-20230424-230630.yaml b/.changes/unreleased/Dependencies-20230424-230630.yaml
new file mode 100644
index 00000000..1f96daad
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230424-230630.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=3.0 to ~=4.5"
+time: 2023-04-24T23:06:30.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 741
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b61700e3..b1eb92d1 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.2
 pytz~=2023.3
-tox~=4.4;python_version>="3.8"
+tox~=4.5
 types-pytz~=2023.3
 types-requests~=2.29
 twine~=4.0

From f0d33db3eedecfbaafca074a35c031921f6ad250 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 4 Aug 2023 11:35:54 -0400
Subject: [PATCH 473/603] Update pyodbc requirement from ~=4.0.30 to ~=4.0.39
 (#742)

* Update pyodbc requirement from ~=4.0.30 to ~=4.0.39

Updates the requirements on [pyodbc](https://github.com/mkleehammer/pyodbc) to permit the latest version.
- [Release notes](https://github.com/mkleehammer/pyodbc/releases)
- [Commits](https://github.com/mkleehammer/pyodbc/compare/4.0.30...4.0.39)

---
updated-dependencies:
- dependency-name: pyodbc
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230424-230645.yaml | 6 ++++++
 requirements.txt                                      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230424-230645.yaml

diff --git a/.changes/unreleased/Dependencies-20230424-230645.yaml b/.changes/unreleased/Dependencies-20230424-230645.yaml
new file mode 100644
index 00000000..83e1bb44
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230424-230645.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pyodbc requirement from ~=4.0.30 to ~=4.0.39"
+time: 2023-04-24T23:06:45.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 742
diff --git a/requirements.txt b/requirements.txt
index 4132645f..e58ecdd4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 PyHive[hive]>=0.6.0,<0.7.0
 requests[python]>=2.28.1
 
-pyodbc~=4.0.30
+pyodbc~=4.0.39
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From 6e81502564d3dec738add7cd15024167320b26ae Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 4 Aug 2023 12:29:58 -0400
Subject: [PATCH 474/603] Update flake8 requirement from ~=6.0 to ~=6.1 (#849)

* Update flake8 requirement from ~=6.0 to ~=6.1

Updates the requirements on [flake8](https://github.com/pycqa/flake8) to permit the latest version.
- [Commits](https://github.com/pycqa/flake8/compare/6.0.0...6.1.0)

---
updated-dependencies:
- dependency-name: flake8
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* remove extra newline from changie and workflow that generates the changie

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20230803-224622.yaml | 6 ++++++
 .github/workflows/bot-changelog.yml                   | 2 +-
 dev-requirements.txt                                  | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20230803-224622.yaml

diff --git a/.changes/unreleased/Dependencies-20230803-224622.yaml b/.changes/unreleased/Dependencies-20230803-224622.yaml
new file mode 100644
index 00000000..119a08e5
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230803-224622.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update flake8 requirement from ~=6.0 to ~=6.1"
+time: 2023-08-03T22:46:22.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 849
diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index 52f50b1e..89972070 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -57,4 +57,4 @@ jobs:
         commit_message: "Add automated changelog yaml from template for bot PR"
         changie_kind: ${{ matrix.changie_kind }}
         label: ${{ matrix.label }}
-        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  PR: ${{ github.event.pull_request.number }}\n"
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  PR: ${{ github.event.pull_request.number }}"
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b1eb92d1..6b5634de 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -8,7 +8,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 black~=23.3
 bumpversion~=0.6.0
 click~=8.1
-flake8~=6.0;python_version>="3.8"
+flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13

From 257853cbf9785166fe239540984dcd1115dd464a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 4 Aug 2023 15:20:23 -0400
Subject: [PATCH 475/603] Update pytest requirement from ~=7.3 to ~=7.4 (#852)

* Update pytest requirement from ~=7.3 to ~=7.4

Updates the requirements on [pytest](https://github.com/pytest-dev/pytest) to permit the latest version.
- [Release notes](https://github.com/pytest-dev/pytest/releases)
- [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest/compare/7.3.0...7.4.0)

---
updated-dependencies:
- dependency-name: pytest
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* remove extra newline from changie

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20230803-224629.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230803-224629.yaml

diff --git a/.changes/unreleased/Dependencies-20230803-224629.yaml b/.changes/unreleased/Dependencies-20230803-224629.yaml
new file mode 100644
index 00000000..6865c7c7
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230803-224629.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytest requirement from ~=7.3 to ~=7.4"
+time: 2023-08-03T22:46:29.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 852
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 6b5634de..a1d3050c 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -16,7 +16,7 @@ mypy==1.3.0  # patch updates have historically introduced breaking changes
 pip-tools~=6.13
 pre-commit~=3.3
 pre-commit-hooks~=4.4
-pytest~=7.3
+pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2

From 5d58150d11b1910cd22d732a3a0349494bfa1312 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 4 Aug 2023 17:15:14 -0400
Subject: [PATCH 476/603] Update pytest-xdist requirement from ~=3.2 to ~=3.3
 (#851)

* Update pytest-xdist requirement from ~=3.2 to ~=3.3

Updates the requirements on [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) to permit the latest version.
- [Changelog](https://github.com/pytest-dev/pytest-xdist/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest-xdist/compare/v3.2.0...v3.3.1)

---
updated-dependencies:
- dependency-name: pytest-xdist
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* remove extra newline from changie

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230803-224626.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230803-224626.yaml

diff --git a/.changes/unreleased/Dependencies-20230803-224626.yaml b/.changes/unreleased/Dependencies-20230803-224626.yaml
new file mode 100644
index 00000000..c8b9ef04
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230803-224626.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytest-xdist requirement from ~=3.2 to ~=3.3"
+time: 2023-08-03T22:46:26.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 851
diff --git a/dev-requirements.txt b/dev-requirements.txt
index a1d3050c..fcd7dc65 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -20,7 +20,7 @@ pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
-pytest-xdist~=3.2
+pytest-xdist~=3.3
 pytz~=2023.3
 tox~=4.5
 types-pytz~=2023.3

From d917bfaf0f29e123633d48e5089abdfd8f2911ac Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Aug 2023 11:42:06 -0400
Subject: [PATCH 477/603] Update pip-tools requirement from ~=6.13 to ~=7.2
 (#856)

* Update pip-tools requirement from ~=6.13 to ~=7.2

Updates the requirements on [pip-tools](https://github.com/jazzband/pip-tools) to permit the latest version.
- [Release notes](https://github.com/jazzband/pip-tools/releases)
- [Changelog](https://github.com/jazzband/pip-tools/blob/main/CHANGELOG.md)
- [Commits](https://github.com/jazzband/pip-tools/compare/6.13.0...7.2.0)

---
updated-dependencies:
- dependency-name: pip-tools
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230804-225232.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230804-225232.yaml

diff --git a/.changes/unreleased/Dependencies-20230804-225232.yaml b/.changes/unreleased/Dependencies-20230804-225232.yaml
new file mode 100644
index 00000000..f4a09b6b
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230804-225232.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pip-tools requirement from ~=6.13 to ~=7.2"
+time: 2023-08-04T22:52:32.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 856
diff --git a/dev-requirements.txt b/dev-requirements.txt
index fcd7dc65..11fc038f 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,7 @@ flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.3.0  # patch updates have historically introduced breaking changes
-pip-tools~=6.13
+pip-tools~=7.2
 pre-commit~=3.3
 pre-commit-hooks~=4.4
 pytest~=7.4

From ce265ba7f39e41221e5a01e5d6f706e1cb05e9bc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Aug 2023 15:51:42 -0400
Subject: [PATCH 478/603] Update wheel requirement from ~=0.40 to ~=0.41 (#858)

* Update wheel requirement from ~=0.40 to ~=0.41

Updates the requirements on [wheel](https://github.com/pypa/wheel) to permit the latest version.
- [Changelog](https://github.com/pypa/wheel/blob/main/docs/news.rst)
- [Commits](https://github.com/pypa/wheel/compare/0.40.0...0.41.0)

---
updated-dependencies:
- dependency-name: wheel
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230804-225243.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230804-225243.yaml

diff --git a/.changes/unreleased/Dependencies-20230804-225243.yaml b/.changes/unreleased/Dependencies-20230804-225243.yaml
new file mode 100644
index 00000000..07b9bdb4
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230804-225243.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update wheel requirement from ~=0.40 to ~=0.41"
+time: 2023-08-04T22:52:43.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 858
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 11fc038f..a95ccd0a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -26,7 +26,7 @@ tox~=4.5
 types-pytz~=2023.3
 types-requests~=2.29
 twine~=4.0
-wheel~=0.40
+wheel~=0.41
 
 # Adapter specific dependencies
 mock~=5.0

From 33e86d6fefb761840453ce66adbd826dcc14da25 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 9 Aug 2023 00:37:09 -0400
Subject: [PATCH 479/603] Update tox requirement from ~=4.5 to ~=4.6 (#861)

* Update tox requirement from ~=4.5 to ~=4.6

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/compare/4.5.0...4.6.4)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230807-221037.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230807-221037.yaml

diff --git a/.changes/unreleased/Dependencies-20230807-221037.yaml b/.changes/unreleased/Dependencies-20230807-221037.yaml
new file mode 100644
index 00000000..daa1e3ba
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230807-221037.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=4.5 to ~=4.6"
+time: 2023-08-07T22:10:37.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 861
diff --git a/dev-requirements.txt b/dev-requirements.txt
index a95ccd0a..705b3555 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.3
 pytz~=2023.3
-tox~=4.5
+tox~=4.6
 types-pytz~=2023.3
 types-requests~=2.29
 twine~=4.0

From 6000540c4325a111998fc299d0c945ea00cf8e9a Mon Sep 17 00:00:00 2001
From: etheleon <picy2k@gmail.com>
Date: Thu, 10 Aug 2023 02:47:50 +0800
Subject: [PATCH 480/603] Update adaptors.sql to use `tblproperties` macro
 (#848)

* Update seed.sql include tblproperties

* Update seed.sql

typo in name of macro

* update macro name and follow pattern from other use cases

* changed macro `dbt_spark_tblproperties_clause()` to `spark__tblproperties_clause()`
* create new macro `tblproperties_clause()` which references the above

* move mention of macro to adapters from seed

* include changelog

* Update Fixes-20230810-014122.yaml

* Update Fixes-20230810-014122.yaml

---------

Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .changes/unreleased/Fixes-20230810-014122.yaml | 6 ++++++
 dbt/include/spark/macros/adapters.sql          | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20230810-014122.yaml

diff --git a/.changes/unreleased/Fixes-20230810-014122.yaml b/.changes/unreleased/Fixes-20230810-014122.yaml
new file mode 100644
index 00000000..fcb34237
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230810-014122.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: include tblproperties macro in adapters.sql create table
+time: 2023-08-10T01:41:22.782982+08:00
+custom:
+  Author: etheleon
+  Issue: "865"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 202564e4..471d1dee 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -1,4 +1,8 @@
-{% macro dbt_spark_tblproperties_clause() -%}
+{% macro tblproperties_clause() %}
+  {{ return(adapter.dispatch('tblproperties_clause', 'dbt')()) }}
+{%- endmacro -%}
+
+{% macro spark__tblproperties_clause() -%}
   {%- set tblproperties = config.get('tblproperties') -%}
   {%- if tblproperties is not none %}
     tblproperties (
@@ -156,10 +160,12 @@
       {% endif %}
       {{ file_format_clause() }}
       {{ options_clause() }}
+      {{ tblproperties_clause() }}
       {{ partition_cols(label="partitioned by") }}
       {{ clustered_cols(label="clustered by") }}
       {{ location_clause() }}
       {{ comment_clause() }}
+
       as
       {{ compiled_code }}
     {%- endif -%}

From 0b80b47f6c8c323ee1409b54768d68972dd91ef9 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 10 Aug 2023 13:45:43 -0700
Subject: [PATCH 481/603] test adding lib deps to spark session docker (#845)

* test adding lib deps to spark session docker

* Updated config.yml

* downgrade spark-sessions docker image

* force python to 3.10

* force python to 3.10

* force python to 3.10

* force python to 3.10

* force python to 3.10
---
 .circleci/config.yml | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 71ca356c..f2a3b635 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -11,23 +11,24 @@ jobs:
       - run: tox -e flake8,unit
 
 # Turning off for now due to flaky runs of tests will turn back on at later date.
-  # integration-spark-session:
-  #   environment:
-  #     DBT_INVOCATION_ENV: circle
-  #   docker:
-  #     - image: godatadriven/pyspark:3.1
-  #   steps:
-  #     - checkout
-  #     - run: apt-get update
-  #     - run: python3 -m pip install --upgrade pip
-  #     - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev
-  #     - run: python3 -m pip install tox
-  #     - run:
-  #         name: Run integration tests
-  #         command: tox -e integration-spark-session
-  #         no_output_timeout: 1h
-  #     - store_artifacts:
-  #         path: ./logs
+  integration-spark-session:
+     environment:
+       DBT_INVOCATION_ENV: circle
+     docker:
+       - image: godatadriven/pyspark:3.1
+     steps:
+       - checkout
+       - run: apt-get update
+       - run: conda install python=3.10
+       - run: python3 -m pip install --upgrade pip
+       - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev libxml2-dev libxslt-dev
+       - run: python3 -m pip install tox
+       - run:
+           name: Run integration tests
+           command: tox -e integration-spark-session
+           no_output_timeout: 1h
+       - store_artifacts:
+           path: ./logs
 
   integration-spark-thrift:
     environment:
@@ -116,9 +117,9 @@ workflows:
   test-everything:
     jobs:
       - unit
-      # - integration-spark-session:
-      #     requires:
-      #       - unit
+      - integration-spark-session:
+          requires:
+            - unit
       - integration-spark-thrift:
           requires:
             - unit

From 5ed503ace5e9e6fc82603f595d158739c27657ff Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Thu, 10 Aug 2023 23:23:31 +0200
Subject: [PATCH 482/603] Add ConnectionWrapper base class (#828)

* Add ConnectionWrapper base class

* Changie

* Rename to SparkConnectionWrapper

* Cleanup
---
 .../unreleased/Features-20230707-135442.yaml  |  6 +++
 dbt/adapters/spark/connections.py             | 49 +++++++++++++++++--
 dbt/adapters/spark/session.py                 | 17 +++++--
 3 files changed, 63 insertions(+), 9 deletions(-)
 create mode 100644 .changes/unreleased/Features-20230707-135442.yaml

diff --git a/.changes/unreleased/Features-20230707-135442.yaml b/.changes/unreleased/Features-20230707-135442.yaml
new file mode 100644
index 00000000..39b11952
--- /dev/null
+++ b/.changes/unreleased/Features-20230707-135442.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Add SessionConnectionWrapper
+time: 2023-07-07T13:54:42.41341+02:00
+custom:
+  Author: Fokko
+  Issue: "829"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 6c7899ad..5756aba3 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -25,7 +25,9 @@
 from dbt.contracts.connection import Connection
 from hologram.helpers import StrEnum
 from dataclasses import dataclass, field
-from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable
+from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence
+
+from abc import ABC, abstractmethod
 
 try:
     from thrift.transport.TSSLSocket import TSSLSocket
@@ -158,7 +160,42 @@ def _connection_keys(self) -> Tuple[str, ...]:
         return "host", "port", "cluster", "endpoint", "schema", "organization"
 
 
-class PyhiveConnectionWrapper(object):
+class SparkConnectionWrapper(ABC):
+    @abstractmethod
+    def cursor(self) -> "SparkConnectionWrapper":
+        pass
+
+    @abstractmethod
+    def cancel(self) -> None:
+        pass
+
+    @abstractmethod
+    def close(self) -> None:
+        pass
+
+    @abstractmethod
+    def rollback(self) -> None:
+        pass
+
+    @abstractmethod
+    def fetchall(self) -> Optional[List]:
+        pass
+
+    @abstractmethod
+    def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
+        pass
+
+    @property
+    @abstractmethod
+    def description(
+        self,
+    ) -> Sequence[
+        Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool]
+    ]:
+        pass
+
+
+class PyhiveConnectionWrapper(SparkConnectionWrapper):
     """Wrap a Spark connection in a way that no-ops transactions"""
 
     # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html  # noqa
@@ -268,7 +305,11 @@ def _fix_binding(cls, value: Any) -> Union[float, str]:
             return value
 
     @property
-    def description(self) -> Tuple[Tuple[str, Any, int, int, int, int, bool]]:
+    def description(
+        self,
+    ) -> Sequence[
+        Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool]
+    ]:
         assert self._cursor, "Cursor not available"
         return self._cursor.description
 
@@ -354,7 +395,7 @@ def open(cls, connection: Connection) -> Connection:
 
         creds = connection.credentials
         exc = None
-        handle: Any
+        handle: SparkConnectionWrapper
 
         for i in range(1 + creds.connect_retries):
             try:
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index 0e371717..eeb01115 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -4,11 +4,12 @@
 
 import datetime as dt
 from types import TracebackType
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union, Sequence
 
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from pyspark.sql import DataFrame, Row, SparkSession
+from dbt.adapters.spark.connections import SparkConnectionWrapper
 
 
 logger = AdapterLogger("Spark")
@@ -44,13 +45,15 @@ def __exit__(
     @property
     def description(
         self,
-    ) -> List[Tuple[str, str, None, None, None, None, bool]]:
+    ) -> Sequence[
+        Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool]
+    ]:
         """
         Get the description.
 
         Returns
         -------
-        out : List[Tuple[str, str, None, None, None, None, bool]]
+        out : Sequence[Tuple[str, str, None, None, None, None, bool]]
             The description.
 
         Source
@@ -180,7 +183,7 @@ def cursor(self) -> Cursor:
         return Cursor(server_side_parameters=self.server_side_parameters)
 
 
-class SessionConnectionWrapper(object):
+class SessionConnectionWrapper(SparkConnectionWrapper):
     """Connection wrapper for the session connection method."""
 
     handle: Connection
@@ -220,7 +223,11 @@ def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
             self._cursor.execute(sql, *bindings)
 
     @property
-    def description(self) -> List[Tuple[str, str, None, None, None, None, bool]]:
+    def description(
+        self,
+    ) -> Sequence[
+        Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool]
+    ]:
         assert self._cursor, "Cursor not available"
         return self._cursor.description
 

From a4ffc6a3243278f99e213c278df7ba8a32af3cac Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Fri, 11 Aug 2023 01:33:00 +0200
Subject: [PATCH 483/603] [ADAP-558] Wrap `AnalysisException` into
 `DbtRuntimeError` (#782)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When something happens, it will now throw a
PySpark AnalysisException, instead we want
to wrap this into a DbtRuntimeError error

Resolves #781

```
➜  dbt-tabular git:(fd-fix) ✗ dbt run
09:56:58  Running with dbt=1.6.0-b1
09:56:59  Found 4 models, 3 tests, 0 snapshots, 0 analyses, 356 macros, 1 operation, 0 seed files, 0 sources, 0 exposures, 0 metrics, 0 groups
09:56:59
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/05/17 11:57:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/05/17 11:57:00 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
23/05/17 11:57:00 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.
09:57:04
09:57:04  Finished running  in 0 hours 0 minutes and 4.92 seconds (4.92s).
09:57:04  Encountered an error:
SHOW TABLE EXTENDED is not supported for v2 tables.;
ShowTableExtended *, [namespace#6, tableName#7, isTemporary#8, information#9]
+- ResolvedNamespace org.apache.iceberg.spark.SparkCatalog@7d95f416, [dbt_tabular]

09:57:04  Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/requires.py", line 86, in wrapper
    result, success = func(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/requires.py", line 71, in wrapper
    return func(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/requires.py", line 142, in wrapper
    return func(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/requires.py", line 168, in wrapper
    return func(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/requires.py", line 215, in wrapper
    return func(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/requires.py", line 250, in wrapper
    return func(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/cli/main.py", line 566, in run
    results = task.run()
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/task/runnable.py", line 443, in run
    result = self.execute_with_hooks(selected_uids)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/task/runnable.py", line 408, in execute_with_hooks
    self.before_run(adapter, selected_uids)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/task/run.py", line 447, in before_run
    self.populate_adapter_cache(adapter, required_schemas)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/task/runnable.py", line 386, in populate_adapter_cache
    adapter.set_relations_cache(self.manifest)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/adapters/base/impl.py", line 462, in set_relations_cache
    self._relations_cache_for_schemas(manifest, required_schemas)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/adapters/base/impl.py", line 439, in _relations_cache_for_schemas
    for relation in future.result():
  File "/opt/homebrew/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py", line 439, in result
    return self.__get_result()
  File "/opt/homebrew/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py", line 391, in __get_result
    raise self._exception
  File "/opt/homebrew/Cellar/python@3.9/3.9.16/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/thread.py", line 58, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/utils.py", line 464, in connected
    return func(*args, **kwargs)
  File "/Users/fokkodriesprong/Desktop/dbt-spark/dbt/adapters/spark/impl.py", line 199, in list_relations_without_caching
    show_table_extended_rows = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/adapters/base/impl.py", line 1044, in execute_macro
    result = macro_function(**kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/clients/jinja.py", line 330, in __call__
    return self.call_macro(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/clients/jinja.py", line 257, in call_macro
    return macro(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 763, in __call__
    return self._invoke(arguments, autoescape)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 777, in _invoke
    rv = self._func(*arguments)
  File "<template>", line 21, in macro
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/sandbox.py", line 393, in call
    return __context.call(__obj, *args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 298, in call
    return __obj(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/clients/jinja.py", line 330, in __call__
    return self.call_macro(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/clients/jinja.py", line 257, in call_macro
    return macro(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 763, in __call__
    return self._invoke(arguments, autoescape)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 777, in _invoke
    rv = self._func(*arguments)
  File "<template>", line 33, in macro
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/sandbox.py", line 393, in call
    return __context.call(__obj, *args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 298, in call
    return __obj(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/clients/jinja.py", line 330, in __call__
    return self.call_macro(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/clients/jinja.py", line 257, in call_macro
    return macro(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 763, in __call__
    return self._invoke(arguments, autoescape)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 777, in _invoke
    rv = self._func(*arguments)
  File "<template>", line 52, in macro
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/sandbox.py", line 393, in call
    return __context.call(__obj, *args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/jinja2/runtime.py", line 298, in call
    return __obj(*args, **kwargs)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/adapters/base/impl.py", line 290, in execute
    return self.connections.execute(sql=sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/adapters/sql/connections.py", line 147, in execute
    _, cursor = self.add_query(sql, auto_begin)
  File "/opt/homebrew/lib/python3.9/site-packages/dbt/adapters/sql/connections.py", line 81, in add_query
    cursor.execute(sql, bindings)
  File "/Users/fokkodriesprong/Desktop/dbt-spark/dbt/adapters/spark/session.py", line 212, in execute
    self._cursor.execute(sql)
  File "/Users/fokkodriesprong/Desktop/dbt-spark/dbt/adapters/spark/session.py", line 116, in execute
    self._df = spark_session.sql(sql)
  File "/opt/homebrew/lib/python3.9/site-packages/pyspark/sql/session.py", line 1034, in sql
    return DataFrame(self._jsparkSession.sql(sqlQuery), self)
  File "/opt/homebrew/lib/python3.9/site-packages/py4j/java_gateway.py", line 1321, in __call__
    return_value = get_return_value(
  File "/opt/homebrew/lib/python3.9/site-packages/pyspark/sql/utils.py", line 196, in deco
    raise converted from None
pyspark.sql.utils.AnalysisException: SHOW TABLE EXTENDED is not supported for v2 tables.;
ShowTableExtended *, [namespace#6, tableName#7, isTemporary#8, information#9]
+- ResolvedNamespace org.apache.iceberg.spark.SparkCatalog@7d95f416, [dbt_tabular]

```
---
 .changes/unreleased/Fixes-20230517-142331.yaml |  6 ++++++
 dbt/adapters/spark/session.py                  | 11 +++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20230517-142331.yaml

diff --git a/.changes/unreleased/Fixes-20230517-142331.yaml b/.changes/unreleased/Fixes-20230517-142331.yaml
new file mode 100644
index 00000000..9f90e48b
--- /dev/null
+++ b/.changes/unreleased/Fixes-20230517-142331.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Wrap AnalysisException into DbtRuntimeError
+time: 2023-05-17T14:23:31.263799+02:00
+custom:
+  Author: Fokko
+  Issue: "782"
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index eeb01115..b5b2bebd 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -6,10 +6,12 @@
 from types import TracebackType
 from typing import Any, Dict, List, Optional, Tuple, Union, Sequence
 
+from dbt.adapters.spark.connections import SparkConnectionWrapper
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
+from dbt.exceptions import DbtRuntimeError
 from pyspark.sql import DataFrame, Row, SparkSession
-from dbt.adapters.spark.connections import SparkConnectionWrapper
+from pyspark.sql.utils import AnalysisException
 
 
 logger = AdapterLogger("Spark")
@@ -110,13 +112,18 @@ def execute(self, sql: str, *parameters: Any) -> None:
         """
         if len(parameters) > 0:
             sql = sql % parameters
+
         builder = SparkSession.builder.enableHiveSupport()
 
         for parameter, value in self.server_side_parameters.items():
             builder = builder.config(parameter, value)
 
         spark_session = builder.getOrCreate()
-        self._df = spark_session.sql(sql)
+
+        try:
+            self._df = spark_session.sql(sql)
+        except AnalysisException as exc:
+            raise DbtRuntimeError(str(exc)) from exc
 
     def fetchall(self) -> Optional[List[Row]]:
         """

From c40184eccaa8805679db09db867f273d9801ca60 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 11 Aug 2023 16:35:04 +0000
Subject: [PATCH 484/603] Update pip-tools requirement from ~=7.2 to ~=7.3
 (#863)

* Update pip-tools requirement from ~=7.2 to ~=7.3

Updates the requirements on [pip-tools](https://github.com/jazzband/pip-tools) to permit the latest version.
- [Release notes](https://github.com/jazzband/pip-tools/releases)
- [Changelog](https://github.com/jazzband/pip-tools/blob/main/CHANGELOG.md)
- [Commits](https://github.com/jazzband/pip-tools/compare/7.2.0...7.3.0)

---
updated-dependencies:
- dependency-name: pip-tools
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230809-043913.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230809-043913.yaml

diff --git a/.changes/unreleased/Dependencies-20230809-043913.yaml b/.changes/unreleased/Dependencies-20230809-043913.yaml
new file mode 100644
index 00000000..28432003
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230809-043913.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pip-tools requirement from ~=7.2 to ~=7.3"
+time: 2023-08-09T04:39:13.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 863
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 705b3555..f34d98fd 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,7 @@ flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.3.0  # patch updates have historically introduced breaking changes
-pip-tools~=7.2
+pip-tools~=7.3
 pre-commit~=3.3
 pre-commit-hooks~=4.4
 pytest~=7.4

From c94891e7fd4032d171de46bbe347b6521938ed50 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 12 Aug 2023 13:02:56 -0400
Subject: [PATCH 485/603] Update tox requirement from ~=4.6 to ~=4.7 (#867)

* Update tox requirement from ~=4.6 to ~=4.7

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/compare/4.6.0...4.7.0)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230811-221135.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230811-221135.yaml

diff --git a/.changes/unreleased/Dependencies-20230811-221135.yaml b/.changes/unreleased/Dependencies-20230811-221135.yaml
new file mode 100644
index 00000000..4fd2e4f5
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230811-221135.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=4.6 to ~=4.7"
+time: 2023-08-11T22:11:35.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 867
diff --git a/dev-requirements.txt b/dev-requirements.txt
index f34d98fd..c08ace38 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.3
 pytz~=2023.3
-tox~=4.6
+tox~=4.7
 types-pytz~=2023.3
 types-requests~=2.29
 twine~=4.0

From 097534b9962a8ff2aec098584924b43d6f77c4f2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 12 Aug 2023 17:05:50 +0000
Subject: [PATCH 486/603] Bump mypy from 1.3.0 to 1.4.1 (#860)

* Bump mypy from 1.3.0 to 1.4.1

Bumps [mypy](https://github.com/python/mypy) from 1.3.0 to 1.4.1.
- [Commits](https://github.com/python/mypy/compare/v1.3.0...v1.4.1)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230807-221033.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230807-221033.yaml

diff --git a/.changes/unreleased/Dependencies-20230807-221033.yaml b/.changes/unreleased/Dependencies-20230807-221033.yaml
new file mode 100644
index 00000000..94a26114
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230807-221033.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.3.0 to 1.4.1"
+time: 2023-08-07T22:10:33.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 860
diff --git a/dev-requirements.txt b/dev-requirements.txt
index c08ace38..3abb54fc 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.3.0  # patch updates have historically introduced breaking changes
+mypy==1.4.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.3
 pre-commit-hooks~=4.4

From 88011bcc238ad1b189bc60bb8bc9fdbfd9609720 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 15 Aug 2023 02:25:01 -0400
Subject: [PATCH 487/603] Update tox requirement from ~=4.7 to ~=4.8 (#871)

* Update tox requirement from ~=4.7 to ~=4.8

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/compare/4.7.0...4.8.0)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230814-224754.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230814-224754.yaml

diff --git a/.changes/unreleased/Dependencies-20230814-224754.yaml b/.changes/unreleased/Dependencies-20230814-224754.yaml
new file mode 100644
index 00000000..4cd4a01d
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230814-224754.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=4.7 to ~=4.8"
+time: 2023-08-14T22:47:54.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 871
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 3abb54fc..64c73657 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.3
 pytz~=2023.3
-tox~=4.7
+tox~=4.8
 types-pytz~=2023.3
 types-requests~=2.29
 twine~=4.0

From 7c4228859c5f07f165c14f16c19f026f2edd7d8f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 15 Aug 2023 06:28:34 +0000
Subject: [PATCH 488/603] Bump mypy from 1.4.1 to 1.5.0 (#872)

* Bump mypy from 1.4.1 to 1.5.0

Bumps [mypy](https://github.com/python/mypy) from 1.4.1 to 1.5.0.
- [Commits](https://github.com/python/mypy/compare/v1.4.1...v1.5.0)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230814-224757.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230814-224757.yaml

diff --git a/.changes/unreleased/Dependencies-20230814-224757.yaml b/.changes/unreleased/Dependencies-20230814-224757.yaml
new file mode 100644
index 00000000..7014382b
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230814-224757.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.4.1 to 1.5.0"
+time: 2023-08-14T22:47:57.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 872
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 64c73657..11369654 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.4.1  # patch updates have historically introduced breaking changes
+mypy==1.5.0  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.3
 pre-commit-hooks~=4.4

From 537b4265ae535fa2c284a766be56b602d655cea8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 15 Aug 2023 18:13:11 +0000
Subject: [PATCH 489/603] Update black requirement from ~=23.3 to ~=23.7 (#857)

* Update black requirement from ~=23.3 to ~=23.7

Updates the requirements on [black](https://github.com/psf/black) to permit the latest version.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/23.3.0...23.7.0)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230804-225236.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230804-225236.yaml

diff --git a/.changes/unreleased/Dependencies-20230804-225236.yaml b/.changes/unreleased/Dependencies-20230804-225236.yaml
new file mode 100644
index 00000000..d45b9551
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230804-225236.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update black requirement from ~=23.3 to ~=23.7"
+time: 2023-08-04T22:52:36.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 857
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 11369654..53260e89 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.3
+black~=23.7
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From cf0db2a568d697b60f8903785da34f89be7bca66 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 16 Aug 2023 01:30:07 +0000
Subject: [PATCH 490/603] Update mock requirement from ~=5.0 to ~=5.1 (#859)

* Update mock requirement from ~=5.0 to ~=5.1

Updates the requirements on [mock](https://github.com/testing-cabal/mock) to permit the latest version.
- [Changelog](https://github.com/testing-cabal/mock/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/testing-cabal/mock/compare/5.0.0...5.1.0)

---
updated-dependencies:
- dependency-name: mock
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230804-225249.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230804-225249.yaml

diff --git a/.changes/unreleased/Dependencies-20230804-225249.yaml b/.changes/unreleased/Dependencies-20230804-225249.yaml
new file mode 100644
index 00000000..68b3a248
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230804-225249.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update mock requirement from ~=5.0 to ~=5.1"
+time: 2023-08-04T22:52:49.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 859
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 53260e89..4dff7106 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -29,6 +29,6 @@ twine~=4.0
 wheel~=0.41
 
 # Adapter specific dependencies
-mock~=5.0
+mock~=5.1
 sasl~=0.3.1
 thrift_sasl~=0.4.3

From 62e87eaf3cafb8aee1d5e0bc85e3198fd784025f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 16 Aug 2023 21:01:56 +0000
Subject: [PATCH 491/603] Update types-requests requirement from ~=2.29 to
 ~=2.31 (#850)

* Update types-requests requirement from ~=2.29 to ~=2.31

Updates the requirements on [types-requests](https://github.com/python/typeshed) to permit the latest version.
- [Commits](https://github.com/python/typeshed/commits)

---
updated-dependencies:
- dependency-name: types-requests
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* remove extra newline from changie

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230803-224623.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230803-224623.yaml

diff --git a/.changes/unreleased/Dependencies-20230803-224623.yaml b/.changes/unreleased/Dependencies-20230803-224623.yaml
new file mode 100644
index 00000000..97ffd555
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230803-224623.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update types-requests requirement from ~=2.29 to ~=2.31"
+time: 2023-08-03T22:46:23.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 850
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 4dff7106..8c50d55a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -24,7 +24,7 @@ pytest-xdist~=3.3
 pytz~=2023.3
 tox~=4.8
 types-pytz~=2023.3
-types-requests~=2.29
+types-requests~=2.31
 twine~=4.0
 wheel~=0.41
 

From cb39fb106eb9d0813e4b547d978922afbbbf536f Mon Sep 17 00:00:00 2001
From: hanna-liashchuk <47921651+hanna-liashchuk@users.noreply.github.com>
Date: Thu, 17 Aug 2023 01:40:24 +0300
Subject: [PATCH 492/603] Added server side parameters for thrift connection
 type (#577)

* Added server side parameters for thrift connection type

* Adding CHANGELOG Entry

* Minor change: line length and commas.

* Added server side parameters for thrift connection type

* trailing commas

* fix formatting

---------

Co-authored-by: VShkaberda <v.shkaberda@gmail.com>
Co-authored-by: Florian Eiden <florian.eiden@fleid.fr>
Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Co-authored-by: Colin <colin.rogers@dbtlabs.com>
---
 .changes/unreleased/Features-20221229-200956.yaml |  7 +++++++
 dbt/adapters/spark/connections.py                 |  6 +++++-
 tests/unit/test_adapter.py                        | 14 +++++++++++---
 3 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Features-20221229-200956.yaml

diff --git a/.changes/unreleased/Features-20221229-200956.yaml b/.changes/unreleased/Features-20221229-200956.yaml
new file mode 100644
index 00000000..1add9bf7
--- /dev/null
+++ b/.changes/unreleased/Features-20221229-200956.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Support server side parameters in thrift connection
+time: 2022-12-29T20:09:56.457776+02:00
+custom:
+  Author: ' hanna-liashchuk'
+  Issue: "387"
+  PR: "577"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 5756aba3..a939ae75 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -439,7 +439,10 @@ def open(cls, connection: Connection) -> Connection:
                             kerberos_service_name=creds.kerberos_service_name,
                             password=creds.password,
                         )
-                        conn = hive.connect(thrift_transport=transport)
+                        conn = hive.connect(
+                            thrift_transport=transport,
+                            configuration=creds.server_side_parameters,
+                        )
                     else:
                         conn = hive.connect(
                             host=creds.host,
@@ -448,6 +451,7 @@ def open(cls, connection: Connection) -> Connection:
                             auth=creds.auth,
                             kerberos_service_name=creds.kerberos_service_name,
                             password=creds.password,
+                            configuration=creds.server_side_parameters,
                         )  # noqa
                     handle = PyhiveConnectionWrapper(conn)
                 elif creds.method == SparkConnectionMethod.ODBC:
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index 1eb81824..a7da6330 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -173,13 +173,16 @@ def test_thrift_connection(self):
         config = self._get_target_thrift(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
+        def hive_thrift_connect(
+            host, port, username, auth, kerberos_service_name, password, configuration
+        ):
             self.assertEqual(host, "myorg.sparkhost.com")
             self.assertEqual(port, 10001)
             self.assertEqual(username, "dbt")
             self.assertIsNone(auth)
             self.assertIsNone(kerberos_service_name)
             self.assertIsNone(password)
+            self.assertDictEqual(configuration, {})
 
         with mock.patch.object(hive, "connect", new=hive_thrift_connect):
             connection = adapter.acquire_connection("dummy")
@@ -194,11 +197,12 @@ def test_thrift_ssl_connection(self):
         config = self._get_target_use_ssl_thrift(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(thrift_transport):
+        def hive_thrift_connect(thrift_transport, configuration):
             self.assertIsNotNone(thrift_transport)
             transport = thrift_transport._trans
             self.assertEqual(transport.host, "myorg.sparkhost.com")
             self.assertEqual(transport.port, 10001)
+            self.assertDictEqual(configuration, {})
 
         with mock.patch.object(hive, "connect", new=hive_thrift_connect):
             connection = adapter.acquire_connection("dummy")
@@ -213,13 +217,16 @@ def test_thrift_connection_kerberos(self):
         config = self._get_target_thrift_kerberos(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
+        def hive_thrift_connect(
+            host, port, username, auth, kerberos_service_name, password, configuration
+        ):
             self.assertEqual(host, "myorg.sparkhost.com")
             self.assertEqual(port, 10001)
             self.assertEqual(username, "dbt")
             self.assertEqual(auth, "KERBEROS")
             self.assertEqual(kerberos_service_name, "hive")
             self.assertIsNone(password)
+            self.assertDictEqual(configuration, {})
 
         with mock.patch.object(hive, "connect", new=hive_thrift_connect):
             connection = adapter.acquire_connection("dummy")
@@ -710,6 +717,7 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
         config = self._get_target_http(self.project_cfg)
         columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
+
         self.assertEqual(
             columns[2].to_column_dict(omit_none=False),
             {

From 6ffd7671ede857a44f718272da175428d567a11c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 17 Aug 2023 17:15:40 -0500
Subject: [PATCH 493/603] [create-pull-request] automated change (#877)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.7.0-b1.md                          | 47 ++++++++++++++++++
 .../Dependencies-20230424-230630.yaml         |  0
 .../Dependencies-20230424-230645.yaml         |  0
 .../Dependencies-20230501-231003.yaml         |  0
 .../Dependencies-20230501-231035.yaml         |  0
 .../Dependencies-20230510-230725.yaml         |  0
 .../Dependencies-20230803-224622.yaml         |  0
 .../Dependencies-20230803-224623.yaml         |  0
 .../Dependencies-20230803-224626.yaml         |  0
 .../Dependencies-20230803-224629.yaml         |  0
 .../Dependencies-20230804-225232.yaml         |  0
 .../Dependencies-20230804-225236.yaml         |  0
 .../Dependencies-20230804-225243.yaml         |  0
 .../Dependencies-20230804-225249.yaml         |  0
 .../Dependencies-20230807-221033.yaml         |  0
 .../Dependencies-20230807-221037.yaml         |  0
 .../Dependencies-20230809-043913.yaml         |  0
 .../Dependencies-20230811-221135.yaml         |  0
 .../Dependencies-20230814-224754.yaml         |  0
 .../Dependencies-20230814-224757.yaml         |  0
 .../Features-20221229-200956.yaml             |  0
 .../Features-20230707-104150.yaml             |  0
 .../Features-20230707-113337.yaml             |  0
 .../Features-20230707-114650.yaml             |  0
 .../Features-20230707-135442.yaml             |  0
 .../Fixes-20230517-142331.yaml                |  0
 .../Fixes-20230810-014122.yaml                |  0
 .../Under the Hood-20230724-165508.yaml       |  0
 CHANGELOG.md                                  | 49 +++++++++++++++++++
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 32 files changed, 99 insertions(+), 3 deletions(-)
 create mode 100644 .changes/1.7.0-b1.md
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230424-230630.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230424-230645.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230501-231003.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230501-231035.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230510-230725.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230803-224622.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230803-224623.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230803-224626.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230803-224629.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230804-225232.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230804-225236.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230804-225243.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230804-225249.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230807-221033.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230807-221037.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230809-043913.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230811-221135.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230814-224754.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230814-224757.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Features-20221229-200956.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Features-20230707-104150.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Features-20230707-113337.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Features-20230707-114650.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Features-20230707-135442.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Fixes-20230517-142331.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Fixes-20230810-014122.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Under the Hood-20230724-165508.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 3b310155..2d884a7e 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.7.0a1
+current_version = 1.7.0b1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.7.0-b1.md b/.changes/1.7.0-b1.md
new file mode 100644
index 00000000..c2ee24af
--- /dev/null
+++ b/.changes/1.7.0-b1.md
@@ -0,0 +1,47 @@
+## dbt-spark 1.7.0-b1 - August 17, 2023
+
+### Features
+
+- Support server side parameters in thrift connection ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
+- Support server_side_parameters for Spark session connection method ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
+- Add server_side_parameters to HTTP connection method ([#824](https://github.com/dbt-labs/dbt-spark/issues/824))
+- Enforce server side parameters keys and values to be strings ([#826](https://github.com/dbt-labs/dbt-spark/issues/826))
+- Add SessionConnectionWrapper ([#829](https://github.com/dbt-labs/dbt-spark/issues/829))
+
+### Fixes
+
+- Wrap AnalysisException into DbtRuntimeError ([#782](https://github.com/dbt-labs/dbt-spark/issues/782))
+- include tblproperties macro in adapters.sql create table ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
+
+### Under the Hood
+
+- Update stale workflow to use centralized version ([#842](https://github.com/dbt-labs/dbt-spark/issues/842))
+
+### Dependencies
+
+- Update tox requirement from ~=3.0 to ~=4.5 ([#741](https://github.com/dbt-labs/dbt-spark/pull/741))
+- Update pyodbc requirement from ~=4.0.30 to ~=4.0.39 ([#742](https://github.com/dbt-labs/dbt-spark/pull/742))
+- Update pre-commit requirement from ~=2.21 to ~=3.3 ([#748](https://github.com/dbt-labs/dbt-spark/pull/748))
+- Update types-requests requirement from ~=2.28 to ~=2.29 ([#749](https://github.com/dbt-labs/dbt-spark/pull/749))
+- Bump mypy from 1.2.0 to 1.3.0 ([#768](https://github.com/dbt-labs/dbt-spark/pull/768))
+- Update flake8 requirement from ~=6.0 to ~=6.1 ([#849](https://github.com/dbt-labs/dbt-spark/pull/849))
+- Update types-requests requirement from ~=2.29 to ~=2.31 ([#850](https://github.com/dbt-labs/dbt-spark/pull/850))
+- Update pytest-xdist requirement from ~=3.2 to ~=3.3 ([#851](https://github.com/dbt-labs/dbt-spark/pull/851))
+- Update pytest requirement from ~=7.3 to ~=7.4 ([#852](https://github.com/dbt-labs/dbt-spark/pull/852))
+- Update pip-tools requirement from ~=6.13 to ~=7.2 ([#856](https://github.com/dbt-labs/dbt-spark/pull/856))
+- Update black requirement from ~=23.3 to ~=23.7 ([#857](https://github.com/dbt-labs/dbt-spark/pull/857))
+- Update wheel requirement from ~=0.40 to ~=0.41 ([#858](https://github.com/dbt-labs/dbt-spark/pull/858))
+- Update mock requirement from ~=5.0 to ~=5.1 ([#859](https://github.com/dbt-labs/dbt-spark/pull/859))
+- Bump mypy from 1.3.0 to 1.4.1 ([#860](https://github.com/dbt-labs/dbt-spark/pull/860))
+- Update tox requirement from ~=4.5 to ~=4.6 ([#861](https://github.com/dbt-labs/dbt-spark/pull/861))
+- Update pip-tools requirement from ~=7.2 to ~=7.3 ([#863](https://github.com/dbt-labs/dbt-spark/pull/863))
+- Update tox requirement from ~=4.6 to ~=4.7 ([#867](https://github.com/dbt-labs/dbt-spark/pull/867))
+- Update tox requirement from ~=4.7 to ~=4.8 ([#871](https://github.com/dbt-labs/dbt-spark/pull/871))
+- Bump mypy from 1.4.1 to 1.5.0 ([#872](https://github.com/dbt-labs/dbt-spark/pull/872))
+
+### Contributors
+- [@Fokko](https://github.com/Fokko) ([#829](https://github.com/dbt-labs/dbt-spark/issues/829), [#782](https://github.com/dbt-labs/dbt-spark/issues/782))
+- [@Fokko,JCZuurmond](https://github.com/Fokko,JCZuurmond) ([#824](https://github.com/dbt-labs/dbt-spark/issues/824), [#826](https://github.com/dbt-labs/dbt-spark/issues/826))
+- [@alarocca-apixio](https://github.com/alarocca-apixio) ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
+- [@etheleon](https://github.com/etheleon) ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
+- [@hanna-liashchuk](https://github.com/hanna-liashchuk) ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
diff --git a/.changes/unreleased/Dependencies-20230424-230630.yaml b/.changes/1.7.0/Dependencies-20230424-230630.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230424-230630.yaml
rename to .changes/1.7.0/Dependencies-20230424-230630.yaml
diff --git a/.changes/unreleased/Dependencies-20230424-230645.yaml b/.changes/1.7.0/Dependencies-20230424-230645.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230424-230645.yaml
rename to .changes/1.7.0/Dependencies-20230424-230645.yaml
diff --git a/.changes/unreleased/Dependencies-20230501-231003.yaml b/.changes/1.7.0/Dependencies-20230501-231003.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230501-231003.yaml
rename to .changes/1.7.0/Dependencies-20230501-231003.yaml
diff --git a/.changes/unreleased/Dependencies-20230501-231035.yaml b/.changes/1.7.0/Dependencies-20230501-231035.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230501-231035.yaml
rename to .changes/1.7.0/Dependencies-20230501-231035.yaml
diff --git a/.changes/unreleased/Dependencies-20230510-230725.yaml b/.changes/1.7.0/Dependencies-20230510-230725.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230510-230725.yaml
rename to .changes/1.7.0/Dependencies-20230510-230725.yaml
diff --git a/.changes/unreleased/Dependencies-20230803-224622.yaml b/.changes/1.7.0/Dependencies-20230803-224622.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230803-224622.yaml
rename to .changes/1.7.0/Dependencies-20230803-224622.yaml
diff --git a/.changes/unreleased/Dependencies-20230803-224623.yaml b/.changes/1.7.0/Dependencies-20230803-224623.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230803-224623.yaml
rename to .changes/1.7.0/Dependencies-20230803-224623.yaml
diff --git a/.changes/unreleased/Dependencies-20230803-224626.yaml b/.changes/1.7.0/Dependencies-20230803-224626.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230803-224626.yaml
rename to .changes/1.7.0/Dependencies-20230803-224626.yaml
diff --git a/.changes/unreleased/Dependencies-20230803-224629.yaml b/.changes/1.7.0/Dependencies-20230803-224629.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230803-224629.yaml
rename to .changes/1.7.0/Dependencies-20230803-224629.yaml
diff --git a/.changes/unreleased/Dependencies-20230804-225232.yaml b/.changes/1.7.0/Dependencies-20230804-225232.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230804-225232.yaml
rename to .changes/1.7.0/Dependencies-20230804-225232.yaml
diff --git a/.changes/unreleased/Dependencies-20230804-225236.yaml b/.changes/1.7.0/Dependencies-20230804-225236.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230804-225236.yaml
rename to .changes/1.7.0/Dependencies-20230804-225236.yaml
diff --git a/.changes/unreleased/Dependencies-20230804-225243.yaml b/.changes/1.7.0/Dependencies-20230804-225243.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230804-225243.yaml
rename to .changes/1.7.0/Dependencies-20230804-225243.yaml
diff --git a/.changes/unreleased/Dependencies-20230804-225249.yaml b/.changes/1.7.0/Dependencies-20230804-225249.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230804-225249.yaml
rename to .changes/1.7.0/Dependencies-20230804-225249.yaml
diff --git a/.changes/unreleased/Dependencies-20230807-221033.yaml b/.changes/1.7.0/Dependencies-20230807-221033.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230807-221033.yaml
rename to .changes/1.7.0/Dependencies-20230807-221033.yaml
diff --git a/.changes/unreleased/Dependencies-20230807-221037.yaml b/.changes/1.7.0/Dependencies-20230807-221037.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230807-221037.yaml
rename to .changes/1.7.0/Dependencies-20230807-221037.yaml
diff --git a/.changes/unreleased/Dependencies-20230809-043913.yaml b/.changes/1.7.0/Dependencies-20230809-043913.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230809-043913.yaml
rename to .changes/1.7.0/Dependencies-20230809-043913.yaml
diff --git a/.changes/unreleased/Dependencies-20230811-221135.yaml b/.changes/1.7.0/Dependencies-20230811-221135.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230811-221135.yaml
rename to .changes/1.7.0/Dependencies-20230811-221135.yaml
diff --git a/.changes/unreleased/Dependencies-20230814-224754.yaml b/.changes/1.7.0/Dependencies-20230814-224754.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230814-224754.yaml
rename to .changes/1.7.0/Dependencies-20230814-224754.yaml
diff --git a/.changes/unreleased/Dependencies-20230814-224757.yaml b/.changes/1.7.0/Dependencies-20230814-224757.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230814-224757.yaml
rename to .changes/1.7.0/Dependencies-20230814-224757.yaml
diff --git a/.changes/unreleased/Features-20221229-200956.yaml b/.changes/1.7.0/Features-20221229-200956.yaml
similarity index 100%
rename from .changes/unreleased/Features-20221229-200956.yaml
rename to .changes/1.7.0/Features-20221229-200956.yaml
diff --git a/.changes/unreleased/Features-20230707-104150.yaml b/.changes/1.7.0/Features-20230707-104150.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230707-104150.yaml
rename to .changes/1.7.0/Features-20230707-104150.yaml
diff --git a/.changes/unreleased/Features-20230707-113337.yaml b/.changes/1.7.0/Features-20230707-113337.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230707-113337.yaml
rename to .changes/1.7.0/Features-20230707-113337.yaml
diff --git a/.changes/unreleased/Features-20230707-114650.yaml b/.changes/1.7.0/Features-20230707-114650.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230707-114650.yaml
rename to .changes/1.7.0/Features-20230707-114650.yaml
diff --git a/.changes/unreleased/Features-20230707-135442.yaml b/.changes/1.7.0/Features-20230707-135442.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230707-135442.yaml
rename to .changes/1.7.0/Features-20230707-135442.yaml
diff --git a/.changes/unreleased/Fixes-20230517-142331.yaml b/.changes/1.7.0/Fixes-20230517-142331.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230517-142331.yaml
rename to .changes/1.7.0/Fixes-20230517-142331.yaml
diff --git a/.changes/unreleased/Fixes-20230810-014122.yaml b/.changes/1.7.0/Fixes-20230810-014122.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20230810-014122.yaml
rename to .changes/1.7.0/Fixes-20230810-014122.yaml
diff --git a/.changes/unreleased/Under the Hood-20230724-165508.yaml b/.changes/1.7.0/Under the Hood-20230724-165508.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230724-165508.yaml
rename to .changes/1.7.0/Under the Hood-20230724-165508.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 902db37f..583cdd61 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,55 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.7.0-b1 - August 17, 2023
+
+### Features
+
+- Support server side parameters in thrift connection ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
+- Support server_side_parameters for Spark session connection method ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
+- Add server_side_parameters to HTTP connection method ([#824](https://github.com/dbt-labs/dbt-spark/issues/824))
+- Enforce server side parameters keys and values to be strings ([#826](https://github.com/dbt-labs/dbt-spark/issues/826))
+- Add SessionConnectionWrapper ([#829](https://github.com/dbt-labs/dbt-spark/issues/829))
+
+### Fixes
+
+- Wrap AnalysisException into DbtRuntimeError ([#782](https://github.com/dbt-labs/dbt-spark/issues/782))
+- include tblproperties macro in adapters.sql create table ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
+
+### Under the Hood
+
+- Update stale workflow to use centralized version ([#842](https://github.com/dbt-labs/dbt-spark/issues/842))
+
+### Dependencies
+
+- Update tox requirement from ~=3.0 to ~=4.5 ([#741](https://github.com/dbt-labs/dbt-spark/pull/741))
+- Update pyodbc requirement from ~=4.0.30 to ~=4.0.39 ([#742](https://github.com/dbt-labs/dbt-spark/pull/742))
+- Update pre-commit requirement from ~=2.21 to ~=3.3 ([#748](https://github.com/dbt-labs/dbt-spark/pull/748))
+- Update types-requests requirement from ~=2.28 to ~=2.29 ([#749](https://github.com/dbt-labs/dbt-spark/pull/749))
+- Bump mypy from 1.2.0 to 1.3.0 ([#768](https://github.com/dbt-labs/dbt-spark/pull/768))
+- Update flake8 requirement from ~=6.0 to ~=6.1 ([#849](https://github.com/dbt-labs/dbt-spark/pull/849))
+- Update types-requests requirement from ~=2.29 to ~=2.31 ([#850](https://github.com/dbt-labs/dbt-spark/pull/850))
+- Update pytest-xdist requirement from ~=3.2 to ~=3.3 ([#851](https://github.com/dbt-labs/dbt-spark/pull/851))
+- Update pytest requirement from ~=7.3 to ~=7.4 ([#852](https://github.com/dbt-labs/dbt-spark/pull/852))
+- Update pip-tools requirement from ~=6.13 to ~=7.2 ([#856](https://github.com/dbt-labs/dbt-spark/pull/856))
+- Update black requirement from ~=23.3 to ~=23.7 ([#857](https://github.com/dbt-labs/dbt-spark/pull/857))
+- Update wheel requirement from ~=0.40 to ~=0.41 ([#858](https://github.com/dbt-labs/dbt-spark/pull/858))
+- Update mock requirement from ~=5.0 to ~=5.1 ([#859](https://github.com/dbt-labs/dbt-spark/pull/859))
+- Bump mypy from 1.3.0 to 1.4.1 ([#860](https://github.com/dbt-labs/dbt-spark/pull/860))
+- Update tox requirement from ~=4.5 to ~=4.6 ([#861](https://github.com/dbt-labs/dbt-spark/pull/861))
+- Update pip-tools requirement from ~=7.2 to ~=7.3 ([#863](https://github.com/dbt-labs/dbt-spark/pull/863))
+- Update tox requirement from ~=4.6 to ~=4.7 ([#867](https://github.com/dbt-labs/dbt-spark/pull/867))
+- Update tox requirement from ~=4.7 to ~=4.8 ([#871](https://github.com/dbt-labs/dbt-spark/pull/871))
+- Bump mypy from 1.4.1 to 1.5.0 ([#872](https://github.com/dbt-labs/dbt-spark/pull/872))
+
+### Contributors
+- [@Fokko](https://github.com/Fokko) ([#829](https://github.com/dbt-labs/dbt-spark/issues/829), [#782](https://github.com/dbt-labs/dbt-spark/issues/782))
+- [@Fokko,JCZuurmond](https://github.com/Fokko,JCZuurmond) ([#824](https://github.com/dbt-labs/dbt-spark/issues/824), [#826](https://github.com/dbt-labs/dbt-spark/issues/826))
+- [@alarocca-apixio](https://github.com/alarocca-apixio) ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
+- [@etheleon](https://github.com/etheleon) ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
+- [@hanna-liashchuk](https://github.com/hanna-liashchuk) ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
+
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 874bd74c..48607b01 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.7.0a1"
+version = "1.7.0b1"
diff --git a/setup.py b/setup.py
index c6713e89..482c0fd3 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.7.0a1"
+package_version = "1.7.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 07383d906f5ab97e9f4db72926d401d45e47c2c2 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Mon, 21 Aug 2023 13:10:23 -0700
Subject: [PATCH 494/603] Use PyHive pure-sasl import (#818)

* Use PyHive pure-sasl import

* Test PyHive pure-sasl import

* update requirements.txt

* use github fix for >3.11

* use github fix for >3.11

* fix pyhive spec

* remove unnecessary requests extra

* remove unnecessary sasl install

* add changie

* specify no-binary for pyodbc install

* conditionally import sasl vs puresasl

* conditionally import sasl vs pure-sasl

* experiment with pure sasl SASLClient

* file formatting

* test using only puresasl

* test using only puresasl

* update requirements.txt

* remove sasl from req

* remove unused sys dependency

* test spark-session

* fix pure-sasl

* allow pre-release installs

* allow pre-release installs

* use pyhive 0.7.0

* add pyhive back to reqs

* fix setup.py and remove sasl from dev-requirements.txt

* Update Dependencies-20230628-121341.yaml

* remove unnecessary pip_pre arg from tox.ini

---------

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .../unreleased/Dependencies-20230628-121341.yaml   |  6 ++++++
 .github/workflows/main.yml                         |  4 ++--
 dbt/adapters/spark/connections.py                  | 14 ++++++--------
 dev-requirements.txt                               |  1 -
 requirements.txt                                   |  4 ++--
 setup.py                                           |  6 +++---
 6 files changed, 19 insertions(+), 16 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20230628-121341.yaml

diff --git a/.changes/unreleased/Dependencies-20230628-121341.yaml b/.changes/unreleased/Dependencies-20230628-121341.yaml
new file mode 100644
index 00000000..5569c885
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230628-121341.yaml
@@ -0,0 +1,6 @@
+kind: Dependencies
+body: Replace sasl with pure-sasl for PyHive
+time: 2023-06-28T12:13:41.141588-07:00
+custom:
+  Author: colin-rogers-dbt
+  PR: "818"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 6b3d93b6..30126325 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -79,7 +79,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     env:
       TOXENV: "unit"
@@ -177,7 +177,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index a939ae75..beb2b269 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -33,8 +33,8 @@
     from thrift.transport.TSSLSocket import TSSLSocket
     import thrift
     import ssl
-    import sasl
     import thrift_sasl
+    from puresasl.client import SASLClient
 except ImportError:
     pass  # done deliberately: setting modules to None explicitly violates MyPy contracts by degrading type semantics
 
@@ -603,17 +603,15 @@ def build_ssl_transport(
                 # to be nonempty.
                 password = "x"
 
-        def sasl_factory() -> sasl.Client:
-            sasl_client = sasl.Client()
-            sasl_client.setAttr("host", host)
+        def sasl_factory() -> SASLClient:
             if sasl_auth == "GSSAPI":
-                sasl_client.setAttr("service", kerberos_service_name)
+                sasl_client = SASLClient(host, kerberos_service_name, mechanism=sasl_auth)
             elif sasl_auth == "PLAIN":
-                sasl_client.setAttr("username", username)
-                sasl_client.setAttr("password", password)
+                sasl_client = SASLClient(
+                    host, mechanism=sasl_auth, username=username, password=password
+                )
             else:
                 raise AssertionError
-            sasl_client.init()
             return sasl_client
 
         transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket)
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8c50d55a..cb5ea2cd 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -30,5 +30,4 @@ wheel~=0.41
 
 # Adapter specific dependencies
 mock~=5.1
-sasl~=0.3.1
 thrift_sasl~=0.4.3
diff --git a/requirements.txt b/requirements.txt
index e58ecdd4..ea5d1ad2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-PyHive[hive]>=0.6.0,<0.7.0
-requests[python]>=2.28.1
+pyhive[hive_pure_sasl]~=0.7.0
+requests>=2.28.1
 
 pyodbc~=4.0.39
 sqlparams>=3.0.0
diff --git a/setup.py b/setup.py
index 482c0fd3..2ac9e63c 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,6 @@
     print("Please upgrade to Python 3.8 or higher.")
     sys.exit(1)
 
-
 # require version of setuptools that supports find_namespace_packages
 from setuptools import setup
 
@@ -54,9 +53,9 @@ def _get_dbt_core_version():
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
-odbc_extras = ["pyodbc~=4.0.30"]
+odbc_extras = ["pyodbc~=4.0.39"]
 pyhive_extras = [
-    "PyHive[hive]>=0.6.0,<0.7.0",
+    "PyHive[hive_pure_sasl]~=0.7.0",
     "thrift>=0.11.0,<0.17.0",
 ]
 session_extras = ["pyspark>=3.0.0,<4.0.0"]
@@ -93,6 +92,7 @@ def _get_dbt_core_version():
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
     ],
     python_requires=">=3.8",
 )

From 91f98879b9de863f9c6eb20fd0a752f7a859d5b3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 25 Aug 2023 10:44:16 -0500
Subject: [PATCH 495/603] Update tox requirement from ~=4.8 to ~=4.9 (#874)

* Update tox requirement from ~=4.8 to ~=4.9

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/compare/4.8.0...4.9.0)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230816-221452.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230816-221452.yaml

diff --git a/.changes/unreleased/Dependencies-20230816-221452.yaml b/.changes/unreleased/Dependencies-20230816-221452.yaml
new file mode 100644
index 00000000..8a5d3f0c
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230816-221452.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=4.8 to ~=4.9"
+time: 2023-08-16T22:14:52.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 874
diff --git a/dev-requirements.txt b/dev-requirements.txt
index cb5ea2cd..6a4aec12 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.3
 pytz~=2023.3
-tox~=4.8
+tox~=4.9
 types-pytz~=2023.3
 types-requests~=2.31
 twine~=4.0

From 01c9fd020ea3e92b35a1ebc9225bf9babf3d16ba Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 31 Aug 2023 16:12:25 -0700
Subject: [PATCH 496/603] remove holograms and supply defaults/validation logic
 (#880)

* remove holograms and supply defaults/validation logic

* add changie

* mypy fixes
---
 .../Under the Hood-20230830-160616.yaml        |  6 ++++++
 dbt/adapters/spark/column.py                   |  5 ++---
 dbt/adapters/spark/connections.py              | 18 +++++++++++++-----
 3 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230830-160616.yaml

diff --git a/.changes/unreleased/Under the Hood-20230830-160616.yaml b/.changes/unreleased/Under the Hood-20230830-160616.yaml
new file mode 100644
index 00000000..018a9403
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230830-160616.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Remove dependency on hologram
+time: 2023-08-30T16:06:16.444881-07:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "881"
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index bde49a49..a57fa056 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -3,13 +3,12 @@
 
 from dbt.adapters.base.column import Column
 from dbt.dataclass_schema import dbtClassMixin
-from hologram import JsonDict
 
 Self = TypeVar("Self", bound="SparkColumn")
 
 
 @dataclass
-class SparkColumn(dbtClassMixin, Column):  # type: ignore
+class SparkColumn(dbtClassMixin, Column):
     table_database: Optional[str] = None
     table_schema: Optional[str] = None
     table_name: Optional[str] = None
@@ -63,7 +62,7 @@ def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]:
                 table_stats[f"stats:{key}:include"] = True
         return table_stats
 
-    def to_column_dict(self, omit_none: bool = True, validate: bool = False) -> JsonDict:
+    def to_column_dict(self, omit_none: bool = True, validate: bool = False) -> Dict[str, Any]:
         original_dict = self.to_dict(omit_none=omit_none)
         # If there are stats, merge them into the root of the dict
         original_stats = original_dict.pop("table_stats", None)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index beb2b269..966f5584 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -23,7 +23,7 @@
 from datetime import datetime
 import sqlparams
 from dbt.contracts.connection import Connection
-from hologram.helpers import StrEnum
+from dbt.dataclass_schema import StrEnum
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence
 
@@ -59,9 +59,10 @@ class SparkConnectionMethod(StrEnum):
 
 @dataclass
 class SparkCredentials(Credentials):
-    host: str
-    method: SparkConnectionMethod
-    database: Optional[str]  # type: ignore
+    host: Optional[str] = None
+    schema: Optional[str] = None  # type: ignore
+    method: SparkConnectionMethod = None  # type: ignore
+    database: Optional[str] = None  # type: ignore
     driver: Optional[str] = None
     cluster: Optional[str] = None
     endpoint: Optional[str] = None
@@ -90,6 +91,13 @@ def cluster_id(self) -> Optional[str]:
         return self.cluster
 
     def __post_init__(self) -> None:
+        if self.method is None:
+            raise dbt.exceptions.DbtRuntimeError("Must specify `method` in profile")
+        if self.host is None:
+            raise dbt.exceptions.DbtRuntimeError("Must specify `host` in profile")
+        if self.schema is None:
+            raise dbt.exceptions.DbtRuntimeError("Must specify `schema` in profile")
+
         # spark classifies database and schema as the same thing
         if self.database is not None and self.database != self.schema:
             raise dbt.exceptions.DbtRuntimeError(
@@ -154,7 +162,7 @@ def type(self) -> str:
 
     @property
     def unique_field(self) -> str:
-        return self.host
+        return self.host  # type: ignore
 
     def _connection_keys(self) -> Tuple[str, ...]:
         return "host", "port", "cluster", "endpoint", "schema", "organization"

From efa1b1868424f80534ba6ee5d20a72f9e2343947 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Fri, 1 Sep 2023 01:38:27 +0200
Subject: [PATCH 497/603] Add docker image to the repo (#876)

I think it makes sense to have the Dockerfile
in the repository itself.

Resolves #739

Co-authored-by: Anders <anders.swanson@dbtlabs.com>
Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .../unreleased/Security-20230817-145626.yaml  |  6 ++++
 docker-compose.yml                            |  4 +--
 docker/Dockerfile                             | 30 +++++++++++++++++++
 docker/entrypoint.sh                          | 15 ++++++++++
 4 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Security-20230817-145626.yaml
 create mode 100644 docker/Dockerfile
 create mode 100644 docker/entrypoint.sh

diff --git a/.changes/unreleased/Security-20230817-145626.yaml b/.changes/unreleased/Security-20230817-145626.yaml
new file mode 100644
index 00000000..4add88cb
--- /dev/null
+++ b/.changes/unreleased/Security-20230817-145626.yaml
@@ -0,0 +1,6 @@
+kind: Security
+body: Add docker image to the repo
+time: 2023-08-17T14:56:26.361208+02:00
+custom:
+  Author: Fokko
+  PR: "876"
diff --git a/docker-compose.yml b/docker-compose.yml
index 9bc9e509..ad083eaf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ version: "3.7"
 services:
 
   dbt-spark3-thrift:
-    image: godatadriven/spark:3.1.1
+    build: docker/
     ports:
       - "10000:10000"
       - "4040:4040"
@@ -19,7 +19,7 @@ services:
       - WAIT_FOR=dbt-hive-metastore:5432
 
   dbt-hive-metastore:
-    image: postgres:9.6.17-alpine
+    image: postgres:9-alpine
     volumes:
       - ./.hive-metastore/:/var/lib/postgresql/data
     environment:
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 00000000..bb4d378e
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,30 @@
+ARG OPENJDK_VERSION=8
+FROM eclipse-temurin:${OPENJDK_VERSION}-jre
+
+ARG BUILD_DATE
+ARG SPARK_VERSION=3.3.2
+ARG HADOOP_VERSION=3
+
+LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
+      org.label-schema.build-date=$BUILD_DATE \
+      org.label-schema.version=$SPARK_VERSION
+
+ENV SPARK_HOME /usr/spark
+ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
+
+RUN apt-get update && \
+    apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+    wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
+    ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
+    apt-get remove -y wget && \
+    apt-get autoremove -y && \
+    apt-get clean
+
+COPY entrypoint.sh /scripts/
+RUN chmod +x /scripts/entrypoint.sh
+
+ENTRYPOINT ["/scripts/entrypoint.sh"]
+CMD ["--help"]
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100644
index 00000000..6a759138
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ -n "$WAIT_FOR" ]; then
+  IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR"
+  for HOSTPORT in "${HOSTPORT_ARRAY[@]}"
+  do
+    WAIT_FOR_HOST=${HOSTPORT%:*}
+    WAIT_FOR_PORT=${HOSTPORT#*:}
+
+    echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT...
+    while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done
+  done
+fi
+
+exec spark-submit "$@"

From 9a4fc6faeabecb88bdb5604ec127219827d90e82 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 11 Sep 2023 20:16:06 -0400
Subject: [PATCH 498/603] Update black requirement from ~=23.7 to ~=23.9 (#886)

* Update black requirement from ~=23.7 to ~=23.9

Updates the requirements on [black](https://github.com/psf/black) to permit the latest version.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/23.7.0...23.9.1)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230911-222120.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230911-222120.yaml

diff --git a/.changes/unreleased/Dependencies-20230911-222120.yaml b/.changes/unreleased/Dependencies-20230911-222120.yaml
new file mode 100644
index 00000000..b1b2b68b
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230911-222120.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update black requirement from ~=23.7 to ~=23.9"
+time: 2023-09-11T22:21:20.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 886
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 6a4aec12..fc0072b5 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.7
+black~=23.9
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From ec16f32c344a71fade3b00631d96ed219621b84c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Sep 2023 00:18:44 +0000
Subject: [PATCH 499/603] Update pre-commit requirement from ~=3.3 to ~=3.4
 (#884)

* Update pre-commit requirement from ~=3.3 to ~=3.4

Updates the requirements on [pre-commit](https://github.com/pre-commit/pre-commit) to permit the latest version.
- [Release notes](https://github.com/pre-commit/pre-commit/releases)
- [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit/compare/v3.3.0...v3.4.0)

---
updated-dependencies:
- dependency-name: pre-commit
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230904-221612.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230904-221612.yaml

diff --git a/.changes/unreleased/Dependencies-20230904-221612.yaml b/.changes/unreleased/Dependencies-20230904-221612.yaml
new file mode 100644
index 00000000..50d6dbf5
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230904-221612.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pre-commit requirement from ~=3.3 to ~=3.4"
+time: 2023-09-04T22:16:12.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 884
diff --git a/dev-requirements.txt b/dev-requirements.txt
index fc0072b5..44f65016 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -14,7 +14,7 @@ freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.5.0  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
-pre-commit~=3.3
+pre-commit~=3.4
 pre-commit-hooks~=4.4
 pytest~=7.4
 pytest-csv~=3.0

From 833950395d2cb90261e819be7189cc9af7db0506 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Sep 2023 00:22:56 +0000
Subject: [PATCH 500/603] Update tox requirement from ~=4.9 to ~=4.10 (#879)

* Update tox requirement from ~=4.9 to ~=4.10

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/compare/4.9.0...4.10.0)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230825-154517.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230825-154517.yaml

diff --git a/.changes/unreleased/Dependencies-20230825-154517.yaml b/.changes/unreleased/Dependencies-20230825-154517.yaml
new file mode 100644
index 00000000..3afae44e
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230825-154517.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=4.9 to ~=4.10"
+time: 2023-08-25T15:45:17.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 879
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 44f65016..11e08f1e 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.3
 pytz~=2023.3
-tox~=4.9
+tox~=4.10
 types-pytz~=2023.3
 types-requests~=2.31
 twine~=4.0

From af4674c6d76f9b72abd933f2a61af0fc4182ce99 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Sep 2023 02:06:23 +0000
Subject: [PATCH 501/603] Bump mypy from 1.5.0 to 1.5.1 (#875)

* Bump mypy from 1.5.0 to 1.5.1

Bumps [mypy](https://github.com/python/mypy) from 1.5.0 to 1.5.1.
- [Commits](https://github.com/python/mypy/compare/v1.5.0...v1.5.1)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20230816-221455.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230816-221455.yaml

diff --git a/.changes/unreleased/Dependencies-20230816-221455.yaml b/.changes/unreleased/Dependencies-20230816-221455.yaml
new file mode 100644
index 00000000..01a28c48
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230816-221455.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.5.0 to 1.5.1"
+time: 2023-08-16T22:14:55.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 875
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 11e08f1e..b0c873c0 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.5.0  # patch updates have historically introduced breaking changes
+mypy==1.5.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.4
 pre-commit-hooks~=4.4

From b63b9b95e348b8fd9df79d542fa57efd2a29614b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 13 Sep 2023 18:00:31 -0400
Subject: [PATCH 502/603] Update tox requirement from ~=4.10 to ~=4.11 (#887)

* Update tox requirement from ~=4.10 to ~=4.11

Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version.
- [Release notes](https://github.com/tox-dev/tox/releases)
- [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst)
- [Commits](https://github.com/tox-dev/tox/compare/4.10.0...4.11.3)

---
updated-dependencies:
- dependency-name: tox
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20230912-222718.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20230912-222718.yaml

diff --git a/.changes/unreleased/Dependencies-20230912-222718.yaml b/.changes/unreleased/Dependencies-20230912-222718.yaml
new file mode 100644
index 00000000..560c9176
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20230912-222718.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update tox requirement from ~=4.10 to ~=4.11"
+time: 2023-09-12T22:27:18.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 887
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b0c873c0..6ea7b16a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -22,7 +22,7 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.3
 pytz~=2023.3
-tox~=4.10
+tox~=4.11
 types-pytz~=2023.3
 types-requests~=2.31
 twine~=4.0

From 1672efd4fff26f884decec22b8f02924e7a91976 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 26 Sep 2023 13:12:27 -0700
Subject: [PATCH 503/603] persist view column comments (#893)

* persist view column comments

* format: whitespace

* extracted get_matched_column macro

* move parenthesis to the calling macro

* changelog

* fix: remove matching column in different case

* fix: remove get_matched_column  macro - not much logic left there.

* escape column comments and add functional test

* Update Features-20230817-130731.yaml

* remove unneeded f string

* add test fixture to view test

* fix fixtures for TestPersistDocsDeltaView

* fix fixtures for TestPersistDocsDeltaView

* formatting

* fix tests

---------

Co-authored-by: Juri Krainjukov <juri.krainjukov@gmail.com>
---
 .../unreleased/Features-20230817-130731.yaml  |  6 +++
 dbt/include/spark/macros/adapters.sql         | 21 +++++++++
 .../adapter/persist_docs/fixtures.py          | 28 ++++++++++++
 .../adapter/persist_docs/test_persist_docs.py | 44 +++++++++++++++++++
 4 files changed, 99 insertions(+)
 create mode 100644 .changes/unreleased/Features-20230817-130731.yaml

diff --git a/.changes/unreleased/Features-20230817-130731.yaml b/.changes/unreleased/Features-20230817-130731.yaml
new file mode 100644
index 00000000..e88deb7b
--- /dev/null
+++ b/.changes/unreleased/Features-20230817-130731.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Persist Column level comments when creating views
+time: 2023-08-17T13:07:31.6812862Z
+custom:
+  Author: jurasan
+  Issue: 372
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 471d1dee..9e277dd6 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -229,9 +229,30 @@
   {% endfor %}
 {% endmacro %}
 
+{% macro get_column_comment_sql(column_name, column_dict) -%}
+  {% if column_name in column_dict and column_dict[column_name]["description"] -%}
+    {% set escaped_description = column_dict[column_name]["description"] | replace("'", "\\'") %}
+    {% set column_comment_clause = "comment '" ~ escaped_description ~ "'" %}
+  {%- endif -%}
+  {{ adapter.quote(column_name) }} {{ column_comment_clause }}
+{% endmacro %}
+
+{% macro get_persist_docs_column_list(model_columns, query_columns) %}
+  {% for column_name in query_columns %}
+    {{ get_column_comment_sql(column_name, model_columns) }}
+    {{- ", " if not loop.last else "" }}
+  {% endfor %}
+{% endmacro %}
 
 {% macro spark__create_view_as(relation, sql) -%}
   create or replace view {{ relation }}
+  {% if config.persist_column_docs() -%}
+    {% set model_columns = model.columns %}
+    {% set query_columns = get_columns_in_query(sql) %}
+    (
+    {{ get_persist_docs_column_list(model_columns, query_columns) }}
+    )
+  {% endif %}
   {{ comment_clause() }}
   {%- set contract_config = config.get('contract') -%}
   {%- if contract_config.enforced -%}
diff --git a/tests/functional/adapter/persist_docs/fixtures.py b/tests/functional/adapter/persist_docs/fixtures.py
index 3c351ab5..b884b7de 100644
--- a/tests/functional/adapter/persist_docs/fixtures.py
+++ b/tests/functional/adapter/persist_docs/fixtures.py
@@ -21,11 +21,39 @@
 select 1 as id, 'Joe' as name
 """
 
+_MODELS__VIEW_DELTA_MODEL = """
+{{ config(materialized='view') }}
+select id, count(*) as count from {{ ref('table_delta_model') }} group by id
+"""
+
 _MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN = """
 {{ config(materialized='table', file_format='delta') }}
 select 1 as id, 'Joe' as different_name
 """
+_VIEW_PROPERTIES_MODELS = """
+version: 2
 
+models:
+  - name: view_delta_model
+    description: |
+      View model description "with double quotes"
+      and with 'single  quotes' as welll as other;
+      '''abc123'''
+      reserved -- characters
+      --
+      /* comment */
+      Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
+    columns:
+      - name: id
+        description: |
+          id Column description "with double quotes"
+          and with 'single  quotes' as welll as other;
+          '''abc123'''
+          reserved -- characters
+          --
+          /* comment */
+          Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
+"""
 _PROPERTIES__MODELS = """
 version: 2
 
diff --git a/tests/functional/adapter/persist_docs/test_persist_docs.py b/tests/functional/adapter/persist_docs/test_persist_docs.py
index 0e3d102d..ee02e5ef 100644
--- a/tests/functional/adapter/persist_docs/test_persist_docs.py
+++ b/tests/functional/adapter/persist_docs/test_persist_docs.py
@@ -10,6 +10,8 @@
     _PROPERTIES__MODELS,
     _PROPERTIES__SEEDS,
     _SEEDS__BASIC,
+    _MODELS__VIEW_DELTA_MODEL,
+    _VIEW_PROPERTIES_MODELS,
 )
 
 
@@ -76,6 +78,48 @@ def test_delta_comments(self, project):
                     assert result[2].startswith("Some stuff here and then a call to")
 
 
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestPersistDocsDeltaView:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL,
+            "view_delta_model.sql": _MODELS__VIEW_DELTA_MODEL,
+            "schema.yml": _VIEW_PROPERTIES_MODELS,
+        }
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "test": {
+                    "+persist_docs": {
+                        "relation": True,
+                        "columns": True,
+                    },
+                }
+            },
+        }
+
+    def test_delta_comments(self, project):
+        run_dbt(["run"])
+
+        results = project.run_sql(
+            "describe extended {schema}.{table}".format(
+                schema=project.test_schema, table="view_delta_model"
+            ),
+            fetch="all",
+        )
+
+        for result in results:
+            if result[0] == "Comment":
+                assert result[1].startswith("View model description")
+            if result[0] == "id":
+                assert result[2].startswith("id Column description")
+            if result[0] == "count":
+                assert result[2] is None
+
+
 @pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestPersistDocsMissingColumn:
     @pytest.fixture(scope="class")

From 992de28f26300a469cabdf2812d809d1c1e0a985 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 2 Oct 2023 12:50:25 -0700
Subject: [PATCH 504/603] [create-pull-request] automated change (#898)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.7.0-b2.md                          | 27 +++++++++++++++++
 .../Dependencies-20230628-121341.yaml         |  0
 .../Dependencies-20230816-221452.yaml         |  0
 .../Dependencies-20230816-221455.yaml         |  0
 .../Dependencies-20230825-154517.yaml         |  0
 .../Dependencies-20230904-221612.yaml         |  0
 .../Dependencies-20230911-222120.yaml         |  0
 .../Dependencies-20230912-222718.yaml         |  0
 .../Features-20230817-130731.yaml             |  0
 .../Security-20230817-145626.yaml             |  0
 .../Under the Hood-20230830-160616.yaml       |  0
 CHANGELOG.md                                  | 30 ++++++++++++++++++-
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 15 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.7.0-b2.md
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230628-121341.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230816-221452.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230816-221455.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230825-154517.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230904-221612.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230911-222120.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Dependencies-20230912-222718.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Features-20230817-130731.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Security-20230817-145626.yaml (100%)
 rename .changes/{unreleased => 1.7.0}/Under the Hood-20230830-160616.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 2d884a7e..dbd792cd 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.7.0b1
+current_version = 1.7.0b2
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.7.0-b2.md b/.changes/1.7.0-b2.md
new file mode 100644
index 00000000..1f267653
--- /dev/null
+++ b/.changes/1.7.0-b2.md
@@ -0,0 +1,27 @@
+## dbt-spark 1.7.0-b2 - October 02, 2023
+
+### Features
+
+- Persist Column level comments when creating views ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
+
+### Under the Hood
+
+- Remove dependency on hologram ([#881](https://github.com/dbt-labs/dbt-spark/issues/881))
+
+### Dependencies
+
+- Replace sasl with pure-sasl for PyHive ([#818](https://github.com/dbt-labs/dbt-spark/pull/818))
+- Update tox requirement from ~=4.8 to ~=4.9 ([#874](https://github.com/dbt-labs/dbt-spark/pull/874))
+- Bump mypy from 1.5.0 to 1.5.1 ([#875](https://github.com/dbt-labs/dbt-spark/pull/875))
+- Update tox requirement from ~=4.9 to ~=4.10 ([#879](https://github.com/dbt-labs/dbt-spark/pull/879))
+- Update pre-commit requirement from ~=3.3 to ~=3.4 ([#884](https://github.com/dbt-labs/dbt-spark/pull/884))
+- Update black requirement from ~=23.7 to ~=23.9 ([#886](https://github.com/dbt-labs/dbt-spark/pull/886))
+- Update tox requirement from ~=4.10 to ~=4.11 ([#887](https://github.com/dbt-labs/dbt-spark/pull/887))
+
+### Security
+
+- Add docker image to the repo ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
+
+### Contributors
+- [@Fokko](https://github.com/Fokko) ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
+- [@jurasan](https://github.com/jurasan) ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
diff --git a/.changes/unreleased/Dependencies-20230628-121341.yaml b/.changes/1.7.0/Dependencies-20230628-121341.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230628-121341.yaml
rename to .changes/1.7.0/Dependencies-20230628-121341.yaml
diff --git a/.changes/unreleased/Dependencies-20230816-221452.yaml b/.changes/1.7.0/Dependencies-20230816-221452.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230816-221452.yaml
rename to .changes/1.7.0/Dependencies-20230816-221452.yaml
diff --git a/.changes/unreleased/Dependencies-20230816-221455.yaml b/.changes/1.7.0/Dependencies-20230816-221455.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230816-221455.yaml
rename to .changes/1.7.0/Dependencies-20230816-221455.yaml
diff --git a/.changes/unreleased/Dependencies-20230825-154517.yaml b/.changes/1.7.0/Dependencies-20230825-154517.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230825-154517.yaml
rename to .changes/1.7.0/Dependencies-20230825-154517.yaml
diff --git a/.changes/unreleased/Dependencies-20230904-221612.yaml b/.changes/1.7.0/Dependencies-20230904-221612.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230904-221612.yaml
rename to .changes/1.7.0/Dependencies-20230904-221612.yaml
diff --git a/.changes/unreleased/Dependencies-20230911-222120.yaml b/.changes/1.7.0/Dependencies-20230911-222120.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230911-222120.yaml
rename to .changes/1.7.0/Dependencies-20230911-222120.yaml
diff --git a/.changes/unreleased/Dependencies-20230912-222718.yaml b/.changes/1.7.0/Dependencies-20230912-222718.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20230912-222718.yaml
rename to .changes/1.7.0/Dependencies-20230912-222718.yaml
diff --git a/.changes/unreleased/Features-20230817-130731.yaml b/.changes/1.7.0/Features-20230817-130731.yaml
similarity index 100%
rename from .changes/unreleased/Features-20230817-130731.yaml
rename to .changes/1.7.0/Features-20230817-130731.yaml
diff --git a/.changes/unreleased/Security-20230817-145626.yaml b/.changes/1.7.0/Security-20230817-145626.yaml
similarity index 100%
rename from .changes/unreleased/Security-20230817-145626.yaml
rename to .changes/1.7.0/Security-20230817-145626.yaml
diff --git a/.changes/unreleased/Under the Hood-20230830-160616.yaml b/.changes/1.7.0/Under the Hood-20230830-160616.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230830-160616.yaml
rename to .changes/1.7.0/Under the Hood-20230830-160616.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 583cdd61..362976a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,35 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.7.0-b2 - October 02, 2023
+
+### Features
+
+- Persist Column level comments when creating views ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
+
+### Under the Hood
+
+- Remove dependency on hologram ([#881](https://github.com/dbt-labs/dbt-spark/issues/881))
+
+### Dependencies
+
+- Replace sasl with pure-sasl for PyHive ([#818](https://github.com/dbt-labs/dbt-spark/pull/818))
+- Update tox requirement from ~=4.8 to ~=4.9 ([#874](https://github.com/dbt-labs/dbt-spark/pull/874))
+- Bump mypy from 1.5.0 to 1.5.1 ([#875](https://github.com/dbt-labs/dbt-spark/pull/875))
+- Update tox requirement from ~=4.9 to ~=4.10 ([#879](https://github.com/dbt-labs/dbt-spark/pull/879))
+- Update pre-commit requirement from ~=3.3 to ~=3.4 ([#884](https://github.com/dbt-labs/dbt-spark/pull/884))
+- Update black requirement from ~=23.7 to ~=23.9 ([#886](https://github.com/dbt-labs/dbt-spark/pull/886))
+- Update tox requirement from ~=4.10 to ~=4.11 ([#887](https://github.com/dbt-labs/dbt-spark/pull/887))
+
+### Security
+
+- Add docker image to the repo ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
+
+### Contributors
+- [@Fokko](https://github.com/Fokko) ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
+- [@jurasan](https://github.com/jurasan) ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
+
+
 ## dbt-spark 1.7.0-b1 - August 17, 2023
 
 ### Features
@@ -53,7 +82,6 @@
 - [@etheleon](https://github.com/etheleon) ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
 - [@hanna-liashchuk](https://github.com/hanna-liashchuk) ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
 
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 48607b01..3f5d3c0b 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.7.0b1"
+version = "1.7.0b2"
diff --git a/setup.py b/setup.py
index 2ac9e63c..088e5f87 100644
--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.7.0b1"
+package_version = "1.7.0b2"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 04d01af7567500c26495b1f1aab3f8c246aeb0ce Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 11 Oct 2023 13:05:36 -0400
Subject: [PATCH 505/603] ADAP-894: Support test results as views (#889)

* implement tests for persist test results functionality

* updated project config for tests

* revert config changes

* skip Spark, which needs different delete sql

* add Spark config

* add a skip marker indicating the issue that needs to be resolved

* pull the test case in from core to show that everything passes except for the deletion step

* update changelog and test names to reflect renamed parameter

* correct fixture names in test

* updated the name of the overridden test so that it actually overrides the test

* update the row count method so support spark requirements

* added --store-failures parameter to the dbt invocation

* implement store-failures-as tests

* skip spark-session from store-failures-as tests

* revert dev requirements to point back to main on dbt-core

* revert dev requirements to point back to main on dbt-core

* update signature of get_catalog to match dbt-core and appease mypy
---
 .../unreleased/Features-20230921-180958.yaml  |  6 ++++
 Makefile                                      |  1 +
 dbt/adapters/spark/impl.py                    |  4 ++-
 .../test_store_test_failures.py               | 31 +++++++++++++++++++
 4 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Features-20230921-180958.yaml
 rename tests/functional/adapter/{store_test_failures_tests => }/test_store_test_failures.py (62%)

diff --git a/.changes/unreleased/Features-20230921-180958.yaml b/.changes/unreleased/Features-20230921-180958.yaml
new file mode 100644
index 00000000..66141eb5
--- /dev/null
+++ b/.changes/unreleased/Features-20230921-180958.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Support storing test failures as views
+time: 2023-09-21T18:09:58.174136-04:00
+custom:
+  Author: mikealfare
+  Issue: "6914"
diff --git a/Makefile b/Makefile
index 876440a0..cc1d9f75 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,7 @@ dev: ## Installs adapter in develop mode along with development dependencies
 dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment
                ## Useful when updating versions, or if you accidentally installed into the system interpreter
 	pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y
+	pip uninstall -y dbt-spark
 
 .PHONY: mypy
 mypy: ## Runs mypy against staged changes for static type checking.
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 2864c4f3..feae3412 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -347,7 +347,9 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str,
             as_dict["table_database"] = None
             yield as_dict
 
-    def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]]:
+    def get_catalog(
+        self, manifest: Manifest, selected_nodes: Optional[Set] = None
+    ) -> Tuple[agate.Table, List[Exception]]:
         schema_map = self._get_catalog_schemas(manifest)
         if len(schema_map) > 1:
             raise dbt.exceptions.CompilationError(
diff --git a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py b/tests/functional/adapter/test_store_test_failures.py
similarity index 62%
rename from tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
rename to tests/functional/adapter/test_store_test_failures.py
index c445fe67..e27cb9b9 100644
--- a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
+++ b/tests/functional/adapter/test_store_test_failures.py
@@ -1,5 +1,6 @@
 import pytest
 
+from dbt.tests.adapter.store_test_failures_tests import basic
 from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import (
     StoreTestFailuresBase,
     TEST_AUDIT_SCHEMA_SUFFIX,
@@ -42,3 +43,33 @@ def project_config_update(self):
     def test_store_and_assert_failure_with_delta(self, project):
         self.run_tests_store_one_failure(project)
         self.run_tests_store_failures_and_assert(project)
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestStoreTestFailuresAsInteractions(basic.StoreTestFailuresAsInteractions):
+    pass
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestStoreTestFailuresAsProjectLevelOff(basic.StoreTestFailuresAsProjectLevelOff):
+    pass
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestStoreTestFailuresAsProjectLevelView(basic.StoreTestFailuresAsProjectLevelView):
+    pass
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestStoreTestFailuresAsGeneric(basic.StoreTestFailuresAsGeneric):
+    pass
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestStoreTestFailuresAsProjectLevelEphemeral(basic.StoreTestFailuresAsProjectLevelEphemeral):
+    pass
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestStoreTestFailuresAsExceptions(basic.StoreTestFailuresAsExceptions):
+    pass

From 7bf19341f6219db5df8bee052558b8c7b77af3e3 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 11 Oct 2023 11:22:18 -0700
Subject: [PATCH 506/603] Create temporary views with 'or replace' (#906)

* Create temporary views with 'or replace'

* Add changie

---------

Co-authored-by: annaazizyan <anna.azizyan@picsart.com>
---
 .changes/unreleased/Features-20231011-094718.yaml | 6 ++++++
 dbt/include/spark/macros/adapters.sql             | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Features-20231011-094718.yaml

diff --git a/.changes/unreleased/Features-20231011-094718.yaml b/.changes/unreleased/Features-20231011-094718.yaml
new file mode 100644
index 00000000..8503a70b
--- /dev/null
+++ b/.changes/unreleased/Features-20231011-094718.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Create temporary views with 'or replace'
+time: 2023-10-11T09:47:18.485764-07:00
+custom:
+  Author: annazizian
+  Issue: "350"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 9e277dd6..bfc1f198 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -138,7 +138,7 @@
 
 {#-- We can't use temporary tables with `create ... as ()` syntax --#}
 {% macro spark__create_temporary_view(relation, compiled_code) -%}
-    create temporary view {{ relation }} as
+    create or replace temporary view {{ relation }} as
       {{ compiled_code }}
 {%- endmacro -%}
 

From cd1783af2181e142a9f3fd2693c61a5fb696fd7d Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Thu, 12 Oct 2023 00:33:28 +0200
Subject: [PATCH 507/603] Update docs on Spark version (#897)

* Bump to Spark 3.4 and update docs

* Update docker/Dockerfile

* Update README.md

---------

Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 README.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index fa286b1f..2d258679 100644
--- a/README.md
+++ b/README.md
@@ -26,18 +26,20 @@ more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark)
 
 ## Running locally
 A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend.
-Note: dbt-spark now supports Spark 3.1.1 (formerly on Spark 2.x).
+Note: dbt-spark now supports Spark 3.3.2.
 
-The following command would start two docker containers
-```
+The following command starts two docker containers:
+
+```sh
 docker-compose up -d
 ```
+
 It will take a bit of time for the instance to start, you can check the logs of the two containers.
 If the instance doesn't start correctly, try the complete reset command listed below and then try start again.
 
 Create a profile like this one:
 
-```
+```yaml
 spark_testing:
   target: local
   outputs:
@@ -60,7 +62,7 @@ Connecting to the local spark instance:
 
 Note that the Hive metastore data is persisted under `./.hive-metastore/`, and the Spark-produced data under `./.spark-warehouse/`. To completely reset you environment run the following:
 
-```
+```sh
 docker-compose down
 rm -rf ./.hive-metastore/
 rm -rf ./.spark-warehouse/

From ff6bddbafd6ad0720e6be4ffe72682a82a19e23d Mon Sep 17 00:00:00 2001
From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com>
Date: Thu, 12 Oct 2023 12:50:36 -0400
Subject: [PATCH 508/603] Cleanup main after cutting new 1.7.latest branch
 (#907)

* Clean up changelog on main

* Bumping version to 1.8.0a1

* Code quality cleanup
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.7.0-b1.md                          | 47 -----------
 .changes/1.7.0-b2.md                          | 27 -------
 .../1.7.0/Dependencies-20230424-230630.yaml   |  6 --
 .../1.7.0/Dependencies-20230424-230645.yaml   |  6 --
 .../1.7.0/Dependencies-20230501-231003.yaml   |  6 --
 .../1.7.0/Dependencies-20230501-231035.yaml   |  6 --
 .../1.7.0/Dependencies-20230510-230725.yaml   |  6 --
 .../1.7.0/Dependencies-20230628-121341.yaml   |  6 --
 .../1.7.0/Dependencies-20230803-224622.yaml   |  6 --
 .../1.7.0/Dependencies-20230803-224623.yaml   |  6 --
 .../1.7.0/Dependencies-20230803-224626.yaml   |  6 --
 .../1.7.0/Dependencies-20230803-224629.yaml   |  6 --
 .../1.7.0/Dependencies-20230804-225232.yaml   |  6 --
 .../1.7.0/Dependencies-20230804-225236.yaml   |  6 --
 .../1.7.0/Dependencies-20230804-225243.yaml   |  6 --
 .../1.7.0/Dependencies-20230804-225249.yaml   |  6 --
 .../1.7.0/Dependencies-20230807-221033.yaml   |  6 --
 .../1.7.0/Dependencies-20230807-221037.yaml   |  6 --
 .../1.7.0/Dependencies-20230809-043913.yaml   |  6 --
 .../1.7.0/Dependencies-20230811-221135.yaml   |  6 --
 .../1.7.0/Dependencies-20230814-224754.yaml   |  6 --
 .../1.7.0/Dependencies-20230814-224757.yaml   |  6 --
 .../1.7.0/Dependencies-20230816-221452.yaml   |  6 --
 .../1.7.0/Dependencies-20230816-221455.yaml   |  6 --
 .../1.7.0/Dependencies-20230825-154517.yaml   |  6 --
 .../1.7.0/Dependencies-20230904-221612.yaml   |  6 --
 .../1.7.0/Dependencies-20230911-222120.yaml   |  6 --
 .../1.7.0/Dependencies-20230912-222718.yaml   |  6 --
 .changes/1.7.0/Features-20221229-200956.yaml  |  7 --
 .changes/1.7.0/Features-20230707-104150.yaml  |  6 --
 .changes/1.7.0/Features-20230707-113337.yaml  |  6 --
 .changes/1.7.0/Features-20230707-114650.yaml  |  6 --
 .changes/1.7.0/Features-20230707-135442.yaml  |  6 --
 .changes/1.7.0/Features-20230817-130731.yaml  |  6 --
 .changes/1.7.0/Fixes-20230517-142331.yaml     |  6 --
 .changes/1.7.0/Fixes-20230810-014122.yaml     |  6 --
 .changes/1.7.0/Security-20230817-145626.yaml  |  6 --
 .../1.7.0/Under the Hood-20230724-165508.yaml |  6 --
 .../1.7.0/Under the Hood-20230830-160616.yaml |  6 --
 .../unreleased/Features-20230921-180958.yaml  |  6 --
 .../unreleased/Features-20231011-094718.yaml  |  6 --
 CHANGELOG.md                                  | 77 -------------------
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 45 files changed, 3 insertions(+), 389 deletions(-)
 delete mode 100644 .changes/1.7.0-b1.md
 delete mode 100644 .changes/1.7.0-b2.md
 delete mode 100644 .changes/1.7.0/Dependencies-20230424-230630.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230424-230645.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230501-231003.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230501-231035.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230510-230725.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230628-121341.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230803-224622.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230803-224623.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230803-224626.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230803-224629.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230804-225232.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230804-225236.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230804-225243.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230804-225249.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230807-221033.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230807-221037.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230809-043913.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230811-221135.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230814-224754.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230814-224757.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230816-221452.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230816-221455.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230825-154517.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230904-221612.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230911-222120.yaml
 delete mode 100644 .changes/1.7.0/Dependencies-20230912-222718.yaml
 delete mode 100644 .changes/1.7.0/Features-20221229-200956.yaml
 delete mode 100644 .changes/1.7.0/Features-20230707-104150.yaml
 delete mode 100644 .changes/1.7.0/Features-20230707-113337.yaml
 delete mode 100644 .changes/1.7.0/Features-20230707-114650.yaml
 delete mode 100644 .changes/1.7.0/Features-20230707-135442.yaml
 delete mode 100644 .changes/1.7.0/Features-20230817-130731.yaml
 delete mode 100644 .changes/1.7.0/Fixes-20230517-142331.yaml
 delete mode 100644 .changes/1.7.0/Fixes-20230810-014122.yaml
 delete mode 100644 .changes/1.7.0/Security-20230817-145626.yaml
 delete mode 100644 .changes/1.7.0/Under the Hood-20230724-165508.yaml
 delete mode 100644 .changes/1.7.0/Under the Hood-20230830-160616.yaml
 delete mode 100644 .changes/unreleased/Features-20230921-180958.yaml
 delete mode 100644 .changes/unreleased/Features-20231011-094718.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index dbd792cd..48676867 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.7.0b2
+current_version = 1.8.0a1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.7.0-b1.md b/.changes/1.7.0-b1.md
deleted file mode 100644
index c2ee24af..00000000
--- a/.changes/1.7.0-b1.md
+++ /dev/null
@@ -1,47 +0,0 @@
-## dbt-spark 1.7.0-b1 - August 17, 2023
-
-### Features
-
-- Support server side parameters in thrift connection ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
-- Support server_side_parameters for Spark session connection method ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
-- Add server_side_parameters to HTTP connection method ([#824](https://github.com/dbt-labs/dbt-spark/issues/824))
-- Enforce server side parameters keys and values to be strings ([#826](https://github.com/dbt-labs/dbt-spark/issues/826))
-- Add SessionConnectionWrapper ([#829](https://github.com/dbt-labs/dbt-spark/issues/829))
-
-### Fixes
-
-- Wrap AnalysisException into DbtRuntimeError ([#782](https://github.com/dbt-labs/dbt-spark/issues/782))
-- include tblproperties macro in adapters.sql create table ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
-
-### Under the Hood
-
-- Update stale workflow to use centralized version ([#842](https://github.com/dbt-labs/dbt-spark/issues/842))
-
-### Dependencies
-
-- Update tox requirement from ~=3.0 to ~=4.5 ([#741](https://github.com/dbt-labs/dbt-spark/pull/741))
-- Update pyodbc requirement from ~=4.0.30 to ~=4.0.39 ([#742](https://github.com/dbt-labs/dbt-spark/pull/742))
-- Update pre-commit requirement from ~=2.21 to ~=3.3 ([#748](https://github.com/dbt-labs/dbt-spark/pull/748))
-- Update types-requests requirement from ~=2.28 to ~=2.29 ([#749](https://github.com/dbt-labs/dbt-spark/pull/749))
-- Bump mypy from 1.2.0 to 1.3.0 ([#768](https://github.com/dbt-labs/dbt-spark/pull/768))
-- Update flake8 requirement from ~=6.0 to ~=6.1 ([#849](https://github.com/dbt-labs/dbt-spark/pull/849))
-- Update types-requests requirement from ~=2.29 to ~=2.31 ([#850](https://github.com/dbt-labs/dbt-spark/pull/850))
-- Update pytest-xdist requirement from ~=3.2 to ~=3.3 ([#851](https://github.com/dbt-labs/dbt-spark/pull/851))
-- Update pytest requirement from ~=7.3 to ~=7.4 ([#852](https://github.com/dbt-labs/dbt-spark/pull/852))
-- Update pip-tools requirement from ~=6.13 to ~=7.2 ([#856](https://github.com/dbt-labs/dbt-spark/pull/856))
-- Update black requirement from ~=23.3 to ~=23.7 ([#857](https://github.com/dbt-labs/dbt-spark/pull/857))
-- Update wheel requirement from ~=0.40 to ~=0.41 ([#858](https://github.com/dbt-labs/dbt-spark/pull/858))
-- Update mock requirement from ~=5.0 to ~=5.1 ([#859](https://github.com/dbt-labs/dbt-spark/pull/859))
-- Bump mypy from 1.3.0 to 1.4.1 ([#860](https://github.com/dbt-labs/dbt-spark/pull/860))
-- Update tox requirement from ~=4.5 to ~=4.6 ([#861](https://github.com/dbt-labs/dbt-spark/pull/861))
-- Update pip-tools requirement from ~=7.2 to ~=7.3 ([#863](https://github.com/dbt-labs/dbt-spark/pull/863))
-- Update tox requirement from ~=4.6 to ~=4.7 ([#867](https://github.com/dbt-labs/dbt-spark/pull/867))
-- Update tox requirement from ~=4.7 to ~=4.8 ([#871](https://github.com/dbt-labs/dbt-spark/pull/871))
-- Bump mypy from 1.4.1 to 1.5.0 ([#872](https://github.com/dbt-labs/dbt-spark/pull/872))
-
-### Contributors
-- [@Fokko](https://github.com/Fokko) ([#829](https://github.com/dbt-labs/dbt-spark/issues/829), [#782](https://github.com/dbt-labs/dbt-spark/issues/782))
-- [@Fokko,JCZuurmond](https://github.com/Fokko,JCZuurmond) ([#824](https://github.com/dbt-labs/dbt-spark/issues/824), [#826](https://github.com/dbt-labs/dbt-spark/issues/826))
-- [@alarocca-apixio](https://github.com/alarocca-apixio) ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
-- [@etheleon](https://github.com/etheleon) ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
-- [@hanna-liashchuk](https://github.com/hanna-liashchuk) ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
diff --git a/.changes/1.7.0-b2.md b/.changes/1.7.0-b2.md
deleted file mode 100644
index 1f267653..00000000
--- a/.changes/1.7.0-b2.md
+++ /dev/null
@@ -1,27 +0,0 @@
-## dbt-spark 1.7.0-b2 - October 02, 2023
-
-### Features
-
-- Persist Column level comments when creating views ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
-
-### Under the Hood
-
-- Remove dependency on hologram ([#881](https://github.com/dbt-labs/dbt-spark/issues/881))
-
-### Dependencies
-
-- Replace sasl with pure-sasl for PyHive ([#818](https://github.com/dbt-labs/dbt-spark/pull/818))
-- Update tox requirement from ~=4.8 to ~=4.9 ([#874](https://github.com/dbt-labs/dbt-spark/pull/874))
-- Bump mypy from 1.5.0 to 1.5.1 ([#875](https://github.com/dbt-labs/dbt-spark/pull/875))
-- Update tox requirement from ~=4.9 to ~=4.10 ([#879](https://github.com/dbt-labs/dbt-spark/pull/879))
-- Update pre-commit requirement from ~=3.3 to ~=3.4 ([#884](https://github.com/dbt-labs/dbt-spark/pull/884))
-- Update black requirement from ~=23.7 to ~=23.9 ([#886](https://github.com/dbt-labs/dbt-spark/pull/886))
-- Update tox requirement from ~=4.10 to ~=4.11 ([#887](https://github.com/dbt-labs/dbt-spark/pull/887))
-
-### Security
-
-- Add docker image to the repo ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
-
-### Contributors
-- [@Fokko](https://github.com/Fokko) ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
-- [@jurasan](https://github.com/jurasan) ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
diff --git a/.changes/1.7.0/Dependencies-20230424-230630.yaml b/.changes/1.7.0/Dependencies-20230424-230630.yaml
deleted file mode 100644
index 1f96daad..00000000
--- a/.changes/1.7.0/Dependencies-20230424-230630.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=3.0 to ~=4.5"
-time: 2023-04-24T23:06:30.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 741
diff --git a/.changes/1.7.0/Dependencies-20230424-230645.yaml b/.changes/1.7.0/Dependencies-20230424-230645.yaml
deleted file mode 100644
index 83e1bb44..00000000
--- a/.changes/1.7.0/Dependencies-20230424-230645.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pyodbc requirement from ~=4.0.30 to ~=4.0.39"
-time: 2023-04-24T23:06:45.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 742
diff --git a/.changes/1.7.0/Dependencies-20230501-231003.yaml b/.changes/1.7.0/Dependencies-20230501-231003.yaml
deleted file mode 100644
index b3e3a750..00000000
--- a/.changes/1.7.0/Dependencies-20230501-231003.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit requirement from ~=2.21 to ~=3.3"
-time: 2023-05-01T23:10:03.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 748
diff --git a/.changes/1.7.0/Dependencies-20230501-231035.yaml b/.changes/1.7.0/Dependencies-20230501-231035.yaml
deleted file mode 100644
index 7bbf9820..00000000
--- a/.changes/1.7.0/Dependencies-20230501-231035.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update types-requests requirement from ~=2.28 to ~=2.29"
-time: 2023-05-01T23:10:35.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 749
diff --git a/.changes/1.7.0/Dependencies-20230510-230725.yaml b/.changes/1.7.0/Dependencies-20230510-230725.yaml
deleted file mode 100644
index dfd04ad3..00000000
--- a/.changes/1.7.0/Dependencies-20230510-230725.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.2.0 to 1.3.0"
-time: 2023-05-10T23:07:25.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 768
diff --git a/.changes/1.7.0/Dependencies-20230628-121341.yaml b/.changes/1.7.0/Dependencies-20230628-121341.yaml
deleted file mode 100644
index 5569c885..00000000
--- a/.changes/1.7.0/Dependencies-20230628-121341.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Dependencies
-body: Replace sasl with pure-sasl for PyHive
-time: 2023-06-28T12:13:41.141588-07:00
-custom:
-  Author: colin-rogers-dbt
-  PR: "818"
diff --git a/.changes/1.7.0/Dependencies-20230803-224622.yaml b/.changes/1.7.0/Dependencies-20230803-224622.yaml
deleted file mode 100644
index 119a08e5..00000000
--- a/.changes/1.7.0/Dependencies-20230803-224622.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update flake8 requirement from ~=6.0 to ~=6.1"
-time: 2023-08-03T22:46:22.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 849
diff --git a/.changes/1.7.0/Dependencies-20230803-224623.yaml b/.changes/1.7.0/Dependencies-20230803-224623.yaml
deleted file mode 100644
index 97ffd555..00000000
--- a/.changes/1.7.0/Dependencies-20230803-224623.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update types-requests requirement from ~=2.29 to ~=2.31"
-time: 2023-08-03T22:46:23.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 850
diff --git a/.changes/1.7.0/Dependencies-20230803-224626.yaml b/.changes/1.7.0/Dependencies-20230803-224626.yaml
deleted file mode 100644
index c8b9ef04..00000000
--- a/.changes/1.7.0/Dependencies-20230803-224626.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest-xdist requirement from ~=3.2 to ~=3.3"
-time: 2023-08-03T22:46:26.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 851
diff --git a/.changes/1.7.0/Dependencies-20230803-224629.yaml b/.changes/1.7.0/Dependencies-20230803-224629.yaml
deleted file mode 100644
index 6865c7c7..00000000
--- a/.changes/1.7.0/Dependencies-20230803-224629.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest requirement from ~=7.3 to ~=7.4"
-time: 2023-08-03T22:46:29.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 852
diff --git a/.changes/1.7.0/Dependencies-20230804-225232.yaml b/.changes/1.7.0/Dependencies-20230804-225232.yaml
deleted file mode 100644
index f4a09b6b..00000000
--- a/.changes/1.7.0/Dependencies-20230804-225232.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pip-tools requirement from ~=6.13 to ~=7.2"
-time: 2023-08-04T22:52:32.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 856
diff --git a/.changes/1.7.0/Dependencies-20230804-225236.yaml b/.changes/1.7.0/Dependencies-20230804-225236.yaml
deleted file mode 100644
index d45b9551..00000000
--- a/.changes/1.7.0/Dependencies-20230804-225236.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.3 to ~=23.7"
-time: 2023-08-04T22:52:36.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 857
diff --git a/.changes/1.7.0/Dependencies-20230804-225243.yaml b/.changes/1.7.0/Dependencies-20230804-225243.yaml
deleted file mode 100644
index 07b9bdb4..00000000
--- a/.changes/1.7.0/Dependencies-20230804-225243.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update wheel requirement from ~=0.40 to ~=0.41"
-time: 2023-08-04T22:52:43.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 858
diff --git a/.changes/1.7.0/Dependencies-20230804-225249.yaml b/.changes/1.7.0/Dependencies-20230804-225249.yaml
deleted file mode 100644
index 68b3a248..00000000
--- a/.changes/1.7.0/Dependencies-20230804-225249.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update mock requirement from ~=5.0 to ~=5.1"
-time: 2023-08-04T22:52:49.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 859
diff --git a/.changes/1.7.0/Dependencies-20230807-221033.yaml b/.changes/1.7.0/Dependencies-20230807-221033.yaml
deleted file mode 100644
index 94a26114..00000000
--- a/.changes/1.7.0/Dependencies-20230807-221033.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.3.0 to 1.4.1"
-time: 2023-08-07T22:10:33.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 860
diff --git a/.changes/1.7.0/Dependencies-20230807-221037.yaml b/.changes/1.7.0/Dependencies-20230807-221037.yaml
deleted file mode 100644
index daa1e3ba..00000000
--- a/.changes/1.7.0/Dependencies-20230807-221037.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=4.5 to ~=4.6"
-time: 2023-08-07T22:10:37.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 861
diff --git a/.changes/1.7.0/Dependencies-20230809-043913.yaml b/.changes/1.7.0/Dependencies-20230809-043913.yaml
deleted file mode 100644
index 28432003..00000000
--- a/.changes/1.7.0/Dependencies-20230809-043913.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pip-tools requirement from ~=7.2 to ~=7.3"
-time: 2023-08-09T04:39:13.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 863
diff --git a/.changes/1.7.0/Dependencies-20230811-221135.yaml b/.changes/1.7.0/Dependencies-20230811-221135.yaml
deleted file mode 100644
index 4fd2e4f5..00000000
--- a/.changes/1.7.0/Dependencies-20230811-221135.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=4.6 to ~=4.7"
-time: 2023-08-11T22:11:35.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 867
diff --git a/.changes/1.7.0/Dependencies-20230814-224754.yaml b/.changes/1.7.0/Dependencies-20230814-224754.yaml
deleted file mode 100644
index 4cd4a01d..00000000
--- a/.changes/1.7.0/Dependencies-20230814-224754.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=4.7 to ~=4.8"
-time: 2023-08-14T22:47:54.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 871
diff --git a/.changes/1.7.0/Dependencies-20230814-224757.yaml b/.changes/1.7.0/Dependencies-20230814-224757.yaml
deleted file mode 100644
index 7014382b..00000000
--- a/.changes/1.7.0/Dependencies-20230814-224757.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.4.1 to 1.5.0"
-time: 2023-08-14T22:47:57.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 872
diff --git a/.changes/1.7.0/Dependencies-20230816-221452.yaml b/.changes/1.7.0/Dependencies-20230816-221452.yaml
deleted file mode 100644
index 8a5d3f0c..00000000
--- a/.changes/1.7.0/Dependencies-20230816-221452.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=4.8 to ~=4.9"
-time: 2023-08-16T22:14:52.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 874
diff --git a/.changes/1.7.0/Dependencies-20230816-221455.yaml b/.changes/1.7.0/Dependencies-20230816-221455.yaml
deleted file mode 100644
index 01a28c48..00000000
--- a/.changes/1.7.0/Dependencies-20230816-221455.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.5.0 to 1.5.1"
-time: 2023-08-16T22:14:55.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 875
diff --git a/.changes/1.7.0/Dependencies-20230825-154517.yaml b/.changes/1.7.0/Dependencies-20230825-154517.yaml
deleted file mode 100644
index 3afae44e..00000000
--- a/.changes/1.7.0/Dependencies-20230825-154517.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=4.9 to ~=4.10"
-time: 2023-08-25T15:45:17.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 879
diff --git a/.changes/1.7.0/Dependencies-20230904-221612.yaml b/.changes/1.7.0/Dependencies-20230904-221612.yaml
deleted file mode 100644
index 50d6dbf5..00000000
--- a/.changes/1.7.0/Dependencies-20230904-221612.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit requirement from ~=3.3 to ~=3.4"
-time: 2023-09-04T22:16:12.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 884
diff --git a/.changes/1.7.0/Dependencies-20230911-222120.yaml b/.changes/1.7.0/Dependencies-20230911-222120.yaml
deleted file mode 100644
index b1b2b68b..00000000
--- a/.changes/1.7.0/Dependencies-20230911-222120.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.7 to ~=23.9"
-time: 2023-09-11T22:21:20.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 886
diff --git a/.changes/1.7.0/Dependencies-20230912-222718.yaml b/.changes/1.7.0/Dependencies-20230912-222718.yaml
deleted file mode 100644
index 560c9176..00000000
--- a/.changes/1.7.0/Dependencies-20230912-222718.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update tox requirement from ~=4.10 to ~=4.11"
-time: 2023-09-12T22:27:18.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 887
diff --git a/.changes/1.7.0/Features-20221229-200956.yaml b/.changes/1.7.0/Features-20221229-200956.yaml
deleted file mode 100644
index 1add9bf7..00000000
--- a/.changes/1.7.0/Features-20221229-200956.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Support server side parameters in thrift connection
-time: 2022-12-29T20:09:56.457776+02:00
-custom:
-  Author: ' hanna-liashchuk'
-  Issue: "387"
-  PR: "577"
diff --git a/.changes/1.7.0/Features-20230707-104150.yaml b/.changes/1.7.0/Features-20230707-104150.yaml
deleted file mode 100644
index 183a37b4..00000000
--- a/.changes/1.7.0/Features-20230707-104150.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Support server_side_parameters for Spark session connection method
-time: 2023-07-07T10:41:50.01541+02:00
-custom:
-  Author: alarocca-apixio
-  Issue: "690"
diff --git a/.changes/1.7.0/Features-20230707-113337.yaml b/.changes/1.7.0/Features-20230707-113337.yaml
deleted file mode 100644
index de0a50fe..00000000
--- a/.changes/1.7.0/Features-20230707-113337.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Add server_side_parameters to HTTP connection method
-time: 2023-07-07T11:33:37.794112+02:00
-custom:
-  Author: Fokko,JCZuurmond
-  Issue: "824"
diff --git a/.changes/1.7.0/Features-20230707-114650.yaml b/.changes/1.7.0/Features-20230707-114650.yaml
deleted file mode 100644
index 6f1b3d38..00000000
--- a/.changes/1.7.0/Features-20230707-114650.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Enforce server side parameters keys and values to be strings
-time: 2023-07-07T11:46:50.390918+02:00
-custom:
-  Author: Fokko,JCZuurmond
-  Issue: "826"
diff --git a/.changes/1.7.0/Features-20230707-135442.yaml b/.changes/1.7.0/Features-20230707-135442.yaml
deleted file mode 100644
index 39b11952..00000000
--- a/.changes/1.7.0/Features-20230707-135442.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Add SessionConnectionWrapper
-time: 2023-07-07T13:54:42.41341+02:00
-custom:
-  Author: Fokko
-  Issue: "829"
diff --git a/.changes/1.7.0/Features-20230817-130731.yaml b/.changes/1.7.0/Features-20230817-130731.yaml
deleted file mode 100644
index e88deb7b..00000000
--- a/.changes/1.7.0/Features-20230817-130731.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Persist Column level comments when creating views
-time: 2023-08-17T13:07:31.6812862Z
-custom:
-  Author: jurasan
-  Issue: 372
diff --git a/.changes/1.7.0/Fixes-20230517-142331.yaml b/.changes/1.7.0/Fixes-20230517-142331.yaml
deleted file mode 100644
index 9f90e48b..00000000
--- a/.changes/1.7.0/Fixes-20230517-142331.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Wrap AnalysisException into DbtRuntimeError
-time: 2023-05-17T14:23:31.263799+02:00
-custom:
-  Author: Fokko
-  Issue: "782"
diff --git a/.changes/1.7.0/Fixes-20230810-014122.yaml b/.changes/1.7.0/Fixes-20230810-014122.yaml
deleted file mode 100644
index fcb34237..00000000
--- a/.changes/1.7.0/Fixes-20230810-014122.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: include tblproperties macro in adapters.sql create table
-time: 2023-08-10T01:41:22.782982+08:00
-custom:
-  Author: etheleon
-  Issue: "865"
diff --git a/.changes/1.7.0/Security-20230817-145626.yaml b/.changes/1.7.0/Security-20230817-145626.yaml
deleted file mode 100644
index 4add88cb..00000000
--- a/.changes/1.7.0/Security-20230817-145626.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Security
-body: Add docker image to the repo
-time: 2023-08-17T14:56:26.361208+02:00
-custom:
-  Author: Fokko
-  PR: "876"
diff --git a/.changes/1.7.0/Under the Hood-20230724-165508.yaml b/.changes/1.7.0/Under the Hood-20230724-165508.yaml
deleted file mode 100644
index 88948464..00000000
--- a/.changes/1.7.0/Under the Hood-20230724-165508.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Update stale workflow to use centralized version
-time: 2023-07-24T16:55:08.096947-04:00
-custom:
-  Author: mikealfare
-  Issue: "842"
diff --git a/.changes/1.7.0/Under the Hood-20230830-160616.yaml b/.changes/1.7.0/Under the Hood-20230830-160616.yaml
deleted file mode 100644
index 018a9403..00000000
--- a/.changes/1.7.0/Under the Hood-20230830-160616.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Remove dependency on hologram
-time: 2023-08-30T16:06:16.444881-07:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "881"
diff --git a/.changes/unreleased/Features-20230921-180958.yaml b/.changes/unreleased/Features-20230921-180958.yaml
deleted file mode 100644
index 66141eb5..00000000
--- a/.changes/unreleased/Features-20230921-180958.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Support storing test failures as views
-time: 2023-09-21T18:09:58.174136-04:00
-custom:
-  Author: mikealfare
-  Issue: "6914"
diff --git a/.changes/unreleased/Features-20231011-094718.yaml b/.changes/unreleased/Features-20231011-094718.yaml
deleted file mode 100644
index 8503a70b..00000000
--- a/.changes/unreleased/Features-20231011-094718.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Create temporary views with 'or replace'
-time: 2023-10-11T09:47:18.485764-07:00
-custom:
-  Author: annazizian
-  Issue: "350"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 362976a1..902db37f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,83 +5,6 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.7.0-b2 - October 02, 2023
-
-### Features
-
-- Persist Column level comments when creating views ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
-
-### Under the Hood
-
-- Remove dependency on hologram ([#881](https://github.com/dbt-labs/dbt-spark/issues/881))
-
-### Dependencies
-
-- Replace sasl with pure-sasl for PyHive ([#818](https://github.com/dbt-labs/dbt-spark/pull/818))
-- Update tox requirement from ~=4.8 to ~=4.9 ([#874](https://github.com/dbt-labs/dbt-spark/pull/874))
-- Bump mypy from 1.5.0 to 1.5.1 ([#875](https://github.com/dbt-labs/dbt-spark/pull/875))
-- Update tox requirement from ~=4.9 to ~=4.10 ([#879](https://github.com/dbt-labs/dbt-spark/pull/879))
-- Update pre-commit requirement from ~=3.3 to ~=3.4 ([#884](https://github.com/dbt-labs/dbt-spark/pull/884))
-- Update black requirement from ~=23.7 to ~=23.9 ([#886](https://github.com/dbt-labs/dbt-spark/pull/886))
-- Update tox requirement from ~=4.10 to ~=4.11 ([#887](https://github.com/dbt-labs/dbt-spark/pull/887))
-
-### Security
-
-- Add docker image to the repo ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
-
-### Contributors
-- [@Fokko](https://github.com/Fokko) ([#876](https://github.com/dbt-labs/dbt-spark/pull/876))
-- [@jurasan](https://github.com/jurasan) ([#372](https://github.com/dbt-labs/dbt-spark/issues/372))
-
-
-## dbt-spark 1.7.0-b1 - August 17, 2023
-
-### Features
-
-- Support server side parameters in thrift connection ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
-- Support server_side_parameters for Spark session connection method ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
-- Add server_side_parameters to HTTP connection method ([#824](https://github.com/dbt-labs/dbt-spark/issues/824))
-- Enforce server side parameters keys and values to be strings ([#826](https://github.com/dbt-labs/dbt-spark/issues/826))
-- Add SessionConnectionWrapper ([#829](https://github.com/dbt-labs/dbt-spark/issues/829))
-
-### Fixes
-
-- Wrap AnalysisException into DbtRuntimeError ([#782](https://github.com/dbt-labs/dbt-spark/issues/782))
-- include tblproperties macro in adapters.sql create table ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
-
-### Under the Hood
-
-- Update stale workflow to use centralized version ([#842](https://github.com/dbt-labs/dbt-spark/issues/842))
-
-### Dependencies
-
-- Update tox requirement from ~=3.0 to ~=4.5 ([#741](https://github.com/dbt-labs/dbt-spark/pull/741))
-- Update pyodbc requirement from ~=4.0.30 to ~=4.0.39 ([#742](https://github.com/dbt-labs/dbt-spark/pull/742))
-- Update pre-commit requirement from ~=2.21 to ~=3.3 ([#748](https://github.com/dbt-labs/dbt-spark/pull/748))
-- Update types-requests requirement from ~=2.28 to ~=2.29 ([#749](https://github.com/dbt-labs/dbt-spark/pull/749))
-- Bump mypy from 1.2.0 to 1.3.0 ([#768](https://github.com/dbt-labs/dbt-spark/pull/768))
-- Update flake8 requirement from ~=6.0 to ~=6.1 ([#849](https://github.com/dbt-labs/dbt-spark/pull/849))
-- Update types-requests requirement from ~=2.29 to ~=2.31 ([#850](https://github.com/dbt-labs/dbt-spark/pull/850))
-- Update pytest-xdist requirement from ~=3.2 to ~=3.3 ([#851](https://github.com/dbt-labs/dbt-spark/pull/851))
-- Update pytest requirement from ~=7.3 to ~=7.4 ([#852](https://github.com/dbt-labs/dbt-spark/pull/852))
-- Update pip-tools requirement from ~=6.13 to ~=7.2 ([#856](https://github.com/dbt-labs/dbt-spark/pull/856))
-- Update black requirement from ~=23.3 to ~=23.7 ([#857](https://github.com/dbt-labs/dbt-spark/pull/857))
-- Update wheel requirement from ~=0.40 to ~=0.41 ([#858](https://github.com/dbt-labs/dbt-spark/pull/858))
-- Update mock requirement from ~=5.0 to ~=5.1 ([#859](https://github.com/dbt-labs/dbt-spark/pull/859))
-- Bump mypy from 1.3.0 to 1.4.1 ([#860](https://github.com/dbt-labs/dbt-spark/pull/860))
-- Update tox requirement from ~=4.5 to ~=4.6 ([#861](https://github.com/dbt-labs/dbt-spark/pull/861))
-- Update pip-tools requirement from ~=7.2 to ~=7.3 ([#863](https://github.com/dbt-labs/dbt-spark/pull/863))
-- Update tox requirement from ~=4.6 to ~=4.7 ([#867](https://github.com/dbt-labs/dbt-spark/pull/867))
-- Update tox requirement from ~=4.7 to ~=4.8 ([#871](https://github.com/dbt-labs/dbt-spark/pull/871))
-- Bump mypy from 1.4.1 to 1.5.0 ([#872](https://github.com/dbt-labs/dbt-spark/pull/872))
-
-### Contributors
-- [@Fokko](https://github.com/Fokko) ([#829](https://github.com/dbt-labs/dbt-spark/issues/829), [#782](https://github.com/dbt-labs/dbt-spark/issues/782))
-- [@Fokko,JCZuurmond](https://github.com/Fokko,JCZuurmond) ([#824](https://github.com/dbt-labs/dbt-spark/issues/824), [#826](https://github.com/dbt-labs/dbt-spark/issues/826))
-- [@alarocca-apixio](https://github.com/alarocca-apixio) ([#690](https://github.com/dbt-labs/dbt-spark/issues/690))
-- [@etheleon](https://github.com/etheleon) ([#865](https://github.com/dbt-labs/dbt-spark/issues/865))
-- [@hanna-liashchuk](https://github.com/hanna-liashchuk) ([#387](https://github.com/dbt-labs/dbt-spark/issues/387))
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 3f5d3c0b..f15b401d 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.7.0b2"
+version = "1.8.0a1"
diff --git a/setup.py b/setup.py
index 088e5f87..301b4a41 100644
--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.7.0b2"
+package_version = "1.8.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 7ac4a7eb0515365e3959d1e3f54b29925ed487e3 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Fri, 13 Oct 2023 15:48:35 -0500
Subject: [PATCH 509/603] add docs-issue workflow to dbt-spark (#913)

---
 .github/workflows/docs-issues.yml | 43 +++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 .github/workflows/docs-issues.yml

diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml
new file mode 100644
index 00000000..00a098df
--- /dev/null
+++ b/.github/workflows/docs-issues.yml
@@ -0,0 +1,43 @@
+# **what?**
+# Open an issue in docs.getdbt.com when a PR is labeled `user docs`
+
+# **why?**
+# To reduce barriers for keeping docs up to date
+
+# **when?**
+# When a PR is labeled `user docs` and is merged.  Runs on pull_request_target to run off the workflow already merged,
+# not the workflow that existed on the PR branch.  This allows old PRs to get comments.
+
+
+name: Open issues in docs.getdbt.com repo when a PR is labeled
+run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}"
+
+on:
+  pull_request_target:
+    types: [labeled, closed]
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+    issues: write # opens new issues
+    pull-requests: write # comments on PRs
+
+
+jobs:
+  open_issues:
+    # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`.  Otherwise it runs the
+    # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having
+    # generating the comment before the other runs.  This lives here instead of the shared workflow because this is where we
+    # decide if it should run or not.
+    if: |
+      (github.event.pull_request.merged == true) &&
+      ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) ||
+      (github.event.action == 'labeled' && github.event.label.name == 'user docs'))
+    uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
+    with:
+        issue_repository: "dbt-labs/docs.getdbt.com"
+        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}"
+        issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
+    secrets: inherit

From 99c5c5ad4cad053e556af50afb77603c821ce538 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 17 Oct 2023 11:35:01 -0500
Subject: [PATCH 510/603] Mcknight/fix test store test (#832)

* add teardown_method for store_test_fail test

* create pr

* add teardown to delta test class as it also builds tests on alt schema
---
 .../adapter/test_store_test_failures.py       | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/functional/adapter/test_store_test_failures.py b/tests/functional/adapter/test_store_test_failures.py
index e27cb9b9..e78bd4f7 100644
--- a/tests/functional/adapter/test_store_test_failures.py
+++ b/tests/functional/adapter/test_store_test_failures.py
@@ -18,6 +18,17 @@ def project_config_update(self):
             "tests": {"+schema": TEST_AUDIT_SCHEMA_SUFFIX, "+store_failures": True},
         }
 
+    @pytest.fixture(scope="function", autouse=True)
+    def teardown_method(self, project):
+        yield
+        with project.adapter.connection_named("__test"):
+            relation = project.adapter.Relation.create(
+                database=project.database,
+                schema=f"{project.test_schema}_{TEST_AUDIT_SCHEMA_SUFFIX}",
+            )
+
+            project.adapter.drop_schema(relation)
+
     def test_store_and_assert(self, project):
         self.run_tests_store_one_failure(project)
         self.run_tests_store_failures_and_assert(project)
@@ -40,6 +51,17 @@ def project_config_update(self):
             },
         }
 
+    @pytest.fixture(scope="function", autouse=True)
+    def teardown_method(self, project):
+        yield
+        with project.adapter.connection_named("__test"):
+            relation = project.adapter.Relation.create(
+                database=project.database,
+                schema=f"{project.test_schema}_{TEST_AUDIT_SCHEMA_SUFFIX}",
+            )
+
+            project.adapter.drop_schema(relation)
+
     def test_store_and_assert_failure_with_delta(self, project):
         self.run_tests_store_one_failure(project)
         self.run_tests_store_failures_and_assert(project)

From 13c42060b8c16d871f664302a2b1782f622e6c2a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Oct 2023 03:25:03 +0000
Subject: [PATCH 511/603] Update pre-commit requirement from ~=3.4 to ~=3.5
 (#914)

* Update pre-commit requirement from ~=3.4 to ~=3.5

Updates the requirements on [pre-commit](https://github.com/pre-commit/pre-commit) to permit the latest version.
- [Release notes](https://github.com/pre-commit/pre-commit/releases)
- [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit/compare/v3.4.0...v3.5.0)

---
updated-dependencies:
- dependency-name: pre-commit
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231013-223750.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231013-223750.yaml

diff --git a/.changes/unreleased/Dependencies-20231013-223750.yaml b/.changes/unreleased/Dependencies-20231013-223750.yaml
new file mode 100644
index 00000000..2cea8c6d
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231013-223750.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pre-commit requirement from ~=3.4 to ~=3.5"
+time: 2023-10-13T22:37:50.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 914
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 6ea7b16a..1cbd0d73 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -14,7 +14,7 @@ freezegun~=1.2
 ipdb~=0.13.13
 mypy==1.5.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
-pre-commit~=3.4
+pre-commit~=3.5
 pre-commit-hooks~=4.4
 pytest~=7.4
 pytest-csv~=3.0

From d81cb27daf4e12e7bc98d30bbc105fdb2f3a030b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Oct 2023 09:37:34 -0400
Subject: [PATCH 512/603] Update pre-commit-hooks requirement from ~=4.4 to
 ~=4.5 (#903)

* Update pre-commit-hooks requirement from ~=4.4 to ~=4.5

Updates the requirements on [pre-commit-hooks](https://github.com/pre-commit/pre-commit-hooks) to permit the latest version.
- [Release notes](https://github.com/pre-commit/pre-commit-hooks/releases)
- [Changelog](https://github.com/pre-commit/pre-commit-hooks/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0)

---
updated-dependencies:
- dependency-name: pre-commit-hooks
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231009-220732.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231009-220732.yaml

diff --git a/.changes/unreleased/Dependencies-20231009-220732.yaml b/.changes/unreleased/Dependencies-20231009-220732.yaml
new file mode 100644
index 00000000..4b9073fa
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231009-220732.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pre-commit-hooks requirement from ~=4.4 to ~=4.5"
+time: 2023-10-09T22:07:32.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 903
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1cbd0d73..6aefeb6c 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -15,7 +15,7 @@ ipdb~=0.13.13
 mypy==1.5.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.5
-pre-commit-hooks~=4.4
+pre-commit-hooks~=4.5
 pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2

From c4b3705539cf03ca4a461e6979987fa9b3f002f6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Oct 2023 10:31:30 -0400
Subject: [PATCH 513/603] Bump mypy from 1.5.1 to 1.6.0 (#904)

* Bump mypy from 1.5.1 to 1.6.0

Bumps [mypy](https://github.com/python/mypy) from 1.5.1 to 1.6.0.
- [Commits](https://github.com/python/mypy/compare/v1.5.1...v1.6.0)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231010-222853.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231010-222853.yaml

diff --git a/.changes/unreleased/Dependencies-20231010-222853.yaml b/.changes/unreleased/Dependencies-20231010-222853.yaml
new file mode 100644
index 00000000..43b481ed
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231010-222853.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.5.1 to 1.6.0"
+time: 2023-10-10T22:28:53.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 904
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 6aefeb6c..8bed9b79 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.5.1  # patch updates have historically introduced breaking changes
+mypy==1.6.0  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.5
 pre-commit-hooks~=4.5

From 76f890ebdaf3e10d5495c501aa255dff020dbfdb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Oct 2023 13:04:39 -0400
Subject: [PATCH 514/603] Update pyodbc requirement from ~=4.0.39 to ~=5.0.0
 (#905)

* Update pyodbc requirement from ~=4.0.39 to ~=5.0.0

Updates the requirements on [pyodbc](https://github.com/mkleehammer/pyodbc) to permit the latest version.
- [Release notes](https://github.com/mkleehammer/pyodbc/releases)
- [Commits](https://github.com/mkleehammer/pyodbc/compare/4.0.39...5.0.0)

---
updated-dependencies:
- dependency-name: pyodbc
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231010-222910.yaml | 6 ++++++
 requirements.txt                                      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231010-222910.yaml

diff --git a/.changes/unreleased/Dependencies-20231010-222910.yaml b/.changes/unreleased/Dependencies-20231010-222910.yaml
new file mode 100644
index 00000000..c6dbe582
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231010-222910.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pyodbc requirement from ~=4.0.39 to ~=5.0.0"
+time: 2023-10-10T22:29:10.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 905
diff --git a/requirements.txt b/requirements.txt
index ea5d1ad2..afd16277 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=4.0.39
+pyodbc~=5.0.0
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From fed3d5f23a90943a2fd675c6b104358cc5179f43 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 31 Oct 2023 17:10:55 -0400
Subject: [PATCH 515/603] Update black requirement from ~=23.9 to ~=23.10
 (#927)

* Update black requirement from ~=23.9 to ~=23.10

Updates the requirements on [black](https://github.com/psf/black) to permit the latest version.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/23.9.0...23.10.1)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20231027-230301.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231027-230301.yaml

diff --git a/.changes/unreleased/Dependencies-20231027-230301.yaml b/.changes/unreleased/Dependencies-20231027-230301.yaml
new file mode 100644
index 00000000..74548dde
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231027-230301.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update black requirement from ~=23.9 to ~=23.10"
+time: 2023-10-27T23:03:01.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 927
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8bed9b79..8f94d509 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.9
+black~=23.10
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From 36c28b9b3e9ae79a68c3c0ea8f15d0489a8e2569 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 20:03:28 -0400
Subject: [PATCH 516/603] Update pyodbc requirement from ~=5.0.0 to ~=5.0.1
 (#925)

* Update pyodbc requirement from ~=5.0.0 to ~=5.0.1

Updates the requirements on [pyodbc](https://github.com/mkleehammer/pyodbc) to permit the latest version.
- [Release notes](https://github.com/mkleehammer/pyodbc/releases)
- [Commits](https://github.com/mkleehammer/pyodbc/compare/5.0.0...5.0.1)

---
updated-dependencies:
- dependency-name: pyodbc
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231027-230251.yaml | 6 ++++++
 requirements.txt                                      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231027-230251.yaml

diff --git a/.changes/unreleased/Dependencies-20231027-230251.yaml b/.changes/unreleased/Dependencies-20231027-230251.yaml
new file mode 100644
index 00000000..4ad0d65f
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231027-230251.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pyodbc requirement from ~=5.0.0 to ~=5.0.1"
+time: 2023-10-27T23:02:51.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 925
diff --git a/requirements.txt b/requirements.txt
index afd16277..18ccc77f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=5.0.0
+pyodbc~=5.0.1
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From dffea968188cf87f1eb1be48c01f5fcb609e5bb2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:58:15 -0400
Subject: [PATCH 517/603] Bump mypy from 1.6.0 to 1.6.1 (#926)

* Bump mypy from 1.6.0 to 1.6.1

Bumps [mypy](https://github.com/python/mypy) from 1.6.0 to 1.6.1.
- [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md)
- [Commits](https://github.com/python/mypy/compare/v1.6.0...v1.6.1)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231027-230254.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231027-230254.yaml

diff --git a/.changes/unreleased/Dependencies-20231027-230254.yaml b/.changes/unreleased/Dependencies-20231027-230254.yaml
new file mode 100644
index 00000000..f1b74521
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231027-230254.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.6.0 to 1.6.1"
+time: 2023-10-27T23:02:54.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 926
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8f94d509..a6b3dca0 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.6.0  # patch updates have historically introduced breaking changes
+mypy==1.6.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.5
 pre-commit-hooks~=4.5

From 0f39bd1410d7cc95cfae8b661c6080e7fd99cc01 Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Tue, 7 Nov 2023 16:49:38 -0500
Subject: [PATCH 518/603] Support new agate Integer type and test with empty
 seed (#936)

* Support new agate Integer type and test with empty seed

* add changie, remove branch reference
---
 .changes/unreleased/Fixes-20231107-134141.yaml | 6 ++++++
 dbt/adapters/spark/impl.py                     | 4 ++++
 tests/functional/adapter/test_simple_seed.py   | 5 +++++
 3 files changed, 15 insertions(+)
 create mode 100644 .changes/unreleased/Fixes-20231107-134141.yaml
 create mode 100644 tests/functional/adapter/test_simple_seed.py

diff --git a/.changes/unreleased/Fixes-20231107-134141.yaml b/.changes/unreleased/Fixes-20231107-134141.yaml
new file mode 100644
index 00000000..d4f56eac
--- /dev/null
+++ b/.changes/unreleased/Fixes-20231107-134141.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Support new agate Integer type and empty seed test
+time: 2023-11-07T13:41:41.033441-05:00
+custom:
+  Author: gshank
+  Issue: "935"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index feae3412..16c3a3cb 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -120,6 +120,10 @@ def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
         return "double" if decimals else "bigint"
 
+    @classmethod
+    def convert_integer_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+        return "bigint"
+
     @classmethod
     def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "date"
diff --git a/tests/functional/adapter/test_simple_seed.py b/tests/functional/adapter/test_simple_seed.py
new file mode 100644
index 00000000..c610967c
--- /dev/null
+++ b/tests/functional/adapter/test_simple_seed.py
@@ -0,0 +1,5 @@
+from dbt.tests.adapter.simple_seed.test_seed import BaseTestEmptySeed
+
+
+class TestBigQueryEmptySeed(BaseTestEmptySeed):
+    pass

From 5bbf67379c6ee0f8e13819ff736ef9fc90c3f27c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 8 Nov 2023 18:50:52 -0500
Subject: [PATCH 519/603] Update black requirement from ~=23.10 to ~=23.11
 (#942)

* Update black requirement from ~=23.10 to ~=23.11

Updates the requirements on [black](https://github.com/psf/black) to permit the latest version.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/23.10.0...23.11.0)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20231108-222326.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231108-222326.yaml

diff --git a/.changes/unreleased/Dependencies-20231108-222326.yaml b/.changes/unreleased/Dependencies-20231108-222326.yaml
new file mode 100644
index 00000000..fdd35004
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231108-222326.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update black requirement from ~=23.10 to ~=23.11"
+time: 2023-11-08T22:23:26.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 942
diff --git a/dev-requirements.txt b/dev-requirements.txt
index a6b3dca0..a520f150 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.10
+black~=23.11
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From 8d08b769e8619c94e558c3924a187c17f5b65cdf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Nov 2023 13:27:32 -0500
Subject: [PATCH 520/603] Bump mypy from 1.6.1 to 1.7.0 (#946)

* Bump mypy from 1.6.1 to 1.7.0

Bumps [mypy](https://github.com/python/mypy) from 1.6.1 to 1.7.0.
- [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md)
- [Commits](https://github.com/python/mypy/compare/v1.6.1...v1.7.0)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20231110-224056.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231110-224056.yaml

diff --git a/.changes/unreleased/Dependencies-20231110-224056.yaml b/.changes/unreleased/Dependencies-20231110-224056.yaml
new file mode 100644
index 00000000..efdf9a54
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231110-224056.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.6.1 to 1.7.0"
+time: 2023-11-10T22:40:56.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 946
diff --git a/dev-requirements.txt b/dev-requirements.txt
index a520f150..4b4f9939 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.6.1  # patch updates have historically introduced breaking changes
+mypy==1.7.0  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.5
 pre-commit-hooks~=4.5

From 1207e2c25e40d450e96cba0c22d7987b4e660b0c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Nov 2023 15:29:28 -0500
Subject: [PATCH 521/603] Update pytest-xdist requirement from ~=3.3 to ~=3.4
 (#947)

* Update pytest-xdist requirement from ~=3.3 to ~=3.4

Updates the requirements on [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) to permit the latest version.
- [Changelog](https://github.com/pytest-dev/pytest-xdist/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest-xdist/compare/v3.3.0...v3.4.0)

---
updated-dependencies:
- dependency-name: pytest-xdist
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231113-224111.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231113-224111.yaml

diff --git a/.changes/unreleased/Dependencies-20231113-224111.yaml b/.changes/unreleased/Dependencies-20231113-224111.yaml
new file mode 100644
index 00000000..b2fb306c
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231113-224111.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytest-xdist requirement from ~=3.3 to ~=3.4"
+time: 2023-11-13T22:41:11.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 947
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 4b4f9939..4abe824e 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -20,7 +20,7 @@ pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
-pytest-xdist~=3.3
+pytest-xdist~=3.4
 pytz~=2023.3
 tox~=4.11
 types-pytz~=2023.3

From 1d47a3d58983fad91c6c1133a1970dfc99d0b3ac Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Nov 2023 18:19:16 -0500
Subject: [PATCH 522/603] Bump mypy from 1.7.0 to 1.7.1 (#953)

* Bump mypy from 1.7.0 to 1.7.1

Bumps [mypy](https://github.com/python/mypy) from 1.7.0 to 1.7.1.
- [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md)
- [Commits](https://github.com/python/mypy/compare/v1.7.0...v1.7.1)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20231127-220741.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231127-220741.yaml

diff --git a/.changes/unreleased/Dependencies-20231127-220741.yaml b/.changes/unreleased/Dependencies-20231127-220741.yaml
new file mode 100644
index 00000000..63d572f2
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231127-220741.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump mypy from 1.7.0 to 1.7.1"
+time: 2023-11-27T22:07:41.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 953
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 4abe824e..e5d0f370 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -12,7 +12,7 @@ flake8~=6.1;python_version>="3.8"
 flaky~=3.7
 freezegun~=1.2
 ipdb~=0.13.13
-mypy==1.7.0  # patch updates have historically introduced breaking changes
+mypy==1.7.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3
 pre-commit~=3.5
 pre-commit-hooks~=4.5

From ba2085f3e660c9062b4564213f0d8fb8e46ea123 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Nov 2023 22:46:32 -0500
Subject: [PATCH 523/603] Update wheel requirement from ~=0.41 to ~=0.42 (#952)

* Update wheel requirement from ~=0.41 to ~=0.42

Updates the requirements on [wheel](https://github.com/pypa/wheel) to permit the latest version.
- [Release notes](https://github.com/pypa/wheel/releases)
- [Changelog](https://github.com/pypa/wheel/blob/main/docs/news.rst)
- [Commits](https://github.com/pypa/wheel/compare/0.41.0...0.42.0)

---
updated-dependencies:
- dependency-name: wheel
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231127-220737.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231127-220737.yaml

diff --git a/.changes/unreleased/Dependencies-20231127-220737.yaml b/.changes/unreleased/Dependencies-20231127-220737.yaml
new file mode 100644
index 00000000..60e2be67
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231127-220737.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update wheel requirement from ~=0.41 to ~=0.42"
+time: 2023-11-27T22:07:37.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 952
diff --git a/dev-requirements.txt b/dev-requirements.txt
index e5d0f370..4259c661 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -26,7 +26,7 @@ tox~=4.11
 types-pytz~=2023.3
 types-requests~=2.31
 twine~=4.0
-wheel~=0.41
+wheel~=0.42
 
 # Adapter specific dependencies
 mock~=5.1

From da258f8042650801c9d365f27157d0f39c54573c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 29 Nov 2023 00:46:43 -0500
Subject: [PATCH 524/603] Update pytest-xdist requirement from ~=3.4 to ~=3.5
 (#951)

* Update pytest-xdist requirement from ~=3.4 to ~=3.5

Updates the requirements on [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) to permit the latest version.
- [Release notes](https://github.com/pytest-dev/pytest-xdist/releases)
- [Changelog](https://github.com/pytest-dev/pytest-xdist/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest-xdist/compare/v3.4.0...v3.5.0)

---
updated-dependencies:
- dependency-name: pytest-xdist
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231127-220733.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231127-220733.yaml

diff --git a/.changes/unreleased/Dependencies-20231127-220733.yaml b/.changes/unreleased/Dependencies-20231127-220733.yaml
new file mode 100644
index 00000000..be23a48f
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231127-220733.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytest-xdist requirement from ~=3.4 to ~=3.5"
+time: 2023-11-27T22:07:33.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 951
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 4259c661..1d8aea7b 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -20,7 +20,7 @@ pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
-pytest-xdist~=3.4
+pytest-xdist~=3.5
 pytz~=2023.3
 tox~=4.11
 types-pytz~=2023.3

From d0a60a71984c2fc3121806c20b661e31709eab93 Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Fri, 1 Dec 2023 17:28:40 -0500
Subject: [PATCH 525/603] add tests for --empty flag (#948)

* add tests for --empty flag

* changelog entry

* restore dev-requirements
---
 .changes/unreleased/Under the Hood-20231119-132050.yaml | 6 ++++++
 tests/functional/adapter/empty/test_empty.py            | 5 +++++
 2 files changed, 11 insertions(+)
 create mode 100644 .changes/unreleased/Under the Hood-20231119-132050.yaml
 create mode 100644 tests/functional/adapter/empty/test_empty.py

diff --git a/.changes/unreleased/Under the Hood-20231119-132050.yaml b/.changes/unreleased/Under the Hood-20231119-132050.yaml
new file mode 100644
index 00000000..61c7f8ab
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20231119-132050.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Add tests for --empty flag
+time: 2023-11-19T13:20:50.076459-05:00
+custom:
+  Author: michelleark
+  Issue: "949"
diff --git a/tests/functional/adapter/empty/test_empty.py b/tests/functional/adapter/empty/test_empty.py
new file mode 100644
index 00000000..1ea2c886
--- /dev/null
+++ b/tests/functional/adapter/empty/test_empty.py
@@ -0,0 +1,5 @@
+from dbt.tests.adapter.empty.test_empty import BaseTestEmpty
+
+
+class TestSparkEmpty(BaseTestEmpty):
+    pass

From a813e400b50d9e80223d1eacdd3339b06681550e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 21:25:06 -0800
Subject: [PATCH 526/603] Update freezegun requirement from ~=1.2 to ~=1.3
 (#956)

* Update freezegun requirement from ~=1.2 to ~=1.3

Updates the requirements on [freezegun](https://github.com/spulec/freezegun) to permit the latest version.
- [Release notes](https://github.com/spulec/freezegun/releases)
- [Changelog](https://github.com/spulec/freezegun/blob/master/CHANGELOG)
- [Commits](https://github.com/spulec/freezegun/compare/1.2.0...1.3.1)

---
updated-dependencies:
- dependency-name: freezegun
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependencies-20231204-224210.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231204-224210.yaml

diff --git a/.changes/unreleased/Dependencies-20231204-224210.yaml b/.changes/unreleased/Dependencies-20231204-224210.yaml
new file mode 100644
index 00000000..c415934d
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231204-224210.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update freezegun requirement from ~=1.2 to ~=1.3"
+time: 2023-12-04T22:42:10.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 956
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1d8aea7b..76c38b10 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -10,7 +10,7 @@ bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"
 flaky~=3.7
-freezegun~=1.2
+freezegun~=1.3
 ipdb~=0.13.13
 mypy==1.7.1  # patch updates have historically introduced breaking changes
 pip-tools~=7.3

From ffc24ba30aa87e38e42ced897ccb19961425ad81 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 22:47:38 -0800
Subject: [PATCH 527/603] Update black requirement from ~=23.11 to ~=23.12
 (#959)

* Update black requirement from ~=23.11 to ~=23.12

Updates the requirements on [black](https://github.com/psf/black) to permit the latest version.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/23.11.0...23.12.0)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mila Page <67295367+VersusFacit@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231212-223929.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231212-223929.yaml

diff --git a/.changes/unreleased/Dependencies-20231212-223929.yaml b/.changes/unreleased/Dependencies-20231212-223929.yaml
new file mode 100644
index 00000000..65f308f7
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231212-223929.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update black requirement from ~=23.11 to ~=23.12"
+time: 2023-12-12T22:39:29.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 959
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 76c38b10..fe960ed0 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.11
+black~=23.12
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From f774bcbd181fcdd7bb0ce8295e9f93aedfcfe32a Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Thu, 14 Dec 2023 16:45:14 -0700
Subject: [PATCH 528/603] Remove unused `invalid_insert_overwrite_delta_msg`
 variable (#963)

* Remove `invalid_insert_overwrite_delta_msg` message

* Changelog entry
---
 .changes/unreleased/Under the Hood-20231214-134728.yaml    | 6 ++++++
 .../spark/macros/materializations/incremental/validate.sql | 7 +------
 2 files changed, 7 insertions(+), 6 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20231214-134728.yaml

diff --git a/.changes/unreleased/Under the Hood-20231214-134728.yaml b/.changes/unreleased/Under the Hood-20231214-134728.yaml
new file mode 100644
index 00000000..b1de2ddb
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20231214-134728.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Remove unused `invalid_insert_overwrite_delta_msg` message
+time: 2023-12-14T13:47:28.444107-07:00
+custom:
+  Author: dbeatty10
+  Issue: "962"
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql
index 88b851ca..0d4c4d8b 100644
--- a/dbt/include/spark/macros/materializations/incremental/validate.sql
+++ b/dbt/include/spark/macros/materializations/incremental/validate.sql
@@ -29,17 +29,12 @@
     You can only choose this strategy when file_format is set to 'delta' or 'iceberg' or 'hudi'
   {%- endset %}
 
-  {% set invalid_insert_overwrite_delta_msg -%}
-    Invalid incremental strategy provided: {{ raw_strategy }}
-    You cannot use this strategy when file_format is set to 'delta' or 'iceberg'
-    Use the 'append' or 'merge' strategy instead
-  {%- endset %}
-
   {% set invalid_insert_overwrite_endpoint_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
     You cannot use this strategy when connecting via endpoint
     Use the 'append' or 'merge' strategy instead
   {%- endset %}
+
   {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %}
     {% do exceptions.raise_compiler_error(invalid_strategy_msg) %}
   {%-else %}

From 5210d0a9746443c8a2cf217c16c919a73f3ba543 Mon Sep 17 00:00:00 2001
From: Ben Schreiber <74134279+ben-schreiber@users.noreply.github.com>
Date: Thu, 21 Dec 2023 19:45:14 +0200
Subject: [PATCH 529/603] Fix hardcoded file format in python materializations
 (#955)

* Fix hardcoded file format in python materializations

* Add changelog
---
 .changes/unreleased/Fixes-20231221-081949.yaml      | 6 ++++++
 dbt/include/spark/macros/materializations/table.sql | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Fixes-20231221-081949.yaml

diff --git a/.changes/unreleased/Fixes-20231221-081949.yaml b/.changes/unreleased/Fixes-20231221-081949.yaml
new file mode 100644
index 00000000..b10c8141
--- /dev/null
+++ b/.changes/unreleased/Fixes-20231221-081949.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Fix hardcoded file format for python models
+time: 2023-12-21T08:19:49.630806+02:00
+custom:
+  Author: ben-schreiber
+  Issue: "803"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 39a9caba..b7c56c42 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -98,7 +98,7 @@ else:
   msg = f"{type(df)} is not a supported type for dbt Python materialization"
   raise Exception(msg)
 
-df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}")
+df.write.mode("overwrite").format("{{ config.get('file_format', 'delta') }}").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}")
 {%- endmacro -%}
 
 {%macro py_script_comment()%}

From f9f75e92a3177979cd745440297d33536c93a348 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 10 Jan 2024 13:36:17 -0800
Subject: [PATCH 530/603] Migrate Off Circle CI / To Github Actions + dagger.io
 (#923)

* Add Github action for integration test

* Update tox

* Fetch spark from https link

* Use Spark version 3.1.2

* Seperate running Spark session and thrift

* Use Spark 3.1.2 and Hadoop 3.2

* Reset tox.ini

* Remove base pythons in tox.ini

* Fix reference to Docker compose file

* Remove timeout

* Remove artifact steps

* Bump Spark and Hadoop versions

* Reset Spark and Hadoop version

* Update comment

* Add changie

* add databricks and PR execution protections

* use single quotes

* remove `_target` suffix

* add comment to test

* specify container user as root

* formatting

* remove python setup for pre-existing container

* download simba

* fix curl call

* fix curl call

* fix curl call

* fix curl call

* fix curl call

* fix curl call

* fix db test naming

* confirm ODBC driver installed

* add odbc driver env var

* add odbc driver env var

* specify platform

* check odbc driver integrity

* add dbt user env var

* add dbt user env var

* fix host_name env var

* try removing architecture arg

* swap back to pull_request_target

* try running on host instead of container

* Update .github/workflows/integration.yml

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>

* try running odbcinst -j

* remove bash

* add sudo

* add sudo

* update odbc.ini

* install libsasl2-modules-gssapi-mit

* install libsasl2-modules-gssapi-mit

* set -e on odbc install

* set -e on odbc install

* set -e on odbc install

* sudo echo odbc.inst

* remove postgres components

* remove release related items

* remove irrelevant output

* move long bash script into its own file

* update integration.yml to align with other adapters

* revert name change

* revert name change

* combine databricks and spark tests

* combine databricks and spark tests

* Add dagger

* remove platform

* add dagger setup

* add dagger setup

* set env vars

* install requirements

* install requirements

* add DEFAULT_ENV_VARS and test_path arg

* remove circle ci

* formatting

* update changie

* Update .changes/unreleased/Under the Hood-20230929-161218.yaml

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>

* formatting fixes and simplify env_var handling

* remove tox, update CONTRIBUTING.md and cleanup GHA workflows

* remove tox, update CONTRIBUTING.md and cleanup GHA workflows

* install test reqs in main.yml

* install test reqs in main.yml

* formatting

* remove tox from dev-requirements.txt and Makefile

* clarify spark crt instantiation

* add comments on python-version

---------

Co-authored-by: Cor Zuurmond <jczuurmond@protonmail.com>
Co-authored-by: Florian Eiden <florian.eiden@fleid.fr>
Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .../Under the Hood-20230929-161218.yaml       |   6 +
 .circleci/config.yml                          | 136 ------------------
 .github/scripts/update_dbt_core_branch.sh     |  17 +++
 .github/workflows/integration.yml             | 112 +++++++++++++++
 .github/workflows/main.yml                    |  15 +-
 .gitignore                                    |   2 +
 CONTRIBUTING.md                               |  24 +++-
 Makefile                                      |   7 +-
 README.md                                     |   3 -
 dagger/requirements.txt                       |   2 +
 dagger/run_dbt_spark_tests.py                 | 130 +++++++++++++++++
 dagger/scripts/configure_odbc.sh              |  11 ++
 dagger/scripts/install_os_reqs.sh             |  10 ++
 dagger/spark-container/entrypoint.sh          |  15 ++
 dagger/spark-container/hive-site.xml          |  46 ++++++
 dagger/spark-container/install_spark.sh       |  15 ++
 dagger/spark-container/spark-defaults.conf    |   9 ++
 dev-requirements.txt                          |   5 +-
 tests/conftest.py                             |   2 +-
 tox.ini                                       |  83 -----------
 20 files changed, 408 insertions(+), 242 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20230929-161218.yaml
 delete mode 100644 .circleci/config.yml
 create mode 100755 .github/scripts/update_dbt_core_branch.sh
 create mode 100644 .github/workflows/integration.yml
 create mode 100644 dagger/requirements.txt
 create mode 100644 dagger/run_dbt_spark_tests.py
 create mode 100755 dagger/scripts/configure_odbc.sh
 create mode 100755 dagger/scripts/install_os_reqs.sh
 create mode 100644 dagger/spark-container/entrypoint.sh
 create mode 100644 dagger/spark-container/hive-site.xml
 create mode 100755 dagger/spark-container/install_spark.sh
 create mode 100644 dagger/spark-container/spark-defaults.conf
 delete mode 100644 tox.ini

diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml
new file mode 100644
index 00000000..9b5c6818
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow.
+time: 2023-09-29T16:12:18.968755+02:00
+custom:
+  Author: JCZuurmond, colin-rogers-dbt
+  Issue: "719"
diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index f2a3b635..00000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,136 +0,0 @@
-version: 2.1
-
-jobs:
-  unit:
-    environment:
-      DBT_INVOCATION_ENV: circle
-    docker:
-      - image: fishtownanalytics/test-container:10
-    steps:
-      - checkout
-      - run: tox -e flake8,unit
-
-# Turning off for now due to flaky runs of tests will turn back on at later date.
-  integration-spark-session:
-     environment:
-       DBT_INVOCATION_ENV: circle
-     docker:
-       - image: godatadriven/pyspark:3.1
-     steps:
-       - checkout
-       - run: apt-get update
-       - run: conda install python=3.10
-       - run: python3 -m pip install --upgrade pip
-       - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev libxml2-dev libxslt-dev
-       - run: python3 -m pip install tox
-       - run:
-           name: Run integration tests
-           command: tox -e integration-spark-session
-           no_output_timeout: 1h
-       - store_artifacts:
-           path: ./logs
-
-  integration-spark-thrift:
-    environment:
-      DBT_INVOCATION_ENV: circle
-    docker:
-      - image: fishtownanalytics/test-container:10
-      - image: godatadriven/spark:3.1.1
-        environment:
-          WAIT_FOR: localhost:5432
-        command: >
-          --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
-          --name Thrift JDBC/ODBC Server
-      - image: postgres:9.6.17-alpine
-        environment:
-          POSTGRES_USER: dbt
-          POSTGRES_PASSWORD: dbt
-          POSTGRES_DB: metastore
-
-    steps:
-      - checkout
-
-      - run:
-          name: Wait for Spark-Thrift
-          command: dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s
-
-      - run:
-          name: Run integration tests
-          command: tox -e integration-spark-thrift
-          no_output_timeout: 1h
-      - store_artifacts:
-          path: ./logs
-
-  integration-spark-databricks-http:
-    environment:
-      DBT_INVOCATION_ENV: circle
-      DBT_DATABRICKS_RETRY_ALL: True
-      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
-      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
-      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
-    docker:
-      - image: fishtownanalytics/test-container:10
-    steps:
-      - checkout
-      - run:
-          name: Run integration tests
-          command: tox -e integration-spark-databricks-http
-          no_output_timeout: 1h
-      - store_artifacts:
-          path: ./logs
-
-  integration-spark-databricks-odbc-cluster: &databricks-odbc
-    environment:
-      DBT_INVOCATION_ENV: circle
-      ODBC_DRIVER: Simba # TODO: move env var to Docker image
-      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
-      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
-      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
-    docker:
-      # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed
-      - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest
-        aws_auth:
-          aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING
-          aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING
-    steps:
-      - checkout
-      - run:
-          name: Run integration tests
-          command: tox -e integration-spark-databricks-odbc-cluster
-          no_output_timeout: 1h
-      - store_artifacts:
-          path: ./logs
-
-  integration-spark-databricks-odbc-endpoint:
-    <<: *databricks-odbc
-    steps:
-      - checkout
-      - run:
-          name: Run integration tests
-          command: tox -e integration-spark-databricks-odbc-sql-endpoint
-          no_output_timeout: 1h
-      - store_artifacts:
-          path: ./logs
-
-workflows:
-  version: 2
-  test-everything:
-    jobs:
-      - unit
-      - integration-spark-session:
-          requires:
-            - unit
-      - integration-spark-thrift:
-          requires:
-            - unit
-      - integration-spark-databricks-http:
-          requires:
-            - integration-spark-thrift
-      - integration-spark-databricks-odbc-cluster:
-          context: aws-credentials
-          requires:
-            - integration-spark-thrift
-      - integration-spark-databricks-odbc-endpoint:
-          context: aws-credentials
-          requires:
-            - integration-spark-thrift
diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh
new file mode 100755
index 00000000..1a5a5c2d
--- /dev/null
+++ b/.github/scripts/update_dbt_core_branch.sh
@@ -0,0 +1,17 @@
+#!/bin/bash -e
+set -e
+
+git_branch=$1
+target_req_file="dev-requirements.txt"
+core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g"
+tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g"
+if [[ "$OSTYPE" == darwin* ]]; then
+ # mac ships with a different version of sed that requires a delimiter arg
+ sed -i "" "$core_req_sed_pattern" $target_req_file
+ sed -i "" "$tests_req_sed_pattern" $target_req_file
+else
+ sed -i "$core_req_sed_pattern" $target_req_file
+ sed -i "$tests_req_sed_pattern" $target_req_file
+fi
+core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2)
+bumpversion --allow-dirty --new-version "$core_version" major
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
new file mode 100644
index 00000000..94dece35
--- /dev/null
+++ b/.github/workflows/integration.yml
@@ -0,0 +1,112 @@
+# **what?**
+# Runs integration tests.
+
+# **why?**
+# Ensure code runs as expected.
+
+# **when?**
+# This will run for all PRs, when code is pushed to a release
+# branch, and when manually triggered.
+
+name: Adapter Integration Tests
+
+on:
+  push:
+    branches:
+      - "main"
+      - "*.latest"
+
+  pull_request_target:
+    paths-ignore:
+      - ".changes/**"
+      - ".flake8"
+      - ".gitignore"
+      - "**.md"
+
+  workflow_dispatch:
+    inputs:
+      dbt-core-branch:
+        description: "branch of dbt-core to use in dev-requirements.txt"
+        required: false
+        type: string
+
+# explicitly turn off permissions for `GITHUB_TOKEN`
+permissions: read-all
+
+# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request_target') && github.event.pull_request.head.ref || github.sha }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+
+  test:
+    name: ${{ matrix.test }}
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test:
+          - "apache_spark"
+          - "spark_session"
+          - "databricks_sql_endpoint"
+          - "databricks_cluster"
+          - "databricks_http_cluster"
+
+    env:
+      DBT_INVOCATION_ENV: github-actions
+      DD_CIVISIBILITY_AGENTLESS_ENABLED: true
+      DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
+      DD_SITE: datadoghq.com
+      DD_ENV: ci
+      DD_SERVICE: ${{ github.event.repository.name }}
+      DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
+      DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
+      DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
+      DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
+      DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
+
+    steps:
+      - name: Check out the repository
+        if: github.event_name != 'pull_request_target'
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      # explicitly checkout the branch for the PR,
+      # this is necessary for the `pull_request` event
+      - name: Check out the repository (PR)
+        if: github.event_name == 'pull_request_target'
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      # the python version used here is not what is used in the tests themselves
+      - name: Set up Python for dagger
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install python dependencies
+        run: |
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install -r dagger/requirements.txt
+
+      - name: Update dev_requirements.txt
+        if: inputs.dbt-core-branch != ''
+        run: |
+          pip install bumpversion
+          ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
+
+      - name: Run tests for ${{ matrix.test }}
+        run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 30126325..20f3f88f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -19,7 +19,6 @@ on:
     branches:
       - "main"
       - "*.latest"
-      - "releases/*"
   pull_request:
   workflow_dispatch:
 
@@ -81,10 +80,6 @@ jobs:
       matrix:
         python-version: ["3.8", "3.9", "3.10", "3.11"]
 
-    env:
-      TOXENV: "unit"
-      PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv"
-
     steps:
       - name: Check out the repository
         uses: actions/checkout@v3
@@ -100,10 +95,12 @@ jobs:
           sudo apt-get install libsasl2-dev
           python -m pip install --user --upgrade pip
           python -m pip --version
-          python -m pip install tox
-          tox --version
-      - name: Run tox
-        run: tox
+          python -m pip install -r requirements.txt
+          python -m pip install -r dev-requirements.txt
+          python -m pip install -e .
+
+      - name: Run unit tests
+        run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
 
       - name: Get current date
         if: always()
diff --git a/.gitignore b/.gitignore
index 33a83848..1e8ff741 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,5 @@ test.env
 .hive-metastore/
 .spark-warehouse/
 dbt-integration-tests
+/.tool-versions
+/.hypothesis/*
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a61306ea..6fcaacea 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,11 +65,27 @@ $EDITOR test.env
 ### Test commands
 There are a few methods for running tests locally.
 
-#### `tox`
-`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`.
+#### dagger
+To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against.
 
-#### `pytest`
-Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like:
+```sh
+pip install -r dagger/requirements.txt
+python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-path tests/functional/adapter/test_basic.py::TestSimpleMaterializationsSpark::test_base
+```
+
+`--profile`: required, this is the kind of spark connection to test against
+
+_options_:
+  - "apache_spark"
+  - "spark_session"
+  - "databricks_sql_endpoint"
+  - "databricks_cluster"
+  - "databricks_http_cluster"
+
+`--test-path`: optional, this is the path to the test file you want to run. If not specified, all tests will be run.
+
+#### pytest
+Finally, you can also run a specific test or group of tests using `pytest` directly (if you have all the dependencies set up on your machine). With a Python virtualenv active and dev dependencies installed you can do things like:
 
 ```sh
 # run all functional tests
diff --git a/Makefile b/Makefile
index cc1d9f75..2bd1055f 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
 .PHONY: dev
 dev: ## Installs adapter in develop mode along with development dependencies
 	@\
-	pip install -e . -r requirements.txt -r dev-requirements.txt && pre-commit install
+	pip install -e . -r requirements.txt -r dev-requirements.txt -r dagger/requirements.txt && pre-commit install
 
 .PHONY: dev-uninstall
 dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment
@@ -40,12 +40,13 @@ linecheck: ## Checks for all Python lines 100 characters or more
 .PHONY: unit
 unit: ## Runs unit tests with py38.
 	@\
-	tox -e py38
+	python -m pytest tests/unit
 
 .PHONY: test
 test: ## Runs unit tests with py38 and code checks against staged changes.
 	@\
-	tox -p -e py38; \
+	python -m pytest tests/unit; \
+	python dagger/run_dbt_spark_tests.py --profile spark_session \
 	pre-commit run black-check --hook-stage manual | grep -v "INFO"; \
 	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
 	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
diff --git a/README.md b/README.md
index 2d258679..7e95b1fc 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,6 @@
   <a href="https://github.com/dbt-labs/dbt-spark/actions/workflows/main.yml">
     <img src="https://github.com/dbt-labs/dbt-spark/actions/workflows/main.yml/badge.svg?event=push" alt="Unit Tests Badge"/>
   </a>
-  <a href="https://circleci.com/gh/dbt-labs/dbt-spark/?branch=main">
-    <img src="https://circleci.com/gh/dbt-labs/dbt-spark/tree/main.svg?style=shield" alt="Integration Tests Badge"/>
-  </a>
 </p>
 
 **[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
diff --git a/dagger/requirements.txt b/dagger/requirements.txt
new file mode 100644
index 00000000..df36543c
--- /dev/null
+++ b/dagger/requirements.txt
@@ -0,0 +1,2 @@
+dagger-io~=0.8.0
+python-dotenv
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
new file mode 100644
index 00000000..71851990
--- /dev/null
+++ b/dagger/run_dbt_spark_tests.py
@@ -0,0 +1,130 @@
+import os
+
+import argparse
+import sys
+
+import anyio as anyio
+import dagger as dagger
+from dotenv import find_dotenv, load_dotenv
+
+PG_PORT = 5432
+load_dotenv(find_dotenv("test.env"))
+# if env vars aren't specified in test.env (i.e. in github actions worker), use the ones from the host
+TESTING_ENV_VARS = {
+    env_name: os.environ[env_name]
+    for env_name in os.environ
+    if env_name.startswith(("DD_", "DBT_"))
+}
+
+TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"})
+
+
+def env_variables(envs: dict[str, str]):
+    def env_variables_inner(ctr: dagger.Container):
+        for key, value in envs.items():
+            ctr = ctr.with_env_variable(key, value)
+        return ctr
+
+    return env_variables_inner
+
+
+async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
+    ctr = await (
+        client.container()
+        .from_("postgres:13")
+        .with_env_variable("POSTGRES_PASSWORD", "postgres")
+        .with_exposed_port(PG_PORT)
+    )
+
+    return ctr, "postgres_db"
+
+
+async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
+    spark_dir = client.host().directory("./dagger/spark-container")
+    spark_ctr_base = (
+        client.container()
+        .from_("eclipse-temurin:8-jre")
+        .with_directory("/spark_setup", spark_dir)
+        .with_env_variable("SPARK_HOME", "/usr/spark")
+        .with_env_variable("PATH", "/usr/spark/bin:/usr/spark/sbin:$PATH", expand=True)
+        .with_file(
+            "/scripts/entrypoint.sh",
+            client.host().file("./dagger/spark-container/entrypoint.sh"),
+            permissions=755,
+        )
+        .with_file(
+            "/scripts/install_spark.sh",
+            client.host().file("./dagger/spark-container/install_spark.sh"),
+            permissions=755,
+        )
+        .with_exec(["./spark_setup/install_spark.sh"])
+        .with_file("/usr/spark/conf/hive-site.xml", spark_dir.file("/hive-site.xml"))
+        .with_file("/usr/spark/conf/spark-defaults.conf", spark_dir.file("spark-defaults.conf"))
+    )
+
+    # postgres is the metastore here
+    pg_ctr, pg_host = await get_postgres_container(client)
+
+    spark_ctr = (
+        spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr)
+        .with_exec(
+            [
+                "/scripts/entrypoint.sh",
+                "--class",
+                "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2",
+                "--name",
+                "Thrift JDBC/ODBC Server",
+            ]
+        )
+        .with_exposed_port(10000)
+    )
+
+    return spark_ctr, "spark_db"
+
+
+async def test_spark(test_args):
+    async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
+        test_profile = test_args.profile
+        req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"])
+        dbt_spark_dir = client.host().directory("./dbt")
+        test_dir = client.host().directory("./tests")
+        scripts = client.host().directory("./dagger/scripts")
+        platform = dagger.Platform("linux/amd64")
+        tst_container = (
+            client.container(platform=platform)
+            .from_("python:3.8-slim")
+            .with_directory("/.", req_files)
+            .with_directory("/dbt", dbt_spark_dir)
+            .with_directory("/tests", test_dir)
+            .with_directory("/scripts", scripts)
+            .with_exec("./scripts/install_os_reqs.sh")
+            .with_exec(["pip", "install", "-r", "requirements.txt"])
+            .with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+        )
+
+        if test_profile == "apache_spark":
+            spark_ctr, spark_host = await get_spark_container(client)
+            tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
+
+        elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]:
+            tst_container = tst_container.with_exec("./scripts/configure_odbc.sh")
+
+        elif test_profile == "spark_session":
+            tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
+            tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"])
+
+        tst_container = tst_container.with_(env_variables(TESTING_ENV_VARS))
+        test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter"
+        result = await tst_container.with_exec(
+            ["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path]
+        ).stdout()
+
+        return result
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--profile", required=True, type=str)
+parser.add_argument("--test-path", required=False, type=str)
+args = parser.parse_args()
+
+anyio.run(test_spark, args)
diff --git a/dagger/scripts/configure_odbc.sh b/dagger/scripts/configure_odbc.sh
new file mode 100755
index 00000000..ddf020ad
--- /dev/null
+++ b/dagger/scripts/configure_odbc.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -eo
+rm -rf /tmp && mkdir /tmp
+
+curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip"
+unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/
+dpkg -i /tmp/SimbaSparkODBC-2.6.16.1019-Debian-64bit/simbaspark_2.6.16.1019-2_amd64.deb
+echo "--------------------------------------------"
+echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini
+dpkg -l | grep Simba # confirm that the driver is installed
+rm -rf /tmp
diff --git a/dagger/scripts/install_os_reqs.sh b/dagger/scripts/install_os_reqs.sh
new file mode 100755
index 00000000..b50027f5
--- /dev/null
+++ b/dagger/scripts/install_os_reqs.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -eo
+apt-get update && apt-get install -y --no-install-recommends \
+    g++ \
+    git \
+    curl \
+    unixodbc \
+    unixodbc-dev \
+    libsasl2-modules-gssapi-mit \
+    unzip
diff --git a/dagger/spark-container/entrypoint.sh b/dagger/spark-container/entrypoint.sh
new file mode 100644
index 00000000..4b15cab6
--- /dev/null
+++ b/dagger/spark-container/entrypoint.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ -n "$WAIT_FOR" ]; then
+  IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR"
+  for HOSTPORT in "${HOSTPORT_ARRAY[@]}"
+  do
+    WAIT_FOR_HOST=${HOSTPORT%:*}
+    WAIT_FOR_PORT=${HOSTPORT#*:}
+
+    echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT...
+    while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done
+  done
+fi
+echo "$PATH"
+exec spark-submit "$@"
diff --git a/dagger/spark-container/hive-site.xml b/dagger/spark-container/hive-site.xml
new file mode 100644
index 00000000..93e966fb
--- /dev/null
+++ b/dagger/spark-container/hive-site.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+     Licensed to the Apache Software Foundation (ASF) under one or more
+     contributor license agreements.  See the NOTICE file distributed with
+     this work for additional information regarding copyright ownership.
+     The ASF licenses this file to You under the Apache License, Version 2.0
+     (the "License"); you may not use this file except in compliance with
+     the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+
+<configuration>
+
+    <property>
+        <name>javax.jdo.option.ConnectionURL</name>
+        <value>jdbc:postgresql://postgres_db/postgres</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionDriverName</name>
+        <value>org.postgresql.Driver</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionUserName</name>
+        <value>postgres</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionPassword</name>
+        <value>postgres</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.schema.verification</name>
+        <value>false</value>
+    </property>
+</configuration>
diff --git a/dagger/spark-container/install_spark.sh b/dagger/spark-container/install_spark.sh
new file mode 100755
index 00000000..476f362a
--- /dev/null
+++ b/dagger/spark-container/install_spark.sh
@@ -0,0 +1,15 @@
+set -e
+
+SPARK_VERSION=3.1.3
+HADOOP_VERSION=3.2
+
+apt-get update && \
+apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
+ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
+apt-get remove -y wget && \
+apt-get autoremove -y && \
+apt-get clean
diff --git a/dagger/spark-container/spark-defaults.conf b/dagger/spark-container/spark-defaults.conf
new file mode 100644
index 00000000..30ec5959
--- /dev/null
+++ b/dagger/spark-container/spark-defaults.conf
@@ -0,0 +1,9 @@
+spark.driver.memory 2g
+spark.executor.memory 2g
+spark.hadoop.datanucleus.autoCreateTables	true
+spark.hadoop.datanucleus.schema.autoCreateTables	true
+spark.hadoop.datanucleus.fixedDatastore 	false
+spark.serializer	org.apache.spark.serializer.KryoSerializer
+spark.jars.packages	org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0
+spark.sql.extensions	org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+spark.driver.userClassPathFirst true
diff --git a/dev-requirements.txt b/dev-requirements.txt
index fe960ed0..765482e2 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,7 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
@@ -22,7 +22,6 @@ pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.5
 pytz~=2023.3
-tox~=4.11
 types-pytz~=2023.3
 types-requests~=2.31
 twine~=4.0
diff --git a/tests/conftest.py b/tests/conftest.py
index 94969e40..700ade4d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,7 +38,7 @@ def dbt_profile_target(request):
 def apache_spark_target():
     return {
         "type": "spark",
-        "host": "localhost",
+        "host": "spark_db",
         "user": "dbt",
         "method": "thrift",
         "port": 10000,
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 97017a92..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,83 +0,0 @@
-[tox]
-skipsdist = True
-envlist = unit, flake8, integration-spark-thrift
-
-[testenv:{unit,py38,py39,py310,py}]
-allowlist_externals =
-    /bin/bash
-commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
-passenv =
-    DBT_*
-    PYTEST_ADDOPTS
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev-requirements.txt
-
-[testenv:integration-spark-databricks-http]
-allowlist_externals =
-    /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/*'
-passenv =
-    DBT_*
-    PYTEST_ADDOPTS
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev-requirements.txt
-    -e.
-
-[testenv:integration-spark-databricks-odbc-cluster]
-allowlist_externals =
-    /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*'
-passenv =
-    DBT_*
-    PYTEST_ADDOPTS
-    ODBC_DRIVER
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev-requirements.txt
-    -e.
-
-[testenv:integration-spark-databricks-odbc-sql-endpoint]
-allowlist_externals =
-    /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*'
-passenv =
-    DBT_*
-    PYTEST_ADDOPTS
-    ODBC_DRIVER
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev-requirements.txt
-    -e.
-
-
-[testenv:integration-spark-thrift]
-allowlist_externals =
-    /bin/bash
-basepython = python3.8
-commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*'
-passenv =
-    DBT_*
-    PYTEST_ADDOPTS
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev-requirements.txt
-    -e.
-
-[testenv:integration-spark-session]
-allowlist_externals =
-    /bin/bash
-basepython = python3.10
-commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
-passenv =
-    DBT_*
-    PYTEST_*
-    PIP_CACHE_DIR
-deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev-requirements.txt
-    -e.[session]

From e97918b826d5dfc7853d1c559201171026f1d125 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 11 Jan 2024 10:51:46 -0800
Subject: [PATCH 531/603] fix ODBC_DRIVER env var (#971)

---
 dagger/run_dbt_spark_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 71851990..91b9988c 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -16,7 +16,7 @@
     if env_name.startswith(("DD_", "DBT_"))
 }
 
-TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"})
+TESTING_ENV_VARS.update({"ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so"})
 
 
 def env_variables(envs: dict[str, str]):

From 5d90ff9ab4c9c0eb5137f606b843aec3bf35c6d2 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 24 Jan 2024 18:48:08 -0800
Subject: [PATCH 532/603] feature/decouple adapters from core (#972)

* Add Github action for integration test

* Update tox

* Fetch spark from https link

* Use Spark version 3.1.2

* Seperate running Spark session and thrift

* Use Spark 3.1.2 and Hadoop 3.2

* Reset tox.ini

* Remove base pythons in tox.ini

* Fix reference to Docker compose file

* Remove timeout

* Remove artifact steps

* Bump Spark and Hadoop versions

* Reset Spark and Hadoop version

* Update comment

* Add changie

* add databricks and PR execution protections

* use single quotes

* remove `_target` suffix

* add comment to test

* specify container user as root

* formatting

* remove python setup for pre-existing container

* download simba

* fix curl call

* fix curl call

* fix curl call

* fix curl call

* fix curl call

* fix curl call

* fix db test naming

* confirm ODBC driver installed

* add odbc driver env var

* add odbc driver env var

* specify platform

* check odbc driver integrity

* add dbt user env var

* add dbt user env var

* fix host_name env var

* try removing architecture arg

* swap back to pull_request_target

* try running on host instead of container

* Update .github/workflows/integration.yml

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>

* try running odbcinst -j

* remove bash

* add sudo

* add sudo

* update odbc.ini

* install libsasl2-modules-gssapi-mit

* install libsasl2-modules-gssapi-mit

* set -e on odbc install

* set -e on odbc install

* set -e on odbc install

* sudo echo odbc.inst

* remove postgres components

* remove release related items

* remove irrelevant output

* move long bash script into its own file

* update integration.yml to align with other adapters

* revert name change

* revert name change

* combine databricks and spark tests

* combine databricks and spark tests

* Add dagger

* remove platform

* add dagger setup

* add dagger setup

* set env vars

* install requirements

* install requirements

* add DEFAULT_ENV_VARS and test_path arg

* remove circle ci

* formatting

* update changie

* Update .changes/unreleased/Under the Hood-20230929-161218.yaml

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>

* formatting fixes and simplify env_var handling

* remove tox, update CONTRIBUTING.md and cleanup GHA workflows

* remove tox, update CONTRIBUTING.md and cleanup GHA workflows

* install test reqs in main.yml

* install test reqs in main.yml

* formatting

* remove tox from dev-requirements.txt and Makefile

* clarify spark crt instantiation

* add comments on python-version

* initial migration changes

* unpin

* implement core / adapters decoupling

* fix list_relations

* fix typing and exception imports

* fix typing and exception imports

* add changie

* replace dbt.common with dbt_common

* update setup.py

* add dbt-adapters

* update setup.py

* fix credentials import

* fix dev-requirements.txt

* dagger improvements to caching and installing package under test

* update requirements

* add cluster start fixture

* update conftest.py

* re-order dagger setup to reduce cache invalidation

* renove dbt-core version dependency version check

---------

Co-authored-by: Cor Zuurmond <jczuurmond@protonmail.com>
Co-authored-by: Florian Eiden <florian.eiden@fleid.fr>
Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .../Under the Hood-20240111-114806.yaml       |  6 ++
 dagger/requirements.txt                       |  2 +-
 dagger/run_dbt_spark_tests.py                 | 47 ++++++++++----
 dbt/adapters/spark/column.py                  |  2 +-
 dbt/adapters/spark/connections.py             | 57 ++++++++---------
 dbt/adapters/spark/impl.py                    | 63 ++++++++++++-------
 dbt/adapters/spark/python_submissions.py      | 37 ++++-------
 dbt/adapters/spark/relation.py                |  4 +-
 dbt/adapters/spark/session.py                 |  6 +-
 dbt/include/spark/macros/adapters.sql         |  4 +-
 dev-requirements.txt                          |  3 +-
 setup.py                                      | 13 +---
 tests/conftest.py                             | 16 +++--
 tests/functional/conftest.py                  | 19 ++++++
 tests/unit/test_adapter.py                    | 39 ++++++++----
 tests/unit/utils.py                           |  2 +-
 16 files changed, 188 insertions(+), 132 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240111-114806.yaml
 create mode 100644 tests/functional/conftest.py

diff --git a/.changes/unreleased/Under the Hood-20240111-114806.yaml b/.changes/unreleased/Under the Hood-20240111-114806.yaml
new file mode 100644
index 00000000..31705f46
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240111-114806.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Update import paths and list_relations to support decoupling adapters/core
+time: 2024-01-11T11:48:06.120111-08:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "972"
diff --git a/dagger/requirements.txt b/dagger/requirements.txt
index df36543c..b50c448d 100644
--- a/dagger/requirements.txt
+++ b/dagger/requirements.txt
@@ -1,2 +1,2 @@
-dagger-io~=0.8.0
+dagger-io~=0.9.7
 python-dotenv
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 91b9988c..436cb1e9 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -2,6 +2,7 @@
 
 import argparse
 import sys
+from typing import Dict
 
 import anyio as anyio
 import dagger as dagger
@@ -19,7 +20,7 @@
 TESTING_ENV_VARS.update({"ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so"})
 
 
-def env_variables(envs: dict[str, str]):
+def env_variables(envs: Dict[str, str]):
     def env_variables_inner(ctr: dagger.Container):
         for key, value in envs.items():
             ctr = ctr.with_env_variable(key, value)
@@ -28,18 +29,19 @@ def env_variables_inner(ctr: dagger.Container):
     return env_variables_inner
 
 
-async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
-    ctr = await (
+def get_postgres_container(client: dagger.Client) -> (dagger.Container, str):
+    ctr = (
         client.container()
         .from_("postgres:13")
         .with_env_variable("POSTGRES_PASSWORD", "postgres")
         .with_exposed_port(PG_PORT)
+        .as_service()
     )
 
     return ctr, "postgres_db"
 
 
-async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
+def get_spark_container(client: dagger.Client) -> (dagger.Service, str):
     spark_dir = client.host().directory("./dagger/spark-container")
     spark_ctr_base = (
         client.container()
@@ -63,7 +65,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
     )
 
     # postgres is the metastore here
-    pg_ctr, pg_host = await get_postgres_container(client)
+    pg_ctr, pg_host = get_postgres_container(client)
 
     spark_ctr = (
         spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr)
@@ -77,6 +79,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
             ]
         )
         .with_exposed_port(10000)
+        .as_service()
     )
 
     return spark_ctr, "spark_db"
@@ -85,29 +88,49 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str):
 async def test_spark(test_args):
     async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
         test_profile = test_args.profile
-        req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"])
+
+        # create cache volumes, these are persisted between runs saving time when developing locally
+        os_reqs_cache = client.cache_volume("os_reqs")
+        pip_cache = client.cache_volume("pip")
+
+        # setup directories as we don't want to copy the whole repo into the container
+        req_files = client.host().directory(
+            "./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"]
+        )
         dbt_spark_dir = client.host().directory("./dbt")
         test_dir = client.host().directory("./tests")
         scripts = client.host().directory("./dagger/scripts")
+
         platform = dagger.Platform("linux/amd64")
         tst_container = (
             client.container(platform=platform)
             .from_("python:3.8-slim")
-            .with_directory("/.", req_files)
-            .with_directory("/dbt", dbt_spark_dir)
-            .with_directory("/tests", test_dir)
+            .with_mounted_cache("/var/cache/apt/archives", os_reqs_cache)
+            .with_mounted_cache("/root/.cache/pip", pip_cache)
+            # install OS deps first so any local changes don't invalidate the cache
             .with_directory("/scripts", scripts)
-            .with_exec("./scripts/install_os_reqs.sh")
+            .with_exec(["./scripts/install_os_reqs.sh"])
+            # install dbt-spark + python deps
+            .with_directory("/src", req_files)
+            .with_directory("src/dbt", dbt_spark_dir)
+            .with_directory("src/tests", test_dir)
+            .with_workdir("/src")
+            .with_exec(["pip", "install", "-U", "pip"])
             .with_exec(["pip", "install", "-r", "requirements.txt"])
             .with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+            .with_exec(["pip", "install", "-e", "."])
         )
 
         if test_profile == "apache_spark":
-            spark_ctr, spark_host = await get_spark_container(client)
+            spark_ctr, spark_host = get_spark_container(client)
             tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
 
         elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]:
-            tst_container = tst_container.with_exec("./scripts/configure_odbc.sh")
+            tst_container = (
+                tst_container.with_workdir("/")
+                .with_exec(["./scripts/configure_odbc.sh"])
+                .with_workdir("/src")
+            )
 
         elif test_profile == "spark_session":
             tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index a57fa056..39f6f529 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -2,7 +2,7 @@
 from typing import Any, Dict, Optional, TypeVar, Union
 
 from dbt.adapters.base.column import Column
-from dbt.dataclass_schema import dbtClassMixin
+from dbt_common.dataclass_schema import dbtClassMixin
 
 Self = TypeVar("Self", bound="SparkColumn")
 
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 966f5584..83048f92 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,11 +1,17 @@
 from contextlib import contextmanager
 
-import dbt.exceptions
-from dbt.adapters.base import Credentials
+from dbt.adapters.contracts.connection import (
+    AdapterResponse,
+    ConnectionState,
+    Connection,
+    Credentials,
+)
+from dbt.adapters.events.logging import AdapterLogger
+from dbt.adapters.exceptions import FailedToConnectError
 from dbt.adapters.sql import SQLConnectionManager
-from dbt.contracts.connection import ConnectionState, AdapterResponse
-from dbt.events import AdapterLogger
-from dbt.utils import DECIMALS
+from dbt_common.exceptions import DbtConfigError, DbtRuntimeError, DbtDatabaseError
+
+from dbt_common.utils.encoding import DECIMALS
 from dbt.adapters.spark import __version__
 
 try:
@@ -22,8 +28,7 @@
     pyodbc = None
 from datetime import datetime
 import sqlparams
-from dbt.contracts.connection import Connection
-from dbt.dataclass_schema import StrEnum
+from dbt_common.dataclass_schema import StrEnum
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence
 
@@ -92,15 +97,15 @@ def cluster_id(self) -> Optional[str]:
 
     def __post_init__(self) -> None:
         if self.method is None:
-            raise dbt.exceptions.DbtRuntimeError("Must specify `method` in profile")
+            raise DbtRuntimeError("Must specify `method` in profile")
         if self.host is None:
-            raise dbt.exceptions.DbtRuntimeError("Must specify `host` in profile")
+            raise DbtRuntimeError("Must specify `host` in profile")
         if self.schema is None:
-            raise dbt.exceptions.DbtRuntimeError("Must specify `schema` in profile")
+            raise DbtRuntimeError("Must specify `schema` in profile")
 
         # spark classifies database and schema as the same thing
         if self.database is not None and self.database != self.schema:
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 f"    schema: {self.schema} \n"
                 f"    database: {self.database} \n"
                 f"On Spark, database must be omitted or have the same value as"
@@ -112,7 +117,7 @@ def __post_init__(self) -> None:
             try:
                 import pyodbc  # noqa: F401
             except ImportError as e:
-                raise dbt.exceptions.DbtRuntimeError(
+                raise DbtRuntimeError(
                     f"{self.method} connection method requires "
                     "additional dependencies. \n"
                     "Install the additional required dependencies with "
@@ -121,7 +126,7 @@ def __post_init__(self) -> None:
                 ) from e
 
         if self.method == SparkConnectionMethod.ODBC and self.cluster and self.endpoint:
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 "`cluster` and `endpoint` cannot both be set when"
                 f" using {self.method} method to connect to Spark"
             )
@@ -130,7 +135,7 @@ def __post_init__(self) -> None:
             self.method == SparkConnectionMethod.HTTP
             or self.method == SparkConnectionMethod.THRIFT
         ) and not (ThriftState and THttpClient and hive):
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 f"{self.method} connection method requires "
                 "additional dependencies. \n"
                 "Install the additional required dependencies with "
@@ -141,7 +146,7 @@ def __post_init__(self) -> None:
             try:
                 import pyspark  # noqa: F401
             except ImportError as e:
-                raise dbt.exceptions.DbtRuntimeError(
+                raise DbtRuntimeError(
                     f"{self.method} connection method requires "
                     "additional dependencies. \n"
                     "Install the additional required dependencies with "
@@ -291,13 +296,11 @@ def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None:
         if poll_state.errorMessage:
             logger.debug("Poll response: {}".format(poll_state))
             logger.debug("Poll status: {}".format(state))
-            raise dbt.exceptions.DbtDatabaseError(poll_state.errorMessage)
+            raise DbtDatabaseError(poll_state.errorMessage)
 
         elif state not in STATE_SUCCESS:
             status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state))
-            raise dbt.exceptions.DbtDatabaseError(
-                "Query failed with status: {}".format(status_type)
-            )
+            raise DbtDatabaseError("Query failed with status: {}".format(status_type))
 
         logger.debug("Poll status: {}, query complete".format(state))
 
@@ -358,9 +361,9 @@ def exception_handler(self, sql: str) -> Generator[None, None, None]:
             thrift_resp = exc.args[0]
             if hasattr(thrift_resp, "status"):
                 msg = thrift_resp.status.errorMessage
-                raise dbt.exceptions.DbtRuntimeError(msg)
+                raise DbtRuntimeError(msg)
             else:
-                raise dbt.exceptions.DbtRuntimeError(str(exc))
+                raise DbtRuntimeError(str(exc))
 
     def cancel(self, connection: Connection) -> None:
         connection.handle.cancel()
@@ -390,7 +393,7 @@ def validate_creds(cls, creds: Any, required: Iterable[str]) -> None:
 
         for key in required:
             if not hasattr(creds, key):
-                raise dbt.exceptions.DbtProfileError(
+                raise DbtConfigError(
                     "The config '{}' is required when using the {} method"
                     " to connect to Spark".format(key, method)
                 )
@@ -481,7 +484,7 @@ def open(cls, connection: Connection) -> Connection:
                             endpoint=creds.endpoint
                         )
                     else:
-                        raise dbt.exceptions.DbtProfileError(
+                        raise DbtConfigError(
                             "Either `cluster` or `endpoint` must set when"
                             " using the odbc method to connect to Spark"
                         )
@@ -525,9 +528,7 @@ def open(cls, connection: Connection) -> Connection:
                         Connection(server_side_parameters=creds.server_side_parameters)
                     )
                 else:
-                    raise dbt.exceptions.DbtProfileError(
-                        f"invalid credential method: {creds.method}"
-                    )
+                    raise DbtConfigError(f"invalid credential method: {creds.method}")
                 break
             except Exception as e:
                 exc = e
@@ -537,7 +538,7 @@ def open(cls, connection: Connection) -> Connection:
                     msg = "Failed to connect"
                     if creds.token is not None:
                         msg += ", is your token valid?"
-                    raise dbt.exceptions.FailedToConnectError(msg) from e
+                    raise FailedToConnectError(msg) from e
                 retryable_message = _is_retryable_error(e)
                 if retryable_message and creds.connect_retries > 0:
                     msg = (
@@ -558,7 +559,7 @@ def open(cls, connection: Connection) -> Connection:
                     logger.warning(msg)
                     time.sleep(creds.connect_timeout)
                 else:
-                    raise dbt.exceptions.FailedToConnectError("failed to connect") from e
+                    raise FailedToConnectError("failed to connect") from e
         else:
             raise exc  # type: ignore
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 16c3a3cb..9a1a7ec0 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,18 +1,31 @@
+import os
 import re
 from concurrent.futures import Future
 from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable, Set
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Union,
+    Type,
+    Tuple,
+    Callable,
+    Set,
+    FrozenSet,
+)
 
 from dbt.adapters.base.relation import InformationSchema
-from dbt.contracts.graph.manifest import Manifest
+from dbt.adapters.contracts.connection import AdapterResponse
+from dbt.adapters.events.logging import AdapterLogger
+from dbt_common.exceptions import DbtRuntimeError, CompilationError
+from dbt_common.utils import AttrDict, executor
 
 from typing_extensions import TypeAlias
 
 import agate
 
-import dbt
-import dbt.exceptions
-
 from dbt.adapters.base import AdapterConfig, PythonJobHelper
 from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport
 from dbt.adapters.sql import SQLAdapter
@@ -24,14 +37,16 @@
     AllPurposeClusterPythonJobHelper,
 )
 from dbt.adapters.base import BaseRelation
-from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
-from dbt.contracts.connection import AdapterResponse
-from dbt.contracts.graph.nodes import ConstraintType
-from dbt.contracts.relation import RelationType
-from dbt.events import AdapterLogger
-from dbt.utils import executor, AttrDict
+from dbt.adapters.contracts.relation import RelationType, RelationConfig
+from dbt_common.clients.agate_helper import DEFAULT_TYPE_TESTER
+from dbt_common.contracts.constraints import ConstraintType
 
 logger = AdapterLogger("Spark")
+packages = ["pyhive.hive", "thrift.transport", "thrift.protocol"]
+log_level = os.getenv("DBT_SPARK_LOG_LEVEL", "ERROR")
+for package in packages:
+    logger.debug(f"Setting {package} logging to {log_level}")
+    logger.set_adapter_dependency_log_level(package, log_level)
 
 GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw"
 LIST_SCHEMAS_MACRO_NAME = "list_schemas"
@@ -144,7 +159,7 @@ def _get_relation_information(self, row: agate.Row) -> RelationInfo:
         try:
             _schema, name, _, information = row
         except ValueError:
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 f'Invalid value from "show tables extended ...", got {len(row)} values, expected 4'
             )
 
@@ -155,7 +170,7 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn
         try:
             _schema, name, _ = row
         except ValueError:
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 f'Invalid value from "show tables ...", got {len(row)} values, expected 3'
             )
 
@@ -164,7 +179,7 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn
             table_results = self.execute_macro(
                 DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs={"table_name": table_name}
             )
-        except dbt.exceptions.DbtRuntimeError as e:
+        except DbtRuntimeError as e:
             logger.debug(f"Error while retrieving information about {table_name}: {e.msg}")
             table_results = AttrDict()
 
@@ -219,7 +234,7 @@ def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[
                 row_list=show_table_extended_rows,
                 relation_info_func=self._get_relation_information,
             )
-        except dbt.exceptions.DbtRuntimeError as e:
+        except DbtRuntimeError as e:
             errmsg = getattr(e, "msg", "")
             if f"Database '{schema_relation}' not found" in errmsg:
                 return []
@@ -236,7 +251,7 @@ def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[
                         row_list=show_table_rows,
                         relation_info_func=self._get_relation_information_using_describe,
                     )
-                except dbt.exceptions.DbtRuntimeError as e:
+                except DbtRuntimeError as e:
                     description = "Error while retrieving information about"
                     logger.debug(f"{description} {schema_relation}: {e.msg}")
                     return []
@@ -298,7 +313,7 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[SparkColumn]:
                 GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
             )
             columns = self.parse_describe_extended(relation, rows)
-        except dbt.exceptions.DbtRuntimeError as e:
+        except DbtRuntimeError as e:
             # spark would throw error when table doesn't exist, where other
             # CDW would just return and empty list, normalizing the behavior here
             errmsg = getattr(e, "msg", "")
@@ -352,11 +367,13 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str,
             yield as_dict
 
     def get_catalog(
-        self, manifest: Manifest, selected_nodes: Optional[Set] = None
+        self,
+        relation_configs: Iterable[RelationConfig],
+        used_schemas: FrozenSet[Tuple[str, str]],
     ) -> Tuple[agate.Table, List[Exception]]:
-        schema_map = self._get_catalog_schemas(manifest)
+        schema_map = self._get_catalog_schemas(relation_configs)
         if len(schema_map) > 1:
-            raise dbt.exceptions.CompilationError(
+            raise CompilationError(
                 f"Expected only one database in get_catalog, found " f"{list(schema_map)}"
             )
 
@@ -371,7 +388,7 @@ def get_catalog(
                             self._get_one_catalog,
                             info,
                             [schema],
-                            manifest,
+                            relation_configs,
                         )
                     )
             catalogs, exceptions = catch_as_completed(futures)
@@ -381,10 +398,10 @@ def _get_one_catalog(
         self,
         information_schema: InformationSchema,
         schemas: Set[str],
-        manifest: Manifest,
+        used_schemas: FrozenSet[Tuple[str, str]],
     ) -> agate.Table:
         if len(schemas) != 1:
-            raise dbt.exceptions.CompilationError(
+            raise CompilationError(
                 f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}"
             )
 
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 89831ca7..e3e7cb37 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -4,8 +4,9 @@
 from typing import Any, Dict, Callable, Iterable
 import uuid
 
-import dbt.exceptions
 from dbt.adapters.base import PythonJobHelper
+from dbt_common.exceptions import DbtRuntimeError
+
 from dbt.adapters.spark import SparkCredentials
 from dbt.adapters.spark import __version__
 
@@ -53,7 +54,7 @@ def _create_work_dir(self, path: str) -> None:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 f"Error creating work_dir for python notebooks\n {response.content!r}"
             )
 
@@ -71,9 +72,7 @@ def _upload_notebook(self, path: str, compiled_code: str) -> None:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
-                f"Error creating python notebook.\n {response.content!r}"
-            )
+            raise DbtRuntimeError(f"Error creating python notebook.\n {response.content!r}")
 
     def _submit_job(self, path: str, cluster_spec: dict) -> str:
         job_spec = {
@@ -99,9 +98,7 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
             json=job_spec,
         )
         if submit_response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
-                f"Error creating python run.\n {submit_response.content!r}"
-            )
+            raise DbtRuntimeError(f"Error creating python run.\n {submit_response.content!r}")
         return submit_response.json()["run_id"]
 
     def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None:
@@ -135,7 +132,7 @@ def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> No
         json_run_output = run_output.json()
         result_state = json_run_output["metadata"]["state"]["result_state"]
         if result_state != "SUCCESS":
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 "Python model failed with traceback as:\n"
                 "(Note that the line number here does not "
                 "match the line number in your code due to dbt templating)\n"
@@ -169,9 +166,9 @@ def polling(
             response = status_func(**status_func_kwargs)
             state = get_state_func(response)
         if exceeded_timeout:
-            raise dbt.exceptions.DbtRuntimeError("python model run timed out")
+            raise DbtRuntimeError("python model run timed out")
         if state != expected_end_state:
-            raise dbt.exceptions.DbtRuntimeError(
+            raise DbtRuntimeError(
                 "python model run ended in state"
                 f"{state} with state_message\n{get_state_msg_func(response)}"
             )
@@ -205,9 +202,7 @@ def create(self) -> str:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
-                f"Error creating an execution context.\n {response.content!r}"
-            )
+            raise DbtRuntimeError(f"Error creating an execution context.\n {response.content!r}")
         return response.json()["id"]
 
     def destroy(self, context_id: str) -> str:
@@ -221,9 +216,7 @@ def destroy(self, context_id: str) -> str:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
-                f"Error deleting an execution context.\n {response.content!r}"
-            )
+            raise DbtRuntimeError(f"Error deleting an execution context.\n {response.content!r}")
         return response.json()["id"]
 
 
@@ -246,9 +239,7 @@ def execute(self, context_id: str, command: str) -> str:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
-                f"Error creating a command.\n {response.content!r}"
-            )
+            raise DbtRuntimeError(f"Error creating a command.\n {response.content!r}")
         return response.json()["id"]
 
     def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
@@ -263,9 +254,7 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
             },
         )
         if response.status_code != 200:
-            raise dbt.exceptions.DbtRuntimeError(
-                f"Error getting status of command.\n {response.content!r}"
-            )
+            raise DbtRuntimeError(f"Error getting status of command.\n {response.content!r}")
         return response.json()
 
 
@@ -298,7 +287,7 @@ def submit(self, compiled_code: str) -> None:
                     get_state_msg_func=lambda response: response.json()["results"]["data"],
                 )
                 if response["results"]["resultType"] == "error":
-                    raise dbt.exceptions.DbtRuntimeError(
+                    raise DbtRuntimeError(
                         f"Python model failed with traceback as:\n"
                         f"{response['results']['cause']}"
                     )
diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py
index e80f2623..860935cb 100644
--- a/dbt/adapters/spark/relation.py
+++ b/dbt/adapters/spark/relation.py
@@ -2,9 +2,9 @@
 from dataclasses import dataclass, field
 
 from dbt.adapters.base.relation import BaseRelation, Policy
+from dbt.adapters.events.logging import AdapterLogger
 
-from dbt.exceptions import DbtRuntimeError
-from dbt.events import AdapterLogger
+from dbt_common.exceptions import DbtRuntimeError
 
 logger = AdapterLogger("Spark")
 
diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py
index b5b2bebd..7a6982e5 100644
--- a/dbt/adapters/spark/session.py
+++ b/dbt/adapters/spark/session.py
@@ -7,9 +7,9 @@
 from typing import Any, Dict, List, Optional, Tuple, Union, Sequence
 
 from dbt.adapters.spark.connections import SparkConnectionWrapper
-from dbt.events import AdapterLogger
-from dbt.utils import DECIMALS
-from dbt.exceptions import DbtRuntimeError
+from dbt.adapters.events.logging import AdapterLogger
+from dbt_common.utils.encoding import DECIMALS
+from dbt_common.exceptions import DbtRuntimeError
 from pyspark.sql import DataFrame, Row, SparkSession
 from pyspark.sql.utils import AnalysisException
 
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index bfc1f198..bf9f63cf 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -294,7 +294,7 @@
 
 {% macro spark__list_relations_without_caching(relation) %}
   {% call statement('list_relations_without_caching', fetch_result=True) -%}
-    show table extended in {{ relation }} like '*'
+    show table extended in {{ relation.schema }} like '*'
   {% endcall %}
 
   {% do return(load_result('list_relations_without_caching').table) %}
@@ -305,7 +305,7 @@
   {#-- V2 iceberg tables #}
   {#-- https://issues.apache.org/jira/browse/SPARK-33393 #}
   {% call statement('list_relations_without_caching_show_tables', fetch_result=True) -%}
-    show tables in {{ schema_relation }} like '*'
+    show tables in {{ schema_relation.schema }} like '*'
   {% endcall %}
 
   {% do return(load_result('list_relations_without_caching_show_tables').table) %}
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 765482e2..28a626fc 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,7 +1,6 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter
+git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
diff --git a/setup.py b/setup.py
index 301b4a41..2d6e00e5 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,6 @@
     print('Please upgrade setuptools with "pip install --upgrade setuptools" ' "and try again")
     sys.exit(1)
 
-
 # pull long description from README
 this_directory = os.path.abspath(os.path.dirname(__file__))
 with open(os.path.join(this_directory, "README.md"), "r", encoding="utf8") as f:
@@ -40,17 +39,8 @@ def _get_plugin_version_dict():
         return match.groupdict()
 
 
-# require a compatible minor version (~=), prerelease if this is a prerelease
-def _get_dbt_core_version():
-    parts = _get_plugin_version_dict()
-    minor = "{major}.{minor}.0".format(**parts)
-    pre = parts["prekind"] + "1" if parts["prekind"] else ""
-    return f"{minor}{pre}"
-
-
 package_name = "dbt-spark"
 package_version = "1.8.0a1"
-dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=4.0.39"]
@@ -73,8 +63,9 @@ def _get_dbt_core_version():
     packages=find_namespace_packages(include=["dbt", "dbt.*"]),
     include_package_data=True,
     install_requires=[
-        "dbt-core~={}".format(dbt_core_version),
         "sqlparams>=3.0.0",
+        "dbt-common<1.0",
+        "dbt-adapters~=0.1.0a1",
     ],
     extras_require={
         "ODBC": odbc_extras,
diff --git a/tests/conftest.py b/tests/conftest.py
index 700ade4d..efba41a5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -42,9 +42,9 @@ def apache_spark_target():
         "user": "dbt",
         "method": "thrift",
         "port": 10000,
-        "connect_retries": 3,
-        "connect_timeout": 5,
-        "retry_all": True,
+        "connect_retries": 2,
+        "connect_timeout": 3,
+        "retry_all": False,
     }
 
 
@@ -59,7 +59,7 @@ def databricks_cluster_target():
         "port": 443,
         "connect_retries": 3,
         "connect_timeout": 5,
-        "retry_all": True,
+        "retry_all": False,
         "user": os.getenv("DBT_DATABRICKS_USER"),
     }
 
@@ -87,11 +87,9 @@ def databricks_http_cluster_target():
         "token": os.getenv("DBT_DATABRICKS_TOKEN"),
         "method": "http",
         "port": 443,
-        # more retries + longer timout to handle unavailability while cluster is restarting
-        # return failures quickly in dev, retry all failures in CI (up to 5 min)
-        "connect_retries": 5,
-        "connect_timeout": 60,
-        "retry_all": bool(os.getenv("DBT_DATABRICKS_RETRY_ALL", False)),
+        "connect_retries": 3,
+        "connect_timeout": 5,
+        "retry_all": False,
         "user": os.getenv("DBT_DATABRICKS_USER"),
     }
 
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
new file mode 100644
index 00000000..c1a0397b
--- /dev/null
+++ b/tests/functional/conftest.py
@@ -0,0 +1,19 @@
+from multiprocessing import Lock
+
+import pytest
+
+_db_start_lock = Lock()
+_DB_CLUSTER_STARTED = False
+
+
+# Running this should prevent tests from needing to be retried because the Databricks cluster isn't available
+@pytest.fixture(scope="class", autouse=True)
+def start_databricks_cluster(project, request):
+    global _DB_CLUSTER_STARTED
+    profile_type = request.config.getoption("--profile")
+    with _db_start_lock:
+        if "databricks" in profile_type and not _DB_CLUSTER_STARTED:
+            print("Starting Databricks cluster")
+            project.run_sql("SELECT 1")
+
+            _DB_CLUSTER_STARTED = True
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index a7da6330..54e9f015 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -1,4 +1,5 @@
 import unittest
+from multiprocessing import get_context
 from unittest import mock
 
 import dbt.flags as flags
@@ -146,7 +147,7 @@ def _get_target_odbc_sql_endpoint(self, project):
 
     def test_http_connection(self):
         config = self._get_target_http(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
 
         def hive_http_connect(thrift_transport, configuration):
             self.assertEqual(thrift_transport.scheme, "https")
@@ -171,7 +172,7 @@ def hive_http_connect(thrift_transport, configuration):
 
     def test_thrift_connection(self):
         config = self._get_target_thrift(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
 
         def hive_thrift_connect(
             host, port, username, auth, kerberos_service_name, password, configuration
@@ -195,7 +196,7 @@ def hive_thrift_connect(
 
     def test_thrift_ssl_connection(self):
         config = self._get_target_use_ssl_thrift(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
 
         def hive_thrift_connect(thrift_transport, configuration):
             self.assertIsNotNone(thrift_transport)
@@ -215,7 +216,7 @@ def hive_thrift_connect(thrift_transport, configuration):
 
     def test_thrift_connection_kerberos(self):
         config = self._get_target_thrift_kerberos(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
 
         def hive_thrift_connect(
             host, port, username, auth, kerberos_service_name, password, configuration
@@ -239,7 +240,7 @@ def hive_thrift_connect(
 
     def test_odbc_cluster_connection(self):
         config = self._get_target_odbc_cluster(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
@@ -266,7 +267,7 @@ def pyodbc_connect(connection_str, autocommit):
 
     def test_odbc_endpoint_connection(self):
         config = self._get_target_odbc_sql_endpoint(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
@@ -329,7 +330,9 @@ def test_parse_relation(self):
         input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
+        rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+            relation, input_cols
+        )
         self.assertEqual(len(rows), 4)
         self.assertEqual(
             rows[0].to_column_dict(omit_none=False),
@@ -418,7 +421,9 @@ def test_parse_relation_with_integer_owner(self):
         input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
+        rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+            relation, input_cols
+        )
 
         self.assertEqual(rows[0].to_column_dict().get("table_owner"), "1234")
 
@@ -454,7 +459,9 @@ def test_parse_relation_with_statistics(self):
         input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
+        rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+            relation, input_cols
+        )
         self.assertEqual(len(rows), 1)
         self.assertEqual(
             rows[0].to_column_dict(omit_none=False),
@@ -483,7 +490,7 @@ def test_parse_relation_with_statistics(self):
 
     def test_relation_with_database(self):
         config = self._get_target_http(self.project_cfg)
-        adapter = SparkAdapter(config)
+        adapter = SparkAdapter(config, get_context("spawn"))
         # fine
         adapter.Relation.create(schema="different", identifier="table")
         with self.assertRaises(DbtRuntimeError):
@@ -564,7 +571,9 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self)
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(relation)
+        columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
+            relation
+        )
         self.assertEqual(len(columns), 4)
         self.assertEqual(
             columns[0].to_column_dict(omit_none=False),
@@ -649,7 +658,9 @@ def test_parse_columns_from_information_with_view_type(self):
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(relation)
+        columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
+            relation
+        )
         self.assertEqual(len(columns), 4)
         self.assertEqual(
             columns[1].to_column_dict(omit_none=False),
@@ -715,7 +726,9 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(relation)
+        columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
+            relation
+        )
         self.assertEqual(len(columns), 4)
 
         self.assertEqual(
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index ac8c6224..17cd3ee7 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -9,7 +9,7 @@
 
 import agate
 import pytest
-from dbt.dataclass_schema import ValidationError
+from dbt_common.dataclass_schema import ValidationError
 from dbt.config.project import PartialProject
 
 

From 613fa58ff9d1f06877ad8790a145d91f5913f862 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 20 Feb 2024 07:53:05 -0800
Subject: [PATCH 533/603] fix spark cluster start mechanism and add extra dev
 requirements (#986)

---
 dagger/run_dbt_spark_tests.py | 18 +++++++++++++++---
 dev-requirements.txt          |  3 +++
 tests/functional/conftest.py  | 28 ++++++++++++++++++----------
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 436cb1e9..15f9cf2c 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -112,15 +112,27 @@ async def test_spark(test_args):
             .with_exec(["./scripts/install_os_reqs.sh"])
             # install dbt-spark + python deps
             .with_directory("/src", req_files)
-            .with_directory("src/dbt", dbt_spark_dir)
-            .with_directory("src/tests", test_dir)
-            .with_workdir("/src")
             .with_exec(["pip", "install", "-U", "pip"])
+            .with_workdir("/src")
             .with_exec(["pip", "install", "-r", "requirements.txt"])
             .with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+        )
+
+        # install local dbt-spark changes
+        tst_container = (
+            tst_container.with_workdir("/")
+            .with_directory("src/dbt", dbt_spark_dir)
+            .with_workdir("/src")
             .with_exec(["pip", "install", "-e", "."])
         )
 
+        # install local test changes
+        tst_container = (
+            tst_container.with_workdir("/")
+            .with_directory("src/tests", test_dir)
+            .with_workdir("/src")
+        )
+
         if test_profile == "apache_spark":
             spark_ctr, spark_host = get_spark_container(client)
             tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 28a626fc..8f674d84 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,5 +1,8 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-common.git
+git+https://github.com/dbt-labs/dbt-adapters.git
 git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # if version 1.x or greater -> pin to major version
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
index c1a0397b..476ffb47 100644
--- a/tests/functional/conftest.py
+++ b/tests/functional/conftest.py
@@ -1,19 +1,27 @@
-from multiprocessing import Lock
-
+import time
 import pytest
 
-_db_start_lock = Lock()
-_DB_CLUSTER_STARTED = False
+
+def _wait_for_databricks_cluster(project):
+    """
+    It takes roughly 3min for the cluster to start, to be safe we'll wait for 5min
+    """
+    for _ in range(60):
+        try:
+            project.run_sql("SELECT 1", fetch=True)
+            return
+        except Exception:
+            time.sleep(10)
+
+    raise Exception("Databricks cluster did not start in time")
 
 
 # Running this should prevent tests from needing to be retried because the Databricks cluster isn't available
 @pytest.fixture(scope="class", autouse=True)
 def start_databricks_cluster(project, request):
-    global _DB_CLUSTER_STARTED
     profile_type = request.config.getoption("--profile")
-    with _db_start_lock:
-        if "databricks" in profile_type and not _DB_CLUSTER_STARTED:
-            print("Starting Databricks cluster")
-            project.run_sql("SELECT 1")
 
-            _DB_CLUSTER_STARTED = True
+    if "databricks" in profile_type:
+        _wait_for_databricks_cluster(project)
+
+    yield 1

From ef91425004d58948532af5176be9d18af41d0b87 Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Wed, 21 Feb 2024 11:56:09 -0500
Subject: [PATCH 534/603] Add functional tests for unit testing (#976)

---
 .../unreleased/Features-20240220-195925.yaml  |  6 ++++
 dbt/include/spark/macros/adapters.sql         |  1 +
 dbt/include/spark/macros/utils/safe_cast.sql  |  8 +++++
 .../adapter/unit_testing/test_unit_testing.py | 34 +++++++++++++++++++
 4 files changed, 49 insertions(+)
 create mode 100644 .changes/unreleased/Features-20240220-195925.yaml
 create mode 100644 dbt/include/spark/macros/utils/safe_cast.sql
 create mode 100644 tests/functional/adapter/unit_testing/test_unit_testing.py

diff --git a/.changes/unreleased/Features-20240220-195925.yaml b/.changes/unreleased/Features-20240220-195925.yaml
new file mode 100644
index 00000000..c5d86ab7
--- /dev/null
+++ b/.changes/unreleased/Features-20240220-195925.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Implement spark__safe_cast and add functional tests for unit testing
+time: 2024-02-20T19:59:25.907821-05:00
+custom:
+  Author: michelleark
+  Issue: "987"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index bf9f63cf..a6404a2d 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -387,6 +387,7 @@
         "identifier": tmp_identifier
     }) -%}
 
+    {%- set tmp_relation = tmp_relation.include(database=false, schema=false) -%}
     {% do return(tmp_relation) %}
 {% endmacro %}
 
diff --git a/dbt/include/spark/macros/utils/safe_cast.sql b/dbt/include/spark/macros/utils/safe_cast.sql
new file mode 100644
index 00000000..3ce5820a
--- /dev/null
+++ b/dbt/include/spark/macros/utils/safe_cast.sql
@@ -0,0 +1,8 @@
+{% macro spark__safe_cast(field, type) %}
+{%- set field_clean = field.strip('"').strip("'") if (cast_from_string_unsupported_for(type) and field is string) else field -%}
+cast({{field_clean}} as {{type}})
+{% endmacro %}
+
+{% macro cast_from_string_unsupported_for(type) %}
+    {{ return(type.lower().startswith('struct') or type.lower().startswith('array') or type.lower().startswith('map')) }}
+{% endmacro %}
diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py
new file mode 100644
index 00000000..b70c581d
--- /dev/null
+++ b/tests/functional/adapter/unit_testing/test_unit_testing.py
@@ -0,0 +1,34 @@
+import pytest
+
+from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes
+from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity
+from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput
+
+
+class TestSparkUnitTestingTypes(BaseUnitTestingTypes):
+    @pytest.fixture
+    def data_types(self):
+        # sql_value, yaml_value
+        return [
+            ["1", "1"],
+            ["2.0", "2.0"],
+            ["'12345'", "12345"],
+            ["'string'", "string"],
+            ["true", "true"],
+            ["date '2011-11-11'", "2011-11-11"],
+            ["timestamp '2013-11-03 00:00:00-0'", "2013-11-03 00:00:00-0"],
+            ["array(1, 2, 3)", "'array(1, 2, 3)'"],
+            [
+                "map('10', 't', '15', 'f', '20', NULL)",
+                """'map("10", "t", "15", "f", "20", NULL)'""",
+            ],
+            ['named_struct("a", 1, "b", 2, "c", 3)', """'named_struct("a", 1, "b", 2, "c", 3)'"""],
+        ]
+
+
+class TestSparkUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity):
+    pass
+
+
+class TestSparkUnitTestInvalidInput(BaseUnitTestInvalidInput):
+    pass

From 5d600086746d75781838ed71aa266f18c1bb37f1 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 27 Feb 2024 14:27:34 -0500
Subject: [PATCH 535/603] remove actions that sync github and jira (#977)

The failing tests are from the retired CircleCI checks that were implemented in GHA.
---
 .github/workflows/jira-creation.yml   | 28 --------------------------
 .github/workflows/jira-label.yml      | 28 --------------------------
 .github/workflows/jira-transition.yml | 29 ---------------------------
 3 files changed, 85 deletions(-)
 delete mode 100644 .github/workflows/jira-creation.yml
 delete mode 100644 .github/workflows/jira-label.yml
 delete mode 100644 .github/workflows/jira-transition.yml

diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml
deleted file mode 100644
index 2611a8bd..00000000
--- a/.github/workflows/jira-creation.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-# **what?**
-# Mirrors issues into Jira. Includes the information: title,
-# GitHub Issue ID and URL
-
-# **why?**
-# Jira is our tool for tracking and we need to see these issues in there
-
-# **when?**
-# On issue creation or when an issue is labeled `Jira`
-
-name: Jira Issue Creation
-
-on:
-  issues:
-    types: [opened, labeled]
-
-permissions:
-  issues: write
-
-jobs:
-  call-label-action:
-    uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main
-    with:
-      project_key: ADAP
-    secrets:
-      JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
-      JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml
deleted file mode 100644
index 1637cbe3..00000000
--- a/.github/workflows/jira-label.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-# **what?**
-# Calls mirroring Jira label Action. Includes adding a new label
-# to an existing issue or removing a label as well
-
-# **why?**
-# Jira is our tool for tracking and we need to see these labels in there
-
-# **when?**
-# On labels being added or removed from issues
-
-name: Jira Label Mirroring
-
-on:
-  issues:
-    types: [labeled, unlabeled]
-
-permissions:
-  issues: read
-
-jobs:
-  call-label-action:
-    uses: dbt-labs/actions/.github/workflows/jira-label.yml@main
-    with:
-      project_key: ADAP
-    secrets:
-      JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
-      JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml
deleted file mode 100644
index 99158a15..00000000
--- a/.github/workflows/jira-transition.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-# **what?**
-# Transition a Jira issue to a new state
-# Only supports these GitHub Issue transitions:
-#   closed, deleted, reopened
-
-# **why?**
-# Jira needs to be kept up-to-date
-
-# **when?**
-# On issue closing, deletion, reopened
-
-name: Jira Issue Transition
-
-on:
-  issues:
-    types: [closed, deleted, reopened]
-
-# no special access is needed
-permissions: read-all
-
-jobs:
-  call-label-action:
-    uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main
-    with:
-      project_key: ADAP
-    secrets:
-      JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
-      JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-      JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

From 5246f8207799ccd8d35cf0693fc0eb931b2aa3f9 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 27 Feb 2024 18:41:13 -0800
Subject: [PATCH 536/603] update dbt-common dependency to <2.0 (#992)

* update dbt-common dependency to <2.0

* update dbt-adapters dependency to <2.0
---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 2d6e00e5..f87cc321 100644
--- a/setup.py
+++ b/setup.py
@@ -64,8 +64,8 @@ def _get_plugin_version_dict():
     include_package_data=True,
     install_requires=[
         "sqlparams>=3.0.0",
-        "dbt-common<1.0",
-        "dbt-adapters~=0.1.0a1",
+        "dbt-common<2.0",
+        "dbt-adapters<2.0",
     ],
     extras_require={
         "ODBC": odbc_extras,

From c56b9ce02000bd4e7a88f9fd0ac913a3f47ff2f3 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 29 Feb 2024 18:24:24 -0500
Subject: [PATCH 537/603] include a pre-release in the dbt-adapters pin to
 allow pre-releases to be installed on main (#993)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f87cc321..7342c866 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,7 @@ def _get_plugin_version_dict():
     install_requires=[
         "sqlparams>=3.0.0",
         "dbt-common<2.0",
-        "dbt-adapters<2.0",
+        "dbt-adapters>=0.1.0a1,<2.0",
     ],
     extras_require={
         "ODBC": odbc_extras,

From 0aee01e7d36754736840008a8fb29c1450752c83 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Fri, 1 Mar 2024 15:27:35 -0800
Subject: [PATCH 538/603] update install_requires to allow for pre-release
 common/adapters (#995)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7342c866..5f6290d1 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@ def _get_plugin_version_dict():
     include_package_data=True,
     install_requires=[
         "sqlparams>=3.0.0",
-        "dbt-common<2.0",
+        "dbt-common>=0.1.0a1,<2.0",
         "dbt-adapters>=0.1.0a1,<2.0",
     ],
     extras_require={

From 480355936d44353859f5bdbf80863803495c047f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 Mar 2024 16:03:21 -0800
Subject: [PATCH 539/603] [create-pull-request] automated change (#994)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.8.0-b1.md                          | 39 ++++++++++++++++++
 .../Dependencies-20231009-220732.yaml         |  0
 .../Dependencies-20231010-222853.yaml         |  0
 .../Dependencies-20231010-222910.yaml         |  0
 .../Dependencies-20231013-223750.yaml         |  0
 .../Dependencies-20231027-230251.yaml         |  0
 .../Dependencies-20231027-230254.yaml         |  0
 .../Dependencies-20231027-230301.yaml         |  0
 .../Dependencies-20231108-222326.yaml         |  0
 .../Dependencies-20231110-224056.yaml         |  0
 .../Dependencies-20231113-224111.yaml         |  0
 .../Dependencies-20231127-220733.yaml         |  0
 .../Dependencies-20231127-220737.yaml         |  0
 .../Dependencies-20231127-220741.yaml         |  0
 .../Dependencies-20231204-224210.yaml         |  0
 .../Dependencies-20231212-223929.yaml         |  0
 .../Features-20240220-195925.yaml             |  0
 .../Fixes-20231107-134141.yaml                |  0
 .../Fixes-20231221-081949.yaml                |  0
 .../Under the Hood-20230929-161218.yaml       |  0
 .../Under the Hood-20231119-132050.yaml       |  0
 .../Under the Hood-20231214-134728.yaml       |  0
 .../Under the Hood-20240111-114806.yaml       |  0
 CHANGELOG.md                                  | 41 +++++++++++++++++++
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 27 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 .changes/1.8.0-b1.md
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231009-220732.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231010-222853.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231010-222910.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231013-223750.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231027-230251.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231027-230254.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231027-230301.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231108-222326.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231110-224056.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231113-224111.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231127-220733.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231127-220737.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231127-220741.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231204-224210.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Dependencies-20231212-223929.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Features-20240220-195925.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Fixes-20231107-134141.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Fixes-20231221-081949.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Under the Hood-20230929-161218.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Under the Hood-20231119-132050.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Under the Hood-20231214-134728.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Under the Hood-20240111-114806.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 48676867..595914b2 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.8.0a1
+current_version = 1.8.0b1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.8.0-b1.md b/.changes/1.8.0-b1.md
new file mode 100644
index 00000000..4f4091a9
--- /dev/null
+++ b/.changes/1.8.0-b1.md
@@ -0,0 +1,39 @@
+## dbt-spark 1.8.0-b1 - March 01, 2024
+
+### Features
+
+- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987))
+
+### Fixes
+
+- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935))
+- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
+
+### Under the Hood
+
+- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
+- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949))
+- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962))
+- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972))
+
+### Dependencies
+
+- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903))
+- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904))
+- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905))
+- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914))
+- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925))
+- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926))
+- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927))
+- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942))
+- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946))
+- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947))
+- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951))
+- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952))
+- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953))
+- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956))
+- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959))
+
+### Contributors
+- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
+- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
diff --git a/.changes/unreleased/Dependencies-20231009-220732.yaml b/.changes/1.8.0/Dependencies-20231009-220732.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231009-220732.yaml
rename to .changes/1.8.0/Dependencies-20231009-220732.yaml
diff --git a/.changes/unreleased/Dependencies-20231010-222853.yaml b/.changes/1.8.0/Dependencies-20231010-222853.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231010-222853.yaml
rename to .changes/1.8.0/Dependencies-20231010-222853.yaml
diff --git a/.changes/unreleased/Dependencies-20231010-222910.yaml b/.changes/1.8.0/Dependencies-20231010-222910.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231010-222910.yaml
rename to .changes/1.8.0/Dependencies-20231010-222910.yaml
diff --git a/.changes/unreleased/Dependencies-20231013-223750.yaml b/.changes/1.8.0/Dependencies-20231013-223750.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231013-223750.yaml
rename to .changes/1.8.0/Dependencies-20231013-223750.yaml
diff --git a/.changes/unreleased/Dependencies-20231027-230251.yaml b/.changes/1.8.0/Dependencies-20231027-230251.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231027-230251.yaml
rename to .changes/1.8.0/Dependencies-20231027-230251.yaml
diff --git a/.changes/unreleased/Dependencies-20231027-230254.yaml b/.changes/1.8.0/Dependencies-20231027-230254.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231027-230254.yaml
rename to .changes/1.8.0/Dependencies-20231027-230254.yaml
diff --git a/.changes/unreleased/Dependencies-20231027-230301.yaml b/.changes/1.8.0/Dependencies-20231027-230301.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231027-230301.yaml
rename to .changes/1.8.0/Dependencies-20231027-230301.yaml
diff --git a/.changes/unreleased/Dependencies-20231108-222326.yaml b/.changes/1.8.0/Dependencies-20231108-222326.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231108-222326.yaml
rename to .changes/1.8.0/Dependencies-20231108-222326.yaml
diff --git a/.changes/unreleased/Dependencies-20231110-224056.yaml b/.changes/1.8.0/Dependencies-20231110-224056.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231110-224056.yaml
rename to .changes/1.8.0/Dependencies-20231110-224056.yaml
diff --git a/.changes/unreleased/Dependencies-20231113-224111.yaml b/.changes/1.8.0/Dependencies-20231113-224111.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231113-224111.yaml
rename to .changes/1.8.0/Dependencies-20231113-224111.yaml
diff --git a/.changes/unreleased/Dependencies-20231127-220733.yaml b/.changes/1.8.0/Dependencies-20231127-220733.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231127-220733.yaml
rename to .changes/1.8.0/Dependencies-20231127-220733.yaml
diff --git a/.changes/unreleased/Dependencies-20231127-220737.yaml b/.changes/1.8.0/Dependencies-20231127-220737.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231127-220737.yaml
rename to .changes/1.8.0/Dependencies-20231127-220737.yaml
diff --git a/.changes/unreleased/Dependencies-20231127-220741.yaml b/.changes/1.8.0/Dependencies-20231127-220741.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231127-220741.yaml
rename to .changes/1.8.0/Dependencies-20231127-220741.yaml
diff --git a/.changes/unreleased/Dependencies-20231204-224210.yaml b/.changes/1.8.0/Dependencies-20231204-224210.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231204-224210.yaml
rename to .changes/1.8.0/Dependencies-20231204-224210.yaml
diff --git a/.changes/unreleased/Dependencies-20231212-223929.yaml b/.changes/1.8.0/Dependencies-20231212-223929.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231212-223929.yaml
rename to .changes/1.8.0/Dependencies-20231212-223929.yaml
diff --git a/.changes/unreleased/Features-20240220-195925.yaml b/.changes/1.8.0/Features-20240220-195925.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240220-195925.yaml
rename to .changes/1.8.0/Features-20240220-195925.yaml
diff --git a/.changes/unreleased/Fixes-20231107-134141.yaml b/.changes/1.8.0/Fixes-20231107-134141.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20231107-134141.yaml
rename to .changes/1.8.0/Fixes-20231107-134141.yaml
diff --git a/.changes/unreleased/Fixes-20231221-081949.yaml b/.changes/1.8.0/Fixes-20231221-081949.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20231221-081949.yaml
rename to .changes/1.8.0/Fixes-20231221-081949.yaml
diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/1.8.0/Under the Hood-20230929-161218.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20230929-161218.yaml
rename to .changes/1.8.0/Under the Hood-20230929-161218.yaml
diff --git a/.changes/unreleased/Under the Hood-20231119-132050.yaml b/.changes/1.8.0/Under the Hood-20231119-132050.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20231119-132050.yaml
rename to .changes/1.8.0/Under the Hood-20231119-132050.yaml
diff --git a/.changes/unreleased/Under the Hood-20231214-134728.yaml b/.changes/1.8.0/Under the Hood-20231214-134728.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20231214-134728.yaml
rename to .changes/1.8.0/Under the Hood-20231214-134728.yaml
diff --git a/.changes/unreleased/Under the Hood-20240111-114806.yaml b/.changes/1.8.0/Under the Hood-20240111-114806.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20240111-114806.yaml
rename to .changes/1.8.0/Under the Hood-20240111-114806.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 902db37f..d65c50be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,47 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.8.0-b1 - March 01, 2024
+
+### Features
+
+- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987))
+
+### Fixes
+
+- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935))
+- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
+
+### Under the Hood
+
+- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
+- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949))
+- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962))
+- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972))
+
+### Dependencies
+
+- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903))
+- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904))
+- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905))
+- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914))
+- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925))
+- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926))
+- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927))
+- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942))
+- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946))
+- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947))
+- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951))
+- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952))
+- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953))
+- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956))
+- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959))
+
+### Contributors
+- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
+- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
+
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index f15b401d..6496f3e2 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.8.0a1"
+version = "1.8.0b1"
diff --git a/setup.py b/setup.py
index 5f6290d1..067ca41b 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.8.0a1"
+package_version = "1.8.0b1"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=4.0.39"]

From d568d21736059aa06fd760672f681010e4ae252c Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 1 Mar 2024 19:36:56 -0500
Subject: [PATCH 540/603] Vendor release-prep from centralized dbt-release,
 replace tox testing with spark repo testing (#997)

* vendor release-prep from centralized dbt-release, replace tox testing with spark repo testing

* replace dbt --version
---
 .github/workflows/main.yml         |   6 +-
 .github/workflows/release-prep.yml | 650 +++++++++++++++++++++++++++++
 .github/workflows/release.yml      |   4 +-
 3 files changed, 655 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/release-prep.yml

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 20f3f88f..68911710 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -64,7 +64,7 @@ jobs:
           mypy --version
           python -m pip install -r requirements.txt
           python -m pip install -r dev-requirements.txt
-          dbt --version
+          python -c "import dbt.adapters.spark"
 
       - name: Run pre-commit hooks
         run: pre-commit run --all-files --show-diff-on-failure
@@ -200,10 +200,10 @@ jobs:
           find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Check wheel distributions
         run: |
-          dbt --version
+          python -c "import dbt.adapters.spark"
       - name: Install source distributions
         run: |
           find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Check source distributions
         run: |
-          dbt --version
+          python -c "import dbt.adapters.spark"
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
new file mode 100644
index 00000000..1a6e450c
--- /dev/null
+++ b/.github/workflows/release-prep.yml
@@ -0,0 +1,650 @@
+# **what?**
+# Perform the version bump, generate the changelog and run tests.
+#
+# Inputs:
+#  sha:                   The commit to attach to this release
+#  version_number:        The release version number (i.e. 1.0.0b1, 1.2.3rc2, 1.0.0)
+#  target_branch:         The branch that we will release from
+#  env_setup_script_path: Path to the environment setup script
+#  test_run:              Test run (The temp branch will be used for release)
+#  nightly_release:       Identifier that this is nightly release
+#
+# Outputs:
+#   final_sha:      The sha that will actually be released.  This can differ from the
+#                   input sha if adding a version bump and/or changelog
+#   changelog_path: Path to the changelog file (ex .changes/1.2.3-rc1.md)
+#
+# Branching strategy:
+#  - During execution workflow execution the temp branch will be generated.
+#  - For normal runs the temp branch will be removed once changes were merged to target branch;
+#  - For test runs we will keep temp branch and will use it for release;
+#  Naming strategy:
+#  - For normal runs:      prep-release/${{ inputs.version_number }}_$GITHUB_RUN_ID
+#  - For test runs:        prep-release/test-run/${{ inputs.version_number }}_$GITHUB_RUN_ID
+#  - For nightly releases: prep-release/nightly-release/${{ inputs.version_number }}_$GITHUB_RUN_ID
+#
+# **why?**
+# Reusable and consistent GitHub release process.
+#
+# **when?**
+# Call when ready to kick off a build and release
+#
+# Validation Checks
+#
+#  1. Bump the version if it has not been bumped
+#  2. Generate the changelog (via changie) if there is no markdown file for this version
+#
+
+name: Version Bump and Changelog Generation
+
+on:
+  workflow_call:
+    inputs:
+      sha:
+        required: true
+        type: string
+      version_number:
+        required: true
+        type: string
+      target_branch:
+        required: true
+        type: string
+      env_setup_script_path:
+        required: false
+        type: string
+        default: ""
+      test_run:
+        required: false
+        default: true
+        type: boolean
+      nightly_release:
+        type: boolean
+        default: false
+        required: false
+    outputs:
+      final_sha:
+        description: The new commit that includes the changelog and version bump.
+        value: ${{ jobs.determine-release-sha.outputs.final_sha }}
+      changelog_path:
+        description: The path to the changelog for this version
+        value: ${{ jobs.audit-changelog.outputs.changelog_path }}
+    secrets:
+      FISHTOWN_BOT_PAT:
+        description: "Token to commit/merge changes into branches"
+        required: true
+      IT_TEAM_MEMBERSHIP:
+        description: "Token that can view org level teams"
+        required: true
+
+permissions:
+  contents: write
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  PYTHON_TARGET_VERSION: 3.8
+  NOTIFICATION_PREFIX: "[Release Preparation]"
+
+jobs:
+  log-inputs:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: "[DEBUG] Print Variables"
+        run: |
+          # WORKFLOW INPUTS
+          echo The last commit sha in the release:   ${{ inputs.sha }}
+          echo The release version number:           ${{ inputs.version_number }}
+          echo The branch that we will release from: ${{ inputs.target_branch }}
+          echo Path to the environment setup script: ${{ inputs.env_setup_script_path }}
+          echo Test run:                             ${{ inputs.test_run }}
+          echo Nightly release:                      ${{ inputs.nightly_release }}
+          # ENVIRONMENT VARIABLES
+          echo Python target version:                ${{ env.PYTHON_TARGET_VERSION }}
+          echo Notification prefix:                  ${{ env.NOTIFICATION_PREFIX }}
+
+  audit-changelog:
+    runs-on: ubuntu-latest
+
+    outputs:
+      changelog_path: ${{ steps.set_path.outputs.changelog_path }}
+      exists: ${{ steps.set_existence.outputs.exists }}
+      base_version: ${{ steps.semver.outputs.base-version }}
+      prerelease: ${{ steps.semver.outputs.pre-release }}
+      is_prerelease: ${{ steps.semver.outputs.is-pre-release }}
+
+    steps:
+      - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}"
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.sha }}
+
+      - name: "Audit Version And Parse Into Parts"
+        id: semver
+        uses: dbt-labs/actions/parse-semver@v1.1.0
+        with:
+          version: ${{ inputs.version_number }}
+
+      - name: "Set Changelog Path"
+        id: set_path
+        run: |
+          path=".changes/"
+          if [[ ${{ steps.semver.outputs.is-pre-release }} -eq 1 ]]
+          then
+            path+="${{ steps.semver.outputs.base-version }}-${{ steps.semver.outputs.pre-release }}.md"
+          else
+            path+="${{ steps.semver.outputs.base-version }}.md"
+          fi
+          # Send notification
+          echo "changelog_path=$path" >> $GITHUB_OUTPUT
+          title="Changelog path"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$changelog_path"
+
+      - name: "Set Changelog Existence For Subsequent Jobs"
+        id: set_existence
+        run: |
+          does_exist=false
+          if test -f ${{ steps.set_path.outputs.changelog_path }}
+          then
+            does_exist=true
+          fi
+          echo "exists=$does_exist">> $GITHUB_OUTPUT
+
+      - name: "[Notification] Set Changelog Existence For Subsequent Jobs"
+        run: |
+          title="Changelog exists"
+          if [[ ${{ steps.set_existence.outputs.exists }} == true ]]
+          then
+            message="Changelog file ${{ steps.set_path.outputs.changelog_path }} already exists"
+          else
+            message="Changelog file ${{ steps.set_path.outputs.changelog_path }} doesn't exist"
+          fi
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+      - name: "Spark safety check"
+        if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }}
+        run: |
+          if [[ ${{ steps.set_existence.outputs.exists }} != true ]]
+          then
+            title="Spark version-bump.yml check"
+            message="dbt-spark needs version-bump.yml run before running the release.  The changelog is not up to date."
+            echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+            exit 1
+          fi
+
+      - name: "[DEBUG] Print Outputs"
+        run: |
+          echo changelog_path: ${{ steps.set_path.outputs.changelog_path }}
+          echo exists:         ${{ steps.set_existence.outputs.exists }}
+          echo base_version:   ${{ steps.semver.outputs.base-version }}
+          echo prerelease:     ${{ steps.semver.outputs.pre-release }}
+          echo is_prerelease:  ${{ steps.semver.outputs.is-pre-release }}
+
+  audit-version-in-code:
+    runs-on: ubuntu-latest
+
+    outputs:
+      up_to_date: ${{ steps.version-check.outputs.up_to_date }}
+
+    steps:
+      - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}"
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.sha }}
+
+      - name: "Check Current Version In Code"
+        id: version-check
+        run: |
+          is_updated=false
+          if grep -Fxq "current_version = ${{ inputs.version_number }}" .bumpversion.cfg
+          then
+            is_updated=true
+          fi
+          echo "up_to_date=$is_updated" >> $GITHUB_OUTPUT
+
+      - name: "[Notification] Check Current Version In Code"
+        run: |
+          title="Version check"
+          if [[ ${{ steps.version-check.outputs.up_to_date }} == true ]]
+          then
+            message="The version in the codebase is equal to the provided version"
+          else
+            message="The version in the codebase differs from the provided version"
+          fi
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+      - name: "Spark safety check"
+        if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }}
+        run: |
+          if [[ ${{ steps.version-check.outputs.up_to_date }} != true ]]
+          then
+            title="Spark version-bump.yml check"
+            message="dbt-spark needs version-bump.yml run before running the release.  The version bump is not up to date."
+            echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+            exit 1
+          fi
+
+      - name: "[DEBUG] Print Outputs"
+        run: |
+          echo up_to_date: ${{ steps.version-check.outputs.up_to_date }}
+
+  skip-generate-changelog:
+    runs-on: ubuntu-latest
+    needs: [audit-changelog]
+    if: needs.audit-changelog.outputs.exists == 'true'
+
+    steps:
+      - name: "Changelog Exists, Skip Generating New Changelog"
+        run: |
+          # Send notification
+          title="Skip changelog generation"
+          message="A changelog file already exists at ${{ needs.audit-changelog.outputs.changelog_path }}, skipping generating changelog"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+  skip-version-bump:
+    runs-on: ubuntu-latest
+    needs: [audit-version-in-code]
+    if: needs.audit-version-in-code.outputs.up_to_date == 'true'
+
+    steps:
+      - name: "Version Already Bumped"
+        run: |
+          # Send notification
+          title="Skip version bump"
+          message="The version has already been bumped to ${{ inputs.version_number }}, skipping version bump"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+  create-temp-branch:
+    runs-on: ubuntu-latest
+    needs: [audit-changelog, audit-version-in-code]
+    if: needs.audit-changelog.outputs.exists == 'false' || needs.audit-version-in-code.outputs.up_to_date == 'false'
+
+    outputs:
+      branch_name: ${{ steps.variables.outputs.branch_name }}
+
+    steps:
+      - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}"
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.sha }}
+
+      - name: "Generate Branch Name"
+        id: variables
+        run: |
+          name="prep-release/"
+          if [[ ${{ inputs.nightly_release }} == true  ]]
+          then
+            name+="nightly-release/"
+          elif [[ ${{ inputs.test_run }} == true ]]
+          then
+            name+="test-run/"
+          fi
+          name+="${{ inputs.version_number }}_$GITHUB_RUN_ID"
+          echo "branch_name=$name" >> $GITHUB_OUTPUT
+
+      - name: "Create Branch - ${{ steps.variables.outputs.branch_name }}"
+        run: |
+          git checkout -b ${{ steps.variables.outputs.branch_name }}
+          git push -u origin ${{ steps.variables.outputs.branch_name }}
+
+      - name: "[Notification] Temp branch created"
+        run: |
+          # Send notification
+          title="Temp branch generated"
+          message="The ${{ steps.variables.outputs.branch_name }} branch created"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+      - name: "[DEBUG] Print Outputs"
+        run: |
+          echo branch_name ${{ steps.variables.outputs.branch_name }}
+
+  generate-changelog-bump-version:
+    runs-on: ubuntu-latest
+    needs: [audit-changelog, audit-version-in-code, create-temp-branch]
+
+    steps:
+      - name: "Checkout ${{ github.repository }} Branch ${{ needs.create-temp-branch.outputs.branch_name }}"
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.create-temp-branch.outputs.branch_name }}
+
+      - name: "Install Spark Dependencies"
+        if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }}
+        run: |
+          sudo apt-get update
+          sudo apt-get install libsasl2-dev
+
+      - name: "Add Homebrew To PATH"
+        run: |
+          echo "/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin" >> $GITHUB_PATH
+
+      - name: "Install Homebrew Packages"
+        run: |
+          brew install pre-commit
+          brew tap miniscruff/changie https://github.com/miniscruff/changie
+          brew install changie
+
+      - name: "Set json File Name"
+        id: json_file
+        run: |
+          echo "name=output_$GITHUB_RUN_ID.json" >> $GITHUB_OUTPUT
+
+      - name: "Get Core Team Membership"
+        run: |
+          gh api -H "Accept: application/vnd.github+json" orgs/dbt-labs/teams/core-group/members > ${{ steps.json_file.outputs.name }}
+        env:
+          GH_TOKEN: ${{ secrets.IT_TEAM_MEMBERSHIP }}
+
+      - name: "Set Core Team Membership for Changie Contributors exclusion"
+        id: set_team_membership
+        run: |
+          team_list=$(jq -r '.[].login' ${{ steps.json_file.outputs.name }})
+          echo $team_list
+          team_list_single=$(echo $team_list | tr '\n' ' ')
+          echo "CHANGIE_CORE_TEAM=$team_list_single" >> $GITHUB_ENV
+
+      - name: "Delete the json File"
+        run: |
+          rm ${{ steps.json_file.outputs.name }}
+
+      - name: "Generate Release Changelog"
+        if: needs.audit-changelog.outputs.exists == 'false'
+        run: |
+          if [[ ${{ needs.audit-changelog.outputs.is_prerelease }} -eq 1 ]]
+          then
+            changie batch ${{ needs.audit-changelog.outputs.base_version }} --move-dir '${{ needs.audit-changelog.outputs.base_version }}' --prerelease ${{ needs.audit-changelog.outputs.prerelease }}
+          elif [[ -d ".changes/${{ needs.audit-changelog.outputs.base_version }}" ]]
+          then
+            changie batch ${{ needs.audit-changelog.outputs.base_version }} --include '${{ needs.audit-changelog.outputs.base_version }}' --remove-prereleases
+          else # releasing a final patch with no prereleases
+            changie batch ${{ needs.audit-changelog.outputs.base_version }}
+          fi
+          changie merge
+          git status
+
+      - name: "Check Changelog Created Successfully"
+        if: needs.audit-changelog.outputs.exists == 'false'
+        run: |
+          title="Changelog"
+          if [[ -f ${{ needs.audit-changelog.outputs.changelog_path }} ]]
+          then
+            message="Changelog file created successfully"
+            echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+          else
+            message="Changelog failed to generate"
+            echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+            exit 1
+          fi
+
+      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_TARGET_VERSION }}
+
+      - name: "Install Python Dependencies"
+        if: needs.audit-version-in-code.outputs.up_to_date == 'false'
+        run: |
+          python3 -m venv env
+          source env/bin/activate
+          python -m pip install --upgrade pip
+
+      - name: "Bump Version To ${{ inputs.version_number }}"
+        if: needs.audit-version-in-code.outputs.up_to_date == 'false'
+        # note: bumpversion is no longer supported, it actually points to bump2version now
+        run: |
+          source env/bin/activate
+          if [ -f "editable-requirements.txt" ]
+          then
+            python -m pip install -r dev-requirements.txt -r editable-requirements.txt
+          else
+            python -m pip install -r dev-requirements.txt
+          fi
+          env/bin/bumpversion --allow-dirty --new-version ${{ inputs.version_number }} major
+          git status
+
+      - name: "[Notification] Bump Version To ${{ inputs.version_number }}"
+        if: needs.audit-version-in-code.outputs.up_to_date == 'false'
+        run: |
+          title="Version bump"
+          message="Version successfully bumped in codebase to ${{ inputs.version_number }}"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+      # this step will fail on whitespace errors but also correct them
+      - name: "Remove Trailing Whitespace Via Pre-commit"
+        continue-on-error: true
+        run: |
+          pre-commit run trailing-whitespace --files .bumpversion.cfg CHANGELOG.md .changes/*
+          git status
+
+      # this step will fail on newline errors but also correct them
+      - name: "Removing Extra Newlines Via Pre-commit"
+        continue-on-error: true
+        run: |
+          pre-commit run end-of-file-fixer --files .bumpversion.cfg CHANGELOG.md .changes/*
+          git status
+
+      - name: "Commit & Push Changes"
+        run: |
+          #Data for commit
+          user="Github Build Bot"
+          email="buildbot@fishtownanalytics.com"
+          commit_message="Bumping version to ${{ inputs.version_number }} and generate changelog"
+          #Commit changes to branch
+          git config user.name "$user"
+          git config user.email "$email"
+          git pull
+          git add .
+          git commit -m "$commit_message"
+          git push
+
+  run-unit-tests:
+    name: unit test / python ${{ matrix.python-version }}
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install python dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install libsasl2-dev
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install -r requirements.txt
+          python -m pip install -r dev-requirements.txt
+          python -m pip install -e .
+
+      - name: Run unit tests
+        run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
+
+  run-integration-tests:
+    name: ${{ matrix.test }}
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test:
+          - "apache_spark"
+          - "spark_session"
+          - "databricks_sql_endpoint"
+          - "databricks_cluster"
+          - "databricks_http_cluster"
+
+    env:
+      DBT_INVOCATION_ENV: github-actions
+      DD_CIVISIBILITY_AGENTLESS_ENABLED: true
+      DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
+      DD_SITE: datadoghq.com
+      DD_ENV: ci
+      DD_SERVICE: ${{ github.event.repository.name }}
+      DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
+      DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
+      DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
+      DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
+      DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
+
+    steps:
+      - name: Check out the repository
+        if: github.event_name != 'pull_request_target'
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      # explicitly checkout the branch for the PR,
+      # this is necessary for the `pull_request` event
+      - name: Check out the repository (PR)
+        if: github.event_name == 'pull_request_target'
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      # the python version used here is not what is used in the tests themselves
+      - name: Set up Python for dagger
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install python dependencies
+        run: |
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install -r dagger/requirements.txt
+
+      - name: Run tests for ${{ matrix.test }}
+        run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
+
+  merge-changes-into-target-branch:
+    runs-on: ubuntu-latest
+    needs: [run-unit-tests, run-integration-tests, create-temp-branch, audit-version-in-code, audit-changelog]
+    if: |
+        !failure() && !cancelled() &&
+        inputs.test_run == false &&
+        (
+          needs.audit-changelog.outputs.exists == 'false' ||
+          needs.audit-version-in-code.outputs.up_to_date == 'false'
+        )
+
+    steps:
+      - name: "[Debug] Print Variables"
+        run: |
+          echo target_branch: ${{ inputs.target_branch }}
+          echo branch_name:   ${{ needs.create-temp-branch.outputs.branch_name }}
+          echo inputs.test_run: ${{ inputs.test_run }}
+          echo needs.audit-changelog.outputs.exists: ${{ needs.audit-changelog.outputs.exists }}
+          echo needs.audit-version-in-code.outputs.up_to_date: ${{ needs.audit-version-in-code.outputs.up_to_date }}
+
+      - name: "Checkout Repo ${{ github.repository }}"
+        uses: actions/checkout@v4
+
+      - name: "Merge Changes Into ${{ inputs.target_branch }}"
+        uses: everlytic/branch-merge@1.1.5
+        with:
+          source_ref: ${{ needs.create-temp-branch.outputs.branch_name }}
+          target_branch: ${{ inputs.target_branch }}
+          github_token: ${{ secrets.FISHTOWN_BOT_PAT }}
+          commit_message_template: "[Automated] Merged {source_ref} into target {target_branch} during release process"
+
+      - name: "[Notification] Changes Merged into ${{ inputs.target_branch }}"
+        run: |
+          title="Changelog and Version Bump Branch Merge"
+          message="The ${{ needs.create-temp-branch.outputs.branch_name }} branch was merged into ${{ inputs.target_branch }}"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+  determine-release-sha:
+    runs-on: ubuntu-latest
+    needs:
+      [
+        create-temp-branch,
+        merge-changes-into-target-branch,
+        audit-changelog,
+        audit-version-in-code,
+      ]
+    # always run this job, regardless of if the dependant jobs were skipped
+    if: ${{ !failure() && !cancelled() }}
+
+    # Get the sha that will be released.  If the changelog already exists on the input sha and the version has already been bumped,
+    # then it is what we will release. Otherwise we generated a changelog and did the version bump in this workflow and there is a
+    # new sha to use from the merge we just did.  Grab that here instead.
+    outputs:
+      final_sha: ${{ steps.resolve_commit_sha.outputs.release_sha }}
+
+    steps:
+      - name: "[Debug] Print Variables"
+        run: |
+          echo target_branch:    ${{ inputs.target_branch }}
+          echo new_branch:       ${{ needs.create-temp-branch.outputs.branch_name }}
+          echo changelog_exists: ${{ needs.audit-changelog.outputs.exists }}
+          echo up_to_date:       ${{ needs.audit-version-in-code.outputs.up_to_date }}
+
+      - name: "Resolve Branch To Checkout"
+        id: resolve_branch
+        run: |
+          branch=""
+          if [[ ${{ inputs.test_run == true }} ]]
+          then
+            branch=${{ needs.create-temp-branch.outputs.branch_name }}
+          else
+            branch=${{ inputs.target_branch }}
+          fi
+          echo "target_branch=$branch" >> $GITHUB_OUTPUT
+
+      - name: "[Notification] Resolve Branch To Checkout"
+        run: |
+          title="Branch pick"
+          message="The ${{ steps.resolve_branch.outputs.target_branch }} branch will be used for release"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+      - name: "Checkout Resolved Branch - ${{ steps.resolve_branch.outputs.target_branch }}"
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.resolve_branch.outputs.target_branch }}
+
+      - name: "[Debug] Log Branch"
+        run: git status
+
+      - name: "Resolve Commit SHA For Release"
+        id: resolve_commit_sha
+        run: |
+          commit_sha=""
+          if [[ ${{ needs.audit-changelog.outputs.exists }} == false ]] || [[ ${{ needs.audit-version-in-code.outputs.up_to_date }} == false ]]
+          then
+            commit_sha=$(git rev-parse HEAD)
+          else
+            commit_sha=${{ inputs.sha }}
+          fi
+          echo "release_sha=$commit_sha" >> $GITHUB_OUTPUT
+
+      - name: "[Notification] Resolve Commit SHA For Release"
+        run: |
+          title="Release commit pick"
+          message="The ${{ steps.resolve_commit_sha.outputs.release_sha }} commit will be used for release"
+          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
+
+      - name: "Remove Temp Branch - ${{ needs.create-temp-branch.outputs.branch_name }}"
+        if: ${{ inputs.test_run == false && needs.create-temp-branch.outputs.branch_name != '' }}
+        run: |
+          git push origin -d ${{ needs.create-temp-branch.outputs.branch_name }}
+
+      - name: "[Debug] Print Outputs"
+        run: |
+          echo release_sha: ${{ steps.resolve_commit_sha.outputs.release_sha }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index df30809a..9b2774f1 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -44,7 +44,7 @@ on:
       package_test_command:
         description: "Package test command"
         type: string
-        default: "dbt --version"
+        default: "python -c \"import dbt.adapters.spark\""
         required: true
       env_setup_script_path:
         description: "Environment setup script path"
@@ -89,7 +89,7 @@ jobs:
   audit-version-and-changelog:
     name: Bump package version, Generate changelog
 
-    uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@main
+    uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main
 
     with:
       sha: ${{ inputs.sha }}

From a2c487111d57c9331620613ec83813ff6f0d7d96 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Fri, 1 Mar 2024 17:53:49 -0800
Subject: [PATCH 541/603] add tox file so release workflows succeed (#996)

* add tox file so release workflows succeed

* add empty tox file
---
 tox.ini | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tox.ini

diff --git a/tox.ini b/tox.ini
new file mode 100644
index 00000000..62bb9c5b
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,3 @@
+[tox]
+skipsdist = True
+envlist = unit, flake8, integration-spark-thrift

From 748c7f615ec482cfd63eb05cbcf7b26f3b3655de Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Mon, 18 Mar 2024 18:15:56 -0700
Subject: [PATCH 542/603] Add workflow for spark and changelog (#998)

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .../unreleased/Features-20240318-033621.yaml  |  6 ++
 .github/workflows/release-internal.yml        | 64 +++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 .changes/unreleased/Features-20240318-033621.yaml
 create mode 100644 .github/workflows/release-internal.yml

diff --git a/.changes/unreleased/Features-20240318-033621.yaml b/.changes/unreleased/Features-20240318-033621.yaml
new file mode 100644
index 00000000..a1a1b9c5
--- /dev/null
+++ b/.changes/unreleased/Features-20240318-033621.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: ': Add new workflow for internal patch releases'
+time: 2024-03-18T03:36:21.634918-07:00
+custom:
+  Author: versusfacit
+  Issue: "38"
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
new file mode 100644
index 00000000..462e1481
--- /dev/null
+++ b/.github/workflows/release-internal.yml
@@ -0,0 +1,64 @@
+name: Release internal patch
+
+on:
+  workflow_dispatch:
+    inputs:
+      version_number:
+        description: "The release version number (i.e. 1.0.0b1)"
+        type: string
+        required: true
+      sha:
+        description: "The sha to use (leave empty to use latest on main)"
+        type: string
+        required: false
+      package_test_command:
+        description: "Package test command"
+        type: string
+        default: "python -c \"import dbt.adapters.spark\""
+        required: true
+      dbms_name:
+        description: "The name of the warehouse the adapter connects to."
+        type: string
+        default: "spark"
+        required: true
+  workflow_call:
+    inputs:
+      version_number:
+        description: "The release version number (i.e. 1.0.0b1)"
+        type: string
+        required: true
+      sha:
+        description: "The sha to use (leave empty to use latest on main)"
+        type: string
+        required: false
+      package_test_command:
+        description: "Package test command"
+        type: string
+        default: "python -c \"import dbt.adapters.spark\""
+        required: true
+      dbms_name:
+        description: "The name of the warehouse the adapter connects to."
+        type: string
+        default: "spark"
+        required: true
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  PYTHON_TARGET_VERSION: 3.11
+
+jobs:
+  invoke-reusable-workflow:
+    name: Build and Release Internally
+
+    uses: VersusFacit/dbt-release/.github/workflows/internal-archive-release.yml@main
+
+    with:
+      version_number: ${{ inputs.version_number }}
+      package_test_command: ${{ inputs.package_test_command }}
+      dbms_name: ${{ inputs.dbms_name }}
+      sha: ${{ inputs.sha }}
+
+    secrets: inherit

From 9fe1a0613dac7ccd717783b8d78081f7a3059560 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Tue, 26 Mar 2024 17:15:52 -0700
Subject: [PATCH 543/603] Finish internal build workflow (#999)

* Add workflow for spark

* Shape up workflow.

* Modify range of acceptable semvers to include a build tag.

* Fix action name by making into a string

* add tests to workflow

* Change python version to match Cloud.

* Pare down spark testing.

* Change branch reference of workflow to main.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .github/workflows/release-internal.yml | 153 +++++++++++++++++++------
 setup.py                               |   3 +-
 2 files changed, 118 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index 462e1481..6334fd28 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -1,4 +1,16 @@
-name: Release internal patch
+# What?
+#
+# Tag and release an arbitrary ref. Uploads to an internal archive for further processing.
+#
+# How?
+#
+# After checking out and testing the provided ref, the image is built and uploaded.
+#
+# When?
+#
+# Manual trigger.
+
+name: "Release internal patch"
 
 on:
   workflow_dispatch:
@@ -7,58 +19,125 @@ on:
         description: "The release version number (i.e. 1.0.0b1)"
         type: string
         required: true
-      sha:
-        description: "The sha to use (leave empty to use latest on main)"
-        type: string
-        required: false
-      package_test_command:
-        description: "Package test command"
-        type: string
-        default: "python -c \"import dbt.adapters.spark\""
-        required: true
-      dbms_name:
-        description: "The name of the warehouse the adapter connects to."
+      ref:
+        description: "The ref (sha or branch name) to use"
         type: string
-        default: "spark"
+        default: "main"
         required: true
-  workflow_call:
-    inputs:
-      version_number:
-        description: "The release version number (i.e. 1.0.0b1)"
-        type: string
-        required: true
-      sha:
-        description: "The sha to use (leave empty to use latest on main)"
-        type: string
-        required: false
       package_test_command:
         description: "Package test command"
         type: string
         default: "python -c \"import dbt.adapters.spark\""
         required: true
-      dbms_name:
-        description: "The name of the warehouse the adapter connects to."
-        type: string
-        default: "spark"
-        required: true
 
 defaults:
   run:
-    shell: bash
+    shell: "bash"
 
 env:
-  PYTHON_TARGET_VERSION: 3.11
+  PYTHON_TARGET_VERSION: 3.8
 
 jobs:
+  run-unit-tests:
+    name: "Unit tests"
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - name: "Check out the repository"
+        uses: actions/checkout@v3
+
+      - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}"
+        uses: actions/setup-python@v4
+        with:
+          python-version: "${{ env.PYTHON_TARGET_VERSION }}"
+
+      - name: Install python dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install libsasl2-dev
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install -r requirements.txt
+          python -m pip install -r dev-requirements.txt
+          python -m pip install -e .
+
+      - name: Run unit tests
+        run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
+
+  run-integration-tests:
+    name: "${{ matrix.test }}"
+    needs: [run-unit-tests]
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test:
+          - "apache_spark"
+          - "spark_session"
+          - "databricks_sql_endpoint"
+          - "databricks_cluster"
+          - "databricks_http_cluster"
+
+    env:
+      DBT_INVOCATION_ENV: github-actions
+      DD_CIVISIBILITY_AGENTLESS_ENABLED: true
+      DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
+      DD_SITE: datadoghq.com
+      DD_ENV: ci
+      DD_SERVICE: ${{ github.event.repository.name }}
+      DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
+      DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
+      DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
+      DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
+      DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
+
+    steps:
+      - name: Check out the repository
+        if: github.event_name != 'pull_request_target'
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      # explicitly checkout the branch for the PR,
+      # this is necessary for the `pull_request` event
+      - name: Check out the repository (PR)
+        if: github.event_name == 'pull_request_target'
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+          ref: ${{ github.event.pull_request.head.ref }}
+
+      # the python version used here is not what is used in the tests themselves
+      - name: Set up Python for dagger
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install python dependencies
+        run: |
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install -r dagger/requirements.txt
+
+      - name: "Run tests for ${{ matrix.test }}"
+        run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
+
   invoke-reusable-workflow:
-    name: Build and Release Internally
+    name: "Build and Release Internally"
+    needs: [run-integration-tests]
 
-    uses: VersusFacit/dbt-release/.github/workflows/internal-archive-release.yml@main
+    uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main"
 
     with:
-      version_number: ${{ inputs.version_number }}
-      package_test_command: ${{ inputs.package_test_command }}
-      dbms_name: ${{ inputs.dbms_name }}
-      sha: ${{ inputs.sha }}
+      version_number: "${{ inputs.version_number }}"
+      package_test_command: "${{ inputs.package_test_command }}"
+      dbms_name: "spark"
+      ref: "${{ inputs.ref }}"
 
-    secrets: inherit
+    secrets: "inherit"
diff --git a/setup.py b/setup.py
index 067ca41b..48339e7c 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,8 @@ def _get_plugin_version_dict():
     _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py")
     _semver = r"""(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)"""
     _pre = r"""((?P<prekind>a|b|rc)(?P<pre>\d+))?"""
-    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}["']"""
+    _build = r"""(\+build[0-9]+)?"""
+    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}{_build}["']"""
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:

From 7129f59e123dc572721840926860dd350105aab7 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 28 Mar 2024 13:44:15 -0400
Subject: [PATCH 544/603] Pin `black>=24.3` (#1000)

---
 .changes/unreleased/Security-20240327-193553.yaml | 6 ++++++
 dev-requirements.txt                              | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Security-20240327-193553.yaml

diff --git a/.changes/unreleased/Security-20240327-193553.yaml b/.changes/unreleased/Security-20240327-193553.yaml
new file mode 100644
index 00000000..daee50fd
--- /dev/null
+++ b/.changes/unreleased/Security-20240327-193553.yaml
@@ -0,0 +1,6 @@
+kind: Security
+body: Pin `black>=24.3` in `dev-requirements.txt`
+time: 2024-03-27T19:35:53.102377-04:00
+custom:
+  Author: mikealfare
+  PR: "1000"
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8f674d84..26522411 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,7 +7,7 @@ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.12
+black>=24.3
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From 45b904cb06179f15c9b04c08b1bb7948ae3d5035 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 2 Apr 2024 14:39:48 -0400
Subject: [PATCH 545/603] Pin `beartype<0.18.0` (#1001)

* Pin `beartype<0.18.0`
---
 dagger/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dagger/requirements.txt b/dagger/requirements.txt
index b50c448d..f150e309 100644
--- a/dagger/requirements.txt
+++ b/dagger/requirements.txt
@@ -1,2 +1,3 @@
+beartype<0.18.0
 dagger-io~=0.9.7
 python-dotenv

From 377c65fb02f57001a52302291653ceef2081b97e Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 3 Apr 2024 15:45:13 -0400
Subject: [PATCH 546/603] Add `dbt-core~=1.8.0a1` as convenience dep (#1002)

* add `dbt-core~=1.8.0a1` as convenience dep
---
 .changes/unreleased/Dependencies-20240403-135436.yaml | 6 ++++++
 setup.py                                              | 2 ++
 2 files changed, 8 insertions(+)
 create mode 100644 .changes/unreleased/Dependencies-20240403-135436.yaml

diff --git a/.changes/unreleased/Dependencies-20240403-135436.yaml b/.changes/unreleased/Dependencies-20240403-135436.yaml
new file mode 100644
index 00000000..345b7074
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240403-135436.yaml
@@ -0,0 +1,6 @@
+kind: Dependencies
+body: Add `dbt-core` as a dependency to preserve backwards compatibility for installation
+time: 2024-04-03T13:54:36.667724-04:00
+custom:
+  Author: mikealfare
+  PR: "1002"
diff --git a/setup.py b/setup.py
index 48339e7c..325d31cc 100644
--- a/setup.py
+++ b/setup.py
@@ -67,6 +67,8 @@ def _get_plugin_version_dict():
         "sqlparams>=3.0.0",
         "dbt-common>=0.1.0a1,<2.0",
         "dbt-adapters>=0.1.0a1,<2.0",
+        # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
+        "dbt-core>=1.8.0a1",
     ],
     extras_require={
         "ODBC": odbc_extras,

From da885394070094b6c632b06dabf7a2a4a50e5c0b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 3 Apr 2024 16:01:05 -0400
Subject: [PATCH 547/603] [create-pull-request] automated change (#1003)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                                |  2 +-
 .changes/1.8.0-b2.md                            | 13 +++++++++++++
 .../Dependencies-20240403-135436.yaml           |  0
 .../Features-20240318-033621.yaml               |  0
 .../Security-20240327-193553.yaml               |  0
 CHANGELOG.md                                    | 17 ++++++++++++++++-
 dbt/adapters/spark/__version__.py               |  2 +-
 setup.py                                        |  2 +-
 8 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.8.0-b2.md
 rename .changes/{unreleased => 1.8.0}/Dependencies-20240403-135436.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Features-20240318-033621.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Security-20240327-193553.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 595914b2..aa22c821 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.8.0b1
+current_version = 1.8.0b2
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.8.0-b2.md b/.changes/1.8.0-b2.md
new file mode 100644
index 00000000..806d6115
--- /dev/null
+++ b/.changes/1.8.0-b2.md
@@ -0,0 +1,13 @@
+## dbt-spark 1.8.0-b2 - April 03, 2024
+
+### Features
+
+- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
+
+### Dependencies
+
+- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
+
+### Security
+
+- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
diff --git a/.changes/unreleased/Dependencies-20240403-135436.yaml b/.changes/1.8.0/Dependencies-20240403-135436.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240403-135436.yaml
rename to .changes/1.8.0/Dependencies-20240403-135436.yaml
diff --git a/.changes/unreleased/Features-20240318-033621.yaml b/.changes/1.8.0/Features-20240318-033621.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240318-033621.yaml
rename to .changes/1.8.0/Features-20240318-033621.yaml
diff --git a/.changes/unreleased/Security-20240327-193553.yaml b/.changes/1.8.0/Security-20240327-193553.yaml
similarity index 100%
rename from .changes/unreleased/Security-20240327-193553.yaml
rename to .changes/1.8.0/Security-20240327-193553.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d65c50be..81f0575d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,22 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.8.0-b2 - April 03, 2024
+
+### Features
+
+- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
+
+### Dependencies
+
+- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
+
+### Security
+
+- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
+
+
+
 ## dbt-spark 1.8.0-b1 - March 01, 2024
 
 ### Features
@@ -45,7 +61,6 @@
 - [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
 - [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
 
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 6496f3e2..7d16c28f 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.8.0b1"
+version = "1.8.0b2"
diff --git a/setup.py b/setup.py
index 325d31cc..55112e3f 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.8.0b1"
+package_version = "1.8.0b2"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=4.0.39"]

From 0646c2403b97be2f83e70cac7233f22b35d496d7 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 11 Apr 2024 16:09:34 -0500
Subject: [PATCH 548/603] test name change (#1005)

---
 tests/functional/adapter/dbt_clone/test_dbt_clone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/functional/adapter/dbt_clone/test_dbt_clone.py b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
index a5e8d70e..80e919a2 100644
--- a/tests/functional/adapter/dbt_clone/test_dbt_clone.py
+++ b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
@@ -15,7 +15,7 @@
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session")
-class TestSparkBigqueryClonePossible(BaseClonePossible):
+class TestSparkClonePossible(BaseClonePossible):
     @pytest.fixture(scope="class")
     def models(self):
         return {

From e93deea64800d9c60a014d5e2a366b19ef630e17 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 18 Apr 2024 22:47:20 -0400
Subject: [PATCH 549/603] Update dependabot config to cover GHA and Docker
 (#1006)

* Update dependabot config to cover GHA and Docker
---
 .../unreleased/Under the Hood-20240410-183535.yaml    |  6 ++++++
 .github/dependabot.yml                                | 11 ++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240410-183535.yaml

diff --git a/.changes/unreleased/Under the Hood-20240410-183535.yaml b/.changes/unreleased/Under the Hood-20240410-183535.yaml
new file mode 100644
index 00000000..54b69f28
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240410-183535.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Update dependabot config to cover GHA and Docker
+time: 2024-04-10T18:35:35.093246-04:00
+custom:
+  Author: mikealfare
+  Issue: "1006"
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 2a6f3449..ae2be43a 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,8 +1,17 @@
 version: 2
 updates:
-  # python dependencies
   - package-ecosystem: "pip"
     directory: "/"
     schedule:
       interval: "daily"
     rebase-strategy: "disabled"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"
+  - package-ecosystem: "docker"
+    directory: "/docker"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"

From 080ba71b2689a67d9195dcae2cb6e9b0116e76dc Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 19 Apr 2024 12:48:09 -0400
Subject: [PATCH 550/603] add pre-commit check for dbt-core (#1015)

---
 .pre-commit-config.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5e7fdbd0..882a3276 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,6 +14,10 @@ repos:
   - id: end-of-file-fixer
   - id: trailing-whitespace
   - id: check-case-conflict
+- repo: https://github.com/dbt-labs/pre-commit-hooks
+  rev: v0.1.0a1
+  hooks:
+  - id: dbt-core-in-adapters-check
 - repo: https://github.com/psf/black
   rev: 23.1.0
   hooks:

From c5742f225f20938f16c4ce95ba7e71d6874a198c Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 25 Apr 2024 10:20:44 -0400
Subject: [PATCH 551/603] pin macos test runners to macos-12 (#1016)

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 68911710..189a8847 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -173,7 +173,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-12, windows-latest]
         python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:

From c0c3f4f0f81004d49ab7aa7209aae096d6b8eaea Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 25 Apr 2024 23:48:22 +0000
Subject: [PATCH 552/603] Bump actions/checkout from 3 to 4 (#1014)

* Bump actions/checkout from 3 to 4

Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240419-024916.yaml | 6 ++++++
 .github/workflows/integration.yml                     | 4 ++--
 .github/workflows/main.yml                            | 6 +++---
 .github/workflows/release-internal.yml                | 6 +++---
 .github/workflows/release-prep.yml                    | 6 +++---
 5 files changed, 17 insertions(+), 11 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024916.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024916.yaml b/.changes/unreleased/Dependencies-20240419-024916.yaml
new file mode 100644
index 00000000..1ef46465
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024916.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump actions/checkout from 3 to 4"
+time: 2024-04-19T02:49:16.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1014
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 94dece35..6e5f42af 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -77,7 +77,7 @@ jobs:
     steps:
       - name: Check out the repository
         if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -85,7 +85,7 @@ jobs:
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
         if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
           ref: ${{ github.event.pull_request.head.sha }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 189a8847..ebd0a3bd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -42,7 +42,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -82,7 +82,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -123,7 +123,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v4
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index 6334fd28..d8b1e459 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -46,7 +46,7 @@ jobs:
 
     steps:
       - name: "Check out the repository"
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}"
         uses: actions/setup-python@v4
@@ -100,7 +100,7 @@ jobs:
     steps:
       - name: Check out the repository
         if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -108,7 +108,7 @@ jobs:
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
         if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
           ref: ${{ github.event.pull_request.head.ref }}
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 1a6e450c..e214de82 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -452,7 +452,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -505,7 +505,7 @@ jobs:
     steps:
       - name: Check out the repository
         if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -513,7 +513,7 @@ jobs:
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
         if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
           ref: ${{ github.event.pull_request.head.sha }}

From 8dfd12c16b6d0f2a6abf2213dc33cb7cf61ce45c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 25 Apr 2024 23:59:09 +0000
Subject: [PATCH 553/603] Bump actions/setup-python from 4 to 5 (#1013)

* Bump actions/setup-python from 4 to 5

Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240419-024912.yaml | 6 ++++++
 .github/workflows/integration.yml                     | 2 +-
 .github/workflows/main.yml                            | 8 ++++----
 .github/workflows/release-internal.yml                | 4 ++--
 .github/workflows/release-prep.yml                    | 4 ++--
 5 files changed, 15 insertions(+), 9 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024912.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024912.yaml b/.changes/unreleased/Dependencies-20240419-024912.yaml
new file mode 100644
index 00000000..ea5b75aa
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024912.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump actions/setup-python from 4 to 5"
+time: 2024-04-19T02:49:12.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1013
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6e5f42af..e82a83c4 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -92,7 +92,7 @@ jobs:
 
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ebd0a3bd..c71df970 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -47,7 +47,7 @@ jobs:
           persist-credentials: false
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.8'
 
@@ -85,7 +85,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -126,7 +126,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.8'
 
@@ -178,7 +178,7 @@ jobs:
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index d8b1e459..eb892415 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -49,7 +49,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "${{ env.PYTHON_TARGET_VERSION }}"
 
@@ -115,7 +115,7 @@ jobs:
 
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index e214de82..0061a860 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -455,7 +455,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -520,7 +520,7 @@ jobs:
 
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 

From 804567cea3f64af2a3b19bb51db76b637ed38ae5 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 3 May 2024 15:36:45 -0400
Subject: [PATCH 554/603] skip tests on `main` to unblock CI, create issues to
 fix them (#1034)

---
 tests/functional/adapter/test_constraints.py  | 11 +++++++++++
 tests/functional/adapter/test_python_model.py |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 41b50ef8..654bbdb5 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -358,6 +358,17 @@ def models(self):
             "constraints_schema.yml": constraints_yml,
         }
 
+    @pytest.mark.skip(
+        "Databricks now raises an exception, which gets raised prior to the `expected_pass` check."
+        "See https://github.com/dbt-labs/dbt-spark/issues/1009"
+    )
+    def test__constraints_enforcement_rollback(
+        self, project, expected_color, expected_error_messages, null_model_sql
+    ):
+        super().test__constraints_enforcement_rollback(
+            project, expected_color, expected_error_messages, null_model_sql
+        )
+
 
 # TODO: Like the tests above, this does test that model-level constraints don't
 # result in errors, but it does not verify that they are actually present in
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3..05e25c5f 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -24,6 +24,13 @@ class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     def project_config_update(self):
         return {}
 
+    @pytest.mark.skip(
+        "Databricks can't find the transaction log"
+        "See https://github.com/dbt-labs/dbt-spark/issues/1033"
+    )
+    def test_incremental(self, project):
+        super().test_incremental(project)
+
 
 models__simple_python_model = """
 import pandas

From bec191158fa3a5586c2a8f2005ac3619d3e3b3a9 Mon Sep 17 00:00:00 2001
From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com>
Date: Mon, 6 May 2024 17:42:37 -0400
Subject: [PATCH 555/603] Cleanup main after cutting new 1.8.latest branch
 (#1032)

* Clean up changelog on main

* Bumping version to 1.9.0a1

* Code quality cleanup

---------

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.8.0-b1.md                          | 39 -------------
 .changes/1.8.0-b2.md                          | 13 -----
 .../1.8.0/Dependencies-20231009-220732.yaml   |  6 --
 .../1.8.0/Dependencies-20231010-222853.yaml   |  6 --
 .../1.8.0/Dependencies-20231010-222910.yaml   |  6 --
 .../1.8.0/Dependencies-20231013-223750.yaml   |  6 --
 .../1.8.0/Dependencies-20231027-230251.yaml   |  6 --
 .../1.8.0/Dependencies-20231027-230254.yaml   |  6 --
 .../1.8.0/Dependencies-20231027-230301.yaml   |  6 --
 .../1.8.0/Dependencies-20231108-222326.yaml   |  6 --
 .../1.8.0/Dependencies-20231110-224056.yaml   |  6 --
 .../1.8.0/Dependencies-20231113-224111.yaml   |  6 --
 .../1.8.0/Dependencies-20231127-220733.yaml   |  6 --
 .../1.8.0/Dependencies-20231127-220737.yaml   |  6 --
 .../1.8.0/Dependencies-20231127-220741.yaml   |  6 --
 .../1.8.0/Dependencies-20231204-224210.yaml   |  6 --
 .../1.8.0/Dependencies-20231212-223929.yaml   |  6 --
 .../1.8.0/Dependencies-20240403-135436.yaml   |  6 --
 .changes/1.8.0/Features-20240220-195925.yaml  |  6 --
 .changes/1.8.0/Features-20240318-033621.yaml  |  6 --
 .changes/1.8.0/Fixes-20231107-134141.yaml     |  6 --
 .changes/1.8.0/Fixes-20231221-081949.yaml     |  6 --
 .changes/1.8.0/Security-20240327-193553.yaml  |  6 --
 .../1.8.0/Under the Hood-20230929-161218.yaml |  6 --
 .../1.8.0/Under the Hood-20231119-132050.yaml |  6 --
 .../1.8.0/Under the Hood-20231214-134728.yaml |  6 --
 .../1.8.0/Under the Hood-20240111-114806.yaml |  6 --
 .../Dependencies-20240419-024912.yaml         |  6 --
 .../Dependencies-20240419-024916.yaml         |  6 --
 .../Under the Hood-20240410-183535.yaml       |  6 --
 CHANGELOG.md                                  | 56 -------------------
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 34 files changed, 3 insertions(+), 279 deletions(-)
 delete mode 100644 .changes/1.8.0-b1.md
 delete mode 100644 .changes/1.8.0-b2.md
 delete mode 100644 .changes/1.8.0/Dependencies-20231009-220732.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231010-222853.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231010-222910.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231013-223750.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231027-230251.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231027-230254.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231027-230301.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231108-222326.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231110-224056.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231113-224111.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231127-220733.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231127-220737.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231127-220741.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231204-224210.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231212-223929.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20240403-135436.yaml
 delete mode 100644 .changes/1.8.0/Features-20240220-195925.yaml
 delete mode 100644 .changes/1.8.0/Features-20240318-033621.yaml
 delete mode 100644 .changes/1.8.0/Fixes-20231107-134141.yaml
 delete mode 100644 .changes/1.8.0/Fixes-20231221-081949.yaml
 delete mode 100644 .changes/1.8.0/Security-20240327-193553.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20230929-161218.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20231119-132050.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20231214-134728.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20240111-114806.yaml
 delete mode 100644 .changes/unreleased/Dependencies-20240419-024912.yaml
 delete mode 100644 .changes/unreleased/Dependencies-20240419-024916.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20240410-183535.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index aa22c821..77a3f463 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.8.0b2
+current_version = 1.9.0a1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.8.0-b1.md b/.changes/1.8.0-b1.md
deleted file mode 100644
index 4f4091a9..00000000
--- a/.changes/1.8.0-b1.md
+++ /dev/null
@@ -1,39 +0,0 @@
-## dbt-spark 1.8.0-b1 - March 01, 2024
-
-### Features
-
-- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987))
-
-### Fixes
-
-- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935))
-- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
-
-### Under the Hood
-
-- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949))
-- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962))
-- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972))
-
-### Dependencies
-
-- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903))
-- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904))
-- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905))
-- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914))
-- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925))
-- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926))
-- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927))
-- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942))
-- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946))
-- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947))
-- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951))
-- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952))
-- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953))
-- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956))
-- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959))
-
-### Contributors
-- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
diff --git a/.changes/1.8.0-b2.md b/.changes/1.8.0-b2.md
deleted file mode 100644
index 806d6115..00000000
--- a/.changes/1.8.0-b2.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## dbt-spark 1.8.0-b2 - April 03, 2024
-
-### Features
-
-- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
-
-### Dependencies
-
-- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
-
-### Security
-
-- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
diff --git a/.changes/1.8.0/Dependencies-20231009-220732.yaml b/.changes/1.8.0/Dependencies-20231009-220732.yaml
deleted file mode 100644
index 4b9073fa..00000000
--- a/.changes/1.8.0/Dependencies-20231009-220732.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit-hooks requirement from ~=4.4 to ~=4.5"
-time: 2023-10-09T22:07:32.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 903
diff --git a/.changes/1.8.0/Dependencies-20231010-222853.yaml b/.changes/1.8.0/Dependencies-20231010-222853.yaml
deleted file mode 100644
index 43b481ed..00000000
--- a/.changes/1.8.0/Dependencies-20231010-222853.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.5.1 to 1.6.0"
-time: 2023-10-10T22:28:53.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 904
diff --git a/.changes/1.8.0/Dependencies-20231010-222910.yaml b/.changes/1.8.0/Dependencies-20231010-222910.yaml
deleted file mode 100644
index c6dbe582..00000000
--- a/.changes/1.8.0/Dependencies-20231010-222910.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pyodbc requirement from ~=4.0.39 to ~=5.0.0"
-time: 2023-10-10T22:29:10.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 905
diff --git a/.changes/1.8.0/Dependencies-20231013-223750.yaml b/.changes/1.8.0/Dependencies-20231013-223750.yaml
deleted file mode 100644
index 2cea8c6d..00000000
--- a/.changes/1.8.0/Dependencies-20231013-223750.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit requirement from ~=3.4 to ~=3.5"
-time: 2023-10-13T22:37:50.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 914
diff --git a/.changes/1.8.0/Dependencies-20231027-230251.yaml b/.changes/1.8.0/Dependencies-20231027-230251.yaml
deleted file mode 100644
index 4ad0d65f..00000000
--- a/.changes/1.8.0/Dependencies-20231027-230251.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pyodbc requirement from ~=5.0.0 to ~=5.0.1"
-time: 2023-10-27T23:02:51.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 925
diff --git a/.changes/1.8.0/Dependencies-20231027-230254.yaml b/.changes/1.8.0/Dependencies-20231027-230254.yaml
deleted file mode 100644
index f1b74521..00000000
--- a/.changes/1.8.0/Dependencies-20231027-230254.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.6.0 to 1.6.1"
-time: 2023-10-27T23:02:54.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 926
diff --git a/.changes/1.8.0/Dependencies-20231027-230301.yaml b/.changes/1.8.0/Dependencies-20231027-230301.yaml
deleted file mode 100644
index 74548dde..00000000
--- a/.changes/1.8.0/Dependencies-20231027-230301.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.9 to ~=23.10"
-time: 2023-10-27T23:03:01.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 927
diff --git a/.changes/1.8.0/Dependencies-20231108-222326.yaml b/.changes/1.8.0/Dependencies-20231108-222326.yaml
deleted file mode 100644
index fdd35004..00000000
--- a/.changes/1.8.0/Dependencies-20231108-222326.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.10 to ~=23.11"
-time: 2023-11-08T22:23:26.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 942
diff --git a/.changes/1.8.0/Dependencies-20231110-224056.yaml b/.changes/1.8.0/Dependencies-20231110-224056.yaml
deleted file mode 100644
index efdf9a54..00000000
--- a/.changes/1.8.0/Dependencies-20231110-224056.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.6.1 to 1.7.0"
-time: 2023-11-10T22:40:56.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 946
diff --git a/.changes/1.8.0/Dependencies-20231113-224111.yaml b/.changes/1.8.0/Dependencies-20231113-224111.yaml
deleted file mode 100644
index b2fb306c..00000000
--- a/.changes/1.8.0/Dependencies-20231113-224111.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest-xdist requirement from ~=3.3 to ~=3.4"
-time: 2023-11-13T22:41:11.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 947
diff --git a/.changes/1.8.0/Dependencies-20231127-220733.yaml b/.changes/1.8.0/Dependencies-20231127-220733.yaml
deleted file mode 100644
index be23a48f..00000000
--- a/.changes/1.8.0/Dependencies-20231127-220733.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest-xdist requirement from ~=3.4 to ~=3.5"
-time: 2023-11-27T22:07:33.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 951
diff --git a/.changes/1.8.0/Dependencies-20231127-220737.yaml b/.changes/1.8.0/Dependencies-20231127-220737.yaml
deleted file mode 100644
index 60e2be67..00000000
--- a/.changes/1.8.0/Dependencies-20231127-220737.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update wheel requirement from ~=0.41 to ~=0.42"
-time: 2023-11-27T22:07:37.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 952
diff --git a/.changes/1.8.0/Dependencies-20231127-220741.yaml b/.changes/1.8.0/Dependencies-20231127-220741.yaml
deleted file mode 100644
index 63d572f2..00000000
--- a/.changes/1.8.0/Dependencies-20231127-220741.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.7.0 to 1.7.1"
-time: 2023-11-27T22:07:41.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 953
diff --git a/.changes/1.8.0/Dependencies-20231204-224210.yaml b/.changes/1.8.0/Dependencies-20231204-224210.yaml
deleted file mode 100644
index c415934d..00000000
--- a/.changes/1.8.0/Dependencies-20231204-224210.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update freezegun requirement from ~=1.2 to ~=1.3"
-time: 2023-12-04T22:42:10.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 956
diff --git a/.changes/1.8.0/Dependencies-20231212-223929.yaml b/.changes/1.8.0/Dependencies-20231212-223929.yaml
deleted file mode 100644
index 65f308f7..00000000
--- a/.changes/1.8.0/Dependencies-20231212-223929.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.11 to ~=23.12"
-time: 2023-12-12T22:39:29.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 959
diff --git a/.changes/1.8.0/Dependencies-20240403-135436.yaml b/.changes/1.8.0/Dependencies-20240403-135436.yaml
deleted file mode 100644
index 345b7074..00000000
--- a/.changes/1.8.0/Dependencies-20240403-135436.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Dependencies
-body: Add `dbt-core` as a dependency to preserve backwards compatibility for installation
-time: 2024-04-03T13:54:36.667724-04:00
-custom:
-  Author: mikealfare
-  PR: "1002"
diff --git a/.changes/1.8.0/Features-20240220-195925.yaml b/.changes/1.8.0/Features-20240220-195925.yaml
deleted file mode 100644
index c5d86ab7..00000000
--- a/.changes/1.8.0/Features-20240220-195925.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Implement spark__safe_cast and add functional tests for unit testing
-time: 2024-02-20T19:59:25.907821-05:00
-custom:
-  Author: michelleark
-  Issue: "987"
diff --git a/.changes/1.8.0/Features-20240318-033621.yaml b/.changes/1.8.0/Features-20240318-033621.yaml
deleted file mode 100644
index a1a1b9c5..00000000
--- a/.changes/1.8.0/Features-20240318-033621.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: ': Add new workflow for internal patch releases'
-time: 2024-03-18T03:36:21.634918-07:00
-custom:
-  Author: versusfacit
-  Issue: "38"
diff --git a/.changes/1.8.0/Fixes-20231107-134141.yaml b/.changes/1.8.0/Fixes-20231107-134141.yaml
deleted file mode 100644
index d4f56eac..00000000
--- a/.changes/1.8.0/Fixes-20231107-134141.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Support new agate Integer type and empty seed test
-time: 2023-11-07T13:41:41.033441-05:00
-custom:
-  Author: gshank
-  Issue: "935"
diff --git a/.changes/1.8.0/Fixes-20231221-081949.yaml b/.changes/1.8.0/Fixes-20231221-081949.yaml
deleted file mode 100644
index b10c8141..00000000
--- a/.changes/1.8.0/Fixes-20231221-081949.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Fix hardcoded file format for python models
-time: 2023-12-21T08:19:49.630806+02:00
-custom:
-  Author: ben-schreiber
-  Issue: "803"
diff --git a/.changes/1.8.0/Security-20240327-193553.yaml b/.changes/1.8.0/Security-20240327-193553.yaml
deleted file mode 100644
index daee50fd..00000000
--- a/.changes/1.8.0/Security-20240327-193553.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Security
-body: Pin `black>=24.3` in `dev-requirements.txt`
-time: 2024-03-27T19:35:53.102377-04:00
-custom:
-  Author: mikealfare
-  PR: "1000"
diff --git a/.changes/1.8.0/Under the Hood-20230929-161218.yaml b/.changes/1.8.0/Under the Hood-20230929-161218.yaml
deleted file mode 100644
index 9b5c6818..00000000
--- a/.changes/1.8.0/Under the Hood-20230929-161218.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow.
-time: 2023-09-29T16:12:18.968755+02:00
-custom:
-  Author: JCZuurmond, colin-rogers-dbt
-  Issue: "719"
diff --git a/.changes/1.8.0/Under the Hood-20231119-132050.yaml b/.changes/1.8.0/Under the Hood-20231119-132050.yaml
deleted file mode 100644
index 61c7f8ab..00000000
--- a/.changes/1.8.0/Under the Hood-20231119-132050.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Add tests for --empty flag
-time: 2023-11-19T13:20:50.076459-05:00
-custom:
-  Author: michelleark
-  Issue: "949"
diff --git a/.changes/1.8.0/Under the Hood-20231214-134728.yaml b/.changes/1.8.0/Under the Hood-20231214-134728.yaml
deleted file mode 100644
index b1de2ddb..00000000
--- a/.changes/1.8.0/Under the Hood-20231214-134728.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Remove unused `invalid_insert_overwrite_delta_msg` message
-time: 2023-12-14T13:47:28.444107-07:00
-custom:
-  Author: dbeatty10
-  Issue: "962"
diff --git a/.changes/1.8.0/Under the Hood-20240111-114806.yaml b/.changes/1.8.0/Under the Hood-20240111-114806.yaml
deleted file mode 100644
index 31705f46..00000000
--- a/.changes/1.8.0/Under the Hood-20240111-114806.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Update import paths and list_relations to support decoupling adapters/core
-time: 2024-01-11T11:48:06.120111-08:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "972"
diff --git a/.changes/unreleased/Dependencies-20240419-024912.yaml b/.changes/unreleased/Dependencies-20240419-024912.yaml
deleted file mode 100644
index ea5b75aa..00000000
--- a/.changes/unreleased/Dependencies-20240419-024912.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump actions/setup-python from 4 to 5"
-time: 2024-04-19T02:49:12.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1013
diff --git a/.changes/unreleased/Dependencies-20240419-024916.yaml b/.changes/unreleased/Dependencies-20240419-024916.yaml
deleted file mode 100644
index 1ef46465..00000000
--- a/.changes/unreleased/Dependencies-20240419-024916.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump actions/checkout from 3 to 4"
-time: 2024-04-19T02:49:16.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1014
diff --git a/.changes/unreleased/Under the Hood-20240410-183535.yaml b/.changes/unreleased/Under the Hood-20240410-183535.yaml
deleted file mode 100644
index 54b69f28..00000000
--- a/.changes/unreleased/Under the Hood-20240410-183535.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Update dependabot config to cover GHA and Docker
-time: 2024-04-10T18:35:35.093246-04:00
-custom:
-  Author: mikealfare
-  Issue: "1006"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81f0575d..902db37f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,62 +5,6 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.8.0-b2 - April 03, 2024
-
-### Features
-
-- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
-
-### Dependencies
-
-- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
-
-### Security
-
-- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
-
-
-
-## dbt-spark 1.8.0-b1 - March 01, 2024
-
-### Features
-
-- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987))
-
-### Fixes
-
-- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935))
-- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
-
-### Under the Hood
-
-- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949))
-- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962))
-- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972))
-
-### Dependencies
-
-- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903))
-- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904))
-- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905))
-- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914))
-- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925))
-- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926))
-- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927))
-- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942))
-- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946))
-- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947))
-- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951))
-- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952))
-- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953))
-- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956))
-- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959))
-
-### Contributors
-- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 7d16c28f..6698ed64 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.8.0b2"
+version = "1.9.0a1"
diff --git a/setup.py b/setup.py
index 55112e3f..a7621d58 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.8.0b2"
+package_version = "1.9.0a1"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=4.0.39"]

From 70f6060a5377da83360b6f6664012b03ca1a23c3 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Tue, 7 May 2024 08:09:45 -0700
Subject: [PATCH 556/603] Add missing changelog backlinks. (#1039)

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 902db37f..36a3ea69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@
 
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.8](https://github.com/dbt-labs/dbt-spark/blob/1.8.latest/CHANGELOG.md)
+- [1.7](https://github.com/dbt-labs/dbt-spark/blob/1.7.latest/CHANGELOG.md)
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
 - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)

From b65776724fe552963c7291b94be286cb69366c46 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 8 May 2024 10:37:19 -0400
Subject: [PATCH 557/603] Update CODEOWNERS (#1040)

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index f6283d12..02ed72d4 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,3 @@
 # This codeowners file is used to ensure all PRs require reviews from the adapters team
 
-* @dbt-labs/core-adapters
+* @dbt-labs/adapters

From 94bfcd942b989c7cba12135ce441edffc0f9c8a2 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 8 May 2024 11:18:05 -0400
Subject: [PATCH 558/603] Move to `pre-commit` only (#1026)

* move linter and typechecker reqs and config to .pre-commit-config.yaml
* update linters and typecheckers
* make updates from running linters and typecheckers
* remove old make recipes
---
 .flake8                           |  14 ----
 .github/workflows/integration.yml |   1 -
 .github/workflows/main.yml        |   3 -
 .pre-commit-config.yaml           | 115 ++++++++++++++----------------
 Makefile                          |  27 +------
 dbt/adapters/spark/__init__.py    |   2 +-
 dbt/adapters/spark/column.py      |   2 +-
 dbt/adapters/spark/connections.py |   6 +-
 dbt/adapters/spark/impl.py        |   2 +-
 dev-requirements.txt              |  30 +++-----
 tests/unit/utils.py               |   1 +
 11 files changed, 73 insertions(+), 130 deletions(-)
 delete mode 100644 .flake8

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index bbc3202a..00000000
--- a/.flake8
+++ /dev/null
@@ -1,14 +0,0 @@
-[flake8]
-select =
-    E
-    W
-    F
-ignore =
-    # makes Flake8 work like black
-    W503,
-    W504,
-    # makes Flake8 work like black
-    E203,
-    E741,
-    E501,
-exclude = test
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index e82a83c4..1e058aa4 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -19,7 +19,6 @@ on:
   pull_request_target:
     paths-ignore:
       - ".changes/**"
-      - ".flake8"
       - ".gitignore"
       - "**.md"
 
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c71df970..cbbb7f72 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -59,9 +59,6 @@ jobs:
           python -m pip --version
           python -m pip install pre-commit
           pre-commit --version
-          python -m pip install mypy==0.942
-          python -m pip install types-requests
-          mypy --version
           python -m pip install -r requirements.txt
           python -m pip install -r dev-requirements.txt
           python -c "import dbt.adapters.spark"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 882a3276..e228e7d9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,67 +1,58 @@
 # For more on configuring pre-commit hooks (see https://pre-commit.com/)
-
-# Force all unspecified python hooks to run python 3.8
 default_language_version:
-  python: python3
+    python: python3
 
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.4.0
-  hooks:
-  - id: check-yaml
-    args: [--unsafe]
-  - id: check-json
-  - id: end-of-file-fixer
-  - id: trailing-whitespace
-  - id: check-case-conflict
-- repo: https://github.com/dbt-labs/pre-commit-hooks
-  rev: v0.1.0a1
-  hooks:
-  - id: dbt-core-in-adapters-check
-- repo: https://github.com/psf/black
-  rev: 23.1.0
-  hooks:
-  - id: black
-    additional_dependencies: ['click~=8.1']
-    args:
-    - "--line-length=99"
-    - "--target-version=py38"
-  - id: black
-    alias: black-check
-    stages: [manual]
-    additional_dependencies: ['click~=8.1']
-    args:
-    - "--line-length=99"
-    - "--target-version=py38"
-    - "--check"
-    - "--diff"
-- repo: https://github.com/pycqa/flake8
-  rev: 6.0.0
-  hooks:
-  - id: flake8
-  - id: flake8
-    alias: flake8-check
-    stages: [manual]
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v1.2.0
-  hooks:
-  - id: mypy
-    # N.B.: Mypy is... a bit fragile.
-    #
-    # By using `language: system` we run this hook in the local
-    # environment instead of a pre-commit isolated one.  This is needed
-    # to ensure mypy correctly parses the project.
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: check-yaml
+        args: [--unsafe]
+    -   id: check-json
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: check-case-conflict
+
+-   repo: https://github.com/dbt-labs/pre-commit-hooks
+    rev: v0.1.0a1
+    hooks:
+    -   id: dbt-core-in-adapters-check
+
+-   repo: https://github.com/psf/black
+    rev: 24.4.2
+    hooks:
+    -   id: black
+        args:
+        -   --line-length=99
+        -   --target-version=py38
+        -   --target-version=py39
+        -   --target-version=py310
+        -   --target-version=py311
+        additional_dependencies: [flaky]
+
+-   repo: https://github.com/pycqa/flake8
+    rev: 7.0.0
+    hooks:
+    -   id: flake8
+        exclude: tests/
+        args:
+        -   --max-line-length=99
+        -   --select=E,F,W
+        -   --ignore=E203,E501,E741,W503,W504
+        -   --per-file-ignores=*/__init__.py:F401
 
-    # It may cause trouble in that it adds environmental variables out
-    # of our control to the mix.  Unfortunately, there's nothing we can
-    # do about per pre-commit's author.
-    # See https://github.com/pre-commit/pre-commit/issues/730 for details.
-    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores, --disallow-untyped-defs]
-    files: ^dbt/adapters/.*
-    language: system
-  - id: mypy
-    alias: mypy-check
-    stages: [manual]
-    args: [--show-error-codes, --pretty, --ignore-missing-imports, --explicit-package-bases]
-    files: ^dbt/adapters
-    language: system
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.10.0
+    hooks:
+    -   id: mypy
+        args:
+        -   --show-error-codes
+        -   --ignore-missing-imports
+        -   --explicit-package-bases
+        -   --warn-unused-ignores
+        -   --disallow-untyped-defs
+        -   --pretty
+        files: ^dbt/adapters
+        additional_dependencies:
+        -   types-pytz
+        -   types-requests
diff --git a/Makefile b/Makefile
index 2bd1055f..af3a5154 100644
--- a/Makefile
+++ b/Makefile
@@ -11,31 +11,10 @@ dev-uninstall: ## Uninstalls all packages while maintaining the virtual environm
 	pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y
 	pip uninstall -y dbt-spark
 
-.PHONY: mypy
-mypy: ## Runs mypy against staged changes for static type checking.
-	@\
-	pre-commit run --hook-stage manual mypy-check | grep -v "INFO"
-
-.PHONY: flake8
-flake8: ## Runs flake8 against staged changes to enforce style guide.
-	@\
-	pre-commit run --hook-stage manual flake8-check | grep -v "INFO"
-
-.PHONY: black
-black: ## Runs black  against staged changes to enforce style guide.
-	@\
-	pre-commit run --hook-stage manual black-check -v | grep -v "INFO"
-
 .PHONY: lint
 lint: ## Runs flake8 and mypy code checks against staged changes.
 	@\
-	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
-	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
-
-.PHONY: linecheck
-linecheck: ## Checks for all Python lines 100 characters or more
-	@\
-	find dbt -type f -name "*.py" -exec grep -I -r -n '.\{100\}' {} \;
+	pre-commit run --all-files
 
 .PHONY: unit
 unit: ## Runs unit tests with py38.
@@ -47,9 +26,7 @@ test: ## Runs unit tests with py38 and code checks against staged changes.
 	@\
 	python -m pytest tests/unit; \
 	python dagger/run_dbt_spark_tests.py --profile spark_session \
-	pre-commit run black-check --hook-stage manual | grep -v "INFO"; \
-	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
-	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
+	pre-commit run --all-files
 
 .PHONY: clean
 	@echo "cleaning repo"
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index c25ba40d..6ecc5ecc 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -8,5 +8,5 @@
 from dbt.include import spark
 
 Plugin = AdapterPlugin(
-    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH  # type: ignore
+    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH
 )
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index 39f6f529..98fa24a1 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -21,7 +21,7 @@ class SparkColumn(dbtClassMixin, Column):
     def translate_type(cls, dtype: str) -> str:
         return dtype
 
-    def can_expand_to(self: Self, other_column: Self) -> bool:  # type: ignore
+    def can_expand_to(self: Self, other_column: Self) -> bool:
         """returns True if both columns are strings"""
         return self.is_string() and other_column.is_string()
 
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 83048f92..0405eaf5 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -65,9 +65,9 @@ class SparkConnectionMethod(StrEnum):
 @dataclass
 class SparkCredentials(Credentials):
     host: Optional[str] = None
-    schema: Optional[str] = None  # type: ignore
+    schema: Optional[str] = None
     method: SparkConnectionMethod = None  # type: ignore
-    database: Optional[str] = None  # type: ignore
+    database: Optional[str] = None
     driver: Optional[str] = None
     cluster: Optional[str] = None
     endpoint: Optional[str] = None
@@ -568,7 +568,7 @@ def open(cls, connection: Connection) -> Connection:
         return connection
 
     @classmethod
-    def data_type_code_to_name(cls, type_code: Union[type, str]) -> str:  # type: ignore
+    def data_type_code_to_name(cls, type_code: Union[type, str]) -> str:
         """
         :param Union[type, str] type_code: The sql to execute.
             * type_code is a python type (!) in pyodbc https://github.com/mkleehammer/pyodbc/wiki/Cursor#description, and a string for other spark runtimes.
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 9a1a7ec0..255ab780 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -151,7 +151,7 @@ def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
     def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "timestamp"
 
-    def quote(self, identifier: str) -> str:  # type: ignore
+    def quote(self, identifier: str) -> str:
         return "`{}`".format(identifier)
 
     def _get_relation_information(self, row: agate.Row) -> RelationInfo:
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 26522411..f86fb5ef 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,30 +5,22 @@ git+https://github.com/dbt-labs/dbt-common.git
 git+https://github.com/dbt-labs/dbt-adapters.git
 git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
-# if version 1.x or greater -> pin to major version
-# if version 0.x -> pin to minor
-black>=24.3
-bumpversion~=0.6.0
-click~=8.1
-flake8~=6.1;python_version>="3.8"
-flaky~=3.7
-freezegun~=1.3
+# dev
 ipdb~=0.13.13
-mypy==1.7.1  # patch updates have historically introduced breaking changes
-pip-tools~=7.3
-pre-commit~=3.5
-pre-commit-hooks~=4.5
+pre-commit==3.7.0;python_version >="3.9"
+pre-commit==3.5.0;python_version <"3.9"
+
+# test
+freezegun~=1.3
+mock~=5.1
 pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.5
-pytz~=2023.3
-types-pytz~=2023.3
-types-requests~=2.31
+thrift_sasl~=0.4.3
+
+# build
+bumpversion~=0.6.0
 twine~=4.0
 wheel~=0.42
-
-# Adapter specific dependencies
-mock~=5.1
-thrift_sasl~=0.4.3
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 17cd3ee7..d080242c 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -2,6 +2,7 @@
 Note that all imports should be inside the functions to avoid import/mocking
 issues.
 """
+
 import string
 import os
 from unittest import mock

From 1f4442a4dcf9719b1670278760772a07506681fc Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 10 May 2024 11:37:55 -0400
Subject: [PATCH 559/603] [Bug] Fix constraints enforcement rollover test
 (#1041)

* unskip offending test case
* update help in example test env file
* install pyodbc from source to support arm machines
* allow for both col and column
---
 requirements.txt                             |  2 +-
 test.env.example                             | 16 ++++++----------
 tests/functional/adapter/test_constraints.py | 13 +------------
 3 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 18ccc77f..40335fb8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=5.0.1
+pyodbc~=5.0.1 --no-binary pyodbc
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/test.env.example b/test.env.example
index e69f700b..140da404 100644
--- a/test.env.example
+++ b/test.env.example
@@ -1,13 +1,9 @@
-# Cluster ID
-DBT_DATABRICKS_CLUSTER_NAME=
-# SQL Endpoint
-DBT_DATABRICKS_ENDPOINT=
-# Server Hostname value
-DBT_DATABRICKS_HOST_NAME=
-# personal token
-DBT_DATABRICKS_TOKEN=
-# file path to local ODBC driver
-ODBC_DRIVER=
+# databricks credentials
+DBT_DATABRICKS_HOST_NAME=<{this value}.cloud.databricks.com>.cloud.databricks.com
+DBT_DATABRICKS_TOKEN=<personal token>
+DBT_DATABRICKS_CLUSTER_NAME=<sql/protocolv1/o/{not this}/{this value}>
+ODBC_DRIVER=</file/path/to/local/ODBC.driver>
+DBT_DATABRICKS_ENDPOINT=</sql/1.0/warehouses/{this value}>
 
 # users for testing 'grants' functionality
 DBT_TEST_USER_1=
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 654bbdb5..e35a13a6 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -316,7 +316,7 @@ def expected_error_messages(self):
             "violate the new NOT NULL constraint",
             "(id > 0) violated by row with values:",  # incremental mats
             "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES",  # incremental mats
-            "NOT NULL constraint violated for column",
+            "NOT NULL constraint violated for col",
         ]
 
     def assert_expected_error_messages(self, error_message, expected_error_messages):
@@ -358,17 +358,6 @@ def models(self):
             "constraints_schema.yml": constraints_yml,
         }
 
-    @pytest.mark.skip(
-        "Databricks now raises an exception, which gets raised prior to the `expected_pass` check."
-        "See https://github.com/dbt-labs/dbt-spark/issues/1009"
-    )
-    def test__constraints_enforcement_rollback(
-        self, project, expected_color, expected_error_messages, null_model_sql
-    ):
-        super().test__constraints_enforcement_rollback(
-            project, expected_color, expected_error_messages, null_model_sql
-        )
-
 
 # TODO: Like the tests above, this does test that model-level constraints don't
 # result in errors, but it does not verify that they are actually present in

From 94af50e683c7c4466708e40e29ce38a51ee102b8 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 13 May 2024 17:24:38 -0600
Subject: [PATCH 560/603] Cross-database `date` macro (#1030)

* Cross-database `date` macro

* Temporary dev requirements for testing

* Update changelog entry

* Revert "Temporary dev requirements for testing"

This reverts commit 529461f475c5a639a6d545ff61ab32d91616fd2a.
---
 .changes/unreleased/Features-20240501-151904.yaml | 6 ++++++
 dbt/include/spark/macros/utils/date.sql           | 5 +++++
 tests/functional/adapter/utils/test_utils.py      | 5 +++++
 3 files changed, 16 insertions(+)
 create mode 100644 .changes/unreleased/Features-20240501-151904.yaml
 create mode 100644 dbt/include/spark/macros/utils/date.sql

diff --git a/.changes/unreleased/Features-20240501-151904.yaml b/.changes/unreleased/Features-20240501-151904.yaml
new file mode 100644
index 00000000..ed08f125
--- /dev/null
+++ b/.changes/unreleased/Features-20240501-151904.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Cross-database `date` macro
+time: 2024-05-01T15:19:04.822157-06:00
+custom:
+  Author: dbeatty10
+  Issue: 1031
diff --git a/dbt/include/spark/macros/utils/date.sql b/dbt/include/spark/macros/utils/date.sql
new file mode 100644
index 00000000..a76e995b
--- /dev/null
+++ b/dbt/include/spark/macros/utils/date.sql
@@ -0,0 +1,5 @@
+{% macro spark__date(year, month, day) -%}
+    {%- set dt = modules.datetime.date(year, month, day) -%}
+    {%- set iso_8601_formatted_date = dt.strftime('%Y-%m-%d') -%}
+    to_date('{{ iso_8601_formatted_date }}', 'yyyy-MM-dd')
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 0dc52656..e8ebb087 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -8,6 +8,7 @@
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
 from dbt.tests.adapter.utils.test_concat import BaseConcat
 from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
+from dbt.tests.adapter.utils.test_date import BaseDate
 from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
@@ -80,6 +81,10 @@ class TestCurrentTimestamp(BaseCurrentTimestampNaive):
     pass
 
 
+class TestDate(BaseDate):
+    pass
+
+
 class TestDateAdd(BaseDateAdd):
     pass
 

From 629ea94098cea5f4c182b123048086cd2a48c767 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 14 May 2024 12:28:51 -0400
Subject: [PATCH 561/603] [Bug] Fix incremental python tables - dbt can't find
 temporary table transaction logs (#1042)

* unskip test
* align test pyodbc version with build deps; add pyspark from build deps
* revert change to exclude database and schema from temp relations
* retain existing behavior for unit tests by vendoring temp relation into the incremental materialization
* changelog entry
---
 .changes/unreleased/Fixes-20240513-160121.yaml             | 7 +++++++
 .../macros/materializations/incremental/incremental.sql    | 2 +-
 requirements.txt                                           | 3 ++-
 tests/functional/adapter/test_python_model.py              | 7 -------
 4 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20240513-160121.yaml

diff --git a/.changes/unreleased/Fixes-20240513-160121.yaml b/.changes/unreleased/Fixes-20240513-160121.yaml
new file mode 100644
index 00000000..fd707295
--- /dev/null
+++ b/.changes/unreleased/Fixes-20240513-160121.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Fix incremental python models error where Databricks could not find the temp
+  table transaction logs
+time: 2024-05-13T16:01:21.255833-04:00
+custom:
+  Author: mikealfare
+  Issue: "1033"
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 10d4f3ed..9a66bab5 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -16,7 +16,7 @@
   {%- set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) -%}
   {%- set target_relation = this -%}
   {%- set existing_relation = load_relation(this) -%}
-  {%- set tmp_relation = make_temp_relation(this) -%}
+  {% set tmp_relation = this.incorporate(path = {"identifier": this.identifier ~ '__dbt_tmp'}) -%}
 
   {#-- for SQL model we will create temp view that doesn't have database and schema --#}
   {%- if language == 'sql'-%}
diff --git a/requirements.txt b/requirements.txt
index 40335fb8..b32884c4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,10 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=5.0.1 --no-binary pyodbc
+pyodbc~=4.0.39 --no-binary pyodbc
 sqlparams>=3.0.0
 thrift>=0.13.0
+pyspark>=3.0.0,<4.0.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
 
 types-PyYAML
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 05e25c5f..1195cbd3 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -24,13 +24,6 @@ class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     def project_config_update(self):
         return {}
 
-    @pytest.mark.skip(
-        "Databricks can't find the transaction log"
-        "See https://github.com/dbt-labs/dbt-spark/issues/1033"
-    )
-    def test_incremental(self, project):
-        super().test_incremental(project)
-
 
 models__simple_python_model = """
 import pandas

From 9b3dd3f7eb6d0f5379239342cdf25052bfb7eb9a Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 14 May 2024 13:04:41 -0600
Subject: [PATCH 562/603] Import relevant pytest(s) for cross-database `cast`
 macro (#1029)

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .changes/unreleased/Features-20240430-185723.yaml | 6 ++++++
 tests/functional/adapter/utils/test_utils.py      | 5 +++++
 2 files changed, 11 insertions(+)
 create mode 100644 .changes/unreleased/Features-20240430-185723.yaml

diff --git a/.changes/unreleased/Features-20240430-185723.yaml b/.changes/unreleased/Features-20240430-185723.yaml
new file mode 100644
index 00000000..cb86e796
--- /dev/null
+++ b/.changes/unreleased/Features-20240430-185723.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Add tests for cross-database `cast` macro
+time: 2024-04-30T18:57:23.881246-06:00
+custom:
+  Author: dbeatty10
+  Issue: "1028"
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index e8ebb087..0285f7c4 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -5,6 +5,7 @@
 from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct
 from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
 from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
+from dbt.tests.adapter.utils.test_cast import BaseCast
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
 from dbt.tests.adapter.utils.test_concat import BaseConcat
 from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
@@ -67,6 +68,10 @@ class TestBoolOr(BaseBoolOr):
     pass
 
 
+class TestCast(BaseCast):
+    pass
+
+
 class TestCastBoolToText(BaseCastBoolToText):
     pass
 

From 6f8ff606926b5bb3f986a0eb0b6007bff35476da Mon Sep 17 00:00:00 2001
From: Jeremy Cohen <jeremy@dbtlabs.com>
Date: Tue, 14 May 2024 23:53:46 +0200
Subject: [PATCH 563/603] Bump deps on common, adapters, core (#1045)

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index a7621d58..fd985eba 100644
--- a/setup.py
+++ b/setup.py
@@ -65,10 +65,10 @@ def _get_plugin_version_dict():
     include_package_data=True,
     install_requires=[
         "sqlparams>=3.0.0",
-        "dbt-common>=0.1.0a1,<2.0",
-        "dbt-adapters>=0.1.0a1,<2.0",
+        "dbt-common>=1.0.4,<2.0",
+        "dbt-adapters>=1.1.1,<2.0",
         # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
-        "dbt-core>=1.8.0a1",
+        "dbt-core>=1.8.0",
     ],
     extras_require={
         "ODBC": odbc_extras,

From 944dbea08de9a8069a39ad92f4beecb2cbec608c Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 21 May 2024 00:57:20 -0400
Subject: [PATCH 564/603]  Add docker release to the full release process for
 final releases (#1004)

* add docker release to release pipeline
* update docker release to align with other adapters, add dev docker
* remove defaulted input for docker package, override default for docker release image
* fix docker release dependent steps
* only release docker when not testing, allow to only release to docker
* remove dev container
* remove test script
* rename the spark Dockerfile to make space for the release Dockerfile
* move the release Dockerfile into ./docker

---------

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
---
 .github/dependabot.yml        |  5 +++
 .github/workflows/release.yml | 64 ++++++++++++++++---------------
 Makefile                      |  4 ++
 docker-compose.yml            |  4 +-
 docker/Dockerfile             | 72 ++++++++++++++++++++---------------
 docker/README.md              | 70 ++++++++++++++++++++++++++++++++++
 docker/spark.Dockerfile       | 30 +++++++++++++++
 7 files changed, 187 insertions(+), 62 deletions(-)
 create mode 100644 docker/README.md
 create mode 100644 docker/spark.Dockerfile

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index ae2be43a..fc44c9fe 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -15,3 +15,8 @@ updates:
     schedule:
       interval: "weekly"
     rebase-strategy: "disabled"
+  - package-ecosystem: "docker"
+    directory: "/docker-dev"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9b2774f1..cdbdaa13 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -13,8 +13,8 @@
 # This will only run manually. Run this workflow only after the
 # version bump workflow is completed and related changes are reviewed and merged.
 #
-
-name: Release to GitHub and PyPI
+name: "Release to GitHub, PyPI, and Docker"
+run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker"
 
 on:
   workflow_dispatch:
@@ -56,6 +56,11 @@ on:
         type: boolean
         default: true
         required: false
+      only_docker:
+        description: "Only release Docker image, skip GitHub & PyPI"
+        type: boolean
+        default: false
+        required: false
 
 permissions:
   contents: write # this is the permission that allows creating a new release
@@ -66,7 +71,7 @@ defaults:
 
 jobs:
   log-inputs:
-    name: Log Inputs
+    name: "Log Inputs"
     runs-on: ubuntu-latest
     steps:
       - name: "[DEBUG] Print Variables"
@@ -79,6 +84,7 @@ jobs:
           echo AWS S3 bucket name:                 ${{ inputs.s3_bucket_name }}
           echo Package test command:               ${{ inputs.package_test_command }}
           echo Test run:                           ${{ inputs.test_run }}
+          echo Only Docker:                        ${{ inputs.only_docker }}
 
   # The Spark repository uses CircleCI to run integration tests.
   # Because of this, the process of version bumps will be manual
@@ -87,27 +93,21 @@ jobs:
   # We are passing `env_setup_script_path` as an empty string
   # so that the integration tests stage will be skipped.
   audit-version-and-changelog:
-    name: Bump package version, Generate changelog
-
+    name: "Bump package version, Generate changelog"
     uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main
-
     with:
       sha: ${{ inputs.sha }}
       version_number: ${{ inputs.version_number }}
       target_branch: ${{ inputs.target_branch }}
       env_setup_script_path: ""
       test_run: ${{ inputs.test_run }}
-
     secrets: inherit
 
   log-outputs-audit-version-and-changelog:
     name: "[Log output] Bump package version, Generate changelog"
-    if: ${{ !failure() && !cancelled() }}
-
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [audit-version-and-changelog]
-
     runs-on: ubuntu-latest
-
     steps:
       - name: Print variables
         run: |
@@ -115,12 +115,10 @@ jobs:
           echo Changelog path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
 
   build-test-package:
-    name: Build, Test, Package
-    if: ${{ !failure() && !cancelled() }}
+    name: "Build, Test, Package"
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [audit-version-and-changelog]
-
     uses: dbt-labs/dbt-release/.github/workflows/build.yml@main
-
     with:
       sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
       version_number: ${{ inputs.version_number }}
@@ -129,19 +127,15 @@ jobs:
       s3_bucket_name: ${{ inputs.s3_bucket_name }}
       package_test_command: ${{ inputs.package_test_command }}
       test_run: ${{ inputs.test_run }}
-
     secrets:
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 
   github-release:
-    name: GitHub Release
-    if: ${{ !failure() && !cancelled() }}
-
+    name: "GitHub Release"
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [audit-version-and-changelog, build-test-package]
-
     uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main
-
     with:
       sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
       version_number: ${{ inputs.version_number }}
@@ -149,35 +143,43 @@ jobs:
       test_run: ${{ inputs.test_run }}
 
   pypi-release:
-    name: PyPI Release
-
+    name: "PyPI Release"
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [github-release]
-
     uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main
-
     with:
       version_number: ${{ inputs.version_number }}
       test_run: ${{ inputs.test_run }}
-
     secrets:
       PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
       TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}
 
+  docker-release:
+    name: "Docker Release"
+    # We cannot release to docker on a test run because it uses the tag in GitHub as
+    # what we need to release but draft releases don't actually tag the commit so it
+    # finds nothing to release
+    if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }}
+    needs: [github-release]
+    permissions:
+      packages: write
+    uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main
+    with:
+      version_number: ${{ inputs.version_number }}
+      dockerfile: "docker/Dockerfile"
+      test_run: ${{ inputs.test_run }}
+
   slack-notification:
     name: Slack Notification
     if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }}
-
     needs:
       [
-        audit-version-and-changelog,
-        build-test-package,
         github-release,
         pypi-release,
+        docker-release,
       ]
-
     uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
     with:
       status: "failure"
-
     secrets:
       SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}
diff --git a/Makefile b/Makefile
index af3a5154..ff4c0fc1 100644
--- a/Makefile
+++ b/Makefile
@@ -38,3 +38,7 @@ help: ## Show this help message.
 	@echo
 	@echo 'targets:'
 	@grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: docker-prod
+docker-prod:
+	docker build -f docker/Dockerfile -t dbt-spark .
diff --git a/docker-compose.yml b/docker-compose.yml
index ad083eaf..cd3e1c77 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,9 @@ version: "3.7"
 services:
 
   dbt-spark3-thrift:
-    build: docker/
+    build:
+      context: ./docker
+      dockerfile: spark.Dockerfile
     ports:
       - "10000:10000"
       - "4040:4040"
diff --git a/docker/Dockerfile b/docker/Dockerfile
index bb4d378e..ef4574dd 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,30 +1,42 @@
-ARG OPENJDK_VERSION=8
-FROM eclipse-temurin:${OPENJDK_VERSION}-jre
-
-ARG BUILD_DATE
-ARG SPARK_VERSION=3.3.2
-ARG HADOOP_VERSION=3
-
-LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
-      org.label-schema.build-date=$BUILD_DATE \
-      org.label-schema.version=$SPARK_VERSION
-
-ENV SPARK_HOME /usr/spark
-ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
-
-RUN apt-get update && \
-    apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
-    wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
-    tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
-    rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
-    mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
-    ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
-    apt-get remove -y wget && \
-    apt-get autoremove -y && \
-    apt-get clean
-
-COPY entrypoint.sh /scripts/
-RUN chmod +x /scripts/entrypoint.sh
-
-ENTRYPOINT ["/scripts/entrypoint.sh"]
-CMD ["--help"]
+# this image gets published to GHCR for production use
+ARG py_version=3.11.2
+
+FROM python:$py_version-slim-bullseye as base
+
+RUN apt-get update \
+  && apt-get dist-upgrade -y \
+  && apt-get install -y --no-install-recommends \
+    build-essential=12.9 \
+    ca-certificates=20210119 \
+    gcc=4:10.2.1-1 \
+    git=1:2.30.2-1+deb11u2 \
+    libpq-dev=13.14-0+deb11u1 \
+    libsasl2-dev=2.1.27+dfsg-2.1+deb11u1 \
+    make=4.3-4.1 \
+    openssh-client=1:8.4p1-5+deb11u3 \
+    python-dev-is-python2=2.7.18-9 \
+    software-properties-common=0.96.20.2-2.1 \
+    unixodbc-dev=2.3.6-0.1+b1 \
+  && apt-get clean \
+  && rm -rf \
+    /var/lib/apt/lists/* \
+    /tmp/* \
+    /var/tmp/*
+
+ENV PYTHONIOENCODING=utf-8
+ENV LANG=C.UTF-8
+
+RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir
+
+
+FROM base as dbt-spark
+
+ARG commit_ref=main
+ARG extras=all
+
+HEALTHCHECK CMD dbt --version || exit 1
+
+WORKDIR /usr/app/dbt/
+ENTRYPOINT ["dbt"]
+
+RUN python -m pip install --no-cache-dir "dbt-spark[${extras}] @ git+https://github.com/dbt-labs/dbt-spark@${commit_ref}"
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 00000000..42ca5e22
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,70 @@
+# Docker for dbt
+`Dockerfile` is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry.
+
+## Building an image:
+This Dockerfile can create images for the following target: `dbt-spark`
+
+In order to build a new image, run the following docker command.
+```shell
+docker build --tag <your_image_name> --target dbt-spark <path/to/dockerfile>
+```
+---
+> **Note:**  Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly!
+
+---
+
+By default the image will be populated with the latest version of `dbt-spark` on `main`.
+If you need to use a different version you can specify it by git ref using the `--build-arg` flag:
+```shell
+docker build --tag <your_image_name> \
+  --target dbt-spark \
+  --build-arg commit_ref=<commit_ref> \
+  <path/to/dockerfile>
+```
+
+### Examples:
+To build an image named "my-dbt" that supports Snowflake using the latest releases:
+```shell
+cd dbt-core/docker
+docker build --tag my-dbt --target dbt-spark .
+```
+
+To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1:
+```shell
+cd dbt-core/docker
+docker build \
+  --tag my-other-dbt \
+  --target dbt-spark \
+  --build-arg commit_ref=v1.0.0b1 \
+ .
+```
+
+## Special cases
+There are a few special cases worth noting:
+* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`.
+If you wish to override this you can use the `--build-arg` flag with the value of `extras=<extras_name>`.
+See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information.
+```shell
+docker build --tag my_dbt \
+  --target dbt-spark \
+  --build-arg commit_ref=v1.0.0b1 \
+  --build-arg extras=PyHive \
+  <path/to/dockerfile>
+```
+
+## Running an image in a container:
+The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal:
+```shell
+docker run \
+  --network=host \
+  --mount type=bind,source=path/to/project,target=/usr/app \
+  --mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \
+  my-dbt \
+  ls
+```
+---
+**Notes:**
+* Bind-mount sources _must_ be an absolute path
+* You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host.
+
+---
diff --git a/docker/spark.Dockerfile b/docker/spark.Dockerfile
new file mode 100644
index 00000000..bb4d378e
--- /dev/null
+++ b/docker/spark.Dockerfile
@@ -0,0 +1,30 @@
+ARG OPENJDK_VERSION=8
+FROM eclipse-temurin:${OPENJDK_VERSION}-jre
+
+ARG BUILD_DATE
+ARG SPARK_VERSION=3.3.2
+ARG HADOOP_VERSION=3
+
+LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
+      org.label-schema.build-date=$BUILD_DATE \
+      org.label-schema.version=$SPARK_VERSION
+
+ENV SPARK_HOME /usr/spark
+ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
+
+RUN apt-get update && \
+    apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+    wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
+    ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
+    apt-get remove -y wget && \
+    apt-get autoremove -y && \
+    apt-get clean
+
+COPY entrypoint.sh /scripts/
+RUN chmod +x /scripts/entrypoint.sh
+
+ENTRYPOINT ["/scripts/entrypoint.sh"]
+CMD ["--help"]

From 7850da3d542608128ad80c41e03972649943d449 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Fri, 14 Jun 2024 05:00:32 +0000
Subject: [PATCH 565/603] Adap 1049/lazy load agate (#1050)

* Add changelog

* Lazy load agate.

* More comments on types and lint.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .../Under the Hood-20240612-195629.yaml       |  6 +++
 dbt/adapters/spark/impl.py                    | 39 ++++++++++++-------
 2 files changed, 30 insertions(+), 15 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240612-195629.yaml

diff --git a/.changes/unreleased/Under the Hood-20240612-195629.yaml b/.changes/unreleased/Under the Hood-20240612-195629.yaml
new file mode 100644
index 00000000..c90ebcda
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240612-195629.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Lazy load agate to improve performance
+time: 2024-06-12T19:56:29.943204-07:00
+custom:
+  Author: versusfacit
+  Issue: "1049"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 255ab780..d33ebde2 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -14,6 +14,7 @@
     Callable,
     Set,
     FrozenSet,
+    TYPE_CHECKING,
 )
 
 from dbt.adapters.base.relation import InformationSchema
@@ -24,7 +25,10 @@
 
 from typing_extensions import TypeAlias
 
-import agate
+if TYPE_CHECKING:
+    # Indirectly imported via agate_helper, which is lazy loaded further downfile.
+    # Used by mypy for earlier type hints.
+    import agate
 
 from dbt.adapters.base import AdapterConfig, PythonJobHelper
 from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport
@@ -127,34 +131,36 @@ def date_function(cls) -> str:
         return "current_timestamp()"
 
     @classmethod
-    def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_text_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "string"
 
     @classmethod
-    def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_number_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
+        import agate
+
         decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
         return "double" if decimals else "bigint"
 
     @classmethod
-    def convert_integer_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_integer_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "bigint"
 
     @classmethod
-    def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_date_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "date"
 
     @classmethod
-    def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_time_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "time"
 
     @classmethod
-    def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_datetime_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "timestamp"
 
     def quote(self, identifier: str) -> str:
         return "`{}`".format(identifier)
 
-    def _get_relation_information(self, row: agate.Row) -> RelationInfo:
+    def _get_relation_information(self, row: "agate.Row") -> RelationInfo:
         """relation info was fetched with SHOW TABLES EXTENDED"""
         try:
             _schema, name, _, information = row
@@ -165,7 +171,7 @@ def _get_relation_information(self, row: agate.Row) -> RelationInfo:
 
         return _schema, name, information
 
-    def _get_relation_information_using_describe(self, row: agate.Row) -> RelationInfo:
+    def _get_relation_information_using_describe(self, row: "agate.Row") -> RelationInfo:
         """Relation info fetched using SHOW TABLES and an auxiliary DESCRIBE statement"""
         try:
             _schema, name, _ = row
@@ -193,8 +199,8 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn
 
     def _build_spark_relation_list(
         self,
-        row_list: agate.Table,
-        relation_info_func: Callable[[agate.Row], RelationInfo],
+        row_list: "agate.Table",
+        relation_info_func: Callable[["agate.Row"], RelationInfo],
     ) -> List[BaseRelation]:
         """Aggregate relations with format metadata included."""
         relations = []
@@ -370,7 +376,7 @@ def get_catalog(
         self,
         relation_configs: Iterable[RelationConfig],
         used_schemas: FrozenSet[Tuple[str, str]],
-    ) -> Tuple[agate.Table, List[Exception]]:
+    ) -> Tuple["agate.Table", List[Exception]]:
         schema_map = self._get_catalog_schemas(relation_configs)
         if len(schema_map) > 1:
             raise CompilationError(
@@ -378,7 +384,7 @@ def get_catalog(
             )
 
         with executor(self.config) as tpe:
-            futures: List[Future[agate.Table]] = []
+            futures: List[Future["agate.Table"]] = []
             for info, schemas in schema_map.items():
                 for schema in schemas:
                     futures.append(
@@ -399,7 +405,7 @@ def _get_one_catalog(
         information_schema: InformationSchema,
         schemas: Set[str],
         used_schemas: FrozenSet[Tuple[str, str]],
-    ) -> agate.Table:
+    ) -> "agate.Table":
         if len(schemas) != 1:
             raise CompilationError(
                 f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}"
@@ -412,6 +418,9 @@ def _get_one_catalog(
         for relation in self.list_relations(database, schema):
             logger.debug("Getting table schema for relation {}", str(relation))
             columns.extend(self._get_columns_for_catalog(relation))
+
+        import agate
+
         return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER)
 
     def check_schema_exists(self, database: str, schema: str) -> bool:
@@ -486,7 +495,7 @@ def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]:
             "all_purpose_cluster": AllPurposeClusterPythonJobHelper,
         }
 
-    def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
+    def standardize_grants_dict(self, grants_table: "agate.Table") -> dict:
         grants_dict: Dict[str, List[str]] = {}
         for row in grants_table:
             grantee = row["Principal"]

From ba5b3f05bb699c9be59b39661cb8186ce7175587 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:53:38 -0400
Subject: [PATCH 566/603] skip broken tests, link to the issue for resolution
 (#1056)

---
 tests/functional/adapter/test_python_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3..957361cb 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -15,7 +15,9 @@ class TestPythonModelSpark(BasePythonModelTests):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
-    pass
+    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
+    def test_different_dataframes(self, project):
+        return super().test_different_dataframes(project)
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
@@ -69,6 +71,7 @@ class TestChangingSchemaSpark:
     def models(self):
         return {"simple_python_model.py": models__simple_python_model}
 
+    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_changing_schema_with_log_validation(self, project, logs_dir):
         run_dbt(["run"])
         write_file(

From cd6efba4f006f7bc3de761a02717ff9261b736a1 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:50:48 -0500
Subject: [PATCH 567/603] update user docs-issue workflow (#1051)

* update user docs-issue workflow

* pre-commit fix

* update workflow based onf feedback

* whitespace

* update to match bigquery

* pin numpy to below 2.0 new release

* remove numpy pin for its own pr
---
 .github/workflows/docs-issues.yml | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml
index 00a098df..f49cf517 100644
--- a/.github/workflows/docs-issues.yml
+++ b/.github/workflows/docs-issues.yml
@@ -1,19 +1,18 @@
 # **what?**
-# Open an issue in docs.getdbt.com when a PR is labeled `user docs`
+# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed
 
 # **why?**
 # To reduce barriers for keeping docs up to date
 
 # **when?**
-# When a PR is labeled `user docs` and is merged.  Runs on pull_request_target to run off the workflow already merged,
-# not the workflow that existed on the PR branch.  This allows old PRs to get comments.
+# When an issue is labeled `user docs` and is closed as completed.  Can be labeled before or after the issue is closed.
 
 
-name: Open issues in docs.getdbt.com repo when a PR is labeled
-run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}"
+name: Open issues in docs.getdbt.com repo when an issue is labeled
+run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}"
 
 on:
-  pull_request_target:
+  issues:
     types: [labeled, closed]
 
 defaults:
@@ -21,23 +20,22 @@ defaults:
     shell: bash
 
 permissions:
-    issues: write # opens new issues
-    pull-requests: write # comments on PRs
-
+    issues: write # comments on issues
 
 jobs:
   open_issues:
-    # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`.  Otherwise it runs the
+    # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned.
+    # If this logic does not exist in this workflow, it runs the
     # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having
     # generating the comment before the other runs.  This lives here instead of the shared workflow because this is where we
     # decide if it should run or not.
     if: |
-      (github.event.pull_request.merged == true) &&
-      ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) ||
+      (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && (
+      (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) ||
       (github.event.action == 'labeled' && github.event.label.name == 'user docs'))
     uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
     with:
         issue_repository: "dbt-labs/docs.getdbt.com"
-        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}"
+        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
         issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
     secrets: inherit

From 50634b9c6038016f888ea84be542d9a84e52a141 Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Mon, 24 Jun 2024 09:09:25 -0700
Subject: [PATCH 568/603] update spark internal-release workflow (#1052)

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .github/workflows/release-internal.yml | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index eb892415..d4e7a3c9 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -10,15 +10,12 @@
 #
 # Manual trigger.
 
-name: "Release internal patch"
+name: "Release to Cloud"
+run-name: "Release to Cloud off of ${{ inputs.ref }}"
 
 on:
   workflow_dispatch:
     inputs:
-      version_number:
-        description: "The release version number (i.e. 1.0.0b1)"
-        type: string
-        required: true
       ref:
         description: "The ref (sha or branch name) to use"
         type: string
@@ -29,6 +26,11 @@ on:
         type: string
         default: "python -c \"import dbt.adapters.spark\""
         required: true
+      skip_tests:
+        description: "Should the tests be skipped? (default to false)"
+        type: boolean
+        required: true
+        default: false
 
 defaults:
   run:
@@ -129,15 +131,14 @@ jobs:
         run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
 
   invoke-reusable-workflow:
-    name: "Build and Release Internally"
+    name: "Create cloud release"
     needs: [run-integration-tests]
-
     uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main"
 
     with:
-      version_number: "${{ inputs.version_number }}"
       package_test_command: "${{ inputs.package_test_command }}"
       dbms_name: "spark"
       ref: "${{ inputs.ref }}"
+      skip_tests: "${{ inputs.skip_tests }}"
 
     secrets: "inherit"

From 824ca0f2249d145234f21d7e4066e033a273e2e2 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 26 Jun 2024 12:06:32 -0400
Subject: [PATCH 569/603] Update the spark version to the current version
 (#1055)

* update the spark version to the current version
* update pin for pydantic to resolve https://github.com/explosion/spaCy/issues/12659
* exclude koalas dataframes from test
---
 tests/functional/adapter/test_python_model.py | 35 ++++++++++++++++---
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 957361cb..cd798d1d 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -15,9 +15,22 @@ class TestPythonModelSpark(BasePythonModelTests):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
-    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_different_dataframes(self, project):
-        return super().test_different_dataframes(project)
+        """
+        Test that python models are supported using dataframes from:
+        - pandas
+        - pyspark
+        - pyspark.pandas (formerly dataspark.koalas)
+
+        Note:
+            The CI environment is on Apache Spark >3.1, which includes koalas as pyspark.pandas.
+            The only Databricks runtime that supports Apache Spark <=3.1 is 9.1 LTS, which is EOL 2024-09-23.
+            For more information, see:
+            - https://github.com/databricks/koalas
+            - https://docs.databricks.com/en/release-notes/runtime/index.html
+        """
+        results = run_dbt(["run", "--exclude", "koalas_df"])
+        assert len(results) == 3
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
@@ -37,7 +50,7 @@ def model(dbt, spark):
         materialized='table',
         submission_method='job_cluster',
         job_cluster_config={
-            "spark_version": "7.3.x-scala2.12",
+            "spark_version": "12.2.x-scala2.12",
             "node_type_id": "i3.xlarge",
             "num_workers": 0,
             "spark_conf": {
@@ -48,7 +61,7 @@ def model(dbt, spark):
                 "ResourceClass": "SingleNode"
             }
         },
-        packages=['spacy', 'torch', 'pydantic<1.10.3']
+        packages=['spacy', 'torch', 'pydantic>=1.10.8']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])
@@ -67,11 +80,23 @@ def model(dbt, spark):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestChangingSchemaSpark:
+    """
+    Confirm that we can setup a spot instance and parse required packages into the Databricks job.
+
+    Notes:
+        - This test generates a spot instance on demand using the settings from `job_cluster_config`
+        in `models__simple_python_model` above. It takes several minutes to run due to creating the cluster.
+        The job can be monitored via "Data Engineering > Job Runs" or "Workflows > Job Runs"
+        in the Databricks UI (instead of via the normal cluster).
+        - The `spark_version` argument will need to periodically be updated. It will eventually become
+        unsupported and start experiencing issues.
+        - See https://github.com/explosion/spaCy/issues/12659 for why we're pinning pydantic
+    """
+
     @pytest.fixture(scope="class")
     def models(self):
         return {"simple_python_model.py": models__simple_python_model}
 
-    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_changing_schema_with_log_validation(self, project, logs_dir):
         run_dbt(["run"])
         write_file(

From 034cb6118e808c1c9ad81d3553a136ac94b77781 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Mon, 8 Jul 2024 12:11:09 -0400
Subject: [PATCH 570/603] Base 207/add test (#1057)

* Add test for upstream change.
* Skip session since it's not liking the test.
* Import pytest to fix skip error.
* Dial in tests to reflect error messages from spark.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .../adapter/dbt_show/test_dbt_show.py         | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 tests/functional/adapter/dbt_show/test_dbt_show.py

diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/tests/functional/adapter/dbt_show/test_dbt_show.py
new file mode 100644
index 00000000..bc56fd90
--- /dev/null
+++ b/tests/functional/adapter/dbt_show/test_dbt_show.py
@@ -0,0 +1,22 @@
+import pytest
+
+from dbt.tests.adapter.dbt_show.test_dbt_show import (
+    BaseShowSqlHeader,
+    BaseShowLimit,
+    BaseShowDoesNotHandleDoubleLimit,
+)
+
+
+class TestSparkShowLimit(BaseShowLimit):
+    pass
+
+
+class TestSparkShowSqlHeader(BaseShowSqlHeader):
+    pass
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_http_cluster")
+class TestSparkShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit):
+    """The syntax message is quite variable across clusters, but this hits two at once."""
+
+    DATABASE_ERROR_MESSAGE = "limit"

From 81a7cf25e9880fe1d8114e7e27cd6eacac01093e Mon Sep 17 00:00:00 2001
From: Colin <colin.rogers@dbtlabs.com>
Date: Wed, 10 Jul 2024 16:54:38 -0700
Subject: [PATCH 571/603] delete duplicate fixture

---
 tests/functional/adapter/test_python_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3..1e40344e 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -28,7 +28,6 @@ def project_config_update(self):
 models__simple_python_model = """
 import pandas
 import torch
-import spacy
 
 def model(dbt, spark):
     dbt.config(

From 2a31d3f28cc5e2fe6542a62e35b8c654c312f1ab Mon Sep 17 00:00:00 2001
From: Colin <colin.rogers@dbtlabs.com>
Date: Wed, 10 Jul 2024 16:56:47 -0700
Subject: [PATCH 572/603] Revert "delete duplicate fixture"

This reverts commit 81a7cf25e9880fe1d8114e7e27cd6eacac01093e.
---
 tests/functional/adapter/test_python_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 99c9fc95..cd798d1d 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -43,6 +43,7 @@ def project_config_update(self):
 models__simple_python_model = """
 import pandas
 import torch
+import spacy
 
 def model(dbt, spark):
     dbt.config(

From a096642e7429da3e9299d9a4ca8cb8c2b09b5d81 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 22:13:55 +0000
Subject: [PATCH 573/603] Update freezegun requirement from ~=1.3 to ~=1.4
 (#966)

* Update freezegun requirement from ~=1.3 to ~=1.4

Updates the requirements on [freezegun](https://github.com/spulec/freezegun) to permit the latest version.
- [Release notes](https://github.com/spulec/freezegun/releases)
- [Changelog](https://github.com/spulec/freezegun/blob/master/CHANGELOG)
- [Commits](https://github.com/spulec/freezegun/compare/1.3.0...1.4.0)

---
updated-dependencies:
- dependency-name: freezegun
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20231219-222211.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20231219-222211.yaml

diff --git a/.changes/unreleased/Dependencies-20231219-222211.yaml b/.changes/unreleased/Dependencies-20231219-222211.yaml
new file mode 100644
index 00000000..39f42509
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20231219-222211.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update freezegun requirement from ~=1.3 to ~=1.4"
+time: 2023-12-19T22:22:11.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 966
diff --git a/dev-requirements.txt b/dev-requirements.txt
index f86fb5ef..3a3e8c8f 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -11,7 +11,7 @@ pre-commit==3.7.0;python_version >="3.9"
 pre-commit==3.5.0;python_version <"3.9"
 
 # test
-freezegun~=1.3
+freezegun~=1.4
 mock~=5.1
 pytest~=7.4
 pytest-csv~=3.0

From e87c93c9d311980dc71adf1061698cc0853aceaf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 23:04:43 +0000
Subject: [PATCH 574/603] Bump dbt-labs/actions from 1.1.0 to 1.1.1 (#1012)

* Bump dbt-labs/actions from 1.1.0 to 1.1.1

Bumps [dbt-labs/actions](https://github.com/dbt-labs/actions) from 1.1.0 to 1.1.1.
- [Commits](https://github.com/dbt-labs/actions/compare/v1.1.0...v1.1.1)

---
updated-dependencies:
- dependency-name: dbt-labs/actions
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240419-024820.yaml | 6 ++++++
 .github/workflows/release-prep.yml                    | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024820.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024820.yaml b/.changes/unreleased/Dependencies-20240419-024820.yaml
new file mode 100644
index 00000000..65caf7c1
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024820.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump dbt-labs/actions from 1.1.0 to 1.1.1"
+time: 2024-04-19T02:48:20.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1012
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 0061a860..11af8214 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -123,7 +123,7 @@ jobs:
 
       - name: "Audit Version And Parse Into Parts"
         id: semver
-        uses: dbt-labs/actions/parse-semver@v1.1.0
+        uses: dbt-labs/actions/parse-semver@v1.1.1
         with:
           version: ${{ inputs.version_number }}
 

From 1d7ed0ebc2bfe9dfa7a55e1681203be4f49932fa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 18 Jul 2024 01:00:28 +0000
Subject: [PATCH 575/603] Update wheel requirement from ~=0.42 to ~=0.43
 (#1035)

* Update wheel requirement from ~=0.42 to ~=0.43

Updates the requirements on [wheel](https://github.com/pypa/wheel) to permit the latest version.
- [Release notes](https://github.com/pypa/wheel/releases)
- [Changelog](https://github.com/pypa/wheel/blob/main/docs/news.rst)
- [Commits](https://github.com/pypa/wheel/compare/0.42.0...0.43.0)

---
updated-dependencies:
- dependency-name: wheel
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240503-224157.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20240503-224157.yaml

diff --git a/.changes/unreleased/Dependencies-20240503-224157.yaml b/.changes/unreleased/Dependencies-20240503-224157.yaml
new file mode 100644
index 00000000..9a7f0b8f
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240503-224157.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update wheel requirement from ~=0.42 to ~=0.43"
+time: 2024-05-03T22:41:57.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1035
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 3a3e8c8f..3f00401e 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -23,4 +23,4 @@ thrift_sasl~=0.4.3
 # build
 bumpversion~=0.6.0
 twine~=4.0
-wheel~=0.42
+wheel~=0.43

From d0474c358dcc9e37fbe911a185cd1b2e3bb86b3c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 18 Jul 2024 02:32:15 +0000
Subject: [PATCH 576/603] Bump actions/download-artifact from 3 to 4 (#1010)

* Bump actions/download-artifact from 3 to 4

Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4.
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](https://github.com/actions/download-artifact/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/download-artifact
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240419-024814.yaml | 6 ++++++
 .github/workflows/main.yml                            | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024814.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024814.yaml b/.changes/unreleased/Dependencies-20240419-024814.yaml
new file mode 100644
index 00000000..06529952
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024814.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump actions/download-artifact from 3 to 4"
+time: 2024-04-19T02:48:14.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1010
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index cbbb7f72..fd3c6707 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -184,7 +184,7 @@ jobs:
           python -m pip install --user --upgrade pip
           python -m pip install --upgrade wheel
           python -m pip --version
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
           name: dist
           path: dist/

From 0053c000a098f9a2d0f72770083556039a660cc2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 18 Jul 2024 13:02:50 +0000
Subject: [PATCH 577/603] Bump actions/upload-artifact from 3 to 4 (#1011)

* Bump actions/upload-artifact from 3 to 4

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* add overwrite parameter which is implicitly true in v3 but false in v4

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20240419-024818.yaml | 6 ++++++
 .github/workflows/main.yml                            | 6 ++++--
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024818.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024818.yaml b/.changes/unreleased/Dependencies-20240419-024818.yaml
new file mode 100644
index 00000000..ed51e119
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024818.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump actions/upload-artifact from 3 to 4"
+time: 2024-04-19T02:48:18.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1011
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index fd3c6707..a8908296 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -104,11 +104,12 @@ jobs:
         id: date
         run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         if: always()
         with:
           name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv
           path: unit_results.csv
+          overwrite: true
 
   build:
     name: build packages
@@ -153,10 +154,11 @@ jobs:
           if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi
           echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: dist
           path: dist/
+          overwrite: true
 
   test-build:
     name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }}

From eae8d2fc1b500377e2abe7dee2391a4f44dfdc3b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 19 Jul 2024 17:12:51 +0000
Subject: [PATCH 578/603] Bump pre-commit from 3.7.0 to 3.7.1 (#1074)

* Bump pre-commit from 3.7.0 to 3.7.1

Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 3.7.0 to 3.7.1.
- [Release notes](https://github.com/pre-commit/pre-commit/releases)
- [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pre-commit/pre-commit/compare/v3.7.0...v3.7.1)

---
updated-dependencies:
- dependency-name: pre-commit
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* loosen pin on pre-commit

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .changes/unreleased/Dependencies-20240718-223238.yaml | 6 ++++++
 dev-requirements.txt                                  | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20240718-223238.yaml

diff --git a/.changes/unreleased/Dependencies-20240718-223238.yaml b/.changes/unreleased/Dependencies-20240718-223238.yaml
new file mode 100644
index 00000000..0c7fbacc
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240718-223238.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Loosen pin on pre-commit from 3.7.0 to 3.7"
+time: 2024-07-18T22:32:38.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1074
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 3f00401e..585745a0 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,8 +7,8 @@ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # dev
 ipdb~=0.13.13
-pre-commit==3.7.0;python_version >="3.9"
-pre-commit==3.5.0;python_version <"3.9"
+pre-commit~=3.7.0;python_version>="3.9"
+pre-commit~=3.5.0;python_version<"3.9"
 
 # test
 freezegun~=1.4

From 6cec5d53695f69e0ff13bc8331ccc634b4563558 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 19 Jul 2024 18:10:48 +0000
Subject: [PATCH 579/603] Update pytest-xdist requirement from ~=3.5 to ~=3.6
 (#1073)

* Update pytest-xdist requirement from ~=3.5 to ~=3.6

Updates the requirements on [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) to permit the latest version.
- [Release notes](https://github.com/pytest-dev/pytest-xdist/releases)
- [Changelog](https://github.com/pytest-dev/pytest-xdist/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/pytest-dev/pytest-xdist/compare/v3.5.0...v3.6.1)

---
updated-dependencies:
- dependency-name: pytest-xdist
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240718-223145.yaml | 6 ++++++
 dev-requirements.txt                                  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependencies-20240718-223145.yaml

diff --git a/.changes/unreleased/Dependencies-20240718-223145.yaml b/.changes/unreleased/Dependencies-20240718-223145.yaml
new file mode 100644
index 00000000..6d8433c5
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240718-223145.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Update pytest-xdist requirement from ~=3.5 to ~=3.6"
+time: 2024-07-18T22:31:45.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1073
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 585745a0..41d8642b 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -17,7 +17,7 @@ pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
-pytest-xdist~=3.5
+pytest-xdist~=3.6
 thrift_sasl~=0.4.3
 
 # build

From a7854b50f6439641d27a59f81ef1398765701846 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 24 Jul 2024 00:23:52 -0400
Subject: [PATCH 580/603] make dependabot less aggressive (#1077)

---
 .github/dependabot.yml | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index fc44c9fe..746dcae2 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,18 +5,25 @@ updates:
     schedule:
       interval: "daily"
     rebase-strategy: "disabled"
+    ignore:
+      - dependency-name: "*"
+        update-types:
+          - version-update:semver-patch
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
       interval: "weekly"
     rebase-strategy: "disabled"
+    ignore:
+      - dependency-name: "*"
+        update-types:
+          - version-update:semver-patch
   - package-ecosystem: "docker"
     directory: "/docker"
     schedule:
       interval: "weekly"
     rebase-strategy: "disabled"
-  - package-ecosystem: "docker"
-    directory: "/docker-dev"
-    schedule:
-      interval: "weekly"
-    rebase-strategy: "disabled"
+    ignore:
+      - dependency-name: "*"
+        update-types:
+          - version-update:semver-patch

From a52aac6e45c3d96748a4c58231972eab167208ee Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 24 Jul 2024 12:31:45 -0400
Subject: [PATCH 581/603] remove unused test dependencies (#1078)

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 dev-requirements.txt | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 41d8642b..055cb92f 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -9,16 +9,11 @@ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 ipdb~=0.13.13
 pre-commit~=3.7.0;python_version>="3.9"
 pre-commit~=3.5.0;python_version<"3.9"
-
-# test
-freezegun~=1.4
-mock~=5.1
 pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.6
-thrift_sasl~=0.4.3
 
 # build
 bumpversion~=0.6.0

From 46348bcb8f2582efdae3809360115b0c4e8e1ea7 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Wed, 31 Jul 2024 13:12:27 -0500
Subject: [PATCH 582/603] update netcat install method to fix ci/cd (#1084)

---
 dagger/spark-container/install_spark.sh | 2 +-
 docker/spark.Dockerfile                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dagger/spark-container/install_spark.sh b/dagger/spark-container/install_spark.sh
index 476f362a..a9816020 100755
--- a/dagger/spark-container/install_spark.sh
+++ b/dagger/spark-container/install_spark.sh
@@ -4,7 +4,7 @@ SPARK_VERSION=3.1.3
 HADOOP_VERSION=3.2
 
 apt-get update && \
-apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+apt-get install -y wget netcat-openbsd procps libpostgresql-jdbc-java && \
 wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
 tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
 rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
diff --git a/docker/spark.Dockerfile b/docker/spark.Dockerfile
index bb4d378e..49138af5 100644
--- a/docker/spark.Dockerfile
+++ b/docker/spark.Dockerfile
@@ -13,7 +13,7 @@ ENV SPARK_HOME /usr/spark
 ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
 
 RUN apt-get update && \
-    apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+    apt-get install -y wget netcat-openbsd procps libpostgresql-jdbc-java && \
     wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
     tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
     rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \

From 2580ac5098296366a9ca2a4db01fd899ea908924 Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Thu, 1 Aug 2024 14:15:13 -0400
Subject: [PATCH 583/603] Integration workflow update to support all-in-one
 adapter testing (#1082)

---
 .github/scripts/update_dbt_core_branch.sh     | 17 ------
 .../scripts/update_dev_dependency_branches.sh | 21 ++++++++
 .github/workflows/integration.yml             | 53 +++++++++++++++----
 3 files changed, 64 insertions(+), 27 deletions(-)
 delete mode 100755 .github/scripts/update_dbt_core_branch.sh
 create mode 100755 .github/scripts/update_dev_dependency_branches.sh

diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh
deleted file mode 100755
index 1a5a5c2d..00000000
--- a/.github/scripts/update_dbt_core_branch.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash -e
-set -e
-
-git_branch=$1
-target_req_file="dev-requirements.txt"
-core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g"
-tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g"
-if [[ "$OSTYPE" == darwin* ]]; then
- # mac ships with a different version of sed that requires a delimiter arg
- sed -i "" "$core_req_sed_pattern" $target_req_file
- sed -i "" "$tests_req_sed_pattern" $target_req_file
-else
- sed -i "$core_req_sed_pattern" $target_req_file
- sed -i "$tests_req_sed_pattern" $target_req_file
-fi
-core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2)
-bumpversion --allow-dirty --new-version "$core_version" major
diff --git a/.github/scripts/update_dev_dependency_branches.sh b/.github/scripts/update_dev_dependency_branches.sh
new file mode 100755
index 00000000..022df6a8
--- /dev/null
+++ b/.github/scripts/update_dev_dependency_branches.sh
@@ -0,0 +1,21 @@
+#!/bin/bash -e
+set -e
+
+
+dbt_adapters_branch=$1
+dbt_core_branch=$2
+dbt_common_branch=$3
+target_req_file="dev-requirements.txt"
+core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${dbt_core_branch}#egg=dbt-core|g"
+adapters_req_sed_pattern="s|dbt-adapters.git|dbt-adapters.git@${dbt_adapters_branch}|g"
+common_req_sed_pattern="s|dbt-common.git|dbt-common.git@${dbt_common_branch}|g"
+if [[ "$OSTYPE" == darwin* ]]; then
+ # mac ships with a different version of sed that requires a delimiter arg
+ sed -i "" "$adapters_req_sed_pattern" $target_req_file
+ sed -i "" "$core_req_sed_pattern" $target_req_file
+ sed -i "" "$common_req_sed_pattern" $target_req_file
+else
+ sed -i "$adapters_req_sed_pattern" $target_req_file
+ sed -i "$core_req_sed_pattern" $target_req_file
+ sed -i "$common_req_sed_pattern" $target_req_file
+fi
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 1e058aa4..699d4539 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -10,6 +10,8 @@
 
 name: Adapter Integration Tests
 
+run-name: "${{ (contains(github.event_name, 'workflow_') && inputs.name) || github.event_name }}: ${{ (contains(github.event_name, 'workflow_') &&  inputs.adapter_branch) || github.ref_name }} by @${{ github.actor }}"
+
 on:
   push:
     branches:
@@ -24,10 +26,31 @@ on:
 
   workflow_dispatch:
     inputs:
-      dbt-core-branch:
-        description: "branch of dbt-core to use in dev-requirements.txt"
+      name:
+        description: "Name to associate with run (example: 'dbt-adapters-242')"
         required: false
         type: string
+        default: "Adapter Integration Tests"
+      adapter_branch:
+        description: "The branch of this adapter repository to use"
+        type: string
+        required: false
+        default: "main"
+      dbt_adapters_branch:
+        description: "The branch of dbt-adapters to use"
+        type: string
+        required: false
+        default: "main"
+      dbt_core_branch:
+        description: "The branch of dbt-core to use"
+        type: string
+        required: false
+        default: "main"
+      dbt_common_branch:
+        description: "The branch of dbt-common to use"
+        type: string
+        required: false
+        default: "main"
 
 # explicitly turn off permissions for `GITHUB_TOKEN`
 permissions: read-all
@@ -74,12 +97,19 @@ jobs:
       DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
 
     steps:
-      - name: Check out the repository
-        if: github.event_name != 'pull_request_target'
+      - name: Check out the repository (push)
+        if: github.event_name == 'push'
         uses: actions/checkout@v4
         with:
           persist-credentials: false
 
+      - name: Check out the repository (workflow_dispatch)
+        if: github.event_name == 'workflow_dispatch'
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+          ref: ${{ inputs.adapter_branch }}
+
       # explicitly checkout the branch for the PR,
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
@@ -95,17 +125,20 @@ jobs:
         with:
           python-version: "3.11"
 
+      - name: Update Adapters and Core branches (update dev_requirements.txt)
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          ./.github/scripts/update_dev_dependency_branches.sh \
+            ${{ inputs.dbt_adapters_branch }} \
+            ${{ inputs.dbt_core_branch }} \
+            ${{ inputs.dbt_common_branch }}
+          cat dev-requirements.txt
+
       - name: Install python dependencies
         run: |
           python -m pip install --user --upgrade pip
           python -m pip --version
           python -m pip install -r dagger/requirements.txt
 
-      - name: Update dev_requirements.txt
-        if: inputs.dbt-core-branch != ''
-        run: |
-          pip install bumpversion
-          ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }}
-
       - name: Run tests for ${{ matrix.test }}
         run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}

From 212442322b509eb34deee2ac000c85c1e4daba73 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:56:10 -0700
Subject: [PATCH 584/603] Add support for 3.12 in workflow files and configs.
 (#1081)

* Add support for 3.12 in workflow files and configs.

* Try updating odbc

* Temporarily flip to 3.12

* Revert to 3.11. Add note about unixodbc

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
---
 .github/workflows/main.yml         | 4 ++--
 .github/workflows/release-prep.yml | 2 +-
 .pre-commit-config.yaml            | 1 +
 README.md                          | 7 +++++++
 requirements.txt                   | 2 +-
 setup.py                           | 3 ++-
 6 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a8908296..e4fc66cc 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -75,7 +75,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Check out the repository
@@ -173,7 +173,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-12, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 11af8214..9cb2c3e1 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -448,7 +448,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Check out the repository
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e228e7d9..fdb19526 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,6 +28,7 @@ repos:
         -   --target-version=py39
         -   --target-version=py310
         -   --target-version=py311
+        -   --target-version=py312
         additional_dependencies: [flaky]
 
 -   repo: https://github.com/pycqa/flake8
diff --git a/README.md b/README.md
index 7e95b1fc..adeb1691 100644
--- a/README.md
+++ b/README.md
@@ -65,6 +65,13 @@ rm -rf ./.hive-metastore/
 rm -rf ./.spark-warehouse/
 ```
 
+#### Additional Configuration for MacOS
+
+If installing on MacOS, use `homebrew` to install required dependencies.
+   ```sh
+   brew install unixodbc
+   ```
+
 ### Reporting bugs and contributing code
 
 -   Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt-spark/issues/new).
diff --git a/requirements.txt b/requirements.txt
index b32884c4..41263091 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=4.0.39 --no-binary pyodbc
+pyodbc~=5.1.0 --no-binary pyodbc
 sqlparams>=3.0.0
 thrift>=0.13.0
 pyspark>=3.0.0,<4.0.0
diff --git a/setup.py b/setup.py
index fd985eba..9e1fa31e 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@ def _get_plugin_version_dict():
 package_version = "1.9.0a1"
 description = """The Apache Spark adapter plugin for dbt"""
 
-odbc_extras = ["pyodbc~=4.0.39"]
+odbc_extras = ["pyodbc~=5.1.0"]
 pyhive_extras = [
     "PyHive[hive_pure_sasl]~=0.7.0",
     "thrift>=0.11.0,<0.17.0",
@@ -87,6 +87,7 @@ def _get_plugin_version_dict():
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
     ],
     python_requires=">=3.8",
 )

From 3fc624cb99488e803956304c9dea2c10facab08d Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 12 Sep 2024 13:00:36 -0700
Subject: [PATCH 585/603] Feature/http odbc conn extra (#1093)

* add support for extra odbc connection properties

* clean  up

* fix typo in test_incremental_on_schema_change.py

* fix formatting

* changelog

* Add unit test and refactor unit test fixtures

* update changie

* update changie

* remove holdover code

* remove dbt-core ref

---------

Co-authored-by: nilan3 <nilanthanb1994@gmail.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .../unreleased/Features-20240910-175846.yaml  |   6 +
 .github/workflows/integration.yml             |   1 +
 .github/workflows/release-internal.yml        |   1 +
 .github/workflows/release-prep.yml            |   1 +
 dagger/run_dbt_spark_tests.py                 |   2 +-
 dbt/adapters/spark/connections.py             |  54 +++--
 tests/conftest.py                             |  16 ++
 .../test_incremental_on_schema_change.py      |   4 +-
 .../test_incremental_strategies.py            |  10 +-
 tests/functional/adapter/test_constraints.py  |  18 +-
 tests/functional/adapter/test_python_model.py |  16 +-
 .../adapter/test_store_test_failures.py       |   4 +-
 tests/unit/conftest.py                        |   1 +
 tests/unit/fixtures/__init__.py               |   0
 tests/unit/fixtures/profiles.py               | 174 +++++++++++++
 tests/unit/test_adapter.py                    | 229 +++++-------------
 16 files changed, 336 insertions(+), 201 deletions(-)
 create mode 100644 .changes/unreleased/Features-20240910-175846.yaml
 create mode 100644 tests/unit/conftest.py
 create mode 100644 tests/unit/fixtures/__init__.py
 create mode 100644 tests/unit/fixtures/profiles.py

diff --git a/.changes/unreleased/Features-20240910-175846.yaml b/.changes/unreleased/Features-20240910-175846.yaml
new file mode 100644
index 00000000..68ef8551
--- /dev/null
+++ b/.changes/unreleased/Features-20240910-175846.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Support custom ODBC connection parameters via `connection_string_suffix` config
+time: 2024-09-10T17:58:46.141332-04:00
+custom:
+  Author: colin-rogers-dbt jpoley nilan3
+  Issue: "1092"
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 699d4539..35bd9cae 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -76,6 +76,7 @@ jobs:
         test:
           - "apache_spark"
           - "spark_session"
+          - "spark_http_odbc"
           - "databricks_sql_endpoint"
           - "databricks_cluster"
           - "databricks_http_cluster"
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index d4e7a3c9..1a509031 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -79,6 +79,7 @@ jobs:
         test:
           - "apache_spark"
           - "spark_session"
+          - "spark_http_odbc"
           - "databricks_sql_endpoint"
           - "databricks_cluster"
           - "databricks_http_cluster"
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 9cb2c3e1..9937463d 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -482,6 +482,7 @@ jobs:
         test:
           - "apache_spark"
           - "spark_session"
+          - "spark_http_odbc"
           - "databricks_sql_endpoint"
           - "databricks_cluster"
           - "databricks_http_cluster"
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 15f9cf2c..67fa5658 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -137,7 +137,7 @@ async def test_spark(test_args):
             spark_ctr, spark_host = get_spark_container(client)
             tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
 
-        elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]:
+        elif test_profile in ["databricks_cluster", "databricks_sql_endpoint", "spark_http_odbc"]:
             tst_container = (
                 tst_container.with_workdir("/")
                 .with_exec(["./scripts/configure_odbc.sh"])
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 0405eaf5..d9b615ec 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -78,6 +78,7 @@ class SparkCredentials(Credentials):
     auth: Optional[str] = None
     kerberos_service_name: Optional[str] = None
     organization: str = "0"
+    connection_string_suffix: Optional[str] = None
     connect_retries: int = 0
     connect_timeout: int = 10
     use_ssl: bool = False
@@ -483,38 +484,51 @@ def open(cls, connection: Connection) -> Connection:
                         http_path = cls.SPARK_SQL_ENDPOINT_HTTP_PATH.format(
                             endpoint=creds.endpoint
                         )
+                    elif creds.connection_string_suffix is not None:
+                        required_fields = ["driver", "host", "port", "connection_string_suffix"]
                     else:
                         raise DbtConfigError(
-                            "Either `cluster` or `endpoint` must set when"
+                            "Either `cluster`, `endpoint`, `connection_string_suffix` must set when"
                             " using the odbc method to connect to Spark"
                         )
 
                     cls.validate_creds(creds, required_fields)
-
                     dbt_spark_version = __version__.version
                     user_agent_entry = (
                         f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
                     )
-
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
                     ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()}
-
-                    # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm
-                    connection_str = _build_odbc_connnection_string(
-                        DRIVER=creds.driver,
-                        HOST=creds.host,
-                        PORT=creds.port,
-                        UID="token",
-                        PWD=creds.token,
-                        HTTPPath=http_path,
-                        AuthMech=3,
-                        SparkServerType=3,
-                        ThriftTransport=2,
-                        SSL=1,
-                        UserAgentEntry=user_agent_entry,
-                        LCaseSspKeyName=0 if ssp else 1,
-                        **ssp,
-                    )
+                    if creds.token is not None:
+                        # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm
+                        connection_str = _build_odbc_connnection_string(
+                            DRIVER=creds.driver,
+                            HOST=creds.host,
+                            PORT=creds.port,
+                            UID="token",
+                            PWD=creds.token,
+                            HTTPPath=http_path,
+                            AuthMech=3,
+                            SparkServerType=3,
+                            ThriftTransport=2,
+                            SSL=1,
+                            UserAgentEntry=user_agent_entry,
+                            LCaseSspKeyName=0 if ssp else 1,
+                            **ssp,
+                        )
+                    else:
+                        connection_str = _build_odbc_connnection_string(
+                            DRIVER=creds.driver,
+                            HOST=creds.host,
+                            PORT=creds.port,
+                            ThriftTransport=2,
+                            SSL=1,
+                            UserAgentEntry=user_agent_entry,
+                            LCaseSspKeyName=0 if ssp else 1,
+                            **ssp,
+                        )
+                    if creds.connection_string_suffix is not None:
+                        connection_str = connection_str + ";" + creds.connection_string_suffix
 
                     conn = pyodbc.connect(connection_str, autocommit=True)
                     handle = PyodbcConnectionWrapper(conn)
diff --git a/tests/conftest.py b/tests/conftest.py
index efba41a5..09b31f40 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,6 +30,8 @@ def dbt_profile_target(request):
         target = databricks_http_cluster_target()
     elif profile_type == "spark_session":
         target = spark_session_target()
+    elif profile_type == "spark_http_odbc":
+        target = spark_http_odbc_target()
     else:
         raise ValueError(f"Invalid profile type '{profile_type}'")
     return target
@@ -102,6 +104,20 @@ def spark_session_target():
     }
 
 
+def spark_http_odbc_target():
+    return {
+        "type": "spark",
+        "method": "odbc",
+        "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
+        "port": 443,
+        "driver": os.getenv("ODBC_DRIVER"),
+        "connection_string_suffix": f'UID=token;PWD={os.getenv("DBT_DATABRICKS_TOKEN")};HTTPPath=/sql/1.0/endpoints/{os.getenv("DBT_DATABRICKS_ENDPOINT")};AuthMech=3;SparkServerType=3',
+        "connect_retries": 3,
+        "connect_timeout": 5,
+        "retry_all": True,
+    }
+
+
 @pytest.fixture(autouse=True)
 def skip_by_profile_type(request):
     profile_type = request.config.getoption("--profile")
diff --git a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
index 47832966..6f881697 100644
--- a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
+++ b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
@@ -21,7 +21,7 @@ def test_run_incremental_fail_on_schema_change(self, project):
         assert "Compilation Error" in results_two[1].message
 
 
-@pytest.mark.skip_profile("databricks_sql_endpoint")
+@pytest.mark.skip_profile("databricks_sql_endpoint", "spark_http_odbc")
 class TestAppendOnSchemaChange(IncrementalOnSchemaChangeIgnoreFail):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -32,7 +32,7 @@ def project_config_update(self):
         }
 
 
-@pytest.mark.skip_profile("databricks_sql_endpoint", "spark_session")
+@pytest.mark.skip_profile("databricks_sql_endpoint", "spark_session", "spark_http_odbc")
 class TestInsertOverwriteOnSchemaChange(IncrementalOnSchemaChangeIgnoreFail):
     @pytest.fixture(scope="class")
     def project_config_update(self):
diff --git a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
index b05fcb27..a44a1d23 100644
--- a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
+++ b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
@@ -55,7 +55,7 @@ def run_and_test(self, project):
         check_relations_equal(project.adapter, ["default_append", "expected_append"])
 
     @pytest.mark.skip_profile(
-        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session", "spark_http_odbc"
     )
     def test_default_append(self, project):
         self.run_and_test(project)
@@ -77,7 +77,7 @@ def run_and_test(self, project):
         check_relations_equal(project.adapter, ["insert_overwrite_partitions", "expected_upsert"])
 
     @pytest.mark.skip_profile(
-        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session", "spark_http_odbc"
     )
     def test_insert_overwrite(self, project):
         self.run_and_test(project)
@@ -103,7 +103,11 @@ def run_and_test(self, project):
         check_relations_equal(project.adapter, ["merge_update_columns", "expected_partial_upsert"])
 
     @pytest.mark.skip_profile(
-        "apache_spark", "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+        "apache_spark",
+        "databricks_http_cluster",
+        "databricks_sql_endpoint",
+        "spark_session",
+        "spark_http_odbc",
     )
     def test_delta_strategies(self, project):
         self.run_and_test(project)
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index e35a13a6..f3335926 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -183,7 +183,11 @@ def models(self):
 
 
 @pytest.mark.skip_profile(
-    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+    "spark_session",
+    "apache_spark",
+    "databricks_sql_endpoint",
+    "databricks_cluster",
+    "spark_http_odbc",
 )
 class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(
     DatabricksHTTPSetup, BaseTableConstraintsColumnsEqual
@@ -198,7 +202,11 @@ def models(self):
 
 
 @pytest.mark.skip_profile(
-    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+    "spark_session",
+    "apache_spark",
+    "databricks_sql_endpoint",
+    "databricks_cluster",
+    "spark_http_odbc",
 )
 class TestSparkViewConstraintsColumnsEqualDatabricksHTTP(
     DatabricksHTTPSetup, BaseViewConstraintsColumnsEqual
@@ -213,7 +221,11 @@ def models(self):
 
 
 @pytest.mark.skip_profile(
-    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+    "spark_session",
+    "apache_spark",
+    "databricks_sql_endpoint",
+    "databricks_cluster",
+    "spark_http_odbc",
 )
 class TestSparkIncrementalConstraintsColumnsEqualDatabricksHTTP(
     DatabricksHTTPSetup, BaseIncrementalConstraintsColumnsEqual
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index cd798d1d..50132b88 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -8,12 +8,16 @@
 from dbt.tests.adapter.python_model.test_spark import BasePySparkTests
 
 
-@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+@pytest.mark.skip_profile(
+    "apache_spark", "spark_session", "databricks_sql_endpoint", "spark_http_odbc"
+)
 class TestPythonModelSpark(BasePythonModelTests):
     pass
 
 
-@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+@pytest.mark.skip_profile(
+    "apache_spark", "spark_session", "databricks_sql_endpoint", "spark_http_odbc"
+)
 class TestPySpark(BasePySparkTests):
     def test_different_dataframes(self, project):
         """
@@ -33,7 +37,9 @@ def test_different_dataframes(self, project):
         assert len(results) == 3
 
 
-@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+@pytest.mark.skip_profile(
+    "apache_spark", "spark_session", "databricks_sql_endpoint", "spark_http_odbc"
+)
 class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -78,7 +84,9 @@ def model(dbt, spark):
 """
 
 
-@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+@pytest.mark.skip_profile(
+    "apache_spark", "spark_session", "databricks_sql_endpoint", "spark_http_odbc"
+)
 class TestChangingSchemaSpark:
     """
     Confirm that we can setup a spot instance and parse required packages into the Databricks job.
diff --git a/tests/functional/adapter/test_store_test_failures.py b/tests/functional/adapter/test_store_test_failures.py
index e78bd4f7..3d8a4c19 100644
--- a/tests/functional/adapter/test_store_test_failures.py
+++ b/tests/functional/adapter/test_store_test_failures.py
@@ -7,7 +7,9 @@
 )
 
 
-@pytest.mark.skip_profile("spark_session", "databricks_cluster", "databricks_sql_endpoint")
+@pytest.mark.skip_profile(
+    "spark_session", "databricks_cluster", "databricks_sql_endpoint", "spark_http_odbc"
+)
 class TestSparkStoreTestFailures(StoreTestFailuresBase):
     @pytest.fixture(scope="class")
     def project_config_update(self):
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
new file mode 100644
index 00000000..c3b00035
--- /dev/null
+++ b/tests/unit/conftest.py
@@ -0,0 +1 @@
+from .fixtures.profiles import *
diff --git a/tests/unit/fixtures/__init__.py b/tests/unit/fixtures/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/fixtures/profiles.py b/tests/unit/fixtures/profiles.py
new file mode 100644
index 00000000..c5f24581
--- /dev/null
+++ b/tests/unit/fixtures/profiles.py
@@ -0,0 +1,174 @@
+import pytest
+
+from tests.unit.utils import config_from_parts_or_dicts
+
+
+@pytest.fixture(scope="session", autouse=True)
+def base_project_cfg():
+    return {
+        "name": "X",
+        "version": "0.1",
+        "profile": "test",
+        "project-root": "/tmp/dbt/does-not-exist",
+        "quoting": {
+            "identifier": False,
+            "schema": False,
+        },
+        "config-version": 2,
+    }
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_http(base_project_cfg):
+    config = config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "http",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "organization": "0123456789",
+                    "cluster": "01234-23423-coffeetime",
+                    "server_side_parameters": {"spark.driver.memory": "4g"},
+                }
+            },
+            "target": "test",
+        },
+    )
+    return config
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_thrift(base_project_cfg):
+    return config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "thrift",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 10001,
+                    "user": "dbt",
+                }
+            },
+            "target": "test",
+        },
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_thrift_kerberos(base_project_cfg):
+    return config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "thrift",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 10001,
+                    "user": "dbt",
+                    "auth": "KERBEROS",
+                    "kerberos_service_name": "hive",
+                }
+            },
+            "target": "test",
+        },
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_use_ssl_thrift(base_project_cfg):
+    return config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "thrift",
+                    "use_ssl": True,
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 10001,
+                    "user": "dbt",
+                }
+            },
+            "target": "test",
+        },
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_odbc_cluster(base_project_cfg):
+    return config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "odbc",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "organization": "0123456789",
+                    "cluster": "01234-23423-coffeetime",
+                    "driver": "Simba",
+                }
+            },
+            "target": "test",
+        },
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_odbc_sql_endpoint(base_project_cfg):
+    return config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "odbc",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "endpoint": "012342342393920a",
+                    "driver": "Simba",
+                }
+            },
+            "target": "test",
+        },
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def target_odbc_with_extra_conn(base_project_cfg):
+    return config_from_parts_or_dicts(
+        base_project_cfg,
+        {
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "odbc",
+                    "host": "myorg.sparkhost.com",
+                    "schema": "analytics",
+                    "port": 443,
+                    "driver": "Simba",
+                    "connection_string_suffix": "someExtraValues",
+                    "connect_retries": 3,
+                    "connect_timeout": 5,
+                    "retry_all": True,
+                }
+            },
+            "target": "test",
+        },
+    )
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index 54e9f015..323e82a1 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -1,8 +1,8 @@
 import unittest
+import pytest
 from multiprocessing import get_context
 from unittest import mock
 
-import dbt.flags as flags
 from dbt.exceptions import DbtRuntimeError
 from agate import Row
 from pyhive import hive
@@ -11,143 +11,29 @@
 
 
 class TestSparkAdapter(unittest.TestCase):
-    def setUp(self):
-        flags.STRICT_MODE = False
-
-        self.project_cfg = {
-            "name": "X",
-            "version": "0.1",
-            "profile": "test",
-            "project-root": "/tmp/dbt/does-not-exist",
-            "quoting": {
-                "identifier": False,
-                "schema": False,
-            },
-            "config-version": 2,
-        }
-
-    def _get_target_http(self, project):
-        return config_from_parts_or_dicts(
-            project,
-            {
-                "outputs": {
-                    "test": {
-                        "type": "spark",
-                        "method": "http",
-                        "schema": "analytics",
-                        "host": "myorg.sparkhost.com",
-                        "port": 443,
-                        "token": "abc123",
-                        "organization": "0123456789",
-                        "cluster": "01234-23423-coffeetime",
-                        "server_side_parameters": {"spark.driver.memory": "4g"},
-                    }
-                },
-                "target": "test",
-            },
-        )
-
-    def _get_target_thrift(self, project):
-        return config_from_parts_or_dicts(
-            project,
-            {
-                "outputs": {
-                    "test": {
-                        "type": "spark",
-                        "method": "thrift",
-                        "schema": "analytics",
-                        "host": "myorg.sparkhost.com",
-                        "port": 10001,
-                        "user": "dbt",
-                    }
-                },
-                "target": "test",
-            },
-        )
-
-    def _get_target_thrift_kerberos(self, project):
-        return config_from_parts_or_dicts(
-            project,
-            {
-                "outputs": {
-                    "test": {
-                        "type": "spark",
-                        "method": "thrift",
-                        "schema": "analytics",
-                        "host": "myorg.sparkhost.com",
-                        "port": 10001,
-                        "user": "dbt",
-                        "auth": "KERBEROS",
-                        "kerberos_service_name": "hive",
-                    }
-                },
-                "target": "test",
-            },
-        )
-
-    def _get_target_use_ssl_thrift(self, project):
-        return config_from_parts_or_dicts(
-            project,
-            {
-                "outputs": {
-                    "test": {
-                        "type": "spark",
-                        "method": "thrift",
-                        "use_ssl": True,
-                        "schema": "analytics",
-                        "host": "myorg.sparkhost.com",
-                        "port": 10001,
-                        "user": "dbt",
-                    }
-                },
-                "target": "test",
-            },
-        )
-
-    def _get_target_odbc_cluster(self, project):
-        return config_from_parts_or_dicts(
-            project,
-            {
-                "outputs": {
-                    "test": {
-                        "type": "spark",
-                        "method": "odbc",
-                        "schema": "analytics",
-                        "host": "myorg.sparkhost.com",
-                        "port": 443,
-                        "token": "abc123",
-                        "organization": "0123456789",
-                        "cluster": "01234-23423-coffeetime",
-                        "driver": "Simba",
-                    }
-                },
-                "target": "test",
-            },
-        )
-
-    def _get_target_odbc_sql_endpoint(self, project):
-        return config_from_parts_or_dicts(
-            project,
-            {
-                "outputs": {
-                    "test": {
-                        "type": "spark",
-                        "method": "odbc",
-                        "schema": "analytics",
-                        "host": "myorg.sparkhost.com",
-                        "port": 443,
-                        "token": "abc123",
-                        "endpoint": "012342342393920a",
-                        "driver": "Simba",
-                    }
-                },
-                "target": "test",
-            },
-        )
+    @pytest.fixture(autouse=True)
+    def set_up_fixtures(
+        self,
+        target_http,
+        target_odbc_with_extra_conn,
+        target_thrift,
+        target_thrift_kerberos,
+        target_odbc_sql_endpoint,
+        target_odbc_cluster,
+        target_use_ssl_thrift,
+        base_project_cfg,
+    ):
+        self.base_project_cfg = base_project_cfg
+        self.target_http = target_http
+        self.target_odbc_with_extra_conn = target_odbc_with_extra_conn
+        self.target_odbc_sql_endpoint = target_odbc_sql_endpoint
+        self.target_odbc_cluster = target_odbc_cluster
+        self.target_thrift = target_thrift
+        self.target_thrift_kerberos = target_thrift_kerberos
+        self.target_use_ssl_thrift = target_use_ssl_thrift
 
     def test_http_connection(self):
-        config = self._get_target_http(self.project_cfg)
-        adapter = SparkAdapter(config, get_context("spawn"))
+        adapter = SparkAdapter(self.target_http, get_context("spawn"))
 
         def hive_http_connect(thrift_transport, configuration):
             self.assertEqual(thrift_transport.scheme, "https")
@@ -171,7 +57,7 @@ def hive_http_connect(thrift_transport, configuration):
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_connection(self):
-        config = self._get_target_thrift(self.project_cfg)
+        config = self.target_thrift
         adapter = SparkAdapter(config, get_context("spawn"))
 
         def hive_thrift_connect(
@@ -195,8 +81,7 @@ def hive_thrift_connect(
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_ssl_connection(self):
-        config = self._get_target_use_ssl_thrift(self.project_cfg)
-        adapter = SparkAdapter(config, get_context("spawn"))
+        adapter = SparkAdapter(self.target_use_ssl_thrift, get_context("spawn"))
 
         def hive_thrift_connect(thrift_transport, configuration):
             self.assertIsNotNone(thrift_transport)
@@ -215,8 +100,7 @@ def hive_thrift_connect(thrift_transport, configuration):
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_connection_kerberos(self):
-        config = self._get_target_thrift_kerberos(self.project_cfg)
-        adapter = SparkAdapter(config, get_context("spawn"))
+        adapter = SparkAdapter(self.target_thrift_kerberos, get_context("spawn"))
 
         def hive_thrift_connect(
             host, port, username, auth, kerberos_service_name, password, configuration
@@ -239,8 +123,7 @@ def hive_thrift_connect(
             self.assertIsNone(connection.credentials.database)
 
     def test_odbc_cluster_connection(self):
-        config = self._get_target_odbc_cluster(self.project_cfg)
-        adapter = SparkAdapter(config, get_context("spawn"))
+        adapter = SparkAdapter(self.target_odbc_cluster, get_context("spawn"))
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
@@ -266,8 +149,7 @@ def pyodbc_connect(connection_str, autocommit):
             self.assertIsNone(connection.credentials.database)
 
     def test_odbc_endpoint_connection(self):
-        config = self._get_target_odbc_sql_endpoint(self.project_cfg)
-        adapter = SparkAdapter(config, get_context("spawn"))
+        adapter = SparkAdapter(self.target_odbc_sql_endpoint, get_context("spawn"))
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
@@ -291,6 +173,26 @@ def pyodbc_connect(connection_str, autocommit):
             self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
+    def test_odbc_with_extra_connection_string(self):
+        adapter = SparkAdapter(self.target_odbc_with_extra_conn, get_context("spawn"))
+
+        def pyodbc_connect(connection_str, autocommit):
+            self.assertTrue(autocommit)
+            self.assertIn("driver=simba;", connection_str.lower())
+            self.assertIn("port=443;", connection_str.lower())
+            self.assertIn("host=myorg.sparkhost.com;", connection_str.lower())
+            self.assertIn("someExtraValues", connection_str)
+
+        with mock.patch(
+            "dbt.adapters.spark.connections.pyodbc.connect", new=pyodbc_connect
+        ):  # noqa
+            connection = adapter.acquire_connection("dummy")
+            connection.handle  # trigger lazy-load
+
+            self.assertEqual(connection.state, "open")
+            self.assertIsNotNone(connection.handle)
+            self.assertIsNone(connection.credentials.database)
+
     def test_parse_relation(self):
         self.maxDiff = None
         rel_type = SparkRelation.get_relation_type.Table
@@ -329,8 +231,7 @@ def test_parse_relation(self):
 
         input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
-        config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+        rows = SparkAdapter(self.target_http, get_context("spawn")).parse_describe_extended(
             relation, input_cols
         )
         self.assertEqual(len(rows), 4)
@@ -420,8 +321,7 @@ def test_parse_relation_with_integer_owner(self):
 
         input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
-        config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+        rows = SparkAdapter(self.target_http, get_context("spawn")).parse_describe_extended(
             relation, input_cols
         )
 
@@ -458,8 +358,7 @@ def test_parse_relation_with_statistics(self):
 
         input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
-        config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended(
+        rows = SparkAdapter(self.target_http, get_context("spawn")).parse_describe_extended(
             relation, input_cols
         )
         self.assertEqual(len(rows), 1)
@@ -489,8 +388,7 @@ def test_parse_relation_with_statistics(self):
         )
 
     def test_relation_with_database(self):
-        config = self._get_target_http(self.project_cfg)
-        adapter = SparkAdapter(config, get_context("spawn"))
+        adapter = SparkAdapter(self.target_http, get_context("spawn"))
         # fine
         adapter.Relation.create(schema="different", identifier="table")
         with self.assertRaises(DbtRuntimeError):
@@ -516,7 +414,7 @@ def test_profile_with_database(self):
             "target": "test",
         }
         with self.assertRaises(DbtRuntimeError):
-            config_from_parts_or_dicts(self.project_cfg, profile)
+            config_from_parts_or_dicts(self.base_project_cfg, profile)
 
     def test_profile_with_cluster_and_sql_endpoint(self):
         profile = {
@@ -536,7 +434,7 @@ def test_profile_with_cluster_and_sql_endpoint(self):
             "target": "test",
         }
         with self.assertRaises(DbtRuntimeError):
-            config_from_parts_or_dicts(self.project_cfg, profile)
+            config_from_parts_or_dicts(self.base_project_cfg, profile)
 
     def test_parse_columns_from_information_with_table_type_and_delta_provider(self):
         self.maxDiff = None
@@ -570,10 +468,9 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self)
             schema="default_schema", identifier="mytable", type=rel_type, information=information
         )
 
-        config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
-            relation
-        )
+        columns = SparkAdapter(
+            self.target_http, get_context("spawn")
+        ).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
         self.assertEqual(
             columns[0].to_column_dict(omit_none=False),
@@ -657,10 +554,9 @@ def test_parse_columns_from_information_with_view_type(self):
             schema="default_schema", identifier="myview", type=rel_type, information=information
         )
 
-        config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
-            relation
-        )
+        columns = SparkAdapter(
+            self.target_http, get_context("spawn")
+        ).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
         self.assertEqual(
             columns[1].to_column_dict(omit_none=False),
@@ -725,10 +621,9 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
             schema="default_schema", identifier="mytable", type=rel_type, information=information
         )
 
-        config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information(
-            relation
-        )
+        columns = SparkAdapter(
+            self.target_http, get_context("spawn")
+        ).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
 
         self.assertEqual(

From 8c6fcb4965cb6f9eb5c39284a94e8fd24e893003 Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Fri, 20 Sep 2024 16:31:16 -0400
Subject: [PATCH 586/603] Allow configuring snapshot column names (#1097)

---
 .../unreleased/Features-20240903-161003.yaml     |  6 ++++++
 .../spark/macros/materializations/snapshot.sql   | 16 +++++++++-------
 setup.py                                         |  2 +-
 3 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 .changes/unreleased/Features-20240903-161003.yaml

diff --git a/.changes/unreleased/Features-20240903-161003.yaml b/.changes/unreleased/Features-20240903-161003.yaml
new file mode 100644
index 00000000..57a0f14c
--- /dev/null
+++ b/.changes/unreleased/Features-20240903-161003.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Allow configuring snapshot column names
+time: 2024-09-03T16:10:03.021221-04:00
+custom:
+  Author: gshank
+  Issue: "1096"
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index a397f84e..43c4750f 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -13,6 +13,7 @@
 
 
 {% macro spark__snapshot_merge_sql(target, source, insert_cols) -%}
+    {%- set columns = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() -%}
 
     merge into {{ target }} as DBT_INTERNAL_DEST
     {% if target.is_iceberg %}
@@ -21,12 +22,12 @@
     {% else %}
       using {{ source }} as DBT_INTERNAL_SOURCE
     {% endif %}
-    on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id
+    on DBT_INTERNAL_SOURCE.{{ columns.dbt_scd_id }} = DBT_INTERNAL_DEST.{{ columns.dbt_scd_id }}
     when matched
-     and DBT_INTERNAL_DEST.dbt_valid_to is null
+     and DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null
      and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')
         then update
-        set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
+        set {{ columns.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ columns.dbt_valid_to }}
 
     when not matched
      and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'
@@ -81,13 +82,12 @@
 
 
 {% materialization snapshot, adapter='spark' %}
-  {%- set config = model['config'] -%}
 
   {%- set target_table = model.get('alias', model.get('name')) -%}
 
   {%- set strategy_name = config.get('strategy') -%}
   {%- set unique_key = config.get('unique_key') %}
-  {%- set file_format = config.get('file_format', 'parquet') -%}
+  {%- set file_format = config.get('file_format') or 'parquet' -%}
   {%- set grant_config = config.get('grants') -%}
 
   {% set target_relation_exists, target_relation = get_or_create_relation(
@@ -126,7 +126,7 @@
   {{ run_hooks(pre_hooks, inside_transaction=True) }}
 
   {% set strategy_macro = strategy_dispatch(strategy_name) %}
-  {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %}
+  {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", model['config'], target_relation_exists) %}
 
   {% if not target_relation_exists %}
 
@@ -135,7 +135,9 @@
 
   {% else %}
 
-      {{ adapter.valid_snapshot_target(target_relation) }}
+      {% set columns = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() %}
+
+      {{ adapter.valid_snapshot_target(target_relation, columns) }}
 
       {% set staging_table = spark_build_snapshot_staging_table(strategy, sql, target_relation) %}
 
diff --git a/setup.py b/setup.py
index 9e1fa31e..2de68a6f 100644
--- a/setup.py
+++ b/setup.py
@@ -66,7 +66,7 @@ def _get_plugin_version_dict():
     install_requires=[
         "sqlparams>=3.0.0",
         "dbt-common>=1.0.4,<2.0",
-        "dbt-adapters>=1.1.1,<2.0",
+        "dbt-adapters>=1.7.0,<2.0",
         # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
         "dbt-core>=1.8.0",
     ],

From 101aad24eddd87eecb2b02674fd382d4579d98ae Mon Sep 17 00:00:00 2001
From: Michelle Ark <MichelleArk@users.noreply.github.com>
Date: Wed, 25 Sep 2024 22:28:55 +0100
Subject: [PATCH 587/603] Microbatch Strategy (#1108)

---
 .../unreleased/Features-20240925-125242.yaml  |  6 ++++++
 CONTRIBUTING.md                               |  1 +
 .../incremental/incremental.sql               |  2 +-
 .../incremental/strategies.sql                | 11 ++++++++++
 .../materializations/incremental/validate.sql |  6 +++---
 .../incremental_strategies/test_microbatch.py | 21 +++++++++++++++++++
 6 files changed, 43 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Features-20240925-125242.yaml
 create mode 100644 tests/functional/adapter/incremental_strategies/test_microbatch.py

diff --git a/.changes/unreleased/Features-20240925-125242.yaml b/.changes/unreleased/Features-20240925-125242.yaml
new file mode 100644
index 00000000..1cb51c00
--- /dev/null
+++ b/.changes/unreleased/Features-20240925-125242.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Add Microbatch Strategy to dbt-spark
+time: 2024-09-25T12:52:42.872017+01:00
+custom:
+  Author: michelleark
+  Issue: "1109"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6fcaacea..903507b7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,6 +78,7 @@ python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-pa
 _options_:
   - "apache_spark"
   - "spark_session"
+  - "spark_http_odbc"
   - "databricks_sql_endpoint"
   - "databricks_cluster"
   - "databricks_http_cluster"
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 9a66bab5..77bfc59c 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -24,7 +24,7 @@
   {%- endif -%}
 
   {#-- Set Overwrite Mode --#}
-  {%- if strategy == 'insert_overwrite' and partition_by -%}
+  {%- if strategy in ['insert_overwrite', 'microbatch'] and partition_by -%}
     {%- call statement() -%}
       set spark.sql.sources.partitionOverwriteMode = DYNAMIC
     {%- endcall -%}
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index eeb92049..4ffead6a 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -75,6 +75,17 @@
   {%- elif strategy == 'insert_overwrite' -%}
     {#-- insert statements don't like CTEs, so support them via a temp view #}
     {{ get_insert_overwrite_sql(source, target, existing) }}
+  {%- elif strategy == 'microbatch' -%}
+    {#-- microbatch wraps insert_overwrite, and requires a partition_by config #}
+    {% set missing_partition_key_microbatch_msg -%}
+      dbt-spark 'microbatch' incremental strategy requires a `partition_by` config.
+      Ensure you are using a `partition_by` column that is of grain {{ config.get('batch_size') }}.
+    {%- endset %}
+
+    {%- if not config.get('partition_by') -%}
+      {{ exceptions.raise_compiler_error(missing_partition_key_microbatch_msg) }}
+    {%- endif -%}
+    {{ get_insert_overwrite_sql(source, target, existing) }}
   {%- elif strategy == 'merge' -%}
   {#-- merge all columns for datasources which implement MERGE INTO (e.g. databricks, iceberg) - schema changes are handled for us #}
     {{ get_merge_sql(target, source, unique_key, dest_columns=none, incremental_predicates=incremental_predicates) }}
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql
index 0d4c4d8b..4a1ac994 100644
--- a/dbt/include/spark/macros/materializations/incremental/validate.sql
+++ b/dbt/include/spark/macros/materializations/incremental/validate.sql
@@ -21,7 +21,7 @@
 
   {% set invalid_strategy_msg -%}
     Invalid incremental strategy provided: {{ raw_strategy }}
-    Expected one of: 'append', 'merge', 'insert_overwrite'
+    Expected one of: 'append', 'merge', 'insert_overwrite', 'microbatch'
   {%- endset %}
 
   {% set invalid_merge_msg -%}
@@ -35,13 +35,13 @@
     Use the 'append' or 'merge' strategy instead
   {%- endset %}
 
-  {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %}
+  {% if raw_strategy not in ['append', 'merge', 'insert_overwrite', 'microbatch'] %}
     {% do exceptions.raise_compiler_error(invalid_strategy_msg) %}
   {%-else %}
     {% if raw_strategy == 'merge' and file_format not in ['delta', 'iceberg', 'hudi'] %}
       {% do exceptions.raise_compiler_error(invalid_merge_msg) %}
     {% endif %}
-    {% if raw_strategy == 'insert_overwrite' and target.endpoint %}
+    {% if raw_strategy in ['insert_overwrite', 'microbatch'] and target.endpoint %}
       {% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %}
     {% endif %}
   {% endif %}
diff --git a/tests/functional/adapter/incremental_strategies/test_microbatch.py b/tests/functional/adapter/incremental_strategies/test_microbatch.py
new file mode 100644
index 00000000..088b35ba
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/test_microbatch.py
@@ -0,0 +1,21 @@
+import pytest
+
+from dbt.tests.adapter.incremental.test_incremental_microbatch import (
+    BaseMicrobatch,
+)
+
+# No requirement for a unique_id for spark microbatch!
+_microbatch_model_no_unique_id_sql = """
+{{ config(materialized='incremental', incremental_strategy='microbatch', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0), partition_by=['date_day'], file_format='parquet') }}
+select *, cast(event_time as date) as date_day
+from {{ ref('input_model') }}
+"""
+
+
+@pytest.mark.skip_profile(
+    "databricks_http_cluster", "databricks_sql_endpoint", "spark_session", "spark_http_odbc"
+)
+class TestMicrobatch(BaseMicrobatch):
+    @pytest.fixture(scope="class")
+    def microbatch_model_sql(self) -> str:
+        return _microbatch_model_no_unique_id_sql

From 257e390375d4969656f9d53ebb074004e9654495 Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Thu, 26 Sep 2024 16:49:29 -0700
Subject: [PATCH 588/603] update dbt-common dependency to 1.10 and dbt-adapters
 to 1.7 (#1111)

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 2de68a6f..00aeba60 100644
--- a/setup.py
+++ b/setup.py
@@ -65,8 +65,8 @@ def _get_plugin_version_dict():
     include_package_data=True,
     install_requires=[
         "sqlparams>=3.0.0",
-        "dbt-common>=1.0.4,<2.0",
-        "dbt-adapters>=1.7.0,<2.0",
+        "dbt-common>=1.10,<2.0",
+        "dbt-adapters>=1.7,<2.0",
         # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
         "dbt-core>=1.8.0",
     ],

From e6e1e1039fbbfa9096329dbce12bdb3be191e1dd Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 15 Oct 2024 11:42:52 -0700
Subject: [PATCH 589/603] Setup upper bound on numpy import in
 TestChangingSchemaSpark (#1120)

---
 tests/functional/adapter/test_python_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 50132b88..2ca1c221 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -67,7 +67,7 @@ def model(dbt, spark):
                 "ResourceClass": "SingleNode"
             }
         },
-        packages=['spacy', 'torch', 'pydantic>=1.10.8']
+        packages=['spacy', 'torch', 'pydantic>=1.10.8', 'numpy<2']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])

From 805a08e6e0442da43851ae2511d2b1532c8637ea Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Tue, 15 Oct 2024 17:31:32 -0400
Subject: [PATCH 590/603] Enable setting dbt_valid_to snapshot column to new
 setting dbt_valid_to_current (#1113)

---
 .changes/unreleased/Features-20240927-133927.yaml      | 6 ++++++
 dbt/include/spark/macros/materializations/snapshot.sql | 7 ++++++-
 tests/functional/adapter/test_python_model.py          | 6 +++++-
 3 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Features-20240927-133927.yaml

diff --git a/.changes/unreleased/Features-20240927-133927.yaml b/.changes/unreleased/Features-20240927-133927.yaml
new file mode 100644
index 00000000..ce04ac07
--- /dev/null
+++ b/.changes/unreleased/Features-20240927-133927.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Enable setting current value of dbt_valid_to
+time: 2024-09-27T13:39:27.268886-04:00
+custom:
+  Author: gshank
+  Issue: "1112"
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index 43c4750f..b4ef6e5d 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -24,7 +24,12 @@
     {% endif %}
     on DBT_INTERNAL_SOURCE.{{ columns.dbt_scd_id }} = DBT_INTERNAL_DEST.{{ columns.dbt_scd_id }}
     when matched
-     and DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null
+     {% if config.get("dbt_valid_to_current") %}
+       and ( DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} = {{ config.get('dbt_valid_to_current') }} or
+             DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null )
+     {% else %}
+       and DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null
+     {% endif %}
      and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')
         then update
         set {{ columns.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ columns.dbt_valid_to }}
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 2ca1c221..2ecce266 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -85,7 +85,11 @@ def model(dbt, spark):
 
 
 @pytest.mark.skip_profile(
-    "apache_spark", "spark_session", "databricks_sql_endpoint", "spark_http_odbc"
+    "apache_spark",
+    "spark_session",
+    "databricks_sql_endpoint",
+    "spark_http_odbc",
+    "databricks_http_cluster",
 )
 class TestChangingSchemaSpark:
     """

From 45ab21a4cafc8e925061c008da496e96a07269fc Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:17:07 -0400
Subject: [PATCH 591/603] Drop support for Python 3.8 (#1121)

* drop support for python 3.8

* drop support for python 3.8
---
 .changes/unreleased/Breaking Changes-20241016-184157.yaml | 6 ++++++
 .github/workflows/main.yml                                | 8 ++++----
 .github/workflows/release-internal.yml                    | 2 +-
 .github/workflows/release-prep.yml                        | 4 ++--
 .pre-commit-config.yaml                                   | 1 -
 Makefile                                                  | 4 ++--
 dagger/run_dbt_spark_tests.py                             | 2 +-
 dev-requirements.txt                                      | 3 +--
 docker/Dockerfile                                         | 4 ++--
 setup.py                                                  | 7 +++----
 10 files changed, 22 insertions(+), 19 deletions(-)
 create mode 100644 .changes/unreleased/Breaking Changes-20241016-184157.yaml

diff --git a/.changes/unreleased/Breaking Changes-20241016-184157.yaml b/.changes/unreleased/Breaking Changes-20241016-184157.yaml
new file mode 100644
index 00000000..0a2104e6
--- /dev/null
+++ b/.changes/unreleased/Breaking Changes-20241016-184157.yaml	
@@ -0,0 +1,6 @@
+kind: Breaking Changes
+body: Drop support for Python 3.8
+time: 2024-10-16T18:41:57.721002-04:00
+custom:
+  Author: mikealfare
+  Issue: "1121"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e4fc66cc..a4e2c7a4 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -49,7 +49,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.8'
+          python-version: '3.9'
 
       - name: Install python dependencies
         run: |
@@ -75,7 +75,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Check out the repository
@@ -126,7 +126,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.8'
+          python-version: '3.9'
 
       - name: Install python dependencies
         run: |
@@ -173,7 +173,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-12, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index 1a509031..702ef9ae 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -37,7 +37,7 @@ defaults:
     shell: "bash"
 
 env:
-  PYTHON_TARGET_VERSION: 3.8
+  PYTHON_TARGET_VERSION: 3.9
 
 jobs:
   run-unit-tests:
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 9937463d..d5878ec1 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -84,7 +84,7 @@ defaults:
     shell: bash
 
 env:
-  PYTHON_TARGET_VERSION: 3.8
+  PYTHON_TARGET_VERSION: 3.9
   NOTIFICATION_PREFIX: "[Release Preparation]"
 
 jobs:
@@ -448,7 +448,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Check out the repository
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fdb19526..6697bbeb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,6 @@ repos:
     -   id: black
         args:
         -   --line-length=99
-        -   --target-version=py38
         -   --target-version=py39
         -   --target-version=py310
         -   --target-version=py311
diff --git a/Makefile b/Makefile
index ff4c0fc1..46b9af29 100644
--- a/Makefile
+++ b/Makefile
@@ -17,12 +17,12 @@ lint: ## Runs flake8 and mypy code checks against staged changes.
 	pre-commit run --all-files
 
 .PHONY: unit
-unit: ## Runs unit tests with py38.
+unit: ## Runs unit tests with py39.
 	@\
 	python -m pytest tests/unit
 
 .PHONY: test
-test: ## Runs unit tests with py38 and code checks against staged changes.
+test: ## Runs unit tests with py39 and code checks against staged changes.
 	@\
 	python -m pytest tests/unit; \
 	python dagger/run_dbt_spark_tests.py --profile spark_session \
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 67fa5658..6c310a6f 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -104,7 +104,7 @@ async def test_spark(test_args):
         platform = dagger.Platform("linux/amd64")
         tst_container = (
             client.container(platform=platform)
-            .from_("python:3.8-slim")
+            .from_("python:3.9-slim")
             .with_mounted_cache("/var/cache/apt/archives", os_reqs_cache)
             .with_mounted_cache("/root/.cache/pip", pip_cache)
             # install OS deps first so any local changes don't invalidate the cache
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 055cb92f..3947695c 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,8 +7,7 @@ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # dev
 ipdb~=0.13.13
-pre-commit~=3.7.0;python_version>="3.9"
-pre-commit~=3.5.0;python_version<"3.9"
+pre-commit~=3.7.0
 pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
diff --git a/docker/Dockerfile b/docker/Dockerfile
index ef4574dd..81e5e28f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,7 +1,7 @@
 # this image gets published to GHCR for production use
 ARG py_version=3.11.2
 
-FROM python:$py_version-slim-bullseye as base
+FROM python:$py_version-slim-bullseye AS base
 
 RUN apt-get update \
   && apt-get dist-upgrade -y \
@@ -29,7 +29,7 @@ ENV LANG=C.UTF-8
 RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir
 
 
-FROM base as dbt-spark
+FROM base AS dbt-spark
 
 ARG commit_ref=main
 ARG extras=all
diff --git a/setup.py b/setup.py
index 00aeba60..cf8ff569 100644
--- a/setup.py
+++ b/setup.py
@@ -4,9 +4,9 @@
 import re
 
 # require python 3.8 or newer
-if sys.version_info < (3, 8):
+if sys.version_info < (3, 9):
     print("Error: dbt does not support this version of Python.")
-    print("Please upgrade to Python 3.8 or higher.")
+    print("Please upgrade to Python 3.9 or higher.")
     sys.exit(1)
 
 # require version of setuptools that supports find_namespace_packages
@@ -83,11 +83,10 @@ def _get_plugin_version_dict():
         "Operating System :: Microsoft :: Windows",
         "Operating System :: MacOS :: MacOS X",
         "Operating System :: POSIX :: Linux",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
         "Programming Language :: Python :: 3.12",
     ],
-    python_requires=">=3.8",
+    python_requires=">=3.9",
 )

From 4863bdde9128910fbfd6b1457854491c11afd922 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 17 Oct 2024 20:33:12 +0000
Subject: [PATCH 592/603] [create-pull-request] automated change (#1116)

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.9.0-b1.md                          | 31 ++++++++++++++++
 .../Dependencies-20231219-222211.yaml         |  0
 .../Dependencies-20240419-024814.yaml         |  0
 .../Dependencies-20240419-024818.yaml         |  0
 .../Dependencies-20240419-024820.yaml         |  0
 .../Dependencies-20240503-224157.yaml         |  0
 .../Dependencies-20240718-223145.yaml         |  0
 .../Dependencies-20240718-223238.yaml         |  0
 .../Features-20240430-185723.yaml             |  0
 .../Features-20240501-151904.yaml             |  0
 .../Features-20240903-161003.yaml             |  0
 .../Features-20240910-175846.yaml             |  0
 .../Features-20240925-125242.yaml             |  0
 .../Fixes-20240513-160121.yaml                |  0
 .../Under the Hood-20240612-195629.yaml       |  0
 CHANGELOG.md                                  | 35 +++++++++++++++++--
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 19 files changed, 67 insertions(+), 5 deletions(-)
 create mode 100644 .changes/1.9.0-b1.md
 rename .changes/{unreleased => 1.9.0}/Dependencies-20231219-222211.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Dependencies-20240419-024814.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Dependencies-20240419-024818.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Dependencies-20240419-024820.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Dependencies-20240503-224157.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Dependencies-20240718-223145.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Dependencies-20240718-223238.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Features-20240430-185723.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Features-20240501-151904.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Features-20240903-161003.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Features-20240910-175846.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Features-20240925-125242.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Fixes-20240513-160121.yaml (100%)
 rename .changes/{unreleased => 1.9.0}/Under the Hood-20240612-195629.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 77a3f463..40074e83 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.9.0a1
+current_version = 1.9.0b1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.9.0-b1.md b/.changes/1.9.0-b1.md
new file mode 100644
index 00000000..b6daee41
--- /dev/null
+++ b/.changes/1.9.0-b1.md
@@ -0,0 +1,31 @@
+## dbt-spark 1.9.0-b1 - October 01, 2024
+
+### Features
+
+- Add tests for cross-database `cast` macro ([#1028](https://github.com/dbt-labs/dbt-spark/issues/1028))
+- Cross-database `date` macro ([#1031](https://github.com/dbt-labs/dbt-spark/issues/1031))
+- Allow configuring snapshot column names ([#1096](https://github.com/dbt-labs/dbt-spark/issues/1096))
+- Support custom ODBC connection parameters via `connection_string_suffix` config ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
+- Add Microbatch Strategy to dbt-spark ([#1109](https://github.com/dbt-labs/dbt-spark/issues/1109))
+
+### Fixes
+
+- Fix incremental python models error where Databricks could not find the temp table transaction logs ([#1033](https://github.com/dbt-labs/dbt-spark/issues/1033))
+
+### Under the Hood
+
+- Lazy load agate to improve performance ([#1049](https://github.com/dbt-labs/dbt-spark/issues/1049))
+
+### Dependencies
+
+- Update freezegun requirement from ~=1.3 to ~=1.4 ([#966](https://github.com/dbt-labs/dbt-spark/pull/966))
+- Bump actions/download-artifact from 3 to 4 ([#1010](https://github.com/dbt-labs/dbt-spark/pull/1010))
+- Bump actions/upload-artifact from 3 to 4 ([#1011](https://github.com/dbt-labs/dbt-spark/pull/1011))
+- Bump dbt-labs/actions from 1.1.0 to 1.1.1 ([#1012](https://github.com/dbt-labs/dbt-spark/pull/1012))
+- Update wheel requirement from ~=0.42 to ~=0.43 ([#1035](https://github.com/dbt-labs/dbt-spark/pull/1035))
+- Update pytest-xdist requirement from ~=3.5 to ~=3.6 ([#1073](https://github.com/dbt-labs/dbt-spark/pull/1073))
+- Loosen pin on pre-commit from 3.7.0 to 3.7 ([#1074](https://github.com/dbt-labs/dbt-spark/pull/1074))
+
+### Contributors
+- [@jpoley](https://github.com/jpoley) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
+- [@nilan3](https://github.com/nilan3) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
diff --git a/.changes/unreleased/Dependencies-20231219-222211.yaml b/.changes/1.9.0/Dependencies-20231219-222211.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20231219-222211.yaml
rename to .changes/1.9.0/Dependencies-20231219-222211.yaml
diff --git a/.changes/unreleased/Dependencies-20240419-024814.yaml b/.changes/1.9.0/Dependencies-20240419-024814.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240419-024814.yaml
rename to .changes/1.9.0/Dependencies-20240419-024814.yaml
diff --git a/.changes/unreleased/Dependencies-20240419-024818.yaml b/.changes/1.9.0/Dependencies-20240419-024818.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240419-024818.yaml
rename to .changes/1.9.0/Dependencies-20240419-024818.yaml
diff --git a/.changes/unreleased/Dependencies-20240419-024820.yaml b/.changes/1.9.0/Dependencies-20240419-024820.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240419-024820.yaml
rename to .changes/1.9.0/Dependencies-20240419-024820.yaml
diff --git a/.changes/unreleased/Dependencies-20240503-224157.yaml b/.changes/1.9.0/Dependencies-20240503-224157.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240503-224157.yaml
rename to .changes/1.9.0/Dependencies-20240503-224157.yaml
diff --git a/.changes/unreleased/Dependencies-20240718-223145.yaml b/.changes/1.9.0/Dependencies-20240718-223145.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240718-223145.yaml
rename to .changes/1.9.0/Dependencies-20240718-223145.yaml
diff --git a/.changes/unreleased/Dependencies-20240718-223238.yaml b/.changes/1.9.0/Dependencies-20240718-223238.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240718-223238.yaml
rename to .changes/1.9.0/Dependencies-20240718-223238.yaml
diff --git a/.changes/unreleased/Features-20240430-185723.yaml b/.changes/1.9.0/Features-20240430-185723.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240430-185723.yaml
rename to .changes/1.9.0/Features-20240430-185723.yaml
diff --git a/.changes/unreleased/Features-20240501-151904.yaml b/.changes/1.9.0/Features-20240501-151904.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240501-151904.yaml
rename to .changes/1.9.0/Features-20240501-151904.yaml
diff --git a/.changes/unreleased/Features-20240903-161003.yaml b/.changes/1.9.0/Features-20240903-161003.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240903-161003.yaml
rename to .changes/1.9.0/Features-20240903-161003.yaml
diff --git a/.changes/unreleased/Features-20240910-175846.yaml b/.changes/1.9.0/Features-20240910-175846.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240910-175846.yaml
rename to .changes/1.9.0/Features-20240910-175846.yaml
diff --git a/.changes/unreleased/Features-20240925-125242.yaml b/.changes/1.9.0/Features-20240925-125242.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240925-125242.yaml
rename to .changes/1.9.0/Features-20240925-125242.yaml
diff --git a/.changes/unreleased/Fixes-20240513-160121.yaml b/.changes/1.9.0/Fixes-20240513-160121.yaml
similarity index 100%
rename from .changes/unreleased/Fixes-20240513-160121.yaml
rename to .changes/1.9.0/Fixes-20240513-160121.yaml
diff --git a/.changes/unreleased/Under the Hood-20240612-195629.yaml b/.changes/1.9.0/Under the Hood-20240612-195629.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20240612-195629.yaml
rename to .changes/1.9.0/Under the Hood-20240612-195629.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36a3ea69..7679e37b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,10 +5,41 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.9.0-b1 - October 01, 2024
+
+### Features
+
+- Add tests for cross-database `cast` macro ([#1028](https://github.com/dbt-labs/dbt-spark/issues/1028))
+- Cross-database `date` macro ([#1031](https://github.com/dbt-labs/dbt-spark/issues/1031))
+- Allow configuring snapshot column names ([#1096](https://github.com/dbt-labs/dbt-spark/issues/1096))
+- Support custom ODBC connection parameters via `connection_string_suffix` config ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
+- Add Microbatch Strategy to dbt-spark ([#1109](https://github.com/dbt-labs/dbt-spark/issues/1109))
+
+### Fixes
+
+- Fix incremental python models error where Databricks could not find the temp table transaction logs ([#1033](https://github.com/dbt-labs/dbt-spark/issues/1033))
+
+### Under the Hood
+
+- Lazy load agate to improve performance ([#1049](https://github.com/dbt-labs/dbt-spark/issues/1049))
+
+### Dependencies
+
+- Update freezegun requirement from ~=1.3 to ~=1.4 ([#966](https://github.com/dbt-labs/dbt-spark/pull/966))
+- Bump actions/download-artifact from 3 to 4 ([#1010](https://github.com/dbt-labs/dbt-spark/pull/1010))
+- Bump actions/upload-artifact from 3 to 4 ([#1011](https://github.com/dbt-labs/dbt-spark/pull/1011))
+- Bump dbt-labs/actions from 1.1.0 to 1.1.1 ([#1012](https://github.com/dbt-labs/dbt-spark/pull/1012))
+- Update wheel requirement from ~=0.42 to ~=0.43 ([#1035](https://github.com/dbt-labs/dbt-spark/pull/1035))
+- Update pytest-xdist requirement from ~=3.5 to ~=3.6 ([#1073](https://github.com/dbt-labs/dbt-spark/pull/1073))
+- Loosen pin on pre-commit from 3.7.0 to 3.7 ([#1074](https://github.com/dbt-labs/dbt-spark/pull/1074))
+
+### Contributors
+- [@jpoley](https://github.com/jpoley) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
+- [@nilan3](https://github.com/nilan3) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
+
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
-- [1.8](https://github.com/dbt-labs/dbt-spark/blob/1.8.latest/CHANGELOG.md)
-- [1.7](https://github.com/dbt-labs/dbt-spark/blob/1.7.latest/CHANGELOG.md)
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
 - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 6698ed64..a4077fff 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.9.0a1"
+version = "1.9.0b1"
diff --git a/setup.py b/setup.py
index cf8ff569..406c181d 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.9.0a1"
+package_version = "1.9.0b1"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=5.1.0"]

From d72693bc675cae2b3ef46973dd707680d8b13382 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Thu, 17 Oct 2024 23:16:58 -0400
Subject: [PATCH 593/603] Isolating distribution testing (#1105)

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .../Under the Hood-20240911-192845.yaml         |  6 ++++++
 .github/workflows/main.yml                      | 17 +++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240911-192845.yaml

diff --git a/.changes/unreleased/Under the Hood-20240911-192845.yaml b/.changes/unreleased/Under the Hood-20240911-192845.yaml
new file mode 100644
index 00000000..0c878f7d
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240911-192845.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Isolating distribution testing
+time: 2024-09-11T19:28:45.653064-04:00
+custom:
+    Author: leahwicz
+    Issue: "1069"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a4e2c7a4..9a9d6612 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -161,7 +161,7 @@ jobs:
           overwrite: true
 
   test-build:
-    name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }}
+    name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }} / ${{ matrix.dist-type }}
 
     if: needs.build.outputs.is_alpha == 0
 
@@ -174,6 +174,7 @@ jobs:
       matrix:
         os: [ubuntu-latest, macos-12, windows-latest]
         python-version: ["3.9", "3.10", "3.11", "3.12"]
+        dist-type: ["whl", "gz"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
@@ -186,6 +187,7 @@ jobs:
           python -m pip install --user --upgrade pip
           python -m pip install --upgrade wheel
           python -m pip --version
+
       - uses: actions/download-artifact@v4
         with:
           name: dist
@@ -194,15 +196,10 @@ jobs:
       - name: Show distributions
         run: ls -lh dist/
 
-      - name: Install wheel distributions
-        run: |
-          find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
-      - name: Check wheel distributions
+      - name: Install ${{ matrix.dist-type }} distributions
         run: |
-          python -c "import dbt.adapters.spark"
-      - name: Install source distributions
-        run: |
-          find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
-      - name: Check source distributions
+          find ./dist/*.${{ matrix.dist-type }} -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
+
+      - name: Check ${{ matrix.dist-type }} distributions
         run: |
           python -c "import dbt.adapters.spark"

From afd1829dd5815a10564ca20f980f7c341709ded3 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 8 Nov 2024 16:17:08 -0500
Subject: [PATCH 594/603] move from macos-12 to macos-14 (#1136)

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 9a9d6612..287e5acb 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -172,7 +172,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-12, windows-latest]
+        os: [ubuntu-latest, macos-14, windows-latest]
         python-version: ["3.9", "3.10", "3.11", "3.12"]
         dist-type: ["whl", "gz"]
 

From 70a78e2a8b83c1affab4c784f5fb974b5ca98535 Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:55:19 -0800
Subject: [PATCH 595/603] move github runner from macos-12 to macos-14 (#1135)


From 0ccce2fdc7f2d6341e3b8fd7630eea516c427b65 Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 4 Dec 2024 09:05:42 -0800
Subject: [PATCH 596/603] update libpq-dev dependency to 13.18-0+deb11u1
 (#1148)

---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 81e5e28f..3699ed9a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update \
     ca-certificates=20210119 \
     gcc=4:10.2.1-1 \
     git=1:2.30.2-1+deb11u2 \
-    libpq-dev=13.14-0+deb11u1 \
+    libpq-dev=13.18-0+deb11u1 \
     libsasl2-dev=2.1.27+dfsg-2.1+deb11u1 \
     make=4.3-4.1 \
     openssh-client=1:8.4p1-5+deb11u3 \

From 268f3af7b16338af93e1fde06c76110a21c8577c Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 11 Dec 2024 10:41:16 -0800
Subject: [PATCH 597/603] Add version bump into release.yml (#1147)

* Add version bump into release.yml
* use removeSparkException branch for release-prep
---
 .github/workflows/release.yml | 40 +++++++++++++++++------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index cdbdaa13..aba99768 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -56,6 +56,11 @@ on:
         type: boolean
         default: true
         required: false
+      nightly_release:
+        description: "Nightly release to dev environment"
+        type: boolean
+        default: false
+        required: false
       only_docker:
         description: "Only release Docker image, skip GitHub & PyPI"
         type: boolean
@@ -86,43 +91,38 @@ jobs:
           echo Test run:                           ${{ inputs.test_run }}
           echo Only Docker:                        ${{ inputs.only_docker }}
 
-  # The Spark repository uses CircleCI to run integration tests.
-  # Because of this, the process of version bumps will be manual
-  # which means that this stage will be used to audit the version
-  # and changelog in sources.
-  # We are passing `env_setup_script_path` as an empty string
-  # so that the integration tests stage will be skipped.
-  audit-version-and-changelog:
-    name: "Bump package version, Generate changelog"
-    uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main
+  bump-version-generate-changelog:
+    name: Bump package version, Generate changelog
+    uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@removeSparkException
     with:
       sha: ${{ inputs.sha }}
       version_number: ${{ inputs.version_number }}
       target_branch: ${{ inputs.target_branch }}
-      env_setup_script_path: ""
+      env_setup_script_path: ${{ inputs.env_setup_script_path }}
       test_run: ${{ inputs.test_run }}
+      nightly_release: ${{ inputs.nightly_release }}
     secrets: inherit
 
-  log-outputs-audit-version-and-changelog:
+  log-outputs-bump-version-generate-changelog:
     name: "[Log output] Bump package version, Generate changelog"
     if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [audit-version-and-changelog]
+    needs: [bump-version-generate-changelog]
     runs-on: ubuntu-latest
     steps:
       - name: Print variables
         run: |
-          echo Final SHA     : ${{ needs.audit-version-and-changelog.outputs.final_sha }}
-          echo Changelog path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
+          echo Final SHA     : ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
+          echo Changelog path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
 
   build-test-package:
     name: "Build, Test, Package"
     if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [audit-version-and-changelog]
+    needs: [bump-version-generate-changelog]
     uses: dbt-labs/dbt-release/.github/workflows/build.yml@main
     with:
-      sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
+      sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
       version_number: ${{ inputs.version_number }}
-      changelog_path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
+      changelog_path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
       build_script_path: ${{ inputs.build_script_path }}
       s3_bucket_name: ${{ inputs.s3_bucket_name }}
       package_test_command: ${{ inputs.package_test_command }}
@@ -134,12 +134,12 @@ jobs:
   github-release:
     name: "GitHub Release"
     if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [audit-version-and-changelog, build-test-package]
+    needs: [bump-version-generate-changelog, build-test-package]
     uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main
     with:
-      sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
+      sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
       version_number: ${{ inputs.version_number }}
-      changelog_path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
+      changelog_path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
       test_run: ${{ inputs.test_run }}
 
   pypi-release:

From a38a288d7d3868c88313350f7d369223b0f03a05 Mon Sep 17 00:00:00 2001
From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com>
Date: Wed, 11 Dec 2024 14:50:43 -0500
Subject: [PATCH 598/603] Cleanup main after cutting new 1.9.latest branch
 (#1145)

* Clean up changelog on main
* Bumping version to 1.10.0a1

---------

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.9.0-b1.md                          | 31 -----------------
 .../1.9.0/Dependencies-20231219-222211.yaml   |  6 ----
 .../1.9.0/Dependencies-20240419-024814.yaml   |  6 ----
 .../1.9.0/Dependencies-20240419-024818.yaml   |  6 ----
 .../1.9.0/Dependencies-20240419-024820.yaml   |  6 ----
 .../1.9.0/Dependencies-20240503-224157.yaml   |  6 ----
 .../1.9.0/Dependencies-20240718-223145.yaml   |  6 ----
 .../1.9.0/Dependencies-20240718-223238.yaml   |  6 ----
 .changes/1.9.0/Features-20240430-185723.yaml  |  6 ----
 .changes/1.9.0/Features-20240501-151904.yaml  |  6 ----
 .changes/1.9.0/Features-20240903-161003.yaml  |  6 ----
 .changes/1.9.0/Features-20240910-175846.yaml  |  6 ----
 .changes/1.9.0/Features-20240925-125242.yaml  |  6 ----
 .changes/1.9.0/Fixes-20240513-160121.yaml     |  7 ----
 .../1.9.0/Under the Hood-20240612-195629.yaml |  6 ----
 .../Breaking Changes-20241016-184157.yaml     |  6 ----
 .../unreleased/Features-20240927-133927.yaml  |  6 ----
 .../Under the Hood-20240911-192845.yaml       |  6 ----
 CHANGELOG.md                                  | 33 -------------------
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 22 files changed, 3 insertions(+), 170 deletions(-)
 delete mode 100644 .changes/1.9.0-b1.md
 delete mode 100644 .changes/1.9.0/Dependencies-20231219-222211.yaml
 delete mode 100644 .changes/1.9.0/Dependencies-20240419-024814.yaml
 delete mode 100644 .changes/1.9.0/Dependencies-20240419-024818.yaml
 delete mode 100644 .changes/1.9.0/Dependencies-20240419-024820.yaml
 delete mode 100644 .changes/1.9.0/Dependencies-20240503-224157.yaml
 delete mode 100644 .changes/1.9.0/Dependencies-20240718-223145.yaml
 delete mode 100644 .changes/1.9.0/Dependencies-20240718-223238.yaml
 delete mode 100644 .changes/1.9.0/Features-20240430-185723.yaml
 delete mode 100644 .changes/1.9.0/Features-20240501-151904.yaml
 delete mode 100644 .changes/1.9.0/Features-20240903-161003.yaml
 delete mode 100644 .changes/1.9.0/Features-20240910-175846.yaml
 delete mode 100644 .changes/1.9.0/Features-20240925-125242.yaml
 delete mode 100644 .changes/1.9.0/Fixes-20240513-160121.yaml
 delete mode 100644 .changes/1.9.0/Under the Hood-20240612-195629.yaml
 delete mode 100644 .changes/unreleased/Breaking Changes-20241016-184157.yaml
 delete mode 100644 .changes/unreleased/Features-20240927-133927.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20240911-192845.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 40074e83..f5eb7b79 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.9.0b1
+current_version = 1.10.0a1
 parse = (?P<major>[\d]+) # major version number
 	\.(?P<minor>[\d]+) # minor version number
 	\.(?P<patch>[\d]+) # patch version number
diff --git a/.changes/1.9.0-b1.md b/.changes/1.9.0-b1.md
deleted file mode 100644
index b6daee41..00000000
--- a/.changes/1.9.0-b1.md
+++ /dev/null
@@ -1,31 +0,0 @@
-## dbt-spark 1.9.0-b1 - October 01, 2024
-
-### Features
-
-- Add tests for cross-database `cast` macro ([#1028](https://github.com/dbt-labs/dbt-spark/issues/1028))
-- Cross-database `date` macro ([#1031](https://github.com/dbt-labs/dbt-spark/issues/1031))
-- Allow configuring snapshot column names ([#1096](https://github.com/dbt-labs/dbt-spark/issues/1096))
-- Support custom ODBC connection parameters via `connection_string_suffix` config ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
-- Add Microbatch Strategy to dbt-spark ([#1109](https://github.com/dbt-labs/dbt-spark/issues/1109))
-
-### Fixes
-
-- Fix incremental python models error where Databricks could not find the temp table transaction logs ([#1033](https://github.com/dbt-labs/dbt-spark/issues/1033))
-
-### Under the Hood
-
-- Lazy load agate to improve performance ([#1049](https://github.com/dbt-labs/dbt-spark/issues/1049))
-
-### Dependencies
-
-- Update freezegun requirement from ~=1.3 to ~=1.4 ([#966](https://github.com/dbt-labs/dbt-spark/pull/966))
-- Bump actions/download-artifact from 3 to 4 ([#1010](https://github.com/dbt-labs/dbt-spark/pull/1010))
-- Bump actions/upload-artifact from 3 to 4 ([#1011](https://github.com/dbt-labs/dbt-spark/pull/1011))
-- Bump dbt-labs/actions from 1.1.0 to 1.1.1 ([#1012](https://github.com/dbt-labs/dbt-spark/pull/1012))
-- Update wheel requirement from ~=0.42 to ~=0.43 ([#1035](https://github.com/dbt-labs/dbt-spark/pull/1035))
-- Update pytest-xdist requirement from ~=3.5 to ~=3.6 ([#1073](https://github.com/dbt-labs/dbt-spark/pull/1073))
-- Loosen pin on pre-commit from 3.7.0 to 3.7 ([#1074](https://github.com/dbt-labs/dbt-spark/pull/1074))
-
-### Contributors
-- [@jpoley](https://github.com/jpoley) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
-- [@nilan3](https://github.com/nilan3) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
diff --git a/.changes/1.9.0/Dependencies-20231219-222211.yaml b/.changes/1.9.0/Dependencies-20231219-222211.yaml
deleted file mode 100644
index 39f42509..00000000
--- a/.changes/1.9.0/Dependencies-20231219-222211.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update freezegun requirement from ~=1.3 to ~=1.4"
-time: 2023-12-19T22:22:11.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 966
diff --git a/.changes/1.9.0/Dependencies-20240419-024814.yaml b/.changes/1.9.0/Dependencies-20240419-024814.yaml
deleted file mode 100644
index 06529952..00000000
--- a/.changes/1.9.0/Dependencies-20240419-024814.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump actions/download-artifact from 3 to 4"
-time: 2024-04-19T02:48:14.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1010
diff --git a/.changes/1.9.0/Dependencies-20240419-024818.yaml b/.changes/1.9.0/Dependencies-20240419-024818.yaml
deleted file mode 100644
index ed51e119..00000000
--- a/.changes/1.9.0/Dependencies-20240419-024818.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump actions/upload-artifact from 3 to 4"
-time: 2024-04-19T02:48:18.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1011
diff --git a/.changes/1.9.0/Dependencies-20240419-024820.yaml b/.changes/1.9.0/Dependencies-20240419-024820.yaml
deleted file mode 100644
index 65caf7c1..00000000
--- a/.changes/1.9.0/Dependencies-20240419-024820.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump dbt-labs/actions from 1.1.0 to 1.1.1"
-time: 2024-04-19T02:48:20.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1012
diff --git a/.changes/1.9.0/Dependencies-20240503-224157.yaml b/.changes/1.9.0/Dependencies-20240503-224157.yaml
deleted file mode 100644
index 9a7f0b8f..00000000
--- a/.changes/1.9.0/Dependencies-20240503-224157.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update wheel requirement from ~=0.42 to ~=0.43"
-time: 2024-05-03T22:41:57.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1035
diff --git a/.changes/1.9.0/Dependencies-20240718-223145.yaml b/.changes/1.9.0/Dependencies-20240718-223145.yaml
deleted file mode 100644
index 6d8433c5..00000000
--- a/.changes/1.9.0/Dependencies-20240718-223145.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest-xdist requirement from ~=3.5 to ~=3.6"
-time: 2024-07-18T22:31:45.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1073
diff --git a/.changes/1.9.0/Dependencies-20240718-223238.yaml b/.changes/1.9.0/Dependencies-20240718-223238.yaml
deleted file mode 100644
index 0c7fbacc..00000000
--- a/.changes/1.9.0/Dependencies-20240718-223238.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Loosen pin on pre-commit from 3.7.0 to 3.7"
-time: 2024-07-18T22:32:38.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1074
diff --git a/.changes/1.9.0/Features-20240430-185723.yaml b/.changes/1.9.0/Features-20240430-185723.yaml
deleted file mode 100644
index cb86e796..00000000
--- a/.changes/1.9.0/Features-20240430-185723.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Add tests for cross-database `cast` macro
-time: 2024-04-30T18:57:23.881246-06:00
-custom:
-  Author: dbeatty10
-  Issue: "1028"
diff --git a/.changes/1.9.0/Features-20240501-151904.yaml b/.changes/1.9.0/Features-20240501-151904.yaml
deleted file mode 100644
index ed08f125..00000000
--- a/.changes/1.9.0/Features-20240501-151904.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Cross-database `date` macro
-time: 2024-05-01T15:19:04.822157-06:00
-custom:
-  Author: dbeatty10
-  Issue: 1031
diff --git a/.changes/1.9.0/Features-20240903-161003.yaml b/.changes/1.9.0/Features-20240903-161003.yaml
deleted file mode 100644
index 57a0f14c..00000000
--- a/.changes/1.9.0/Features-20240903-161003.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Allow configuring snapshot column names
-time: 2024-09-03T16:10:03.021221-04:00
-custom:
-  Author: gshank
-  Issue: "1096"
diff --git a/.changes/1.9.0/Features-20240910-175846.yaml b/.changes/1.9.0/Features-20240910-175846.yaml
deleted file mode 100644
index 68ef8551..00000000
--- a/.changes/1.9.0/Features-20240910-175846.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Support custom ODBC connection parameters via `connection_string_suffix` config
-time: 2024-09-10T17:58:46.141332-04:00
-custom:
-  Author: colin-rogers-dbt jpoley nilan3
-  Issue: "1092"
diff --git a/.changes/1.9.0/Features-20240925-125242.yaml b/.changes/1.9.0/Features-20240925-125242.yaml
deleted file mode 100644
index 1cb51c00..00000000
--- a/.changes/1.9.0/Features-20240925-125242.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Add Microbatch Strategy to dbt-spark
-time: 2024-09-25T12:52:42.872017+01:00
-custom:
-  Author: michelleark
-  Issue: "1109"
diff --git a/.changes/1.9.0/Fixes-20240513-160121.yaml b/.changes/1.9.0/Fixes-20240513-160121.yaml
deleted file mode 100644
index fd707295..00000000
--- a/.changes/1.9.0/Fixes-20240513-160121.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: Fix incremental python models error where Databricks could not find the temp
-  table transaction logs
-time: 2024-05-13T16:01:21.255833-04:00
-custom:
-  Author: mikealfare
-  Issue: "1033"
diff --git a/.changes/1.9.0/Under the Hood-20240612-195629.yaml b/.changes/1.9.0/Under the Hood-20240612-195629.yaml
deleted file mode 100644
index c90ebcda..00000000
--- a/.changes/1.9.0/Under the Hood-20240612-195629.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Lazy load agate to improve performance
-time: 2024-06-12T19:56:29.943204-07:00
-custom:
-  Author: versusfacit
-  Issue: "1049"
diff --git a/.changes/unreleased/Breaking Changes-20241016-184157.yaml b/.changes/unreleased/Breaking Changes-20241016-184157.yaml
deleted file mode 100644
index 0a2104e6..00000000
--- a/.changes/unreleased/Breaking Changes-20241016-184157.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Breaking Changes
-body: Drop support for Python 3.8
-time: 2024-10-16T18:41:57.721002-04:00
-custom:
-  Author: mikealfare
-  Issue: "1121"
diff --git a/.changes/unreleased/Features-20240927-133927.yaml b/.changes/unreleased/Features-20240927-133927.yaml
deleted file mode 100644
index ce04ac07..00000000
--- a/.changes/unreleased/Features-20240927-133927.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Enable setting current value of dbt_valid_to
-time: 2024-09-27T13:39:27.268886-04:00
-custom:
-  Author: gshank
-  Issue: "1112"
diff --git a/.changes/unreleased/Under the Hood-20240911-192845.yaml b/.changes/unreleased/Under the Hood-20240911-192845.yaml
deleted file mode 100644
index 0c878f7d..00000000
--- a/.changes/unreleased/Under the Hood-20240911-192845.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Isolating distribution testing
-time: 2024-09-11T19:28:45.653064-04:00
-custom:
-    Author: leahwicz
-    Issue: "1069"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7679e37b..902db37f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,39 +5,6 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.9.0-b1 - October 01, 2024
-
-### Features
-
-- Add tests for cross-database `cast` macro ([#1028](https://github.com/dbt-labs/dbt-spark/issues/1028))
-- Cross-database `date` macro ([#1031](https://github.com/dbt-labs/dbt-spark/issues/1031))
-- Allow configuring snapshot column names ([#1096](https://github.com/dbt-labs/dbt-spark/issues/1096))
-- Support custom ODBC connection parameters via `connection_string_suffix` config ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
-- Add Microbatch Strategy to dbt-spark ([#1109](https://github.com/dbt-labs/dbt-spark/issues/1109))
-
-### Fixes
-
-- Fix incremental python models error where Databricks could not find the temp table transaction logs ([#1033](https://github.com/dbt-labs/dbt-spark/issues/1033))
-
-### Under the Hood
-
-- Lazy load agate to improve performance ([#1049](https://github.com/dbt-labs/dbt-spark/issues/1049))
-
-### Dependencies
-
-- Update freezegun requirement from ~=1.3 to ~=1.4 ([#966](https://github.com/dbt-labs/dbt-spark/pull/966))
-- Bump actions/download-artifact from 3 to 4 ([#1010](https://github.com/dbt-labs/dbt-spark/pull/1010))
-- Bump actions/upload-artifact from 3 to 4 ([#1011](https://github.com/dbt-labs/dbt-spark/pull/1011))
-- Bump dbt-labs/actions from 1.1.0 to 1.1.1 ([#1012](https://github.com/dbt-labs/dbt-spark/pull/1012))
-- Update wheel requirement from ~=0.42 to ~=0.43 ([#1035](https://github.com/dbt-labs/dbt-spark/pull/1035))
-- Update pytest-xdist requirement from ~=3.5 to ~=3.6 ([#1073](https://github.com/dbt-labs/dbt-spark/pull/1073))
-- Loosen pin on pre-commit from 3.7.0 to 3.7 ([#1074](https://github.com/dbt-labs/dbt-spark/pull/1074))
-
-### Contributors
-- [@jpoley](https://github.com/jpoley) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
-- [@nilan3](https://github.com/nilan3) ([#1092](https://github.com/dbt-labs/dbt-spark/issues/1092))
-
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index a4077fff..1af777a6 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.9.0b1"
+version = "1.10.0a1"
diff --git a/setup.py b/setup.py
index 406c181d..aa3be661 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.9.0b1"
+package_version = "1.10.0a1"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=5.1.0"]

From 0247ad9e4f2de2bf584d11b1c133fda9c9f8def8 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 20 Dec 2024 11:00:09 -0500
Subject: [PATCH 599/603] ADAP-1118: Migrate to pyproject.toml (#1150)

* update to pyproject.toml
* install all optional dependencies in the default environment for testing
* update dagger script for hatch
* update hatch integration tests command
* keep the dagger reqs in their own file; dagger requires py310 and we test on py39
* update spark container to be more clear about what is happening when
---
 .bumpversion.cfg                              | 33 -------
 .../Under the Hood-20241207-181814.yaml       |  6 ++
 .github/scripts/update_dependencies.sh        |  6 +-
 .../scripts/update_dev_dependency_branches.sh |  4 +-
 .github/workflows/integration.yml             | 12 +--
 .github/workflows/main.yml                    | 52 ++---------
 .github/workflows/release-internal.yml        | 26 +-----
 .github/workflows/release-prep.yml            | 63 +++----------
 .github/workflows/version-bump.yml            | 28 ------
 .pre-commit-config.yaml                       |  2 +
 MANIFEST.in                                   |  1 -
 Makefile                                      | 44 ---------
 dagger/run_dbt_spark_tests.py                 | 89 +++++++++---------
 dev-requirements.txt                          | 20 ----
 hatch.toml                                    | 59 ++++++++++++
 pyproject.toml                                | 61 ++++++++++++
 pytest.ini                                    |  9 --
 requirements.txt                              | 11 ---
 setup.py                                      | 92 -------------------
 tox.ini                                       |  3 -
 20 files changed, 206 insertions(+), 415 deletions(-)
 delete mode 100644 .bumpversion.cfg
 create mode 100644 .changes/unreleased/Under the Hood-20241207-181814.yaml
 delete mode 100644 .github/workflows/version-bump.yml
 delete mode 100644 MANIFEST.in
 delete mode 100644 Makefile
 delete mode 100644 dev-requirements.txt
 create mode 100644 hatch.toml
 create mode 100644 pyproject.toml
 delete mode 100644 pytest.ini
 delete mode 100644 requirements.txt
 delete mode 100644 setup.py
 delete mode 100644 tox.ini

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
deleted file mode 100644
index f5eb7b79..00000000
--- a/.bumpversion.cfg
+++ /dev/null
@@ -1,33 +0,0 @@
-[bumpversion]
-current_version = 1.10.0a1
-parse = (?P<major>[\d]+) # major version number
-	\.(?P<minor>[\d]+) # minor version number
-	\.(?P<patch>[\d]+) # patch version number
-	(((?P<prekind>a|b|rc) # optional pre-release type
-	?(?P<num>[\d]+?)) # optional pre-release version number
-	\.?(?P<nightly>[a-z0-9]+\+[a-z]+)? # optional nightly release indicator
-	)? # expected matches: `1.5.0`, `1.5.0a1`, `1.5.0a1.dev123457+nightly`, expected failures: `1`, `1.5`, `1.5.2-a1`, `text1.5.0`
-serialize =
-	{major}.{minor}.{patch}{prekind}{num}.{nightly}
-	{major}.{minor}.{patch}{prekind}{num}
-	{major}.{minor}.{patch}
-commit = False
-tag = False
-
-[bumpversion:part:prekind]
-first_value = a
-optional_value = final
-values =
-	a
-	b
-	rc
-	final
-
-[bumpversion:part:num]
-first_value = 1
-
-[bumpversion:part:nightly]
-
-[bumpversion:file:setup.py]
-
-[bumpversion:file:dbt/adapters/spark/__version__.py]
diff --git a/.changes/unreleased/Under the Hood-20241207-181814.yaml b/.changes/unreleased/Under the Hood-20241207-181814.yaml
new file mode 100644
index 00000000..c76974d4
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20241207-181814.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Move from setup.py to pyproject.toml and to hatch as a dev tool
+time: 2024-12-07T18:18:14.85697-05:00
+custom:
+  Author: mikealfare
+  Issue: "1150"
diff --git a/.github/scripts/update_dependencies.sh b/.github/scripts/update_dependencies.sh
index c3df48e5..fabdadff 100644
--- a/.github/scripts/update_dependencies.sh
+++ b/.github/scripts/update_dependencies.sh
@@ -2,9 +2,9 @@
 set -e
 
 git_branch=$1
-target_req_file="dev-requirements.txt"
-core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g"
-tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g"
+target_req_file="hatch.toml"
+core_req_sed_pattern="s|dbt-core.git.*#subdirectory=core|dbt-core.git@${git_branch}#subdirectory=core|g"
+tests_req_sed_pattern="s|dbt-adapters.git.*#subdirectory=dbt-tests-adapter|dbt-adapters.git@${git_branch}#subdirectory=dbt-tests-adapter|g"
 if [[ "$OSTYPE" == darwin* ]]; then
  # mac ships with a different version of sed that requires a delimiter arg
  sed -i "" "$core_req_sed_pattern" $target_req_file
diff --git a/.github/scripts/update_dev_dependency_branches.sh b/.github/scripts/update_dev_dependency_branches.sh
index 022df6a8..9385cf88 100755
--- a/.github/scripts/update_dev_dependency_branches.sh
+++ b/.github/scripts/update_dev_dependency_branches.sh
@@ -5,8 +5,8 @@ set -e
 dbt_adapters_branch=$1
 dbt_core_branch=$2
 dbt_common_branch=$3
-target_req_file="dev-requirements.txt"
-core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${dbt_core_branch}#egg=dbt-core|g"
+target_req_file="hatch.toml"
+core_req_sed_pattern="s|dbt-core.git.*#subdirectory=core|dbt-core.git@${dbt_core_branch}#subdirectory=core|g"
 adapters_req_sed_pattern="s|dbt-adapters.git|dbt-adapters.git@${dbt_adapters_branch}|g"
 common_req_sed_pattern="s|dbt-common.git|dbt-common.git@${dbt_common_branch}|g"
 if [[ "$OSTYPE" == darwin* ]]; then
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 35bd9cae..870ac13b 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -133,13 +133,13 @@ jobs:
             ${{ inputs.dbt_adapters_branch }} \
             ${{ inputs.dbt_core_branch }} \
             ${{ inputs.dbt_common_branch }}
-          cat dev-requirements.txt
+          cat hatch.toml
+
+      - name: Install hatch
+        uses: pypa/hatch@install
 
       - name: Install python dependencies
-        run: |
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install -r dagger/requirements.txt
+        run: hatch run pip install -r dagger/requirements.txt
 
       - name: Run tests for ${{ matrix.test }}
-        run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
+        run: hatch run integration-tests --profile ${{ matrix.test }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 287e5acb..458048a7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -51,20 +51,8 @@ jobs:
         with:
           python-version: '3.9'
 
-      - name: Install python dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install pre-commit
-          pre-commit --version
-          python -m pip install -r requirements.txt
-          python -m pip install -r dev-requirements.txt
-          python -c "import dbt.adapters.spark"
-
       - name: Run pre-commit hooks
-        run: pre-commit run --all-files --show-diff-on-failure
+        uses: pre-commit/action@v3.0.1
 
   unit:
     name: unit test / python ${{ matrix.python-version }}
@@ -87,29 +75,9 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install python dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install -r requirements.txt
-          python -m pip install -r dev-requirements.txt
-          python -m pip install -e .
-
-      - name: Run unit tests
-        run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
+        uses: pypa/hatch@install
 
-      - name: Get current date
-        if: always()
-        id: date
-        run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts
-
-      - uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv
-          path: unit_results.csv
-          overwrite: true
+      - run: hatch run unit-tests
 
   build:
     name: build packages
@@ -128,24 +96,16 @@ jobs:
         with:
           python-version: '3.9'
 
-      - name: Install python dependencies
-        run: |
-          python -m pip install --user --upgrade pip
-          python -m pip install --upgrade setuptools wheel twine check-wheel-contents
-          python -m pip --version
+      - uses: pypa/hatch@install
 
       - name: Build distributions
-        run: ./scripts/build-dist.sh
+        run: hatch build
 
       - name: Show distributions
         run: ls -lh dist/
 
       - name: Check distribution descriptions
-        run: |
-          twine check dist/*
-      - name: Check wheel contents
-        run: |
-          check-wheel-contents dist/*.whl --ignore W007,W008
+        run: hatch run build:check-all
 
       - name: Check if this is an alpha version
         id: check-is-alpha
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index 702ef9ae..c467d3fc 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -56,17 +56,9 @@ jobs:
           python-version: "${{ env.PYTHON_TARGET_VERSION }}"
 
       - name: Install python dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install -r requirements.txt
-          python -m pip install -r dev-requirements.txt
-          python -m pip install -e .
+        uses: pypa/hatch@install
 
-      - name: Run unit tests
-        run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
+      - run: hatch run unit-tests
 
   run-integration-tests:
     name: "${{ matrix.test }}"
@@ -102,21 +94,11 @@ jobs:
 
     steps:
       - name: Check out the repository
-        if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      # explicitly checkout the branch for the PR,
-      # this is necessary for the `pull_request` event
-      - name: Check out the repository (PR)
-        if: github.event_name == 'pull_request_target'
         uses: actions/checkout@v4
         with:
           persist-credentials: false
-          ref: ${{ github.event.pull_request.head.ref }}
 
-      # the python version used here is not what is used in the tests themselves
+     # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
         uses: actions/setup-python@v5
         with:
@@ -140,6 +122,6 @@ jobs:
       package_test_command: "${{ inputs.package_test_command }}"
       dbms_name: "spark"
       ref: "${{ inputs.ref }}"
-      skip_tests: "${{ inputs.skip_tests }}"
+      skip_tests: "true"
 
     secrets: "inherit"
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index d5878ec1..ce07447d 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -168,8 +168,8 @@ jobs:
         run: |
           if [[ ${{ steps.set_existence.outputs.exists }} != true ]]
           then
-            title="Spark version-bump.yml check"
-            message="dbt-spark needs version-bump.yml run before running the release.  The changelog is not up to date."
+            title="Spark version bump check"
+            message="dbt-spark needs a version bump before running the release. The changelog is not up to date."
             echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
             exit 1
           fi
@@ -194,11 +194,14 @@ jobs:
         with:
           ref: ${{ inputs.sha }}
 
+      - uses: pypa/hatch@install
+
       - name: "Check Current Version In Code"
         id: version-check
         run: |
           is_updated=false
-          if grep -Fxq "current_version = ${{ inputs.version_number }}" .bumpversion.cfg
+          current_version=$(hatch version)
+          if test "$current_version" = "${{ inputs.version }}"
           then
             is_updated=true
           fi
@@ -383,26 +386,10 @@ jobs:
         with:
           python-version: ${{ env.PYTHON_TARGET_VERSION }}
 
-      - name: "Install Python Dependencies"
-        if: needs.audit-version-in-code.outputs.up_to_date == 'false'
-        run: |
-          python3 -m venv env
-          source env/bin/activate
-          python -m pip install --upgrade pip
+      - uses: pypa/hatch@install
 
       - name: "Bump Version To ${{ inputs.version_number }}"
-        if: needs.audit-version-in-code.outputs.up_to_date == 'false'
-        # note: bumpversion is no longer supported, it actually points to bump2version now
-        run: |
-          source env/bin/activate
-          if [ -f "editable-requirements.txt" ]
-          then
-            python -m pip install -r dev-requirements.txt -r editable-requirements.txt
-          else
-            python -m pip install -r dev-requirements.txt
-          fi
-          env/bin/bumpversion --allow-dirty --new-version ${{ inputs.version_number }} major
-          git status
+        run: hatch version ${{ inputs.version_number }}
 
       - name: "[Notification] Bump Version To ${{ inputs.version_number }}"
         if: needs.audit-version-in-code.outputs.up_to_date == 'false'
@@ -415,14 +402,14 @@ jobs:
       - name: "Remove Trailing Whitespace Via Pre-commit"
         continue-on-error: true
         run: |
-          pre-commit run trailing-whitespace --files .bumpversion.cfg CHANGELOG.md .changes/*
+          pre-commit run trailing-whitespace --files CHANGELOG.md .changes/*
           git status
 
       # this step will fail on newline errors but also correct them
       - name: "Removing Extra Newlines Via Pre-commit"
         continue-on-error: true
         run: |
-          pre-commit run end-of-file-fixer --files .bumpversion.cfg CHANGELOG.md .changes/*
+          pre-commit run end-of-file-fixer --files CHANGELOG.md .changes/*
           git status
 
       - name: "Commit & Push Changes"
@@ -459,18 +446,10 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install python dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install libsasl2-dev
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install -r requirements.txt
-          python -m pip install -r dev-requirements.txt
-          python -m pip install -e .
+      - uses: pypa/hatch@install
 
       - name: Run unit tests
-        run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit
+        run: hatch run unit-tests
 
   run-integration-tests:
     name: ${{ matrix.test }}
@@ -505,34 +484,20 @@ jobs:
 
     steps:
       - name: Check out the repository
-        if: github.event_name != 'pull_request_target'
         uses: actions/checkout@v4
         with:
           persist-credentials: false
 
-      # explicitly checkout the branch for the PR,
-      # this is necessary for the `pull_request` event
-      - name: Check out the repository (PR)
-        if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          ref: ${{ github.event.pull_request.head.sha }}
-
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
         uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
-      - name: Install python dependencies
-        run: |
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install -r dagger/requirements.txt
+      - uses: pypa/hatch@install
 
       - name: Run tests for ${{ matrix.test }}
-        run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
+        run: hatch run python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
 
   merge-changes-into-target-branch:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml
deleted file mode 100644
index bde34d68..00000000
--- a/.github/workflows/version-bump.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-# **what?**
-# This workflow will take the new version number to bump to. With that
-# it will run versionbump to update the version number everywhere in the
-# code base and then run changie to create the corresponding changelog.
-# A PR will be created with the changes that can be reviewed before committing.
-
-# **why?**
-# This is to aid in releasing dbt and making sure we have updated
-# the version in all places and generated the changelog.
-
-# **when?**
-# This is triggered manually
-
-name: Version Bump
-
-on:
-  workflow_dispatch:
-    inputs:
-      version_number:
-       description: 'The version number to bump to (ex. 1.2.0, 1.3.0b1)'
-       required: true
-
-jobs:
-  version_bump_and_changie:
-    uses: dbt-labs/actions/.github/workflows/version-bump.yml@main
-    with:
-      version_number: ${{ inputs.version_number }}
-    secrets: inherit  # ok since what we are calling is internally maintained
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6697bbeb..1a34810b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -54,5 +54,7 @@ repos:
         -   --pretty
         files: ^dbt/adapters
         additional_dependencies:
+        -   types-PyYAML
+        -   types-python-dateutil
         -   types-pytz
         -   types-requests
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index cfbc714e..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1 +0,0 @@
-recursive-include dbt/include *.sql *.yml *.md
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 46b9af29..00000000
--- a/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-.DEFAULT_GOAL:=help
-
-.PHONY: dev
-dev: ## Installs adapter in develop mode along with development dependencies
-	@\
-	pip install -e . -r requirements.txt -r dev-requirements.txt -r dagger/requirements.txt && pre-commit install
-
-.PHONY: dev-uninstall
-dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment
-               ## Useful when updating versions, or if you accidentally installed into the system interpreter
-	pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y
-	pip uninstall -y dbt-spark
-
-.PHONY: lint
-lint: ## Runs flake8 and mypy code checks against staged changes.
-	@\
-	pre-commit run --all-files
-
-.PHONY: unit
-unit: ## Runs unit tests with py39.
-	@\
-	python -m pytest tests/unit
-
-.PHONY: test
-test: ## Runs unit tests with py39 and code checks against staged changes.
-	@\
-	python -m pytest tests/unit; \
-	python dagger/run_dbt_spark_tests.py --profile spark_session \
-	pre-commit run --all-files
-
-.PHONY: clean
-	@echo "cleaning repo"
-	@git clean -f -X
-
-.PHONY: help
-help: ## Show this help message.
-	@echo 'usage: make [target]'
-	@echo
-	@echo 'targets:'
-	@grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
-
-.PHONY: docker-prod
-docker-prod:
-	docker build -f docker/Dockerfile -t dbt-spark .
diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py
index 6c310a6f..f1efb434 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dagger/run_dbt_spark_tests.py
@@ -87,79 +87,76 @@ def get_spark_container(client: dagger.Client) -> (dagger.Service, str):
 
 async def test_spark(test_args):
     async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client:
-        test_profile = test_args.profile
 
         # create cache volumes, these are persisted between runs saving time when developing locally
-        os_reqs_cache = client.cache_volume("os_reqs")
-        pip_cache = client.cache_volume("pip")
-
-        # setup directories as we don't want to copy the whole repo into the container
-        req_files = client.host().directory(
-            "./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"]
-        )
-        dbt_spark_dir = client.host().directory("./dbt")
-        test_dir = client.host().directory("./tests")
-        scripts = client.host().directory("./dagger/scripts")
-
-        platform = dagger.Platform("linux/amd64")
         tst_container = (
-            client.container(platform=platform)
+            client.container(platform=dagger.Platform("linux/amd64"))
             .from_("python:3.9-slim")
-            .with_mounted_cache("/var/cache/apt/archives", os_reqs_cache)
-            .with_mounted_cache("/root/.cache/pip", pip_cache)
-            # install OS deps first so any local changes don't invalidate the cache
-            .with_directory("/scripts", scripts)
-            .with_exec(["./scripts/install_os_reqs.sh"])
-            # install dbt-spark + python deps
-            .with_directory("/src", req_files)
-            .with_exec(["pip", "install", "-U", "pip"])
-            .with_workdir("/src")
-            .with_exec(["pip", "install", "-r", "requirements.txt"])
-            .with_exec(["pip", "install", "-r", "dev-requirements.txt"])
+            .with_mounted_cache("/var/cache/apt/archives", client.cache_volume("os_reqs"))
+            .with_mounted_cache("/root/.cache/pip", client.cache_volume("pip"))
         )
 
-        # install local dbt-spark changes
+        # install system dependencies first so any local changes don't invalidate the cache
         tst_container = (
             tst_container.with_workdir("/")
-            .with_directory("src/dbt", dbt_spark_dir)
-            .with_workdir("/src")
-            .with_exec(["pip", "install", "-e", "."])
+            .with_directory("/scripts", client.host().directory("./dagger/scripts"))
+            .with_exec(["./scripts/install_os_reqs.sh"])
+            .with_exec(["pip", "install", "-U", "pip", "hatch"])
+            .with_(env_variables(TESTING_ENV_VARS))
         )
 
-        # install local test changes
+        # copy project files into image
         tst_container = (
             tst_container.with_workdir("/")
-            .with_directory("src/tests", test_dir)
-            .with_workdir("/src")
+            .with_directory("/src/dbt", client.host().directory("./dbt"))
+            .with_directory("/src/tests", client.host().directory("./tests"))
+            .with_directory(
+                "/src",
+                client.host().directory(
+                    "./",
+                    include=[
+                        "pyproject.toml",
+                        "hatch.toml",
+                        "License.md",  # referenced in build metadata
+                        "README.md",  # referenced in build metadata
+                        "test.env",  # may not exist locally, does not exist in ci
+                    ],
+                ),
+            )
         )
 
-        if test_profile == "apache_spark":
+        # install profile-specific system dependencies last since tests usually rotate through profiles
+        if test_args.profile == "apache_spark":
             spark_ctr, spark_host = get_spark_container(client)
             tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
 
-        elif test_profile in ["databricks_cluster", "databricks_sql_endpoint", "spark_http_odbc"]:
-            tst_container = (
-                tst_container.with_workdir("/")
-                .with_exec(["./scripts/configure_odbc.sh"])
-                .with_workdir("/src")
+        elif test_args.profile in [
+            "databricks_cluster",
+            "databricks_sql_endpoint",
+            "spark_http_odbc",
+        ]:
+            tst_container = tst_container.with_workdir("/").with_exec(
+                ["./scripts/configure_odbc.sh"]
             )
 
-        elif test_profile == "spark_session":
-            tst_container = tst_container.with_exec(["pip", "install", "pyspark"])
+        elif test_args.profile == "spark_session":
             tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"])
 
-        tst_container = tst_container.with_(env_variables(TESTING_ENV_VARS))
-        test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter"
-        result = await tst_container.with_exec(
-            ["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path]
-        ).stdout()
+        # run the tests
+        result = (
+            await tst_container.with_workdir("/src")
+            .with_exec(
+                ["hatch", "run", "pytest", "--profile", test_args.profile, test_args.test_path]
+            )
+            .stdout()
+        )
 
         return result
 
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--profile", required=True, type=str)
-parser.add_argument("--test-path", required=False, type=str)
+parser.add_argument("--test-path", required=False, type=str, default="tests/functional/adapter")
 args = parser.parse_args()
 
 anyio.run(test_spark, args)
diff --git a/dev-requirements.txt b/dev-requirements.txt
deleted file mode 100644
index 3947695c..00000000
--- a/dev-requirements.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-# install latest changes in dbt-core
-# TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
-git+https://github.com/dbt-labs/dbt-common.git
-git+https://github.com/dbt-labs/dbt-adapters.git
-git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
-
-# dev
-ipdb~=0.13.13
-pre-commit~=3.7.0
-pytest~=7.4
-pytest-csv~=3.0
-pytest-dotenv~=0.5.2
-pytest-logbook~=1.2
-pytest-xdist~=3.6
-
-# build
-bumpversion~=0.6.0
-twine~=4.0
-wheel~=0.43
diff --git a/hatch.toml b/hatch.toml
new file mode 100644
index 00000000..55a51c7c
--- /dev/null
+++ b/hatch.toml
@@ -0,0 +1,59 @@
+[version]
+path = "dbt/adapters/spark/__version__.py"
+
+[build.targets.sdist]
+packages = ["dbt"]
+
+[build.targets.wheel]
+packages = ["dbt"]
+
+[envs.default]
+dependencies = [
+    "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git",
+    "dbt-common @ git+https://github.com/dbt-labs/dbt-common.git",
+    "dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter",
+    "dbt-core @ git+https://github.com/dbt-labs/dbt-core.git#subdirectory=core",
+    "ddtrace==2.3.0",
+    "ipdb~=0.13.13",
+    "pre-commit==3.7.0",
+    "freezegun",
+    "pytest>=7.0,<8.0",
+    "pytest-csv~=3.0",
+    "pytest-dotenv",
+    "pytest-logbook~=1.2",
+    "pytest-mock",
+    "pytest-xdist",
+]
+features=["all"]
+
+[envs.default.scripts]
+setup = "pre-commit install"
+code-quality = "pre-commit run --all-files"
+unit-tests = "python -m pytest {args:tests/unit}"
+integration-tests = "python dagger/run_dbt_spark_tests.py {args:--profile apache_spark}"
+docker-prod = "docker build -f docker/Dockerfile -t dbt-spark ."
+
+[envs.build]
+detached = true
+dependencies = [
+    "wheel",
+    "twine",
+    "check-wheel-contents",
+]
+
+[envs.build.scripts]
+check-all = [
+    "- check-wheel",
+    "- check-sdist",
+]
+check-wheel = [
+    "twine check dist/*",
+    "find ./dist/dbt_spark-*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/",
+    "pip freeze | grep dbt-spark",
+]
+check-sdist = [
+    "check-wheel-contents dist/*.whl --ignore W007,W008",
+    "find ./dist/dbt_spark-*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/",
+    "pip freeze | grep dbt-spark",
+]
+docker-prod = "docker build -f docker/Dockerfile -t dbt-spark ."
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..4079e0af
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,61 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+dynamic = ["version"]
+name = "dbt-spark"
+description = "The Apache Spark adapter plugin for dbt"
+readme = "README.md"
+keywords = ["dbt", "adapter", "adapters", "database", "elt", "dbt-core", "dbt Core", "dbt Cloud", "dbt Labs", "spark"]
+requires-python = ">=3.9.0"
+authors = [{ name = "dbt Labs", email = "info@dbtlabs.com" }]
+maintainers = [{ name = "dbt Labs", email = "info@dbtlabs.com" }]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: MacOS :: MacOS X",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX :: Linux",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "dbt-common>=1.10,<2.0",
+    "dbt-adapters>=1.7,<2.0",
+    # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
+    "dbt-core>=1.8.0",
+    "sqlparams>=3.0.0",
+]
+[project.optional-dependencies]
+ODBC = ["pyodbc~=5.1.0"]
+PyHive = [
+    "PyHive[hive_pure_sasl]~=0.7.0",
+    "thrift>=0.11.0,<0.17.0",
+]
+session = ["pyspark>=3.0.0,<4.0.0"]
+all = [
+    "pyodbc~=5.1.0",
+    "PyHive[hive_pure_sasl]~=0.7.0",
+    "thrift>=0.11.0,<0.17.0",
+    "pyspark>=3.0.0,<4.0.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/dbt-labs/dbt-spark"
+Documentation = "https://docs.getdbt.com"
+Repository = "https://github.com/dbt-labs/dbt-spark.git"
+Issues = "https://github.com/dbt-labs/dbt-spark/issues"
+Changelog = "https://github.com/dbt-labs/dbt-spark/blob/main/CHANGELOG.md"
+
+[tool.pytest.ini_options]
+testpaths = ["tests/functional", "tests/unit"]
+env_files = ["test.env"]
+addopts = "-v -n auto"
+color = true
+filterwarnings = [
+    "ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning",
+    "ignore:unclosed file .*:ResourceWarning",
+]
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index b3d74bc1..00000000
--- a/pytest.ini
+++ /dev/null
@@ -1,9 +0,0 @@
-[pytest]
-filterwarnings =
-    ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning
-    ignore:unclosed file .*:ResourceWarning
-env_files =
-    test.env
-testpaths =
-    tests/unit
-    tests/functional
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 41263091..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-pyhive[hive_pure_sasl]~=0.7.0
-requests>=2.28.1
-
-pyodbc~=5.1.0 --no-binary pyodbc
-sqlparams>=3.0.0
-thrift>=0.13.0
-pyspark>=3.0.0,<4.0.0
-sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
-
-types-PyYAML
-types-python-dateutil
diff --git a/setup.py b/setup.py
deleted file mode 100644
index aa3be661..00000000
--- a/setup.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python
-import os
-import sys
-import re
-
-# require python 3.8 or newer
-if sys.version_info < (3, 9):
-    print("Error: dbt does not support this version of Python.")
-    print("Please upgrade to Python 3.9 or higher.")
-    sys.exit(1)
-
-# require version of setuptools that supports find_namespace_packages
-from setuptools import setup
-
-try:
-    from setuptools import find_namespace_packages
-except ImportError:
-    # the user has a downlevel version of setuptools.
-    print("Error: dbt requires setuptools v40.1.0 or higher.")
-    print('Please upgrade setuptools with "pip install --upgrade setuptools" ' "and try again")
-    sys.exit(1)
-
-# pull long description from README
-this_directory = os.path.abspath(os.path.dirname(__file__))
-with open(os.path.join(this_directory, "README.md"), "r", encoding="utf8") as f:
-    long_description = f.read()
-
-
-# get this package's version from dbt/adapters/<name>/__version__.py
-def _get_plugin_version_dict():
-    _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py")
-    _semver = r"""(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)"""
-    _pre = r"""((?P<prekind>a|b|rc)(?P<pre>\d+))?"""
-    _build = r"""(\+build[0-9]+)?"""
-    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}{_build}["']"""
-    with open(_version_path) as f:
-        match = re.search(_version_pattern, f.read().strip())
-        if match is None:
-            raise ValueError(f"invalid version at {_version_path}")
-        return match.groupdict()
-
-
-package_name = "dbt-spark"
-package_version = "1.10.0a1"
-description = """The Apache Spark adapter plugin for dbt"""
-
-odbc_extras = ["pyodbc~=5.1.0"]
-pyhive_extras = [
-    "PyHive[hive_pure_sasl]~=0.7.0",
-    "thrift>=0.11.0,<0.17.0",
-]
-session_extras = ["pyspark>=3.0.0,<4.0.0"]
-all_extras = odbc_extras + pyhive_extras + session_extras
-
-setup(
-    name=package_name,
-    version=package_version,
-    description=description,
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author="dbt Labs",
-    author_email="info@dbtlabs.com",
-    url="https://github.com/dbt-labs/dbt-spark",
-    packages=find_namespace_packages(include=["dbt", "dbt.*"]),
-    include_package_data=True,
-    install_requires=[
-        "sqlparams>=3.0.0",
-        "dbt-common>=1.10,<2.0",
-        "dbt-adapters>=1.7,<2.0",
-        # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
-        "dbt-core>=1.8.0",
-    ],
-    extras_require={
-        "ODBC": odbc_extras,
-        "PyHive": pyhive_extras,
-        "session": session_extras,
-        "all": all_extras,
-    },
-    zip_safe=False,
-    classifiers=[
-        "Development Status :: 5 - Production/Stable",
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: Microsoft :: Windows",
-        "Operating System :: MacOS :: MacOS X",
-        "Operating System :: POSIX :: Linux",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-    ],
-    python_requires=">=3.9",
-)
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 62bb9c5b..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[tox]
-skipsdist = True
-envlist = unit, flake8, integration-spark-thrift

From 489da19675d49572c84faaf7461f6f4ff023bcae Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Mon, 6 Jan 2025 23:52:29 -0500
Subject: [PATCH 600/603] ADAP-1122: Move dbt-spark into a namespace subpackage
 (#1168)

* move all files into a subpackage directory
* fix project metadata
* update workflows for the new subpackage directory
* update unit tests with new src path
* update host directory for dagger to include src directory
---
 .github/workflows/changelog-existence.yml                 | 5 ++---
 .github/workflows/integration.yml                         | 5 ++++-
 .github/workflows/main.yml                                | 7 ++++++-
 {.changes => dbt-spark/.changes}/0.0.0.md                 | 0
 {.changes => dbt-spark/.changes}/README.md                | 0
 {.changes => dbt-spark/.changes}/header.tpl.md            | 0
 {.changes => dbt-spark/.changes}/unreleased/.gitkeep      | 0
 .../unreleased/Under the Hood-20241207-181814.yaml        | 0
 .changie.yaml => dbt-spark/.changie.yaml                  | 0
 CHANGELOG.md => dbt-spark/CHANGELOG.md                    | 0
 CONTRIBUTING.md => dbt-spark/CONTRIBUTING.md              | 0
 License.md => dbt-spark/License.md                        | 0
 README.md => dbt-spark/README.md                          | 0
 {dagger => dbt-spark/dagger}/requirements.txt             | 0
 {dagger => dbt-spark/dagger}/run_dbt_spark_tests.py       | 2 +-
 {dagger => dbt-spark/dagger}/scripts/configure_odbc.sh    | 0
 {dagger => dbt-spark/dagger}/scripts/install_os_reqs.sh   | 0
 .../dagger}/spark-container/entrypoint.sh                 | 0
 .../dagger}/spark-container/hive-site.xml                 | 0
 .../dagger}/spark-container/install_spark.sh              | 0
 .../dagger}/spark-container/spark-defaults.conf           | 0
 docker-compose.yml => dbt-spark/docker-compose.yml        | 0
 {docker => dbt-spark/docker}/Dockerfile                   | 0
 {docker => dbt-spark/docker}/README.md                    | 0
 {docker => dbt-spark/docker}/entrypoint.sh                | 0
 {docker => dbt-spark/docker}/hive-site.xml                | 0
 {docker => dbt-spark/docker}/spark-defaults.conf          | 0
 {docker => dbt-spark/docker}/spark.Dockerfile             | 0
 hatch.toml => dbt-spark/hatch.toml                        | 8 +++++---
 pyproject.toml => dbt-spark/pyproject.toml                | 3 +--
 {scripts => dbt-spark/scripts}/build-dist.sh              | 0
 {.github => dbt-spark}/scripts/update_dependencies.sh     | 0
 .../scripts/update_dev_dependency_branches.sh             | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/__init__.py     | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/__version__.py  | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/column.py       | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/connections.py  | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/impl.py         | 0
 .../src/dbt}/adapters/spark/python_submissions.py         | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/relation.py     | 0
 {dbt => dbt-spark/src/dbt}/adapters/spark/session.py      | 0
 {dbt => dbt-spark/src/dbt}/include/spark/__init__.py      | 0
 {dbt => dbt-spark/src/dbt}/include/spark/dbt_project.yml  | 0
 .../src/dbt}/include/spark/macros/adapters.sql            | 0
 .../src/dbt}/include/spark/macros/apply_grants.sql        | 0
 .../dbt}/include/spark/macros/materializations/clone.sql  | 0
 .../materializations/incremental/column_helpers.sql       | 0
 .../macros/materializations/incremental/incremental.sql   | 0
 .../macros/materializations/incremental/strategies.sql    | 0
 .../macros/materializations/incremental/validate.sql      | 0
 .../dbt}/include/spark/macros/materializations/seed.sql   | 0
 .../include/spark/macros/materializations/snapshot.sql    | 0
 .../dbt}/include/spark/macros/materializations/table.sql  | 0
 .../dbt}/include/spark/macros/materializations/view.sql   | 0
 .../src/dbt}/include/spark/macros/utils/any_value.sql     | 0
 .../src/dbt}/include/spark/macros/utils/array_append.sql  | 0
 .../src/dbt}/include/spark/macros/utils/array_concat.sql  | 0
 .../dbt}/include/spark/macros/utils/array_construct.sql   | 0
 .../dbt}/include/spark/macros/utils/assert_not_null.sql   | 0
 .../src/dbt}/include/spark/macros/utils/bool_or.sql       | 0
 .../src/dbt}/include/spark/macros/utils/concat.sql        | 0
 .../src/dbt}/include/spark/macros/utils/date.sql          | 0
 .../src/dbt}/include/spark/macros/utils/dateadd.sql       | 0
 .../src/dbt}/include/spark/macros/utils/datediff.sql      | 0
 .../include/spark/macros/utils/escape_single_quotes.sql   | 0
 .../src/dbt}/include/spark/macros/utils/listagg.sql       | 0
 .../src/dbt}/include/spark/macros/utils/safe_cast.sql     | 0
 .../src/dbt}/include/spark/macros/utils/split_part.sql    | 0
 .../src/dbt}/include/spark/macros/utils/timestamps.sql    | 0
 .../src/dbt}/include/spark/profile_template.yml           | 0
 test.env.example => dbt-spark/test.env.example            | 0
 {tests => dbt-spark/tests}/__init__.py                    | 0
 {tests => dbt-spark/tests}/conftest.py                    | 0
 .../tests}/functional/adapter/dbt_clone/fixtures.py       | 0
 .../tests}/functional/adapter/dbt_clone/test_dbt_clone.py | 0
 .../tests}/functional/adapter/dbt_show/test_dbt_show.py   | 0
 .../tests}/functional/adapter/empty/test_empty.py         | 0
 .../incremental/test_incremental_merge_exclude_columns.py | 0
 .../incremental/test_incremental_on_schema_change.py      | 0
 .../adapter/incremental/test_incremental_predicates.py    | 0
 .../adapter/incremental/test_incremental_unique_id.py     | 0
 .../functional/adapter/incremental_strategies/fixtures.py | 0
 .../functional/adapter/incremental_strategies/seeds.py    | 0
 .../incremental_strategies/test_incremental_strategies.py | 0
 .../adapter/incremental_strategies/test_microbatch.py     | 0
 .../tests}/functional/adapter/persist_docs/fixtures.py    | 0
 .../functional/adapter/persist_docs/test_persist_docs.py  | 0
 .../functional/adapter/seed_column_types/fixtures.py      | 0
 .../adapter/seed_column_types/test_seed_column_types.py   | 0
 .../tests}/functional/adapter/test_basic.py               | 0
 .../tests}/functional/adapter/test_constraints.py         | 0
 .../functional/adapter/test_get_columns_in_relation.py    | 0
 .../tests}/functional/adapter/test_grants.py              | 0
 .../tests}/functional/adapter/test_python_model.py        | 0
 .../tests}/functional/adapter/test_simple_seed.py         | 0
 .../tests}/functional/adapter/test_store_test_failures.py | 0
 .../functional/adapter/unit_testing/test_unit_testing.py  | 0
 .../tests}/functional/adapter/utils/fixture_listagg.py    | 0
 .../tests}/functional/adapter/utils/test_data_types.py    | 0
 .../tests}/functional/adapter/utils/test_timestamps.py    | 0
 .../tests}/functional/adapter/utils/test_utils.py         | 0
 {tests => dbt-spark/tests}/functional/conftest.py         | 0
 {tests => dbt-spark/tests}/unit/__init__.py               | 0
 {tests => dbt-spark/tests}/unit/conftest.py               | 0
 {tests => dbt-spark/tests}/unit/fixtures/__init__.py      | 0
 {tests => dbt-spark/tests}/unit/fixtures/profiles.py      | 0
 {tests => dbt-spark/tests}/unit/test_adapter.py           | 0
 {tests => dbt-spark/tests}/unit/test_column.py            | 0
 {tests => dbt-spark/tests}/unit/test_credentials.py       | 0
 {tests => dbt-spark/tests}/unit/test_macros.py            | 2 +-
 {tests => dbt-spark/tests}/unit/utils.py                  | 0
 111 files changed, 20 insertions(+), 12 deletions(-)
 rename {.changes => dbt-spark/.changes}/0.0.0.md (100%)
 rename {.changes => dbt-spark/.changes}/README.md (100%)
 rename {.changes => dbt-spark/.changes}/header.tpl.md (100%)
 rename {.changes => dbt-spark/.changes}/unreleased/.gitkeep (100%)
 rename {.changes => dbt-spark/.changes}/unreleased/Under the Hood-20241207-181814.yaml (100%)
 rename .changie.yaml => dbt-spark/.changie.yaml (100%)
 rename CHANGELOG.md => dbt-spark/CHANGELOG.md (100%)
 rename CONTRIBUTING.md => dbt-spark/CONTRIBUTING.md (100%)
 rename License.md => dbt-spark/License.md (100%)
 rename README.md => dbt-spark/README.md (100%)
 rename {dagger => dbt-spark/dagger}/requirements.txt (100%)
 rename {dagger => dbt-spark/dagger}/run_dbt_spark_tests.py (98%)
 rename {dagger => dbt-spark/dagger}/scripts/configure_odbc.sh (100%)
 rename {dagger => dbt-spark/dagger}/scripts/install_os_reqs.sh (100%)
 rename {dagger => dbt-spark/dagger}/spark-container/entrypoint.sh (100%)
 rename {dagger => dbt-spark/dagger}/spark-container/hive-site.xml (100%)
 rename {dagger => dbt-spark/dagger}/spark-container/install_spark.sh (100%)
 rename {dagger => dbt-spark/dagger}/spark-container/spark-defaults.conf (100%)
 rename docker-compose.yml => dbt-spark/docker-compose.yml (100%)
 rename {docker => dbt-spark/docker}/Dockerfile (100%)
 rename {docker => dbt-spark/docker}/README.md (100%)
 rename {docker => dbt-spark/docker}/entrypoint.sh (100%)
 rename {docker => dbt-spark/docker}/hive-site.xml (100%)
 rename {docker => dbt-spark/docker}/spark-defaults.conf (100%)
 rename {docker => dbt-spark/docker}/spark.Dockerfile (100%)
 rename hatch.toml => dbt-spark/hatch.toml (92%)
 rename pyproject.toml => dbt-spark/pyproject.toml (98%)
 rename {scripts => dbt-spark/scripts}/build-dist.sh (100%)
 rename {.github => dbt-spark}/scripts/update_dependencies.sh (100%)
 rename {.github => dbt-spark}/scripts/update_dev_dependency_branches.sh (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/__init__.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/__version__.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/column.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/connections.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/impl.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/python_submissions.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/relation.py (100%)
 rename {dbt => dbt-spark/src/dbt}/adapters/spark/session.py (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/__init__.py (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/dbt_project.yml (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/adapters.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/apply_grants.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/clone.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/incremental/column_helpers.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/incremental/incremental.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/incremental/strategies.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/incremental/validate.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/seed.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/snapshot.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/table.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/materializations/view.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/any_value.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/array_append.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/array_concat.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/array_construct.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/assert_not_null.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/bool_or.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/concat.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/date.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/dateadd.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/datediff.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/escape_single_quotes.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/listagg.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/safe_cast.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/split_part.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/macros/utils/timestamps.sql (100%)
 rename {dbt => dbt-spark/src/dbt}/include/spark/profile_template.yml (100%)
 rename test.env.example => dbt-spark/test.env.example (100%)
 rename {tests => dbt-spark/tests}/__init__.py (100%)
 rename {tests => dbt-spark/tests}/conftest.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/dbt_clone/fixtures.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/dbt_clone/test_dbt_clone.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/dbt_show/test_dbt_show.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/empty/test_empty.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental/test_incremental_merge_exclude_columns.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental/test_incremental_on_schema_change.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental/test_incremental_predicates.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental/test_incremental_unique_id.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental_strategies/fixtures.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental_strategies/seeds.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental_strategies/test_incremental_strategies.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/incremental_strategies/test_microbatch.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/persist_docs/fixtures.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/persist_docs/test_persist_docs.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/seed_column_types/fixtures.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/seed_column_types/test_seed_column_types.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_basic.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_constraints.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_get_columns_in_relation.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_grants.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_python_model.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_simple_seed.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/test_store_test_failures.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/unit_testing/test_unit_testing.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/utils/fixture_listagg.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/utils/test_data_types.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/utils/test_timestamps.py (100%)
 rename {tests => dbt-spark/tests}/functional/adapter/utils/test_utils.py (100%)
 rename {tests => dbt-spark/tests}/functional/conftest.py (100%)
 rename {tests => dbt-spark/tests}/unit/__init__.py (100%)
 rename {tests => dbt-spark/tests}/unit/conftest.py (100%)
 rename {tests => dbt-spark/tests}/unit/fixtures/__init__.py (100%)
 rename {tests => dbt-spark/tests}/unit/fixtures/profiles.py (100%)
 rename {tests => dbt-spark/tests}/unit/test_adapter.py (100%)
 rename {tests => dbt-spark/tests}/unit/test_column.py (100%)
 rename {tests => dbt-spark/tests}/unit/test_credentials.py (100%)
 rename {tests => dbt-spark/tests}/unit/test_macros.py (99%)
 rename {tests => dbt-spark/tests}/unit/utils.py (100%)

diff --git a/.github/workflows/changelog-existence.yml b/.github/workflows/changelog-existence.yml
index 6e51e8af..19fa8e0f 100644
--- a/.github/workflows/changelog-existence.yml
+++ b/.github/workflows/changelog-existence.yml
@@ -34,8 +34,7 @@ permissions:
 
 jobs:
   changelog:
-    uses: dbt-labs/actions/.github/workflows/changelog-existence.yml@main
+    uses: dbt-labs/dbt-adapters/.github/workflows/_changelog-entry-check.yml@main
     with:
-      changelog_comment: 'Thank you for your pull request! We could not find a changelog entry for this change. For details on how to document a change, see the [dbt-spark contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.MD).'
-      skip_label: 'Skip Changelog'
+      pull-request: ${{ github.event.pull_request.number }}
     secrets: inherit # this is only acceptable because we own the action we're calling
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 870ac13b..a0a6db33 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -129,17 +129,20 @@ jobs:
       - name: Update Adapters and Core branches (update dev_requirements.txt)
         if: github.event_name == 'workflow_dispatch'
         run: |
-          ./.github/scripts/update_dev_dependency_branches.sh \
+          scripts/update_dev_dependency_branches.sh \
             ${{ inputs.dbt_adapters_branch }} \
             ${{ inputs.dbt_core_branch }} \
             ${{ inputs.dbt_common_branch }}
           cat hatch.toml
+        working-directory: ./dbt-spark
 
       - name: Install hatch
         uses: pypa/hatch@install
 
       - name: Install python dependencies
         run: hatch run pip install -r dagger/requirements.txt
+        working-directory: ./dbt-spark
 
       - name: Run tests for ${{ matrix.test }}
         run: hatch run integration-tests --profile ${{ matrix.test }}
+        working-directory: ./dbt-spark
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 458048a7..ca6957dd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -78,6 +78,7 @@ jobs:
         uses: pypa/hatch@install
 
       - run: hatch run unit-tests
+        working-directory: ./dbt-spark
 
   build:
     name: build packages
@@ -100,12 +101,15 @@ jobs:
 
       - name: Build distributions
         run: hatch build
+        working-directory: ./dbt-spark
 
       - name: Show distributions
         run: ls -lh dist/
+        working-directory: ./dbt-spark
 
       - name: Check distribution descriptions
         run: hatch run build:check-all
+        working-directory: ./dbt-spark
 
       - name: Check if this is an alpha version
         id: check-is-alpha
@@ -113,11 +117,12 @@ jobs:
           export is_alpha=0
           if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi
           echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT
+        working-directory: ./dbt-spark
 
       - uses: actions/upload-artifact@v4
         with:
           name: dist
-          path: dist/
+          path: dbt-spark/dist/
           overwrite: true
 
   test-build:
diff --git a/.changes/0.0.0.md b/dbt-spark/.changes/0.0.0.md
similarity index 100%
rename from .changes/0.0.0.md
rename to dbt-spark/.changes/0.0.0.md
diff --git a/.changes/README.md b/dbt-spark/.changes/README.md
similarity index 100%
rename from .changes/README.md
rename to dbt-spark/.changes/README.md
diff --git a/.changes/header.tpl.md b/dbt-spark/.changes/header.tpl.md
similarity index 100%
rename from .changes/header.tpl.md
rename to dbt-spark/.changes/header.tpl.md
diff --git a/.changes/unreleased/.gitkeep b/dbt-spark/.changes/unreleased/.gitkeep
similarity index 100%
rename from .changes/unreleased/.gitkeep
rename to dbt-spark/.changes/unreleased/.gitkeep
diff --git a/.changes/unreleased/Under the Hood-20241207-181814.yaml b/dbt-spark/.changes/unreleased/Under the Hood-20241207-181814.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20241207-181814.yaml
rename to dbt-spark/.changes/unreleased/Under the Hood-20241207-181814.yaml
diff --git a/.changie.yaml b/dbt-spark/.changie.yaml
similarity index 100%
rename from .changie.yaml
rename to dbt-spark/.changie.yaml
diff --git a/CHANGELOG.md b/dbt-spark/CHANGELOG.md
similarity index 100%
rename from CHANGELOG.md
rename to dbt-spark/CHANGELOG.md
diff --git a/CONTRIBUTING.md b/dbt-spark/CONTRIBUTING.md
similarity index 100%
rename from CONTRIBUTING.md
rename to dbt-spark/CONTRIBUTING.md
diff --git a/License.md b/dbt-spark/License.md
similarity index 100%
rename from License.md
rename to dbt-spark/License.md
diff --git a/README.md b/dbt-spark/README.md
similarity index 100%
rename from README.md
rename to dbt-spark/README.md
diff --git a/dagger/requirements.txt b/dbt-spark/dagger/requirements.txt
similarity index 100%
rename from dagger/requirements.txt
rename to dbt-spark/dagger/requirements.txt
diff --git a/dagger/run_dbt_spark_tests.py b/dbt-spark/dagger/run_dbt_spark_tests.py
similarity index 98%
rename from dagger/run_dbt_spark_tests.py
rename to dbt-spark/dagger/run_dbt_spark_tests.py
index f1efb434..d6a0f701 100644
--- a/dagger/run_dbt_spark_tests.py
+++ b/dbt-spark/dagger/run_dbt_spark_tests.py
@@ -108,7 +108,7 @@ async def test_spark(test_args):
         # copy project files into image
         tst_container = (
             tst_container.with_workdir("/")
-            .with_directory("/src/dbt", client.host().directory("./dbt"))
+            .with_directory("/src/src/dbt", client.host().directory("./src/dbt"))
             .with_directory("/src/tests", client.host().directory("./tests"))
             .with_directory(
                 "/src",
diff --git a/dagger/scripts/configure_odbc.sh b/dbt-spark/dagger/scripts/configure_odbc.sh
similarity index 100%
rename from dagger/scripts/configure_odbc.sh
rename to dbt-spark/dagger/scripts/configure_odbc.sh
diff --git a/dagger/scripts/install_os_reqs.sh b/dbt-spark/dagger/scripts/install_os_reqs.sh
similarity index 100%
rename from dagger/scripts/install_os_reqs.sh
rename to dbt-spark/dagger/scripts/install_os_reqs.sh
diff --git a/dagger/spark-container/entrypoint.sh b/dbt-spark/dagger/spark-container/entrypoint.sh
similarity index 100%
rename from dagger/spark-container/entrypoint.sh
rename to dbt-spark/dagger/spark-container/entrypoint.sh
diff --git a/dagger/spark-container/hive-site.xml b/dbt-spark/dagger/spark-container/hive-site.xml
similarity index 100%
rename from dagger/spark-container/hive-site.xml
rename to dbt-spark/dagger/spark-container/hive-site.xml
diff --git a/dagger/spark-container/install_spark.sh b/dbt-spark/dagger/spark-container/install_spark.sh
similarity index 100%
rename from dagger/spark-container/install_spark.sh
rename to dbt-spark/dagger/spark-container/install_spark.sh
diff --git a/dagger/spark-container/spark-defaults.conf b/dbt-spark/dagger/spark-container/spark-defaults.conf
similarity index 100%
rename from dagger/spark-container/spark-defaults.conf
rename to dbt-spark/dagger/spark-container/spark-defaults.conf
diff --git a/docker-compose.yml b/dbt-spark/docker-compose.yml
similarity index 100%
rename from docker-compose.yml
rename to dbt-spark/docker-compose.yml
diff --git a/docker/Dockerfile b/dbt-spark/docker/Dockerfile
similarity index 100%
rename from docker/Dockerfile
rename to dbt-spark/docker/Dockerfile
diff --git a/docker/README.md b/dbt-spark/docker/README.md
similarity index 100%
rename from docker/README.md
rename to dbt-spark/docker/README.md
diff --git a/docker/entrypoint.sh b/dbt-spark/docker/entrypoint.sh
similarity index 100%
rename from docker/entrypoint.sh
rename to dbt-spark/docker/entrypoint.sh
diff --git a/docker/hive-site.xml b/dbt-spark/docker/hive-site.xml
similarity index 100%
rename from docker/hive-site.xml
rename to dbt-spark/docker/hive-site.xml
diff --git a/docker/spark-defaults.conf b/dbt-spark/docker/spark-defaults.conf
similarity index 100%
rename from docker/spark-defaults.conf
rename to dbt-spark/docker/spark-defaults.conf
diff --git a/docker/spark.Dockerfile b/dbt-spark/docker/spark.Dockerfile
similarity index 100%
rename from docker/spark.Dockerfile
rename to dbt-spark/docker/spark.Dockerfile
diff --git a/hatch.toml b/dbt-spark/hatch.toml
similarity index 92%
rename from hatch.toml
rename to dbt-spark/hatch.toml
index 55a51c7c..638bba83 100644
--- a/hatch.toml
+++ b/dbt-spark/hatch.toml
@@ -1,11 +1,13 @@
 [version]
-path = "dbt/adapters/spark/__version__.py"
+path = "src/dbt/adapters/spark/__version__.py"
 
 [build.targets.sdist]
-packages = ["dbt"]
+packages = ["src/dbt"]
+sources = ["src"]
 
 [build.targets.wheel]
-packages = ["dbt"]
+packages = ["src/dbt"]
+sources = ["src"]
 
 [envs.default]
 dependencies = [
diff --git a/pyproject.toml b/dbt-spark/pyproject.toml
similarity index 98%
rename from pyproject.toml
rename to dbt-spark/pyproject.toml
index 4079e0af..de27f9cb 100644
--- a/pyproject.toml
+++ b/dbt-spark/pyproject.toml
@@ -53,8 +53,7 @@ Changelog = "https://github.com/dbt-labs/dbt-spark/blob/main/CHANGELOG.md"
 [tool.pytest.ini_options]
 testpaths = ["tests/functional", "tests/unit"]
 env_files = ["test.env"]
-addopts = "-v -n auto"
-color = true
+addopts = "-v --color=yes -n auto"
 filterwarnings = [
     "ignore:.*'soft_unicode' has been renamed to 'soft_str'*:DeprecationWarning",
     "ignore:unclosed file .*:ResourceWarning",
diff --git a/scripts/build-dist.sh b/dbt-spark/scripts/build-dist.sh
similarity index 100%
rename from scripts/build-dist.sh
rename to dbt-spark/scripts/build-dist.sh
diff --git a/.github/scripts/update_dependencies.sh b/dbt-spark/scripts/update_dependencies.sh
similarity index 100%
rename from .github/scripts/update_dependencies.sh
rename to dbt-spark/scripts/update_dependencies.sh
diff --git a/.github/scripts/update_dev_dependency_branches.sh b/dbt-spark/scripts/update_dev_dependency_branches.sh
similarity index 100%
rename from .github/scripts/update_dev_dependency_branches.sh
rename to dbt-spark/scripts/update_dev_dependency_branches.sh
diff --git a/dbt/adapters/spark/__init__.py b/dbt-spark/src/dbt/adapters/spark/__init__.py
similarity index 100%
rename from dbt/adapters/spark/__init__.py
rename to dbt-spark/src/dbt/adapters/spark/__init__.py
diff --git a/dbt/adapters/spark/__version__.py b/dbt-spark/src/dbt/adapters/spark/__version__.py
similarity index 100%
rename from dbt/adapters/spark/__version__.py
rename to dbt-spark/src/dbt/adapters/spark/__version__.py
diff --git a/dbt/adapters/spark/column.py b/dbt-spark/src/dbt/adapters/spark/column.py
similarity index 100%
rename from dbt/adapters/spark/column.py
rename to dbt-spark/src/dbt/adapters/spark/column.py
diff --git a/dbt/adapters/spark/connections.py b/dbt-spark/src/dbt/adapters/spark/connections.py
similarity index 100%
rename from dbt/adapters/spark/connections.py
rename to dbt-spark/src/dbt/adapters/spark/connections.py
diff --git a/dbt/adapters/spark/impl.py b/dbt-spark/src/dbt/adapters/spark/impl.py
similarity index 100%
rename from dbt/adapters/spark/impl.py
rename to dbt-spark/src/dbt/adapters/spark/impl.py
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt-spark/src/dbt/adapters/spark/python_submissions.py
similarity index 100%
rename from dbt/adapters/spark/python_submissions.py
rename to dbt-spark/src/dbt/adapters/spark/python_submissions.py
diff --git a/dbt/adapters/spark/relation.py b/dbt-spark/src/dbt/adapters/spark/relation.py
similarity index 100%
rename from dbt/adapters/spark/relation.py
rename to dbt-spark/src/dbt/adapters/spark/relation.py
diff --git a/dbt/adapters/spark/session.py b/dbt-spark/src/dbt/adapters/spark/session.py
similarity index 100%
rename from dbt/adapters/spark/session.py
rename to dbt-spark/src/dbt/adapters/spark/session.py
diff --git a/dbt/include/spark/__init__.py b/dbt-spark/src/dbt/include/spark/__init__.py
similarity index 100%
rename from dbt/include/spark/__init__.py
rename to dbt-spark/src/dbt/include/spark/__init__.py
diff --git a/dbt/include/spark/dbt_project.yml b/dbt-spark/src/dbt/include/spark/dbt_project.yml
similarity index 100%
rename from dbt/include/spark/dbt_project.yml
rename to dbt-spark/src/dbt/include/spark/dbt_project.yml
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt-spark/src/dbt/include/spark/macros/adapters.sql
similarity index 100%
rename from dbt/include/spark/macros/adapters.sql
rename to dbt-spark/src/dbt/include/spark/macros/adapters.sql
diff --git a/dbt/include/spark/macros/apply_grants.sql b/dbt-spark/src/dbt/include/spark/macros/apply_grants.sql
similarity index 100%
rename from dbt/include/spark/macros/apply_grants.sql
rename to dbt-spark/src/dbt/include/spark/macros/apply_grants.sql
diff --git a/dbt/include/spark/macros/materializations/clone.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/clone.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/clone.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/clone.sql
diff --git a/dbt/include/spark/macros/materializations/incremental/column_helpers.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/incremental/column_helpers.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/incremental/column_helpers.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/incremental/column_helpers.sql
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/incremental/incremental.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/incremental/incremental.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/incremental/incremental.sql
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/incremental/strategies.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/incremental/strategies.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/incremental/strategies.sql
diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/incremental/validate.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/incremental/validate.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/incremental/validate.sql
diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/seed.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/seed.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/seed.sql
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/snapshot.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/snapshot.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/snapshot.sql
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/table.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/table.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/table.sql
diff --git a/dbt/include/spark/macros/materializations/view.sql b/dbt-spark/src/dbt/include/spark/macros/materializations/view.sql
similarity index 100%
rename from dbt/include/spark/macros/materializations/view.sql
rename to dbt-spark/src/dbt/include/spark/macros/materializations/view.sql
diff --git a/dbt/include/spark/macros/utils/any_value.sql b/dbt-spark/src/dbt/include/spark/macros/utils/any_value.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/any_value.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/any_value.sql
diff --git a/dbt/include/spark/macros/utils/array_append.sql b/dbt-spark/src/dbt/include/spark/macros/utils/array_append.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/array_append.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/array_append.sql
diff --git a/dbt/include/spark/macros/utils/array_concat.sql b/dbt-spark/src/dbt/include/spark/macros/utils/array_concat.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/array_concat.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/array_concat.sql
diff --git a/dbt/include/spark/macros/utils/array_construct.sql b/dbt-spark/src/dbt/include/spark/macros/utils/array_construct.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/array_construct.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/array_construct.sql
diff --git a/dbt/include/spark/macros/utils/assert_not_null.sql b/dbt-spark/src/dbt/include/spark/macros/utils/assert_not_null.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/assert_not_null.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/assert_not_null.sql
diff --git a/dbt/include/spark/macros/utils/bool_or.sql b/dbt-spark/src/dbt/include/spark/macros/utils/bool_or.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/bool_or.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/bool_or.sql
diff --git a/dbt/include/spark/macros/utils/concat.sql b/dbt-spark/src/dbt/include/spark/macros/utils/concat.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/concat.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/concat.sql
diff --git a/dbt/include/spark/macros/utils/date.sql b/dbt-spark/src/dbt/include/spark/macros/utils/date.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/date.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/date.sql
diff --git a/dbt/include/spark/macros/utils/dateadd.sql b/dbt-spark/src/dbt/include/spark/macros/utils/dateadd.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/dateadd.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/dateadd.sql
diff --git a/dbt/include/spark/macros/utils/datediff.sql b/dbt-spark/src/dbt/include/spark/macros/utils/datediff.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/datediff.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/datediff.sql
diff --git a/dbt/include/spark/macros/utils/escape_single_quotes.sql b/dbt-spark/src/dbt/include/spark/macros/utils/escape_single_quotes.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/escape_single_quotes.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/escape_single_quotes.sql
diff --git a/dbt/include/spark/macros/utils/listagg.sql b/dbt-spark/src/dbt/include/spark/macros/utils/listagg.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/listagg.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/listagg.sql
diff --git a/dbt/include/spark/macros/utils/safe_cast.sql b/dbt-spark/src/dbt/include/spark/macros/utils/safe_cast.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/safe_cast.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/safe_cast.sql
diff --git a/dbt/include/spark/macros/utils/split_part.sql b/dbt-spark/src/dbt/include/spark/macros/utils/split_part.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/split_part.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/split_part.sql
diff --git a/dbt/include/spark/macros/utils/timestamps.sql b/dbt-spark/src/dbt/include/spark/macros/utils/timestamps.sql
similarity index 100%
rename from dbt/include/spark/macros/utils/timestamps.sql
rename to dbt-spark/src/dbt/include/spark/macros/utils/timestamps.sql
diff --git a/dbt/include/spark/profile_template.yml b/dbt-spark/src/dbt/include/spark/profile_template.yml
similarity index 100%
rename from dbt/include/spark/profile_template.yml
rename to dbt-spark/src/dbt/include/spark/profile_template.yml
diff --git a/test.env.example b/dbt-spark/test.env.example
similarity index 100%
rename from test.env.example
rename to dbt-spark/test.env.example
diff --git a/tests/__init__.py b/dbt-spark/tests/__init__.py
similarity index 100%
rename from tests/__init__.py
rename to dbt-spark/tests/__init__.py
diff --git a/tests/conftest.py b/dbt-spark/tests/conftest.py
similarity index 100%
rename from tests/conftest.py
rename to dbt-spark/tests/conftest.py
diff --git a/tests/functional/adapter/dbt_clone/fixtures.py b/dbt-spark/tests/functional/adapter/dbt_clone/fixtures.py
similarity index 100%
rename from tests/functional/adapter/dbt_clone/fixtures.py
rename to dbt-spark/tests/functional/adapter/dbt_clone/fixtures.py
diff --git a/tests/functional/adapter/dbt_clone/test_dbt_clone.py b/dbt-spark/tests/functional/adapter/dbt_clone/test_dbt_clone.py
similarity index 100%
rename from tests/functional/adapter/dbt_clone/test_dbt_clone.py
rename to dbt-spark/tests/functional/adapter/dbt_clone/test_dbt_clone.py
diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/dbt-spark/tests/functional/adapter/dbt_show/test_dbt_show.py
similarity index 100%
rename from tests/functional/adapter/dbt_show/test_dbt_show.py
rename to dbt-spark/tests/functional/adapter/dbt_show/test_dbt_show.py
diff --git a/tests/functional/adapter/empty/test_empty.py b/dbt-spark/tests/functional/adapter/empty/test_empty.py
similarity index 100%
rename from tests/functional/adapter/empty/test_empty.py
rename to dbt-spark/tests/functional/adapter/empty/test_empty.py
diff --git a/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py b/dbt-spark/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
similarity index 100%
rename from tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
rename to dbt-spark/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
diff --git a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py b/dbt-spark/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
similarity index 100%
rename from tests/functional/adapter/incremental/test_incremental_on_schema_change.py
rename to dbt-spark/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
diff --git a/tests/functional/adapter/incremental/test_incremental_predicates.py b/dbt-spark/tests/functional/adapter/incremental/test_incremental_predicates.py
similarity index 100%
rename from tests/functional/adapter/incremental/test_incremental_predicates.py
rename to dbt-spark/tests/functional/adapter/incremental/test_incremental_predicates.py
diff --git a/tests/functional/adapter/incremental/test_incremental_unique_id.py b/dbt-spark/tests/functional/adapter/incremental/test_incremental_unique_id.py
similarity index 100%
rename from tests/functional/adapter/incremental/test_incremental_unique_id.py
rename to dbt-spark/tests/functional/adapter/incremental/test_incremental_unique_id.py
diff --git a/tests/functional/adapter/incremental_strategies/fixtures.py b/dbt-spark/tests/functional/adapter/incremental_strategies/fixtures.py
similarity index 100%
rename from tests/functional/adapter/incremental_strategies/fixtures.py
rename to dbt-spark/tests/functional/adapter/incremental_strategies/fixtures.py
diff --git a/tests/functional/adapter/incremental_strategies/seeds.py b/dbt-spark/tests/functional/adapter/incremental_strategies/seeds.py
similarity index 100%
rename from tests/functional/adapter/incremental_strategies/seeds.py
rename to dbt-spark/tests/functional/adapter/incremental_strategies/seeds.py
diff --git a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py b/dbt-spark/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
similarity index 100%
rename from tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
rename to dbt-spark/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
diff --git a/tests/functional/adapter/incremental_strategies/test_microbatch.py b/dbt-spark/tests/functional/adapter/incremental_strategies/test_microbatch.py
similarity index 100%
rename from tests/functional/adapter/incremental_strategies/test_microbatch.py
rename to dbt-spark/tests/functional/adapter/incremental_strategies/test_microbatch.py
diff --git a/tests/functional/adapter/persist_docs/fixtures.py b/dbt-spark/tests/functional/adapter/persist_docs/fixtures.py
similarity index 100%
rename from tests/functional/adapter/persist_docs/fixtures.py
rename to dbt-spark/tests/functional/adapter/persist_docs/fixtures.py
diff --git a/tests/functional/adapter/persist_docs/test_persist_docs.py b/dbt-spark/tests/functional/adapter/persist_docs/test_persist_docs.py
similarity index 100%
rename from tests/functional/adapter/persist_docs/test_persist_docs.py
rename to dbt-spark/tests/functional/adapter/persist_docs/test_persist_docs.py
diff --git a/tests/functional/adapter/seed_column_types/fixtures.py b/dbt-spark/tests/functional/adapter/seed_column_types/fixtures.py
similarity index 100%
rename from tests/functional/adapter/seed_column_types/fixtures.py
rename to dbt-spark/tests/functional/adapter/seed_column_types/fixtures.py
diff --git a/tests/functional/adapter/seed_column_types/test_seed_column_types.py b/dbt-spark/tests/functional/adapter/seed_column_types/test_seed_column_types.py
similarity index 100%
rename from tests/functional/adapter/seed_column_types/test_seed_column_types.py
rename to dbt-spark/tests/functional/adapter/seed_column_types/test_seed_column_types.py
diff --git a/tests/functional/adapter/test_basic.py b/dbt-spark/tests/functional/adapter/test_basic.py
similarity index 100%
rename from tests/functional/adapter/test_basic.py
rename to dbt-spark/tests/functional/adapter/test_basic.py
diff --git a/tests/functional/adapter/test_constraints.py b/dbt-spark/tests/functional/adapter/test_constraints.py
similarity index 100%
rename from tests/functional/adapter/test_constraints.py
rename to dbt-spark/tests/functional/adapter/test_constraints.py
diff --git a/tests/functional/adapter/test_get_columns_in_relation.py b/dbt-spark/tests/functional/adapter/test_get_columns_in_relation.py
similarity index 100%
rename from tests/functional/adapter/test_get_columns_in_relation.py
rename to dbt-spark/tests/functional/adapter/test_get_columns_in_relation.py
diff --git a/tests/functional/adapter/test_grants.py b/dbt-spark/tests/functional/adapter/test_grants.py
similarity index 100%
rename from tests/functional/adapter/test_grants.py
rename to dbt-spark/tests/functional/adapter/test_grants.py
diff --git a/tests/functional/adapter/test_python_model.py b/dbt-spark/tests/functional/adapter/test_python_model.py
similarity index 100%
rename from tests/functional/adapter/test_python_model.py
rename to dbt-spark/tests/functional/adapter/test_python_model.py
diff --git a/tests/functional/adapter/test_simple_seed.py b/dbt-spark/tests/functional/adapter/test_simple_seed.py
similarity index 100%
rename from tests/functional/adapter/test_simple_seed.py
rename to dbt-spark/tests/functional/adapter/test_simple_seed.py
diff --git a/tests/functional/adapter/test_store_test_failures.py b/dbt-spark/tests/functional/adapter/test_store_test_failures.py
similarity index 100%
rename from tests/functional/adapter/test_store_test_failures.py
rename to dbt-spark/tests/functional/adapter/test_store_test_failures.py
diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/dbt-spark/tests/functional/adapter/unit_testing/test_unit_testing.py
similarity index 100%
rename from tests/functional/adapter/unit_testing/test_unit_testing.py
rename to dbt-spark/tests/functional/adapter/unit_testing/test_unit_testing.py
diff --git a/tests/functional/adapter/utils/fixture_listagg.py b/dbt-spark/tests/functional/adapter/utils/fixture_listagg.py
similarity index 100%
rename from tests/functional/adapter/utils/fixture_listagg.py
rename to dbt-spark/tests/functional/adapter/utils/fixture_listagg.py
diff --git a/tests/functional/adapter/utils/test_data_types.py b/dbt-spark/tests/functional/adapter/utils/test_data_types.py
similarity index 100%
rename from tests/functional/adapter/utils/test_data_types.py
rename to dbt-spark/tests/functional/adapter/utils/test_data_types.py
diff --git a/tests/functional/adapter/utils/test_timestamps.py b/dbt-spark/tests/functional/adapter/utils/test_timestamps.py
similarity index 100%
rename from tests/functional/adapter/utils/test_timestamps.py
rename to dbt-spark/tests/functional/adapter/utils/test_timestamps.py
diff --git a/tests/functional/adapter/utils/test_utils.py b/dbt-spark/tests/functional/adapter/utils/test_utils.py
similarity index 100%
rename from tests/functional/adapter/utils/test_utils.py
rename to dbt-spark/tests/functional/adapter/utils/test_utils.py
diff --git a/tests/functional/conftest.py b/dbt-spark/tests/functional/conftest.py
similarity index 100%
rename from tests/functional/conftest.py
rename to dbt-spark/tests/functional/conftest.py
diff --git a/tests/unit/__init__.py b/dbt-spark/tests/unit/__init__.py
similarity index 100%
rename from tests/unit/__init__.py
rename to dbt-spark/tests/unit/__init__.py
diff --git a/tests/unit/conftest.py b/dbt-spark/tests/unit/conftest.py
similarity index 100%
rename from tests/unit/conftest.py
rename to dbt-spark/tests/unit/conftest.py
diff --git a/tests/unit/fixtures/__init__.py b/dbt-spark/tests/unit/fixtures/__init__.py
similarity index 100%
rename from tests/unit/fixtures/__init__.py
rename to dbt-spark/tests/unit/fixtures/__init__.py
diff --git a/tests/unit/fixtures/profiles.py b/dbt-spark/tests/unit/fixtures/profiles.py
similarity index 100%
rename from tests/unit/fixtures/profiles.py
rename to dbt-spark/tests/unit/fixtures/profiles.py
diff --git a/tests/unit/test_adapter.py b/dbt-spark/tests/unit/test_adapter.py
similarity index 100%
rename from tests/unit/test_adapter.py
rename to dbt-spark/tests/unit/test_adapter.py
diff --git a/tests/unit/test_column.py b/dbt-spark/tests/unit/test_column.py
similarity index 100%
rename from tests/unit/test_column.py
rename to dbt-spark/tests/unit/test_column.py
diff --git a/tests/unit/test_credentials.py b/dbt-spark/tests/unit/test_credentials.py
similarity index 100%
rename from tests/unit/test_credentials.py
rename to dbt-spark/tests/unit/test_credentials.py
diff --git a/tests/unit/test_macros.py b/dbt-spark/tests/unit/test_macros.py
similarity index 99%
rename from tests/unit/test_macros.py
rename to dbt-spark/tests/unit/test_macros.py
index 5b648a07..67c45275 100644
--- a/tests/unit/test_macros.py
+++ b/dbt-spark/tests/unit/test_macros.py
@@ -7,7 +7,7 @@
 class TestSparkMacros(unittest.TestCase):
     def setUp(self):
         self.jinja_env = Environment(
-            loader=FileSystemLoader("dbt/include/spark/macros"),
+            loader=FileSystemLoader("src/dbt/include/spark/macros"),
             extensions=[
                 "jinja2.ext.do",
             ],
diff --git a/tests/unit/utils.py b/dbt-spark/tests/unit/utils.py
similarity index 100%
rename from tests/unit/utils.py
rename to dbt-spark/tests/unit/utils.py

From 10e5cebd3c6eb020fa0f8b6c61f2ff5e4e4a5e02 Mon Sep 17 00:00:00 2001
From: Mike Alfare <mike.alfare@dbtlabs.com>
Date: Mon, 13 Jan 2025 12:44:31 -0500
Subject: [PATCH 601/603] update workflows to support dbt-spark in the monorepo

---
 .github/workflows/_generate-changelog.yml |   1 +
 .github/workflows/_integration-tests.yml  |  46 ++
 .github/workflows/_publish-internal.yml   |   1 +
 .github/workflows/_publish-pypi.yml       |   1 +
 .github/workflows/_unit-tests.yml         |   1 +
 .github/workflows/_verify-build.yml       |   1 +
 .github/workflows/backport.yml            |  42 --
 .github/workflows/bot-changelog.yml       |  60 ---
 .github/workflows/changelog-existence.yml |  40 --
 .github/workflows/cut-release-branch.yml  |  42 --
 .github/workflows/docs-issues.yml         |  41 --
 .github/workflows/integration.yml         | 148 ------
 .github/workflows/main.yml                | 170 ------
 .github/workflows/publish.yml             |   1 +
 .github/workflows/pull-request-checks.yml |   3 +
 .github/workflows/release-internal.yml    | 127 -----
 .github/workflows/release-prep.yml        | 616 ----------------------
 .github/workflows/release.yml             | 185 -------
 .github/workflows/stale.yml               |  12 -
 .github/workflows/triage-labels.yml       |  31 --
 20 files changed, 55 insertions(+), 1514 deletions(-)
 delete mode 100644 .github/workflows/backport.yml
 delete mode 100644 .github/workflows/bot-changelog.yml
 delete mode 100644 .github/workflows/changelog-existence.yml
 delete mode 100644 .github/workflows/cut-release-branch.yml
 delete mode 100644 .github/workflows/docs-issues.yml
 delete mode 100644 .github/workflows/integration.yml
 delete mode 100644 .github/workflows/main.yml
 delete mode 100644 .github/workflows/release-internal.yml
 delete mode 100644 .github/workflows/release-prep.yml
 delete mode 100644 .github/workflows/release.yml
 delete mode 100644 .github/workflows/stale.yml
 delete mode 100644 .github/workflows/triage-labels.yml

diff --git a/.github/workflows/_generate-changelog.yml b/.github/workflows/_generate-changelog.yml
index ebedeb4e..be9ffaf6 100644
--- a/.github/workflows/_generate-changelog.yml
+++ b/.github/workflows/_generate-changelog.yml
@@ -35,6 +35,7 @@ on:
                 -   "dbt-adapters"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
             merge:
                 description: "Choose whether to merge the changelog branch"
                 type: boolean
diff --git a/.github/workflows/_integration-tests.yml b/.github/workflows/_integration-tests.yml
index 80c3aeab..85931da6 100644
--- a/.github/workflows/_integration-tests.yml
+++ b/.github/workflows/_integration-tests.yml
@@ -52,6 +52,17 @@ permissions:
     id-token: write
     contents: read
 
+env:
+    DBT_INVOCATION_ENV: ${{ vars.DBT_INVOCATION_ENV }}
+    DD_CIVISIBILITY_AGENTLESS_ENABLED: ${{ vars.DD_CIVISIBILITY_AGENTLESS_ENABLED }}
+    DD_API_KEY: ${{ secrets.DD_API_KEY }}
+    DD_SITE: ${{ vars.DD_SITE }}
+    DD_ENV: ${{ vars.DD_ENV }}
+    DD_SERVICE: ${{ github.event.repository.name }}  # this can change per run because of forks
+    DBT_TEST_USER_1: ${{ vars.DBT_TEST_USER_1 }}
+    DBT_TEST_USER_2: ${{ vars.DBT_TEST_USER_2 }}
+    DBT_TEST_USER_3: ${{ vars.DBT_TEST_USER_3 }}
+
 jobs:
     integration-tests-athena:
         if: ${{ inputs.package == 'dbt-athena' || inputs.package == 'dbt-athena-community' }}
@@ -83,3 +94,38 @@ jobs:
                 aws-region: ${{ vars.DBT_TEST_ATHENA_REGION_NAME }}
         -   run: hatch run integration-tests
             working-directory: ./${{ inputs.package }}
+
+    integration-tests-spark:
+        if: ${{ inputs.package == 'dbt-spark' }}
+        runs-on: ${{ inputs.os }}
+        environment:
+            name: "dbt-spark"
+        env:
+            DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
+            DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
+            DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
+            DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
+            DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USER }}
+        strategy:
+            fail-fast: false
+            matrix:
+                profile:
+                -   "apache_spark"
+                -   "spark_session"
+                -   "spark_http_odbc"
+                -   "databricks_sql_endpoint"
+                -   "databricks_cluster"
+                -   "databricks_http_cluster"
+        steps:
+        -   uses: actions/checkout@v4
+            with:
+                ref: ${{ inputs.branch }}
+                repository: ${{ inputs.repository }}
+        -   uses: actions/setup-python@v5
+            with:
+                python-version: ${{ inputs.python-version }}
+        -   uses: pypa/hatch@install
+        -   run: hatch run pip install -r dagger/requirements.txt
+            working-directory: ./${{ inputs.package }}
+        -   run: hatch run integration-tests --profile ${{ matrix.profile }}
+            working-directory: ./${{ inputs.package }}
diff --git a/.github/workflows/_publish-internal.yml b/.github/workflows/_publish-internal.yml
index 67f9fd1d..7a6f3473 100644
--- a/.github/workflows/_publish-internal.yml
+++ b/.github/workflows/_publish-internal.yml
@@ -23,6 +23,7 @@ on:
                 options:
                 -   "dbt-adapters"
                 -   "dbt-athena"
+                -   "dbt-spark"
             deploy-to:
                 description: "Choose whether to publish to test or prod"
                 type: environment
diff --git a/.github/workflows/_publish-pypi.yml b/.github/workflows/_publish-pypi.yml
index 85b0c3b2..efe17429 100644
--- a/.github/workflows/_publish-pypi.yml
+++ b/.github/workflows/_publish-pypi.yml
@@ -24,6 +24,7 @@ on:
                 -   "dbt-adapters"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
             deploy-to:
                 description: "Choose whether to publish to test or prod"
                 type: environment
diff --git a/.github/workflows/_unit-tests.yml b/.github/workflows/_unit-tests.yml
index 9bfae1f5..eddda99c 100644
--- a/.github/workflows/_unit-tests.yml
+++ b/.github/workflows/_unit-tests.yml
@@ -32,6 +32,7 @@ on:
                 -   "dbt-adapters"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
             branch:
                 description: "Choose the branch to test"
                 type: string
diff --git a/.github/workflows/_verify-build.yml b/.github/workflows/_verify-build.yml
index d60962a6..19730883 100644
--- a/.github/workflows/_verify-build.yml
+++ b/.github/workflows/_verify-build.yml
@@ -33,6 +33,7 @@ on:
                 -   "dbt-tests-adapter"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
             branch:
                 description: "Choose the branch to build"
                 type: string
diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml
deleted file mode 100644
index ea80cbc2..00000000
--- a/.github/workflows/backport.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-# **what?**
-# When a PR is merged, if it has the backport label, it will create
-# a new PR to backport those changes to the given branch. If it can't
-# cleanly do a backport, it will comment on the merged PR of the failure.
-#
-# Label naming convention: "backport <branch name to backport to>"
-# Example: backport 1.0.latest
-#
-# You MUST "Squash and merge" the original PR or this won't work.
-
-# **why?**
-# Changes sometimes need to be backported to release branches.
-# This automates the backporting process
-
-# **when?**
-# Once a PR is "Squash and merge"'d, by adding a backport label, this is triggered
-
-name: Backport
-on:
-  pull_request:
-    types:
-      - labeled
-
-permissions:
-  contents: write
-  pull-requests: write
-
-jobs:
-  backport:
-    name: Backport
-    runs-on: ubuntu-latest
-    # Only react to merged PRs for security reasons.
-    # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target.
-    if: >
-      github.event.pull_request.merged
-      && contains(github.event.label.name, 'backport')
-    steps:
-      - uses: tibdex/backport@v2
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
deleted file mode 100644
index 89972070..00000000
--- a/.github/workflows/bot-changelog.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-# **what?**
-# When bots create a PR, this action will add a corresponding changie yaml file to that
-# PR when a specific label is added.
-#
-# The file is created off a template:
-#
-# kind: <per action matrix>
-# body: <PR title>
-# time: <current timestamp>
-# custom:
-#   Author: <PR User Login (generally the bot)>
-#   PR: <PR number>
-#
-# **why?**
-# Automate changelog generation for more visability with automated bot PRs.
-#
-# **when?**
-# Once a PR is created, label should be added to PR before or after creation. You can also
-#  manually trigger this by adding the appropriate label at any time.
-#
-# **how to add another bot?**
-# Add the label and changie kind to the include matrix.  That's it!
-#
-
-name: Bot Changelog
-
-on:
-  pull_request:
-    # catch when the PR is opened with the label or when the label is added
-    types: [labeled]
-
-permissions:
-  contents: write
-  pull-requests: read
-
-jobs:
-  generate_changelog:
-    strategy:
-      matrix:
-        include:
-          - label: "dependencies"
-            changie_kind: "Dependencies"
-          - label: "snyk"
-            changie_kind: "Security"
-    runs-on: ubuntu-latest
-
-    steps:
-
-    - name: Create and commit changelog on bot PR
-      if: ${{ contains(github.event.pull_request.labels.*.name, matrix.label) }}
-      id: bot_changelog
-      uses: emmyoop/changie_bot@v1
-      with:
-        GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
-        commit_author_name: "Github Build Bot"
-        commit_author_email: "<buildbot@fishtownanalytics.com>"
-        commit_message: "Add automated changelog yaml from template for bot PR"
-        changie_kind: ${{ matrix.changie_kind }}
-        label: ${{ matrix.label }}
-        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  PR: ${{ github.event.pull_request.number }}"
diff --git a/.github/workflows/changelog-existence.yml b/.github/workflows/changelog-existence.yml
deleted file mode 100644
index 19fa8e0f..00000000
--- a/.github/workflows/changelog-existence.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# **what?**
-# Checks that a file has been committed under the /.changes directory
-# as a new CHANGELOG entry.  Cannot check for a specific filename as
-# it is dynamically generated by change type and timestamp.
-# This workflow should not require any secrets since it runs for PRs
-# from forked repos.
-# By default, secrets are not passed to workflows running from
-# a forked repo.
-
-# **why?**
-# Ensure code change gets reflected in the CHANGELOG.
-
-# **when?**
-# This will run for all PRs going into main and *.latest.  It will
-# run when they are opened, reopened, when any label is added or removed
-# and when new code is pushed to the branch.  The action will then get
-# skipped if the 'Skip Changelog' label is present is any of the labels.
-
-name: Check Changelog Entry
-
-on:
-  pull_request:
-    types: [opened, reopened, labeled, unlabeled, synchronize]
-  workflow_dispatch:
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: read
-  pull-requests: write
-
-
-jobs:
-  changelog:
-    uses: dbt-labs/dbt-adapters/.github/workflows/_changelog-entry-check.yml@main
-    with:
-      pull-request: ${{ github.event.pull_request.number }}
-    secrets: inherit # this is only acceptable because we own the action we're calling
diff --git a/.github/workflows/cut-release-branch.yml b/.github/workflows/cut-release-branch.yml
deleted file mode 100644
index f8dfa217..00000000
--- a/.github/workflows/cut-release-branch.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-# **what?**
-# Calls a centralize3d workflow that will:
-# 1. Cut a new branch (generally `*.latest`)
-# 2. Also cleans up all files in `.changes/unreleased` and `.changes/previous version on
-# `main` and bumps `main` to the input version.
-
-# **why?**
-# Generally reduces the workload of engineers and reduces error.  Allow automation.
-
-# **when?**
-# This will run when called manually.
-
-name: Cut new release branch
-
-on:
-  workflow_dispatch:
-    inputs:
-      version_to_bump_main:
-        description: 'The alpha version main should bump to (ex. 1.6.0a1)'
-        required: true
-      new_branch_name:
-        description: 'The full name of the new branch (ex. 1.5.latest)'
-        required: true
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: write
-
-jobs:
-  cut_branch:
-    name: "Cut branch and clean up main for dbt-spark"
-    uses: dbt-labs/actions/.github/workflows/cut-release-branch.yml@main
-    with:
-      version_to_bump_main: ${{ inputs.version_to_bump_main }}
-      new_branch_name: ${{ inputs.new_branch_name }}
-      PR_title: "Cleanup main after cutting new ${{ inputs.new_branch_name }} branch"
-      PR_body: "This PR will fail CI until the dbt-core PR has been merged due to release version conflicts."
-    secrets:
-      FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }}
diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml
deleted file mode 100644
index f49cf517..00000000
--- a/.github/workflows/docs-issues.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-# **what?**
-# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed
-
-# **why?**
-# To reduce barriers for keeping docs up to date
-
-# **when?**
-# When an issue is labeled `user docs` and is closed as completed.  Can be labeled before or after the issue is closed.
-
-
-name: Open issues in docs.getdbt.com repo when an issue is labeled
-run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}"
-
-on:
-  issues:
-    types: [labeled, closed]
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-    issues: write # comments on issues
-
-jobs:
-  open_issues:
-    # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned.
-    # If this logic does not exist in this workflow, it runs the
-    # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having
-    # generating the comment before the other runs.  This lives here instead of the shared workflow because this is where we
-    # decide if it should run or not.
-    if: |
-      (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && (
-      (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) ||
-      (github.event.action == 'labeled' && github.event.label.name == 'user docs'))
-    uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
-    with:
-        issue_repository: "dbt-labs/docs.getdbt.com"
-        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
-        issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
-    secrets: inherit
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
deleted file mode 100644
index a0a6db33..00000000
--- a/.github/workflows/integration.yml
+++ /dev/null
@@ -1,148 +0,0 @@
-# **what?**
-# Runs integration tests.
-
-# **why?**
-# Ensure code runs as expected.
-
-# **when?**
-# This will run for all PRs, when code is pushed to a release
-# branch, and when manually triggered.
-
-name: Adapter Integration Tests
-
-run-name: "${{ (contains(github.event_name, 'workflow_') && inputs.name) || github.event_name }}: ${{ (contains(github.event_name, 'workflow_') &&  inputs.adapter_branch) || github.ref_name }} by @${{ github.actor }}"
-
-on:
-  push:
-    branches:
-      - "main"
-      - "*.latest"
-
-  pull_request_target:
-    paths-ignore:
-      - ".changes/**"
-      - ".gitignore"
-      - "**.md"
-
-  workflow_dispatch:
-    inputs:
-      name:
-        description: "Name to associate with run (example: 'dbt-adapters-242')"
-        required: false
-        type: string
-        default: "Adapter Integration Tests"
-      adapter_branch:
-        description: "The branch of this adapter repository to use"
-        type: string
-        required: false
-        default: "main"
-      dbt_adapters_branch:
-        description: "The branch of dbt-adapters to use"
-        type: string
-        required: false
-        default: "main"
-      dbt_core_branch:
-        description: "The branch of dbt-core to use"
-        type: string
-        required: false
-        default: "main"
-      dbt_common_branch:
-        description: "The branch of dbt-common to use"
-        type: string
-        required: false
-        default: "main"
-
-# explicitly turn off permissions for `GITHUB_TOKEN`
-permissions: read-all
-
-# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request_target') && github.event.pull_request.head.ref || github.sha }}
-  cancel-in-progress: true
-
-defaults:
-  run:
-    shell: bash
-
-jobs:
-
-  test:
-    name: ${{ matrix.test }}
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        test:
-          - "apache_spark"
-          - "spark_session"
-          - "spark_http_odbc"
-          - "databricks_sql_endpoint"
-          - "databricks_cluster"
-          - "databricks_http_cluster"
-
-    env:
-      DBT_INVOCATION_ENV: github-actions
-      DD_CIVISIBILITY_AGENTLESS_ENABLED: true
-      DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
-      DD_SITE: datadoghq.com
-      DD_ENV: ci
-      DD_SERVICE: ${{ github.event.repository.name }}
-      DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
-      DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
-      DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
-      DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
-      DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
-      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
-      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
-      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
-
-    steps:
-      - name: Check out the repository (push)
-        if: github.event_name == 'push'
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      - name: Check out the repository (workflow_dispatch)
-        if: github.event_name == 'workflow_dispatch'
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          ref: ${{ inputs.adapter_branch }}
-
-      # explicitly checkout the branch for the PR,
-      # this is necessary for the `pull_request` event
-      - name: Check out the repository (PR)
-        if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          ref: ${{ github.event.pull_request.head.sha }}
-
-      # the python version used here is not what is used in the tests themselves
-      - name: Set up Python for dagger
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Update Adapters and Core branches (update dev_requirements.txt)
-        if: github.event_name == 'workflow_dispatch'
-        run: |
-          scripts/update_dev_dependency_branches.sh \
-            ${{ inputs.dbt_adapters_branch }} \
-            ${{ inputs.dbt_core_branch }} \
-            ${{ inputs.dbt_common_branch }}
-          cat hatch.toml
-        working-directory: ./dbt-spark
-
-      - name: Install hatch
-        uses: pypa/hatch@install
-
-      - name: Install python dependencies
-        run: hatch run pip install -r dagger/requirements.txt
-        working-directory: ./dbt-spark
-
-      - name: Run tests for ${{ matrix.test }}
-        run: hatch run integration-tests --profile ${{ matrix.test }}
-        working-directory: ./dbt-spark
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index ca6957dd..00000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,170 +0,0 @@
-# **what?**
-# Runs code quality checks, unit tests, and verifies python build on
-# all code commited to the repository. This workflow should not
-# require any secrets since it runs for PRs from forked repos.
-# By default, secrets are not passed to workflows running from
-# a forked repo.
-
-# **why?**
-# Ensure code for dbt meets a certain quality standard.
-
-# **when?**
-# This will run for all PRs, when code is pushed to a release
-# branch, and when manually triggered.
-
-name: Tests and Code Checks
-
-on:
-  push:
-    branches:
-      - "main"
-      - "*.latest"
-  pull_request:
-  workflow_dispatch:
-
-permissions: read-all
-
-# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }}
-  cancel-in-progress: true
-
-defaults:
-  run:
-    shell: bash
-
-jobs:
-  code-quality:
-    name: code-quality
-
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.9'
-
-      - name: Run pre-commit hooks
-        uses: pre-commit/action@v3.0.1
-
-  unit:
-    name: unit test / python ${{ matrix.python-version }}
-
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install python dependencies
-        uses: pypa/hatch@install
-
-      - run: hatch run unit-tests
-        working-directory: ./dbt-spark
-
-  build:
-    name: build packages
-
-    runs-on: ubuntu-latest
-
-    outputs:
-      is_alpha: ${{ steps.check-is-alpha.outputs.is_alpha }}
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.9'
-
-      - uses: pypa/hatch@install
-
-      - name: Build distributions
-        run: hatch build
-        working-directory: ./dbt-spark
-
-      - name: Show distributions
-        run: ls -lh dist/
-        working-directory: ./dbt-spark
-
-      - name: Check distribution descriptions
-        run: hatch run build:check-all
-        working-directory: ./dbt-spark
-
-      - name: Check if this is an alpha version
-        id: check-is-alpha
-        run: |
-          export is_alpha=0
-          if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi
-          echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT
-        working-directory: ./dbt-spark
-
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist
-          path: dbt-spark/dist/
-          overwrite: true
-
-  test-build:
-    name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }} / ${{ matrix.dist-type }}
-
-    if: needs.build.outputs.is_alpha == 0
-
-    needs: build
-
-    runs-on: ${{ matrix.os }}
-
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-14, windows-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-        dist-type: ["whl", "gz"]
-
-    steps:
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install python dependencies
-        run: |
-          python -m pip install --user --upgrade pip
-          python -m pip install --upgrade wheel
-          python -m pip --version
-
-      - uses: actions/download-artifact@v4
-        with:
-          name: dist
-          path: dist/
-
-      - name: Show distributions
-        run: ls -lh dist/
-
-      - name: Install ${{ matrix.dist-type }} distributions
-        run: |
-          find ./dist/*.${{ matrix.dist-type }} -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
-
-      - name: Check ${{ matrix.dist-type }} distributions
-        run: |
-          python -c "import dbt.adapters.spark"
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 741b7fba..fad2c293 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -12,6 +12,7 @@ on:
                 -   "dbt-tests-adapter"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
             deploy-to:
                 description: "Choose whether to publish to test or prod"
                 type: environment
diff --git a/.github/workflows/pull-request-checks.yml b/.github/workflows/pull-request-checks.yml
index 44d264c4..d6529c6a 100644
--- a/.github/workflows/pull-request-checks.yml
+++ b/.github/workflows/pull-request-checks.yml
@@ -34,6 +34,7 @@ jobs:
                 -   "dbt-tests-adapter"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
                 os: [ubuntu-22.04]
                 python-version: ["3.9", "3.10", "3.11", "3.12"]
         with:
@@ -52,6 +53,7 @@ jobs:
                 -   "dbt-adapters"
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
                 os: [ ubuntu-22.04 ]
                 python-version: ["3.9", "3.10", "3.11", "3.12"]
         with:
@@ -69,6 +71,7 @@ jobs:
                 package:
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
                 os: [ubuntu-22.04]
                 python-version: ["3.9", "3.10", "3.11", "3.12"]
         with:
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
deleted file mode 100644
index c467d3fc..00000000
--- a/.github/workflows/release-internal.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-# What?
-#
-# Tag and release an arbitrary ref. Uploads to an internal archive for further processing.
-#
-# How?
-#
-# After checking out and testing the provided ref, the image is built and uploaded.
-#
-# When?
-#
-# Manual trigger.
-
-name: "Release to Cloud"
-run-name: "Release to Cloud off of ${{ inputs.ref }}"
-
-on:
-  workflow_dispatch:
-    inputs:
-      ref:
-        description: "The ref (sha or branch name) to use"
-        type: string
-        default: "main"
-        required: true
-      package_test_command:
-        description: "Package test command"
-        type: string
-        default: "python -c \"import dbt.adapters.spark\""
-        required: true
-      skip_tests:
-        description: "Should the tests be skipped? (default to false)"
-        type: boolean
-        required: true
-        default: false
-
-defaults:
-  run:
-    shell: "bash"
-
-env:
-  PYTHON_TARGET_VERSION: 3.9
-
-jobs:
-  run-unit-tests:
-    name: "Unit tests"
-
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-
-    steps:
-      - name: "Check out the repository"
-        uses: actions/checkout@v4
-
-      - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v5
-        with:
-          python-version: "${{ env.PYTHON_TARGET_VERSION }}"
-
-      - name: Install python dependencies
-        uses: pypa/hatch@install
-
-      - run: hatch run unit-tests
-
-  run-integration-tests:
-    name: "${{ matrix.test }}"
-    needs: [run-unit-tests]
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        test:
-          - "apache_spark"
-          - "spark_session"
-          - "spark_http_odbc"
-          - "databricks_sql_endpoint"
-          - "databricks_cluster"
-          - "databricks_http_cluster"
-
-    env:
-      DBT_INVOCATION_ENV: github-actions
-      DD_CIVISIBILITY_AGENTLESS_ENABLED: true
-      DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
-      DD_SITE: datadoghq.com
-      DD_ENV: ci
-      DD_SERVICE: ${{ github.event.repository.name }}
-      DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
-      DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
-      DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
-      DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
-      DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
-      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
-      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
-      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-     # the python version used here is not what is used in the tests themselves
-      - name: Set up Python for dagger
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install python dependencies
-        run: |
-          python -m pip install --user --upgrade pip
-          python -m pip --version
-          python -m pip install -r dagger/requirements.txt
-
-      - name: "Run tests for ${{ matrix.test }}"
-        run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
-
-  invoke-reusable-workflow:
-    name: "Create cloud release"
-    needs: [run-integration-tests]
-    uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main"
-
-    with:
-      package_test_command: "${{ inputs.package_test_command }}"
-      dbms_name: "spark"
-      ref: "${{ inputs.ref }}"
-      skip_tests: "true"
-
-    secrets: "inherit"
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
deleted file mode 100644
index ce07447d..00000000
--- a/.github/workflows/release-prep.yml
+++ /dev/null
@@ -1,616 +0,0 @@
-# **what?**
-# Perform the version bump, generate the changelog and run tests.
-#
-# Inputs:
-#  sha:                   The commit to attach to this release
-#  version_number:        The release version number (i.e. 1.0.0b1, 1.2.3rc2, 1.0.0)
-#  target_branch:         The branch that we will release from
-#  env_setup_script_path: Path to the environment setup script
-#  test_run:              Test run (The temp branch will be used for release)
-#  nightly_release:       Identifier that this is nightly release
-#
-# Outputs:
-#   final_sha:      The sha that will actually be released.  This can differ from the
-#                   input sha if adding a version bump and/or changelog
-#   changelog_path: Path to the changelog file (ex .changes/1.2.3-rc1.md)
-#
-# Branching strategy:
-#  - During execution workflow execution the temp branch will be generated.
-#  - For normal runs the temp branch will be removed once changes were merged to target branch;
-#  - For test runs we will keep temp branch and will use it for release;
-#  Naming strategy:
-#  - For normal runs:      prep-release/${{ inputs.version_number }}_$GITHUB_RUN_ID
-#  - For test runs:        prep-release/test-run/${{ inputs.version_number }}_$GITHUB_RUN_ID
-#  - For nightly releases: prep-release/nightly-release/${{ inputs.version_number }}_$GITHUB_RUN_ID
-#
-# **why?**
-# Reusable and consistent GitHub release process.
-#
-# **when?**
-# Call when ready to kick off a build and release
-#
-# Validation Checks
-#
-#  1. Bump the version if it has not been bumped
-#  2. Generate the changelog (via changie) if there is no markdown file for this version
-#
-
-name: Version Bump and Changelog Generation
-
-on:
-  workflow_call:
-    inputs:
-      sha:
-        required: true
-        type: string
-      version_number:
-        required: true
-        type: string
-      target_branch:
-        required: true
-        type: string
-      env_setup_script_path:
-        required: false
-        type: string
-        default: ""
-      test_run:
-        required: false
-        default: true
-        type: boolean
-      nightly_release:
-        type: boolean
-        default: false
-        required: false
-    outputs:
-      final_sha:
-        description: The new commit that includes the changelog and version bump.
-        value: ${{ jobs.determine-release-sha.outputs.final_sha }}
-      changelog_path:
-        description: The path to the changelog for this version
-        value: ${{ jobs.audit-changelog.outputs.changelog_path }}
-    secrets:
-      FISHTOWN_BOT_PAT:
-        description: "Token to commit/merge changes into branches"
-        required: true
-      IT_TEAM_MEMBERSHIP:
-        description: "Token that can view org level teams"
-        required: true
-
-permissions:
-  contents: write
-
-defaults:
-  run:
-    shell: bash
-
-env:
-  PYTHON_TARGET_VERSION: 3.9
-  NOTIFICATION_PREFIX: "[Release Preparation]"
-
-jobs:
-  log-inputs:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: "[DEBUG] Print Variables"
-        run: |
-          # WORKFLOW INPUTS
-          echo The last commit sha in the release:   ${{ inputs.sha }}
-          echo The release version number:           ${{ inputs.version_number }}
-          echo The branch that we will release from: ${{ inputs.target_branch }}
-          echo Path to the environment setup script: ${{ inputs.env_setup_script_path }}
-          echo Test run:                             ${{ inputs.test_run }}
-          echo Nightly release:                      ${{ inputs.nightly_release }}
-          # ENVIRONMENT VARIABLES
-          echo Python target version:                ${{ env.PYTHON_TARGET_VERSION }}
-          echo Notification prefix:                  ${{ env.NOTIFICATION_PREFIX }}
-
-  audit-changelog:
-    runs-on: ubuntu-latest
-
-    outputs:
-      changelog_path: ${{ steps.set_path.outputs.changelog_path }}
-      exists: ${{ steps.set_existence.outputs.exists }}
-      base_version: ${{ steps.semver.outputs.base-version }}
-      prerelease: ${{ steps.semver.outputs.pre-release }}
-      is_prerelease: ${{ steps.semver.outputs.is-pre-release }}
-
-    steps:
-      - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}"
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.sha }}
-
-      - name: "Audit Version And Parse Into Parts"
-        id: semver
-        uses: dbt-labs/actions/parse-semver@v1.1.1
-        with:
-          version: ${{ inputs.version_number }}
-
-      - name: "Set Changelog Path"
-        id: set_path
-        run: |
-          path=".changes/"
-          if [[ ${{ steps.semver.outputs.is-pre-release }} -eq 1 ]]
-          then
-            path+="${{ steps.semver.outputs.base-version }}-${{ steps.semver.outputs.pre-release }}.md"
-          else
-            path+="${{ steps.semver.outputs.base-version }}.md"
-          fi
-          # Send notification
-          echo "changelog_path=$path" >> $GITHUB_OUTPUT
-          title="Changelog path"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$changelog_path"
-
-      - name: "Set Changelog Existence For Subsequent Jobs"
-        id: set_existence
-        run: |
-          does_exist=false
-          if test -f ${{ steps.set_path.outputs.changelog_path }}
-          then
-            does_exist=true
-          fi
-          echo "exists=$does_exist">> $GITHUB_OUTPUT
-
-      - name: "[Notification] Set Changelog Existence For Subsequent Jobs"
-        run: |
-          title="Changelog exists"
-          if [[ ${{ steps.set_existence.outputs.exists }} == true ]]
-          then
-            message="Changelog file ${{ steps.set_path.outputs.changelog_path }} already exists"
-          else
-            message="Changelog file ${{ steps.set_path.outputs.changelog_path }} doesn't exist"
-          fi
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-      - name: "Spark safety check"
-        if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }}
-        run: |
-          if [[ ${{ steps.set_existence.outputs.exists }} != true ]]
-          then
-            title="Spark version bump check"
-            message="dbt-spark needs a version bump before running the release. The changelog is not up to date."
-            echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-            exit 1
-          fi
-
-      - name: "[DEBUG] Print Outputs"
-        run: |
-          echo changelog_path: ${{ steps.set_path.outputs.changelog_path }}
-          echo exists:         ${{ steps.set_existence.outputs.exists }}
-          echo base_version:   ${{ steps.semver.outputs.base-version }}
-          echo prerelease:     ${{ steps.semver.outputs.pre-release }}
-          echo is_prerelease:  ${{ steps.semver.outputs.is-pre-release }}
-
-  audit-version-in-code:
-    runs-on: ubuntu-latest
-
-    outputs:
-      up_to_date: ${{ steps.version-check.outputs.up_to_date }}
-
-    steps:
-      - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}"
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.sha }}
-
-      - uses: pypa/hatch@install
-
-      - name: "Check Current Version In Code"
-        id: version-check
-        run: |
-          is_updated=false
-          current_version=$(hatch version)
-          if test "$current_version" = "${{ inputs.version }}"
-          then
-            is_updated=true
-          fi
-          echo "up_to_date=$is_updated" >> $GITHUB_OUTPUT
-
-      - name: "[Notification] Check Current Version In Code"
-        run: |
-          title="Version check"
-          if [[ ${{ steps.version-check.outputs.up_to_date }} == true ]]
-          then
-            message="The version in the codebase is equal to the provided version"
-          else
-            message="The version in the codebase differs from the provided version"
-          fi
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-      - name: "Spark safety check"
-        if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }}
-        run: |
-          if [[ ${{ steps.version-check.outputs.up_to_date }} != true ]]
-          then
-            title="Spark version-bump.yml check"
-            message="dbt-spark needs version-bump.yml run before running the release.  The version bump is not up to date."
-            echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-            exit 1
-          fi
-
-      - name: "[DEBUG] Print Outputs"
-        run: |
-          echo up_to_date: ${{ steps.version-check.outputs.up_to_date }}
-
-  skip-generate-changelog:
-    runs-on: ubuntu-latest
-    needs: [audit-changelog]
-    if: needs.audit-changelog.outputs.exists == 'true'
-
-    steps:
-      - name: "Changelog Exists, Skip Generating New Changelog"
-        run: |
-          # Send notification
-          title="Skip changelog generation"
-          message="A changelog file already exists at ${{ needs.audit-changelog.outputs.changelog_path }}, skipping generating changelog"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-  skip-version-bump:
-    runs-on: ubuntu-latest
-    needs: [audit-version-in-code]
-    if: needs.audit-version-in-code.outputs.up_to_date == 'true'
-
-    steps:
-      - name: "Version Already Bumped"
-        run: |
-          # Send notification
-          title="Skip version bump"
-          message="The version has already been bumped to ${{ inputs.version_number }}, skipping version bump"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-  create-temp-branch:
-    runs-on: ubuntu-latest
-    needs: [audit-changelog, audit-version-in-code]
-    if: needs.audit-changelog.outputs.exists == 'false' || needs.audit-version-in-code.outputs.up_to_date == 'false'
-
-    outputs:
-      branch_name: ${{ steps.variables.outputs.branch_name }}
-
-    steps:
-      - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}"
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.sha }}
-
-      - name: "Generate Branch Name"
-        id: variables
-        run: |
-          name="prep-release/"
-          if [[ ${{ inputs.nightly_release }} == true  ]]
-          then
-            name+="nightly-release/"
-          elif [[ ${{ inputs.test_run }} == true ]]
-          then
-            name+="test-run/"
-          fi
-          name+="${{ inputs.version_number }}_$GITHUB_RUN_ID"
-          echo "branch_name=$name" >> $GITHUB_OUTPUT
-
-      - name: "Create Branch - ${{ steps.variables.outputs.branch_name }}"
-        run: |
-          git checkout -b ${{ steps.variables.outputs.branch_name }}
-          git push -u origin ${{ steps.variables.outputs.branch_name }}
-
-      - name: "[Notification] Temp branch created"
-        run: |
-          # Send notification
-          title="Temp branch generated"
-          message="The ${{ steps.variables.outputs.branch_name }} branch created"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-      - name: "[DEBUG] Print Outputs"
-        run: |
-          echo branch_name ${{ steps.variables.outputs.branch_name }}
-
-  generate-changelog-bump-version:
-    runs-on: ubuntu-latest
-    needs: [audit-changelog, audit-version-in-code, create-temp-branch]
-
-    steps:
-      - name: "Checkout ${{ github.repository }} Branch ${{ needs.create-temp-branch.outputs.branch_name }}"
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.create-temp-branch.outputs.branch_name }}
-
-      - name: "Install Spark Dependencies"
-        if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }}
-        run: |
-          sudo apt-get update
-          sudo apt-get install libsasl2-dev
-
-      - name: "Add Homebrew To PATH"
-        run: |
-          echo "/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin" >> $GITHUB_PATH
-
-      - name: "Install Homebrew Packages"
-        run: |
-          brew install pre-commit
-          brew tap miniscruff/changie https://github.com/miniscruff/changie
-          brew install changie
-
-      - name: "Set json File Name"
-        id: json_file
-        run: |
-          echo "name=output_$GITHUB_RUN_ID.json" >> $GITHUB_OUTPUT
-
-      - name: "Get Core Team Membership"
-        run: |
-          gh api -H "Accept: application/vnd.github+json" orgs/dbt-labs/teams/core-group/members > ${{ steps.json_file.outputs.name }}
-        env:
-          GH_TOKEN: ${{ secrets.IT_TEAM_MEMBERSHIP }}
-
-      - name: "Set Core Team Membership for Changie Contributors exclusion"
-        id: set_team_membership
-        run: |
-          team_list=$(jq -r '.[].login' ${{ steps.json_file.outputs.name }})
-          echo $team_list
-          team_list_single=$(echo $team_list | tr '\n' ' ')
-          echo "CHANGIE_CORE_TEAM=$team_list_single" >> $GITHUB_ENV
-
-      - name: "Delete the json File"
-        run: |
-          rm ${{ steps.json_file.outputs.name }}
-
-      - name: "Generate Release Changelog"
-        if: needs.audit-changelog.outputs.exists == 'false'
-        run: |
-          if [[ ${{ needs.audit-changelog.outputs.is_prerelease }} -eq 1 ]]
-          then
-            changie batch ${{ needs.audit-changelog.outputs.base_version }} --move-dir '${{ needs.audit-changelog.outputs.base_version }}' --prerelease ${{ needs.audit-changelog.outputs.prerelease }}
-          elif [[ -d ".changes/${{ needs.audit-changelog.outputs.base_version }}" ]]
-          then
-            changie batch ${{ needs.audit-changelog.outputs.base_version }} --include '${{ needs.audit-changelog.outputs.base_version }}' --remove-prereleases
-          else # releasing a final patch with no prereleases
-            changie batch ${{ needs.audit-changelog.outputs.base_version }}
-          fi
-          changie merge
-          git status
-
-      - name: "Check Changelog Created Successfully"
-        if: needs.audit-changelog.outputs.exists == 'false'
-        run: |
-          title="Changelog"
-          if [[ -f ${{ needs.audit-changelog.outputs.changelog_path }} ]]
-          then
-            message="Changelog file created successfully"
-            echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-          else
-            message="Changelog failed to generate"
-            echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-            exit 1
-          fi
-
-      - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_TARGET_VERSION }}
-
-      - uses: pypa/hatch@install
-
-      - name: "Bump Version To ${{ inputs.version_number }}"
-        run: hatch version ${{ inputs.version_number }}
-
-      - name: "[Notification] Bump Version To ${{ inputs.version_number }}"
-        if: needs.audit-version-in-code.outputs.up_to_date == 'false'
-        run: |
-          title="Version bump"
-          message="Version successfully bumped in codebase to ${{ inputs.version_number }}"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-      # this step will fail on whitespace errors but also correct them
-      - name: "Remove Trailing Whitespace Via Pre-commit"
-        continue-on-error: true
-        run: |
-          pre-commit run trailing-whitespace --files CHANGELOG.md .changes/*
-          git status
-
-      # this step will fail on newline errors but also correct them
-      - name: "Removing Extra Newlines Via Pre-commit"
-        continue-on-error: true
-        run: |
-          pre-commit run end-of-file-fixer --files CHANGELOG.md .changes/*
-          git status
-
-      - name: "Commit & Push Changes"
-        run: |
-          #Data for commit
-          user="Github Build Bot"
-          email="buildbot@fishtownanalytics.com"
-          commit_message="Bumping version to ${{ inputs.version_number }} and generate changelog"
-          #Commit changes to branch
-          git config user.name "$user"
-          git config user.email "$email"
-          git pull
-          git add .
-          git commit -m "$commit_message"
-          git push
-
-  run-unit-tests:
-    name: unit test / python ${{ matrix.python-version }}
-
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - uses: pypa/hatch@install
-
-      - name: Run unit tests
-        run: hatch run unit-tests
-
-  run-integration-tests:
-    name: ${{ matrix.test }}
-    runs-on: ubuntu-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        test:
-          - "apache_spark"
-          - "spark_session"
-          - "spark_http_odbc"
-          - "databricks_sql_endpoint"
-          - "databricks_cluster"
-          - "databricks_http_cluster"
-
-    env:
-      DBT_INVOCATION_ENV: github-actions
-      DD_CIVISIBILITY_AGENTLESS_ENABLED: true
-      DD_API_KEY: ${{ secrets.DATADOG_API_KEY }}
-      DD_SITE: datadoghq.com
-      DD_ENV: ci
-      DD_SERVICE: ${{ github.event.repository.name }}
-      DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }}
-      DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }}
-      DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }}
-      DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }}
-      DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }}
-      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
-      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
-      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
-
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-
-      # the python version used here is not what is used in the tests themselves
-      - name: Set up Python for dagger
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - uses: pypa/hatch@install
-
-      - name: Run tests for ${{ matrix.test }}
-        run: hatch run python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
-
-  merge-changes-into-target-branch:
-    runs-on: ubuntu-latest
-    needs: [run-unit-tests, run-integration-tests, create-temp-branch, audit-version-in-code, audit-changelog]
-    if: |
-        !failure() && !cancelled() &&
-        inputs.test_run == false &&
-        (
-          needs.audit-changelog.outputs.exists == 'false' ||
-          needs.audit-version-in-code.outputs.up_to_date == 'false'
-        )
-
-    steps:
-      - name: "[Debug] Print Variables"
-        run: |
-          echo target_branch: ${{ inputs.target_branch }}
-          echo branch_name:   ${{ needs.create-temp-branch.outputs.branch_name }}
-          echo inputs.test_run: ${{ inputs.test_run }}
-          echo needs.audit-changelog.outputs.exists: ${{ needs.audit-changelog.outputs.exists }}
-          echo needs.audit-version-in-code.outputs.up_to_date: ${{ needs.audit-version-in-code.outputs.up_to_date }}
-
-      - name: "Checkout Repo ${{ github.repository }}"
-        uses: actions/checkout@v4
-
-      - name: "Merge Changes Into ${{ inputs.target_branch }}"
-        uses: everlytic/branch-merge@1.1.5
-        with:
-          source_ref: ${{ needs.create-temp-branch.outputs.branch_name }}
-          target_branch: ${{ inputs.target_branch }}
-          github_token: ${{ secrets.FISHTOWN_BOT_PAT }}
-          commit_message_template: "[Automated] Merged {source_ref} into target {target_branch} during release process"
-
-      - name: "[Notification] Changes Merged into ${{ inputs.target_branch }}"
-        run: |
-          title="Changelog and Version Bump Branch Merge"
-          message="The ${{ needs.create-temp-branch.outputs.branch_name }} branch was merged into ${{ inputs.target_branch }}"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-  determine-release-sha:
-    runs-on: ubuntu-latest
-    needs:
-      [
-        create-temp-branch,
-        merge-changes-into-target-branch,
-        audit-changelog,
-        audit-version-in-code,
-      ]
-    # always run this job, regardless of if the dependant jobs were skipped
-    if: ${{ !failure() && !cancelled() }}
-
-    # Get the sha that will be released.  If the changelog already exists on the input sha and the version has already been bumped,
-    # then it is what we will release. Otherwise we generated a changelog and did the version bump in this workflow and there is a
-    # new sha to use from the merge we just did.  Grab that here instead.
-    outputs:
-      final_sha: ${{ steps.resolve_commit_sha.outputs.release_sha }}
-
-    steps:
-      - name: "[Debug] Print Variables"
-        run: |
-          echo target_branch:    ${{ inputs.target_branch }}
-          echo new_branch:       ${{ needs.create-temp-branch.outputs.branch_name }}
-          echo changelog_exists: ${{ needs.audit-changelog.outputs.exists }}
-          echo up_to_date:       ${{ needs.audit-version-in-code.outputs.up_to_date }}
-
-      - name: "Resolve Branch To Checkout"
-        id: resolve_branch
-        run: |
-          branch=""
-          if [[ ${{ inputs.test_run == true }} ]]
-          then
-            branch=${{ needs.create-temp-branch.outputs.branch_name }}
-          else
-            branch=${{ inputs.target_branch }}
-          fi
-          echo "target_branch=$branch" >> $GITHUB_OUTPUT
-
-      - name: "[Notification] Resolve Branch To Checkout"
-        run: |
-          title="Branch pick"
-          message="The ${{ steps.resolve_branch.outputs.target_branch }} branch will be used for release"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-      - name: "Checkout Resolved Branch - ${{ steps.resolve_branch.outputs.target_branch }}"
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ steps.resolve_branch.outputs.target_branch }}
-
-      - name: "[Debug] Log Branch"
-        run: git status
-
-      - name: "Resolve Commit SHA For Release"
-        id: resolve_commit_sha
-        run: |
-          commit_sha=""
-          if [[ ${{ needs.audit-changelog.outputs.exists }} == false ]] || [[ ${{ needs.audit-version-in-code.outputs.up_to_date }} == false ]]
-          then
-            commit_sha=$(git rev-parse HEAD)
-          else
-            commit_sha=${{ inputs.sha }}
-          fi
-          echo "release_sha=$commit_sha" >> $GITHUB_OUTPUT
-
-      - name: "[Notification] Resolve Commit SHA For Release"
-        run: |
-          title="Release commit pick"
-          message="The ${{ steps.resolve_commit_sha.outputs.release_sha }} commit will be used for release"
-          echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message"
-
-      - name: "Remove Temp Branch - ${{ needs.create-temp-branch.outputs.branch_name }}"
-        if: ${{ inputs.test_run == false && needs.create-temp-branch.outputs.branch_name != '' }}
-        run: |
-          git push origin -d ${{ needs.create-temp-branch.outputs.branch_name }}
-
-      - name: "[Debug] Print Outputs"
-        run: |
-          echo release_sha: ${{ steps.resolve_commit_sha.outputs.release_sha }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
deleted file mode 100644
index aba99768..00000000
--- a/.github/workflows/release.yml
+++ /dev/null
@@ -1,185 +0,0 @@
-# **what?**
-# Release workflow provides the following steps:
-# - checkout the given commit;
-# - validate version in sources and changelog file for given version;
-# - run unit tests against given commit;
-# - build and package that SHA;
-# - release it to GitHub and PyPI with that specific build;
-#
-# **why?**
-# Ensure an automated and tested release process
-#
-# **when?**
-# This will only run manually. Run this workflow only after the
-# version bump workflow is completed and related changes are reviewed and merged.
-#
-name: "Release to GitHub, PyPI, and Docker"
-run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker"
-
-on:
-  workflow_dispatch:
-    inputs:
-      sha:
-        description: "The last commit sha in the release"
-        type: string
-        required: true
-      target_branch:
-        description: "The branch to release from"
-        type: string
-        required: true
-      version_number:
-        description: "The release version number (i.e. 1.0.0b1)"
-        type: string
-        required: true
-      build_script_path:
-        description: "Build script path"
-        type: string
-        default: "scripts/build-dist.sh"
-        required: true
-      s3_bucket_name:
-        description: "AWS S3 bucket name"
-        type: string
-        default: "core-team-artifacts"
-        required: true
-      package_test_command:
-        description: "Package test command"
-        type: string
-        default: "python -c \"import dbt.adapters.spark\""
-        required: true
-      env_setup_script_path:
-        description: "Environment setup script path"
-        type: string
-        default: ""
-        required: false
-      test_run:
-        description: "Test run (Publish release as draft)"
-        type: boolean
-        default: true
-        required: false
-      nightly_release:
-        description: "Nightly release to dev environment"
-        type: boolean
-        default: false
-        required: false
-      only_docker:
-        description: "Only release Docker image, skip GitHub & PyPI"
-        type: boolean
-        default: false
-        required: false
-
-permissions:
-  contents: write # this is the permission that allows creating a new release
-
-defaults:
-  run:
-    shell: bash
-
-jobs:
-  log-inputs:
-    name: "Log Inputs"
-    runs-on: ubuntu-latest
-    steps:
-      - name: "[DEBUG] Print Variables"
-        run: |
-          echo The last commit sha in the release: ${{ inputs.sha }}
-          echo The branch to release from:         ${{ inputs.target_branch }}
-          echo The release version number:         ${{ inputs.version_number }}
-          echo Build script path:                  ${{ inputs.build_script_path }}
-          echo Environment setup script path:      ${{ inputs.env_setup_script_path }}
-          echo AWS S3 bucket name:                 ${{ inputs.s3_bucket_name }}
-          echo Package test command:               ${{ inputs.package_test_command }}
-          echo Test run:                           ${{ inputs.test_run }}
-          echo Only Docker:                        ${{ inputs.only_docker }}
-
-  bump-version-generate-changelog:
-    name: Bump package version, Generate changelog
-    uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@removeSparkException
-    with:
-      sha: ${{ inputs.sha }}
-      version_number: ${{ inputs.version_number }}
-      target_branch: ${{ inputs.target_branch }}
-      env_setup_script_path: ${{ inputs.env_setup_script_path }}
-      test_run: ${{ inputs.test_run }}
-      nightly_release: ${{ inputs.nightly_release }}
-    secrets: inherit
-
-  log-outputs-bump-version-generate-changelog:
-    name: "[Log output] Bump package version, Generate changelog"
-    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [bump-version-generate-changelog]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Print variables
-        run: |
-          echo Final SHA     : ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
-          echo Changelog path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
-
-  build-test-package:
-    name: "Build, Test, Package"
-    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [bump-version-generate-changelog]
-    uses: dbt-labs/dbt-release/.github/workflows/build.yml@main
-    with:
-      sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
-      version_number: ${{ inputs.version_number }}
-      changelog_path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
-      build_script_path: ${{ inputs.build_script_path }}
-      s3_bucket_name: ${{ inputs.s3_bucket_name }}
-      package_test_command: ${{ inputs.package_test_command }}
-      test_run: ${{ inputs.test_run }}
-    secrets:
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-
-  github-release:
-    name: "GitHub Release"
-    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [bump-version-generate-changelog, build-test-package]
-    uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main
-    with:
-      sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
-      version_number: ${{ inputs.version_number }}
-      changelog_path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
-      test_run: ${{ inputs.test_run }}
-
-  pypi-release:
-    name: "PyPI Release"
-    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
-    needs: [github-release]
-    uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main
-    with:
-      version_number: ${{ inputs.version_number }}
-      test_run: ${{ inputs.test_run }}
-    secrets:
-      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
-      TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}
-
-  docker-release:
-    name: "Docker Release"
-    # We cannot release to docker on a test run because it uses the tag in GitHub as
-    # what we need to release but draft releases don't actually tag the commit so it
-    # finds nothing to release
-    if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }}
-    needs: [github-release]
-    permissions:
-      packages: write
-    uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main
-    with:
-      version_number: ${{ inputs.version_number }}
-      dockerfile: "docker/Dockerfile"
-      test_run: ${{ inputs.test_run }}
-
-  slack-notification:
-    name: Slack Notification
-    if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }}
-    needs:
-      [
-        github-release,
-        pypi-release,
-        docker-release,
-      ]
-    uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
-    with:
-      status: "failure"
-    secrets:
-      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index d902340a..00000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-name: "Close stale issues and PRs"
-on:
-  schedule:
-    - cron: "30 1 * * *"
-
-permissions:
-  issues: write
-  pull-requests: write
-
-jobs:
-  stale:
-    uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main
diff --git a/.github/workflows/triage-labels.yml b/.github/workflows/triage-labels.yml
deleted file mode 100644
index 91f529e3..00000000
--- a/.github/workflows/triage-labels.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-# **what?**
-# When the core team triages, we sometimes need more information from the issue creator.  In
-# those cases we remove the `triage` label and add the `awaiting_response` label.  Once we
-# recieve a response in the form of a comment, we want the `awaiting_response` label removed
-# in favor of the `triage` label so we are aware that the issue needs action.
-
-# **why?**
-# To help with out team triage issue tracking
-
-# **when?**
-# This will run when a comment is added to an issue and that issue has to `awaiting_response` label.
-
-name: Update Triage Label
-
-on: issue_comment
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  issues: write
-
-jobs:
-  triage_label:
-    if: contains(github.event.issue.labels.*.name, 'awaiting_response')
-    uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main
-    with:
-      add_label: "triage"
-      remove_label: "awaiting_response"
-    secrets: inherit # this is only acceptable because we own the action we're calling

From f27857b5e1fcb0605a3ad76f4c6b7510a3617766 Mon Sep 17 00:00:00 2001
From: Mike Alfare <mike.alfare@dbtlabs.com>
Date: Mon, 13 Jan 2025 12:47:22 -0500
Subject: [PATCH 602/603] remove typing additions, leave as separate pull
 request

---
 .pre-commit-config.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0b2f21bc..51154c51 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,8 +49,6 @@ repos:
         args:
         -   --explicit-package-bases
         -   --ignore-missing-imports
-        -   --warn-unused-ignores
-        -   --disallow-untyped-defs
         -   --pretty
         -   --show-error-codes
         files: ^dbt/adapters/

From 39cabd93ce8867e28508a8a31ce2a0ea3ce9a51d Mon Sep 17 00:00:00 2001
From: Mike Alfare <mike.alfare@dbtlabs.com>
Date: Mon, 13 Jan 2025 12:54:24 -0500
Subject: [PATCH 603/603] add dbt-spark as option for testing integration tests
 workflow

---
 .github/workflows/_integration-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/_integration-tests.yml b/.github/workflows/_integration-tests.yml
index 85931da6..30728522 100644
--- a/.github/workflows/_integration-tests.yml
+++ b/.github/workflows/_integration-tests.yml
@@ -31,6 +31,7 @@ on:
                 options:
                 -   "dbt-athena"
                 -   "dbt-athena-community"
+                -   "dbt-spark"
             branch:
                 description: "Choose the branch to test"
                 type: string