Add an additional name property to indexes in Postgres adapter

dbt-labs · Nov 4, 2023 · 4469af9 · 4469af9
1 parent 7fddd6e
commit 4469af9
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 41 deletions.
diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py
@@ -1,25 +1,28 @@
-from datetime import datetime
+import time
 from dataclasses import dataclass
-from typing import Any, Optional, Set, List
+from typing import Any, List, Optional, Set
 
-from dbt.adapters.base.meta import available
+import dbt.utils
 from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport
-from dbt.adapters.capability import CapabilitySupport, Support, CapabilityDict, Capability
-from dbt.adapters.sql import SQLAdapter
-from dbt.adapters.postgres import PostgresConnectionManager
+from dbt.adapters.base.meta import available
+from dbt.adapters.capability import (
+    Capability,
+    CapabilityDict,
+    CapabilitySupport,
+    Support,
+)
+from dbt.adapters.postgres import PostgresConnectionManager, PostgresRelation
 from dbt.adapters.postgres.column import PostgresColumn
-from dbt.adapters.postgres import PostgresRelation
-from dbt.dataclass_schema import dbtClassMixin, ValidationError
+from dbt.adapters.sql import SQLAdapter
 from dbt.contracts.graph.nodes import ConstraintType
+from dbt.dataclass_schema import ValidationError, dbtClassMixin
 from dbt.exceptions import (
     CrossDbReferenceProhibitedError,
-    IndexConfigNotDictError,
-    IndexConfigError,
     DbtRuntimeError,
+    IndexConfigError,
+    IndexConfigNotDictError,
     UnexpectedDbReferenceError,
 )
-import dbt.utils
-
 
 GET_RELATIONS_MACRO_NAME = "postgres__get_relations"
 
@@ -29,16 +32,23 @@ class PostgresIndexConfig(dbtClassMixin):
     columns: List[str]
     unique: bool = False
     type: Optional[str] = None
+    name: Optional[str] = None
 
     def render(self, relation):
-        # We append the current timestamp to the index name because otherwise
-        # the index will only be created on every other run. See
-        # https://github.com/dbt-labs/dbt-core/issues/1945#issuecomment-576714925
-        # for an explanation.
-        now = datetime.utcnow().isoformat()
+        """
+        Renders the index name as a string. If the name is not set, generate a md5 hash from the relation
+            and the index config, including the current timestamp. This ensures that the index name is unique.
+            See the following issue for explanation:
+            https://github.com/fishtown-analytics/dbt/issues/1945#issuecomment-576714925
+        However the `name` field enables to include any user defined name in the index config, which is useful for
+            semantics-concerned users.
+        """
+        now = str(round(time.time() * 1000))
         inputs = self.columns + [relation.render(), str(self.unique), str(self.type), now]
         string = "_".join(inputs)
-        return dbt.utils.md5(string)
+        if self.name is None:
+            return dbt.utils.md5(string)
+        return self.name + "_" + now
 
     @classmethod
     def parse(cls, raw_index) -> Optional["PostgresIndexConfig"]:
@@ -105,7 +115,7 @@ def _link_cached_database_relations(self, schemas: Set[str]):
         database = self.config.credentials.database
         table = self.execute_macro(GET_RELATIONS_MACRO_NAME)
 
-        for (dep_schema, dep_name, refed_schema, refed_name) in table:
+        for dep_schema, dep_name, refed_schema, refed_name in table:
             dependent = self.Relation.create(
                 database=database, schema=dep_schema, identifier=dep_name
             )

diff --git a/tests/functional/postgres/fixtures.py b/tests/functional/postgres/fixtures.py
@@ -4,7 +4,7 @@
     materialized = "incremental",
     indexes=[
       {'columns': ['column_a'], 'type': 'hash'},
-      {'columns': ['column_a', 'column_b'], 'unique': True},
+      {'columns': ['column_a', 'column_b'], 'unique': True, 'name': 'column_a_and_column_b_index'},
     ]
   )
 }}
@@ -29,12 +29,13 @@
       {'columns': ['column_b']},
       {'columns': ['column_a', 'column_b']},
       {'columns': ['column_b', 'column_a'], 'type': 'btree', 'unique': True},
-      {'columns': ['column_a'], 'type': 'hash'}
+      {'columns': ['column_a'], 'type': 'hash'},
+      {'columns': ['column_c'], 'name': 'column_c_index'},
     ]
   )
 }}
 
-select 1 as column_a, 2 as column_b
+select 1 as column_a, 2 as column_b, 3 as column_c
 
 """
 

diff --git a/tests/functional/postgres/test_postgres_indexes.py b/tests/functional/postgres/test_postgres_indexes.py
@@ -1,21 +1,20 @@
-import pytest
 import re
-from dbt.tests.util import (
-    run_dbt,
-    run_dbt_and_capture,
-)
+
+import pytest
+from dbt.tests.util import run_dbt, run_dbt_and_capture
+from freezegun import freeze_time
+
 from tests.functional.postgres.fixtures import (
     models__incremental_sql,
     models__table_sql,
-    models_invalid__missing_columns_sql,
     models_invalid__invalid_columns_type_sql,
     models_invalid__invalid_type_sql,
     models_invalid__invalid_unique_config_sql,
+    models_invalid__missing_columns_sql,
     seeds__seed_csv,
     snapshots__colors_sql,
 )
 
-
 INDEX_DEFINITION_PATTERN = re.compile(r"using\s+(\w+)\s+\((.+)\)\Z")
 
 
@@ -51,6 +50,7 @@ def project_config_update(self):
             },
         }
 
+    @freeze_time("2023-11-04 12:00:00")
     def test_table(self, project, unique_schema):
         results = run_dbt(["run", "--models", "table"])
         assert len(results) == 1
@@ -62,9 +62,16 @@ def test_table(self, project, unique_schema):
             {"columns": "column_a, column_b", "unique": False, "type": "btree"},
             {"columns": "column_b, column_a", "unique": True, "type": "btree"},
             {"columns": "column_a", "unique": False, "type": "hash"},
+            {
+                "name": "column_c_index_1699099200000",
+                "columns": "column_c",
+                "unique": False,
+                "type": "btree",
+            },
         ]
-        assert len(indexes) == len(expected)
+        assert indexes == expected
 
+    @freeze_time("2023-11-04 12:00:00")
     def test_incremental(self, project, unique_schema):
         for additional_argument in [[], [], ["--full-refresh"]]:
             results = run_dbt(["run", "--models", "incremental"] + additional_argument)
@@ -73,9 +80,14 @@ def test_incremental(self, project, unique_schema):
             indexes = self.get_indexes("incremental", project, unique_schema)
             expected = [
                 {"columns": "column_a", "unique": False, "type": "hash"},
-                {"columns": "column_a, column_b", "unique": True, "type": "btree"},
+                {
+                    "name": "column_a_and_column_b_index_1699099200000",
+                    "columns": "column_a, column_b",
+                    "unique": True,
+                    "type": "btree",
+                },
             ]
-            assert len(indexes) == len(expected)
+            assert indexes == expected
 
     def test_seed(self, project, unique_schema):
         for additional_argument in [[], [], ["--full-refresh"]]:
@@ -104,17 +116,18 @@ def test_snapshot(self, project, unique_schema):
     def get_indexes(self, table_name, project, unique_schema):
         sql = f"""
             SELECT
-              pg_get_indexdef(idx.indexrelid) as index_definition
-            FROM pg_index idx
-            JOIN pg_class tab ON tab.oid = idx.indrelid
-            WHERE
-              tab.relname = '{table_name}'
-              AND tab.relnamespace = (
-                SELECT oid FROM pg_namespace WHERE nspname = '{unique_schema}'
-              );
+              indexname AS index_name, indexdef as index_definition
+            FROM pg_indexes
+            WHERE tablename = '{table_name}'
+              AND schemaname = '{unique_schema}'
         """
         results = project.run_sql(sql, fetch="all")
-        return [self.parse_index_definition(row[0]) for row in results]
+        return [
+            dict(name=row[0], **self.parse_index_definition(row[1]))
+            if row[0].startswith("column")
+            else self.parse_index_definition(row[1])
+            for row in results
+        ]
 
     def parse_index_definition(self, index_definition):
         index_definition = index_definition.lower()