Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an optional name field for the Postgres adapter indexes configurations #8999

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231104-213414.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Ability to add a meaningful name in the Postgresql Indexes
time: 2023-11-04T21:34:14.9393+01:00
custom:
Author: nazliander
Issue: "8580"
48 changes: 29 additions & 19 deletions plugins/postgres/dbt/adapters/postgres/impl.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from datetime import datetime
import time
from dataclasses import dataclass
from typing import Any, Optional, Set, List
from typing import Any, List, Optional, Set

from dbt.adapters.base.meta import available
import dbt.utils
from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport
from dbt.adapters.capability import CapabilitySupport, Support, CapabilityDict, Capability
from dbt.adapters.sql import SQLAdapter
from dbt.adapters.postgres import PostgresConnectionManager
from dbt.adapters.base.meta import available
from dbt.adapters.capability import (
Capability,
CapabilityDict,
CapabilitySupport,
Support,
)
from dbt.adapters.postgres import PostgresConnectionManager, PostgresRelation
from dbt.adapters.postgres.column import PostgresColumn
from dbt.adapters.postgres import PostgresRelation
from dbt.dataclass_schema import dbtClassMixin, ValidationError
from dbt.adapters.sql import SQLAdapter
from dbt.contracts.graph.nodes import ConstraintType
from dbt.dataclass_schema import ValidationError, dbtClassMixin
from dbt.exceptions import (
CrossDbReferenceProhibitedError,
IndexConfigNotDictError,
IndexConfigError,
DbtRuntimeError,
IndexConfigError,
IndexConfigNotDictError,
UnexpectedDbReferenceError,
)
import dbt.utils


GET_RELATIONS_MACRO_NAME = "postgres__get_relations"

Expand All @@ -29,16 +32,23 @@ class PostgresIndexConfig(dbtClassMixin):
columns: List[str]
unique: bool = False
type: Optional[str] = None
name: Optional[str] = None

def render(self, relation):
# We append the current timestamp to the index name because otherwise
# the index will only be created on every other run. See
# https://github.com/dbt-labs/dbt-core/issues/1945#issuecomment-576714925
# for an explanation.
now = datetime.utcnow().isoformat()
"""
Renders the index name as a string. If the name is not set, generate a md5 hash from the relation
and the index config, including the current timestamp. This ensures that the index name is unique.
See the following issue for explanation:
https://github.com/fishtown-analytics/dbt/issues/1945#issuecomment-576714925
However the `name` field enables to include any user defined name in the index config, which is useful for
semantics-concerned users.
"""
now = str(round(time.time() * 1000))
inputs = self.columns + [relation.render(), str(self.unique), str(self.type), now]
string = "_".join(inputs)
return dbt.utils.md5(string)
if self.name is None:
return dbt.utils.md5(string)
return self.name + "_" + now

@classmethod
def parse(cls, raw_index) -> Optional["PostgresIndexConfig"]:
Expand Down Expand Up @@ -105,7 +115,7 @@ def _link_cached_database_relations(self, schemas: Set[str]):
database = self.config.credentials.database
table = self.execute_macro(GET_RELATIONS_MACRO_NAME)

for (dep_schema, dep_name, refed_schema, refed_name) in table:
for dep_schema, dep_name, refed_schema, refed_name in table:
dependent = self.Relation.create(
database=database, schema=dep_schema, identifier=dep_name
)
Expand Down
7 changes: 4 additions & 3 deletions tests/functional/postgres/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
materialized = "incremental",
indexes=[
{'columns': ['column_a'], 'type': 'hash'},
{'columns': ['column_a', 'column_b'], 'unique': True},
{'columns': ['column_a', 'column_b'], 'unique': True, 'name': 'column_a_and_column_b_index'},
]
)
}}
Expand All @@ -29,12 +29,13 @@
{'columns': ['column_b']},
{'columns': ['column_a', 'column_b']},
{'columns': ['column_b', 'column_a'], 'type': 'btree', 'unique': True},
{'columns': ['column_a'], 'type': 'hash'}
{'columns': ['column_a'], 'type': 'hash'},
{'columns': ['column_c'], 'name': 'column_c_index'},
]
)
}}

select 1 as column_a, 2 as column_b
select 1 as column_a, 2 as column_b, 3 as column_c

"""

Expand Down
51 changes: 32 additions & 19 deletions tests/functional/postgres/test_postgres_indexes.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import pytest
import re
from dbt.tests.util import (
run_dbt,
run_dbt_and_capture,
)

import pytest
from dbt.tests.util import run_dbt, run_dbt_and_capture
from freezegun import freeze_time

from tests.functional.postgres.fixtures import (
models__incremental_sql,
models__table_sql,
models_invalid__missing_columns_sql,
models_invalid__invalid_columns_type_sql,
models_invalid__invalid_type_sql,
models_invalid__invalid_unique_config_sql,
models_invalid__missing_columns_sql,
seeds__seed_csv,
snapshots__colors_sql,
)


INDEX_DEFINITION_PATTERN = re.compile(r"using\s+(\w+)\s+\((.+)\)\Z")


Expand Down Expand Up @@ -51,6 +50,7 @@ def project_config_update(self):
},
}

@freeze_time("2023-11-04 12:00:00")
def test_table(self, project, unique_schema):
results = run_dbt(["run", "--models", "table"])
assert len(results) == 1
Expand All @@ -62,9 +62,16 @@ def test_table(self, project, unique_schema):
{"columns": "column_a, column_b", "unique": False, "type": "btree"},
{"columns": "column_b, column_a", "unique": True, "type": "btree"},
{"columns": "column_a", "unique": False, "type": "hash"},
{
"name": "column_c_index_1699099200000",
"columns": "column_c",
"unique": False,
"type": "btree",
},
]
assert len(indexes) == len(expected)
assert indexes == expected

@freeze_time("2023-11-04 12:00:00")
def test_incremental(self, project, unique_schema):
for additional_argument in [[], [], ["--full-refresh"]]:
results = run_dbt(["run", "--models", "incremental"] + additional_argument)
Expand All @@ -73,9 +80,14 @@ def test_incremental(self, project, unique_schema):
indexes = self.get_indexes("incremental", project, unique_schema)
expected = [
{"columns": "column_a", "unique": False, "type": "hash"},
{"columns": "column_a, column_b", "unique": True, "type": "btree"},
{
"name": "column_a_and_column_b_index_1699099200000",
"columns": "column_a, column_b",
"unique": True,
"type": "btree",
},
]
assert len(indexes) == len(expected)
assert indexes == expected

def test_seed(self, project, unique_schema):
for additional_argument in [[], [], ["--full-refresh"]]:
Expand Down Expand Up @@ -104,17 +116,18 @@ def test_snapshot(self, project, unique_schema):
def get_indexes(self, table_name, project, unique_schema):
sql = f"""
SELECT
pg_get_indexdef(idx.indexrelid) as index_definition
FROM pg_index idx
JOIN pg_class tab ON tab.oid = idx.indrelid
WHERE
tab.relname = '{table_name}'
AND tab.relnamespace = (
SELECT oid FROM pg_namespace WHERE nspname = '{unique_schema}'
);
indexname AS index_name, indexdef as index_definition
FROM pg_indexes
WHERE tablename = '{table_name}'
AND schemaname = '{unique_schema}'
"""
results = project.run_sql(sql, fetch="all")
return [self.parse_index_definition(row[0]) for row in results]
return [
dict(name=row[0], **self.parse_index_definition(row[1]))
if row[0].startswith("column")
else self.parse_index_definition(row[1])
for row in results
]

def parse_index_definition(self, index_definition):
index_definition = index_definition.lower()
Expand Down