From a93a12b15164c0c9778dec3fe640d53b8717c06a Mon Sep 17 00:00:00 2001 From: Ruben Vereecken Date: Wed, 17 Jul 2024 22:11:16 +0100 Subject: [PATCH] feat: Support string and int arrays (#330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like the title said, this adds support for - `ARRAY(BIGINT())` - `ARRAY(TEXT())` Technically this is a breaking change. Where previously all array types were `ARRAY(JSONB())`, now there's more nuance. Unlikely that many people rely on this behaviour, but we had to write a migration. Should we increment the major version (or at least the minor) to signal this change? Or introduce a feature switch? --------- Co-authored-by: Ruben Vereecken Co-authored-by: Edgar Ramírez Mondragón <16805946+edgarrmondragon@users.noreply.github.com> --- target_postgres/connector.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/target_postgres/connector.py b/target_postgres/connector.py index 4a1e3a6a..cfa8ea25 100644 --- a/target_postgres/connector.py +++ b/target_postgres/connector.py @@ -143,6 +143,7 @@ def prepare_table( # type: ignore[override] column_object = None if property_name in columns: column_object = columns[property_name] + self.prepare_column( full_table_name=table.fullname, column_name=property_name, @@ -246,6 +247,14 @@ def to_sql_type(self, jsonschema_type: dict) -> sa.types.TypeEngine: # type: ig json_type_dict["format"] = jsonschema_type["format"] if encoding := jsonschema_type.get("contentEncoding", False): json_type_dict["contentEncoding"] = encoding + # Figure out array type, but only if there's a single type + # (no array union types) + if ( + "items" in jsonschema_type + and "type" in jsonschema_type["items"] + and isinstance(jsonschema_type["items"]["type"], str) + ): + json_type_dict["items"] = jsonschema_type["items"]["type"] json_type_array.append(json_type_dict) else: msg = "Invalid format for jsonschema type: not str or list." @@ -282,7 +291,13 @@ def pick_individual_type(self, jsonschema_type: dict): if "object" in jsonschema_type["type"]: return JSONB() if "array" in jsonschema_type["type"]: - return ARRAY(JSONB()) + items_type = jsonschema_type.get("items") + if "string" == items_type: + return ARRAY(TEXT()) + if "integer" == items_type: + return ARRAY(BIGINT()) + else: + return ARRAY(JSONB()) # string formats if jsonschema_type.get("format") == "date-time":