diff --git a/target_postgres/connector.py b/target_postgres/connector.py index cfa8ea25..358019f7 100644 --- a/target_postgres/connector.py +++ b/target_postgres/connector.py @@ -291,14 +291,34 @@ def pick_individual_type(self, jsonschema_type: dict): if "object" in jsonschema_type["type"]: return JSONB() if "array" in jsonschema_type["type"]: - items_type = jsonschema_type.get("items") - if "string" == items_type: - return ARRAY(TEXT()) - if "integer" == items_type: - return ARRAY(BIGINT()) - else: + items = jsonschema_type.get("items") + # Case 1: items is a string + if isinstance(items, str): + return ARRAY(self.to_sql_type({"type": items})) + + # Case 2: items are more complex + if isinstance(items, dict): + # Case 2.1: items are variants + if "type" not in items: + return ARRAY(JSONB()) + + items_type = items["type"] + + # Case 2.2: items are a single type + if isinstance(items_type, str): + return ARRAY(self.to_sql_type({"type": items_type})) + + # Case 2.3: items are a list of types + if isinstance(items_type, list): + return ARRAY(self.to_sql_type({"type": items_type})) + + # Case 3: tuples + if isinstance(items, list): return ARRAY(JSONB()) + # All other cases, return JSONB + return JSONB() + # string formats if jsonschema_type.get("format") == "date-time": return TIMESTAMP() diff --git a/target_postgres/tests/data_files/jsonb_data.singer b/target_postgres/tests/data_files/jsonb_data.singer new file mode 100644 index 00000000..70e33f74 --- /dev/null +++ b/target_postgres/tests/data_files/jsonb_data.singer @@ -0,0 +1,3 @@ +{"type":"SCHEMA", "stream":"test_jsonb_data", "key_properties":["id"], "schema":{"required":["id"], "type":"object", "properties":{"id":{"type":["integer"]},"event_data":{"type":["string","number","integer","array","object","boolean","null"]}}}} +{"type":"RECORD","stream":"test_jsonb_data","record":{"id":1,"event_data":null,"time_extracted":"2024-07-27T12:24:43.774995+00:00"}} +{"type":"RECORD","stream":"test_jsonb_data","record":{"id":2,"event_data":{"test":{"test_name":"test_value"}},"time_extracted":"2024-07-27T12:24:43.774995+00:00"}} diff --git a/target_postgres/tests/test_target_postgres.py b/target_postgres/tests/test_target_postgres.py index 9bc0acc1..6a4878ce 100644 --- a/target_postgres/tests/test_target_postgres.py +++ b/target_postgres/tests/test_target_postgres.py @@ -423,6 +423,16 @@ def test_array_data(postgres_target): verify_data(postgres_target, "test_carts", 4, "id", row) +def test_jsonb_data(postgres_target): + file_name = "jsonb_data.singer" + singer_file_to_target(file_name, postgres_target) + row = [ + {"id": 1, "event_data": None}, + {"id": 2, "event_data": {"test": {"test_name": "test_value"}}}, + ] + verify_data(postgres_target, "test_jsonb_data", 2, "id", row) + + def test_encoded_string_data(postgres_target): """ We removed NUL characters from the original encoded_strings.singer as postgres doesn't allow them.