Skip to content

Commit

Permalink
fix: Ensure variant JSONB output from tap-postgres is mapped to `JS…
Browse files Browse the repository at this point in the history
…ONB` in this target (#395)

fixes: #394

---------

Co-authored-by: Edgar Ramírez-Mondragón <[email protected]>
Co-authored-by: Edgar Ramírez Mondragón <[email protected]>
  • Loading branch information
3 people authored Jul 29, 2024
1 parent c7b5f75 commit 455a4bf
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
32 changes: 26 additions & 6 deletions target_postgres/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,14 +291,34 @@ def pick_individual_type(self, jsonschema_type: dict):
if "object" in jsonschema_type["type"]:
return JSONB()
if "array" in jsonschema_type["type"]:
items_type = jsonschema_type.get("items")
if "string" == items_type:
return ARRAY(TEXT())
if "integer" == items_type:
return ARRAY(BIGINT())
else:
items = jsonschema_type.get("items")
# Case 1: items is a string
if isinstance(items, str):
return ARRAY(self.to_sql_type({"type": items}))

# Case 2: items are more complex
if isinstance(items, dict):
# Case 2.1: items are variants
if "type" not in items:
return ARRAY(JSONB())

items_type = items["type"]

# Case 2.2: items are a single type
if isinstance(items_type, str):
return ARRAY(self.to_sql_type({"type": items_type}))

# Case 2.3: items are a list of types
if isinstance(items_type, list):
return ARRAY(self.to_sql_type({"type": items_type}))

# Case 3: tuples
if isinstance(items, list):
return ARRAY(JSONB())

# All other cases, return JSONB
return JSONB()

# string formats
if jsonschema_type.get("format") == "date-time":
return TIMESTAMP()
Expand Down
3 changes: 3 additions & 0 deletions target_postgres/tests/data_files/jsonb_data.singer
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"type":"SCHEMA", "stream":"test_jsonb_data", "key_properties":["id"], "schema":{"required":["id"], "type":"object", "properties":{"id":{"type":["integer"]},"event_data":{"type":["string","number","integer","array","object","boolean","null"]}}}}
{"type":"RECORD","stream":"test_jsonb_data","record":{"id":1,"event_data":null,"time_extracted":"2024-07-27T12:24:43.774995+00:00"}}
{"type":"RECORD","stream":"test_jsonb_data","record":{"id":2,"event_data":{"test":{"test_name":"test_value"}},"time_extracted":"2024-07-27T12:24:43.774995+00:00"}}
10 changes: 10 additions & 0 deletions target_postgres/tests/test_target_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,16 @@ def test_array_data(postgres_target):
verify_data(postgres_target, "test_carts", 4, "id", row)


def test_jsonb_data(postgres_target):
file_name = "jsonb_data.singer"
singer_file_to_target(file_name, postgres_target)
row = [
{"id": 1, "event_data": None},
{"id": 2, "event_data": {"test": {"test_name": "test_value"}}},
]
verify_data(postgres_target, "test_jsonb_data", 2, "id", row)


def test_encoded_string_data(postgres_target):
"""
We removed NUL characters from the original encoded_strings.singer as postgres doesn't allow them.
Expand Down

0 comments on commit 455a4bf

Please sign in to comment.