Skip to content

Commit

Permalink
Merge pull request #1258 from mabel-dev/#1257
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer authored Nov 12, 2023
2 parents 80a94f7 + eb4924e commit 5fea69d
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 12 deletions.
16 changes: 10 additions & 6 deletions opteryx/components/binder/binder_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,15 +575,19 @@ def visit_join(self, node: Node, context: BindingContext) -> Tuple[Node, Binding
for column_name in (n.value for n in node.using):
# Pop the column from the left relation
for left_relation_name in node.left_relation_names:
context.schemas[left_relation_name].pop_column(column_name)
left_column = context.schemas[left_relation_name].pop_column(column_name)

# Pop the column from the right relation, keep this one to add to a new relation
# Pop the column from the right relation
for right_relation_name in node.right_relation_names:
right_column = context.schemas[right_relation_name].pop_column(column_name)
if right_column is not None:
right_column.origin = [left_relation_name, right_relation_name]
columns.append(right_column)
break

# we need to decide which column we're going to keep
if node.type in ("right anti", "right semi"):
left_column.origin = [left_relation_name, right_relation_name]
columns.append(left_column)
else:
right_column.origin = [left_relation_name, right_relation_name]
columns.append(right_column)

# shared columns exist in both schemas in some uses and in neither in others
context.schemas[f"$shared-{random_string()}"] = RelationSchema(
Expand Down
14 changes: 14 additions & 0 deletions opteryx/operators/join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,18 @@ def execute(self) -> Iterable:
join_type=self._join_type,
coalesce_keys=self._using is not None,
)

# need to ensure we put the right column back if we need it
if (
self._join_type in ("right anti", "right semi")
and new_morsel.column_names != right_table.column_names
):
columns = [
col
if col not in self._left_columns
else self._right_columns[self._left_columns.index(col)]
for col in new_morsel.column_names
]
new_morsel = new_morsel.rename_columns(columns)

yield new_morsel
2 changes: 1 addition & 1 deletion opteryx/operators/scanner_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def execute(self) -> Iterable:
morsel = None
schema = self.parameters["schema"]
start_clock = time.monotonic_ns()
reader = self.parameters.get("connector").read_dataset()
reader = self.parameters["connector"].read_dataset()
for morsel in reader:
self.statistics.blobs_read += 1
self.statistics.rows_read += morsel.num_rows
Expand Down
26 changes: 21 additions & 5 deletions tests/sql_battery/tests/v2_planner.run_tests
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,32 @@
# SELECT * FROM $planets UNION SELECT * FROM $planets;

# NEW JOINS
# SELECT * FROM $planets LEFT ANTI JOIN $satellites ON id = id;
# SELECT * FROM $planets LEFT SEMI JOIN $satellites ON id = id;
# SELECT * FROM $planets RIGHT ANTI JOIN $satellites ON id = id;
# SELECT * FROM $planets RIGHT SEMI JOIN $satellites ON id = id;
SELECT * FROM $planets LEFT ANTI JOIN $satellites USING(id);
SELECT * FROM $planets LEFT SEMI JOIN $satellites USING(id);
SELECT * FROM $planets RIGHT ANTI JOIN $satellites USING(id);
SELECT * FROM $planets RIGHT SEMI JOIN $satellites USING(id);
SELECT * FROM $planets LEFT ANTI JOIN $satellites ON $planets.id = $satellites.id;
SELECT * FROM $planets LEFT SEMI JOIN $satellites ON $planets.id = $satellites.id;
SELECT * FROM $planets RIGHT ANTI JOIN $satellites ON $planets.id = $satellites.id;
SELECT * FROM $planets RIGHT SEMI JOIN $satellites ON $planets.id = $satellites.id;
SELECT * FROM $planets LEFT ANTI JOIN $satellites ON $satellites.id = $planets.id;
SELECT * FROM $planets LEFT SEMI JOIN $satellites ON $satellites.id = $planets.id;
SELECT * FROM $planets RIGHT ANTI JOIN $satellites ON $satellites.id = $planets.id;
SELECT * FROM $planets RIGHT SEMI JOIN $satellites ON $satellites.id = $planets.id;
SELECT * FROM $planets AS P LEFT ANTI JOIN $satellites AS S ON P.id = S.id;
SELECT * FROM $planets AS P LEFT SEMI JOIN $satellites AS S ON P.id = S.id;
SELECT * FROM $planets AS P RIGHT ANTI JOIN $satellites AS S ON P.id = S.id;
SELECT * FROM $planets AS P RIGHT SEMI JOIN $satellites AS S ON P.id = S.id;
SELECT * FROM $planets AS P LEFT ANTI JOIN $satellites AS S USING(id);
SELECT * FROM $planets AS P LEFT SEMI JOIN $satellites AS S USING(id);
SELECT * FROM $planets AS P RIGHT ANTI JOIN $satellites AS S USING(id);
SELECT * FROM $planets AS P RIGHT SEMI JOIN $satellites AS S USING(id);

# EXPLAIN FORMAT
# EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id);

# DISTINCT ON
SELECT DISTINCT ON (planetId) planetId, name FROM $satellites;
SELECT DISTINCT ON (planetId) planetId, name FROM $satellites;

# CONDITIONS IN AGGREGATES
# SELECT SUM(DISTINCT id ORDER BY id) FROM $planets

0 comments on commit 5fea69d

Please sign in to comment.