#2228

mabel-dev · Jan 9, 2025 · b97a85a · b97a85a
1 parent b6b2707
commit b97a85a
Show file tree

Hide file tree

Showing 6 changed files with 24 additions and 10 deletions.
diff --git a/opteryx/__main__.py b/opteryx/__main__.py
@@ -127,7 +127,7 @@ def main():
                 )
                 if args.stats:
                     print(
-                        f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration/1e9} seconds )"
+                        f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration / 1e9} seconds )"
                     )
             except MissingSqlStatement:
                 print(
@@ -171,7 +171,7 @@ def main():
         )
         if args.stats:
             print(
-                f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration/1e9} seconds )"
+                f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration / 1e9} seconds )"
             )
     else:
         table = result.arrow()
@@ -197,7 +197,7 @@ def main():
         else:
             raise ValueError(f"Unknown output format '{ext}'")
         print(
-            f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration/1e9} seconds )"
+            f"[ {result.rowcount} rows x {result.columncount} columns ] ( {duration / 1e9} seconds )"
         )
         print(f"Written result to '{args.output}'")
 

diff --git a/opteryx/connectors/iceberg_connector.py b/opteryx/connectors/iceberg_connector.py
@@ -15,14 +15,16 @@
 
 from opteryx.connectors import DiskConnector
 from opteryx.connectors.base.base_connector import BaseConnector
+from opteryx.connectors.capabilities import LimitPushable
 
 
-class IcebergConnector(BaseConnector):
+class IcebergConnector(BaseConnector, LimitPushable):
     __mode__ = "Blob"
-    __type__ = "ARROW"
+    __type__ = "ICEBERG"
 
     def __init__(self, *args, catalog=None, io=DiskConnector, **kwargs):
         BaseConnector.__init__(self, **kwargs)
+        LimitPushable.__init__(self, **kwargs)
 
         self.dataset = self.dataset.lower()
         self.table = catalog.load_table(self.dataset)
@@ -39,6 +41,9 @@ def get_dataset_schema(self) -> RelationSchema:
         return self.schema
 
     def read_dataset(self, columns: list = None, **kwargs) -> pyarrow.Table:
+        rows_read = 0
+        limit = kwargs.get("limit")
+
         if columns is None:
             column_names = self.schema.column_names
         else:
@@ -49,4 +54,9 @@ def read_dataset(self, columns: list = None, **kwargs) -> pyarrow.Table:
         ).to_arrow_batch_reader()
 
         for batch in reader:
+            if limit and rows_read + batch.num_rows > limit:
+                batch = batch.slice(0, limit - rows_read)
             yield pyarrow.Table.from_batches([batch])
+            rows_read += batch.num_rows
+            if limit and rows_read >= limit:
+                break
diff --git a/opteryx/managers/execution/serial_engine.py b/opteryx/managers/execution/serial_engine.py
@@ -84,6 +84,7 @@ def _inner_explain(node, depth):
                     record["time_ms"] = operator.execution_time / 1e6
                     record["records_in"] = operator.records_in
                     record["records_out"] = operator.records_out
+                    record["calls"] = operator.calls
                 yield record
                 yield from _inner_explain(operator_name[0], depth + 1)
 

diff --git a/opteryx/planner/optimizer/bench/predicate_compaction_strategy.py b/opteryx/planner/optimizer/bench/predicate_compaction_strategy.py
@@ -100,9 +100,9 @@ def test_predicate_compaction():
 
     # After compaction, the upper limit should be '< 7'
     expected_upper_limit = Limit(7, False)  # Assuming exclusive bounds for '<'
-    assert (
-        vr.upper == expected_upper_limit
-    ), f"Expected upper limit to be {expected_upper_limit}, got {vr.upper}"
+    assert vr.upper == expected_upper_limit, (
+        f"Expected upper limit to be {expected_upper_limit}, got {vr.upper}"
+    )
 
 
 test_initialization()

diff --git a/opteryx/planner/optimizer/strategies/limit_pushdown.py b/opteryx/planner/optimizer/strategies/limit_pushdown.py
@@ -31,7 +31,10 @@ def visit(self, node: LogicalPlanNode, context: OptimizerContext) -> OptimizerCo
             context.collected_limits.append(node)
             return context
 
-        if node.node_type.Scan and LimitPushable in node.connector.__class__.mro():
+        if (
+            node.node_type == LogicalPlanStepType.Scan
+            and LimitPushable in node.connector.__class__.mro()
+        ):
             for limit_node in context.collected_limits:
                 if node.relation in limit_node.all_relations:
                     self.statistics.optimization_limit_pushdown += 1

diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py
@@ -1571,7 +1571,7 @@
         ("SELECT * FROM $planets AS P LEFT SEMI JOIN (SELECT id FROM $satellites WHERE name != 'Moon') AS S ON S.id = P.id;", 8, 20, None),
         ("SELECT * FROM $planets AS P LEFT SEMI JOIN $satellites AS S ON S.id = P.id WHERE P.name != 'Earth';", 8, 20, None),
         ("SELECT * FROM GENERATE_SERIES(1, 10) AS G LEFT SEMI JOIN $satellites AS S ON S.id = G;", 10, 1, None),
-        ("EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id);", 3, 6, None),
+        ("EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id);", 3, 7, None),
         ("SELECT DISTINCT ON (planetId) planetId, name FROM $satellites ", 7, 2, None),
         ("SELECT 8 DIV 4", 1, 1, None),