From 8273b26fbeda434b312197e416226a74b0475b05 Mon Sep 17 00:00:00 2001 From: joocer Date: Sat, 11 Jan 2025 12:37:15 +0000 Subject: [PATCH 1/4] #2233 --- opteryx/__init__.py | 2 +- opteryx/connectors/disk_connector.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/opteryx/__init__.py b/opteryx/__init__.py index 6b45823aa..9bb0c0d78 100644 --- a/opteryx/__init__.py +++ b/opteryx/__init__.py @@ -32,7 +32,7 @@ getcontext().prec = 28 # end-of-stream marker -EOS = object() +EOS:int = 0 def is_mac() -> bool: # pragma: no cover diff --git a/opteryx/connectors/disk_connector.py b/opteryx/connectors/disk_connector.py index 7ad23287f..d50214ea6 100644 --- a/opteryx/connectors/disk_connector.py +++ b/opteryx/connectors/disk_connector.py @@ -31,6 +31,8 @@ # Define os.O_BINARY for non-Windows platforms if it's not already defined if not hasattr(os, "O_BINARY"): os.O_BINARY = 0 # Value has no effect on non-Windows platforms +if not hasattr(os, "O_DIRECT"): + os.O_DIRECT = 0 # Value has no effect on non-Windows platforms def read_blob( @@ -69,7 +71,7 @@ def read_blob( import mmap try: - file_descriptor = os.open(blob_name, os.O_RDONLY | os.O_BINARY) + file_descriptor = os.open(blob_name, os.O_RDONLY | os.O_BINARY | os.O_DIRECT) if hasattr(os, "posix_fadvise"): os.posix_fadvise(file_descriptor, 0, 0, os.POSIX_FADV_WILLNEED) size = os.fstat(file_descriptor).st_size From 3a5990122eb8a6ce799ec944cf7a020fd0ad587b Mon Sep 17 00:00:00 2001 From: XB500 Date: Sat, 11 Jan 2025 12:37:40 +0000 Subject: [PATCH 2/4] Opteryx Version 0.19.1-alpha.978 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 9a6c27c0a..c71ca070e 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 976 +__build__ = 978 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 8e066f31387b162f209b5fe5ee7bc1b779f3a7fd Mon Sep 17 00:00:00 2001 From: joocer Date: Sun, 12 Jan 2025 13:33:16 +0000 Subject: [PATCH 3/4] 0.19.1 --- .gitignore | 1 + opteryx/__init__.py | 2 +- opteryx/__version__.py | 2 +- opteryx/operators/show_columns_node.py | 18 ++++-------------- .../test_shapes_and_errors_battery.py | 12 ++++++------ 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 4f67726a2..2c7d3ca05 100644 --- a/.gitignore +++ b/.gitignore @@ -180,3 +180,4 @@ space_missions.parquet **.psv planets.parquet tmp/iceberg/** +hits_split/*.parquet diff --git a/opteryx/__init__.py b/opteryx/__init__.py index 9bb0c0d78..b61ea69fa 100644 --- a/opteryx/__init__.py +++ b/opteryx/__init__.py @@ -32,7 +32,7 @@ getcontext().prec = 28 # end-of-stream marker -EOS:int = 0 +EOS: int = 0 def is_mac() -> bool: # pragma: no cover diff --git a/opteryx/__version__.py b/opteryx/__version__.py index c71ca070e..a1165daf5 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -22,7 +22,7 @@ class VersionStatus(Enum): _major = 0 _minor = 19 _revision = 1 -_status = VersionStatus.ALPHA +_status = VersionStatus.RELEASE __author__ = "@joocer" __version__ = f"{_major}.{_minor}.{_revision}" + ( diff --git a/opteryx/operators/show_columns_node.py b/opteryx/operators/show_columns_node.py index eecf9d616..a51a89393 100644 --- a/opteryx/operators/show_columns_node.py +++ b/opteryx/operators/show_columns_node.py @@ -77,18 +77,8 @@ def execute(self, morsel: pyarrow.Table, **kwargs) -> pyarrow.Table: if self._full or self._extended: # we're going to read the full table, so we can count stuff - if morsel == EOS: - dicts = self.collector.to_dicts() - dicts = [self.rename_column(d, self._column_map) for d in dicts] - self.seen = True - yield pyarrow.Table.from_pylist(dicts) - return + self.statistics.add_message("SHOW FULL/SHOW EXTENDED not implemented") - df = DataFrame.from_arrow(morsel) - - if self.collector is None: - self.collector = df.profile - else: - self.collector += df.profile - - yield None + self.seen = True + yield _simple_collector(self._schema) + return diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index 26a1e9f99..fb6ef11e8 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -1033,10 +1033,10 @@ ("SELECT name, id FROM $planets ORDER BY id LIMIT 100", 9, 2, None), ("SHOW COLUMNS FROM $satellites", 8, 4, None), - ("SHOW FULL COLUMNS FROM $satellites", 8, 12, None), - ("SHOW EXTENDED COLUMNS FROM $satellites", 8, 12, None), - ("SHOW EXTENDED COLUMNS FROM $planets", 20, 12, None), - ("SHOW EXTENDED COLUMNS FROM $astronauts", 19, 12, None), + ("SHOW FULL COLUMNS FROM $satellites", 8, 4, None), + ("SHOW EXTENDED COLUMNS FROM $satellites", 8, 4, None), + ("SHOW EXTENDED COLUMNS FROM $planets", 20, 4, None), + ("SHOW EXTENDED COLUMNS FROM $astronauts", 19, 4, None), ("SHOW COLUMNS FROM $satellites LIKE '%d'", 2, 4, UnsupportedSyntaxError), ("SHOW COLUMNS FROM testdata.partitioned.dated FOR '2024-02-03'", 8, 4, None), @@ -2407,8 +2407,8 @@ def test_sql_battery(statement:str, rows:int, columns:int, exception: Optional[E print(">", err) failures.append((statement, err)) -# print(opteryx.query(statement)) -# raise err + print(opteryx.query(statement)) + raise err print("--- ✅ \033[0;32mdone\033[0m") From 6c56d516dde0cbf9a97d70a9d369fb4125fe1ef5 Mon Sep 17 00:00:00 2001 From: XB500 Date: Sun, 12 Jan 2025 13:33:41 +0000 Subject: [PATCH 4/4] Opteryx Version 0.19.1 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index a1165daf5..064a41bad 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 978 +__build__ = 979 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.