From 05ada4cd5831bc69240cba3f998bd6ed5f65e26b Mon Sep 17 00:00:00 2001 From: joocer Date: Thu, 23 May 2024 09:24:53 +0100 Subject: [PATCH 1/6] 0.15.0 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 724ab4375..db750ce58 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -29,7 +29,7 @@ class VersionStatus(Enum): _major = 0 _minor = 15 _revision = 0 -_status = VersionStatus.BETA +_status = VersionStatus.RELEASE __author__ = "@joocer" __version__ = f"{_major}.{_minor}.{_revision}" + ( From 7d62b10df80197884e8aca9b1fd987591aa69b55 Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 23 May 2024 08:25:33 +0000 Subject: [PATCH 2/6] Opteryx Version 0.15.0 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index db750ce58..cf2665b46 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 514 +__build__ = 515 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 0a28bd409d02d9ed72d23be0ee2600460b3240f5 Mon Sep 17 00:00:00 2001 From: joocer Date: Thu, 23 May 2024 21:29:42 +0100 Subject: [PATCH 3/6] #1676 --- opteryx/planner/sql_rewriter.py | 54 ++++++++-- tests/query_planner/test_b_strings.py | 100 ++++++++++++++++++ .../test_shapes_and_errors_battery.py | 4 + 3 files changed, 147 insertions(+), 11 deletions(-) create mode 100644 tests/query_planner/test_b_strings.py diff --git a/opteryx/planner/sql_rewriter.py b/opteryx/planner/sql_rewriter.py index 67844a1ff..817c1c919 100644 --- a/opteryx/planner/sql_rewriter.py +++ b/opteryx/planner/sql_rewriter.py @@ -139,12 +139,19 @@ def sql_parts(string): + r")", re.IGNORECASE, ) - quoted_strings = re.compile(r"(\"(?:\\.|[^\"])*\"|\'(?:\\.|[^\'])*\'|`(?:\\.|[^`])*`)") + # Match ", ', b", b', ` + # We match b prefixes separately after the non-prefix versions + quoted_strings = re.compile( + r"(\"(?:\\.|[^\"])*\"|\'(?:\\.|[^\'])*\'|\b[bB]\"(?:\\.|[^\"])*\"|\b[bB]\'(?:\\.|[^\'])*\'|`(?:\\.|[^`])*`)" + ) parts = [] for part in quoted_strings.split(string): if part and part[-1] in ("'", '"', "`"): - parts.append(part) + if part[0] in ("b", "B"): + parts.append(f"blob({part[1:]})") + else: + parts.append(part) else: for subpart in keywords.split(part): subpart = subpart.strip() @@ -230,15 +237,13 @@ def _temporal_extration_state_machine(parts: List[str]) -> Tuple[List[Tuple[str, Returns: Tuple containing two lists, first with the temporal filters, second with the remaining SQL parts. """ - """ - we use a four state machine to extract the temporal information from the query - and maintain the relation to filter information. - - We separate out the two key parts of the algorithm, first we determine the state, - then we work out if the state transition means we should do something. - - We're essentially using a bit mask to record state and transitions. - """ + # We use a four state machine to extract the temporal information from the query + # and maintain the relation to filter information. + # + # We separate out the two key parts of the algorithm, first we determine the state, + # then we work out if the state transition means we should do something. + # + # We're essentially using a bit mask to record state and transitions. state = WAITING relation = "" @@ -378,5 +383,32 @@ def extract_temporal_filters(sql): # pragma: no cover return sql, final_collector +def replace_b_strings(text: str) -> str: + """ + Replaces occurrences of b'...' or B"..." with inline_blob('...') or inline_blob("..."). + + Parameters: + text: str + The input text containing the strings to be replaced. + + Returns: + The modified text with the replacements. + """ + # Define the regex pattern to match b'...' or B"..." (case insensitive) + pattern = re.compile(r"\b([bB]?)(['\"])((?:\\.|[^\2])*?)\2") + + def replacer(match): + prefix = match.group(1) + quote = match.group(2) + content = match.group(3) + # Only replace if there's a b or B prefix + if prefix.lower() == "b": + return f"blob({quote}{content}{quote})" + return match.group(0) + + # Use re.sub with the replacer function + return pattern.sub(replacer, text) + + def do_sql_rewrite(statement): return extract_temporal_filters(statement) diff --git a/tests/query_planner/test_b_strings.py b/tests/query_planner/test_b_strings.py new file mode 100644 index 000000000..acc90ab59 --- /dev/null +++ b/tests/query_planner/test_b_strings.py @@ -0,0 +1,100 @@ +import os +import pytest +import sys + +sys.path.insert(1, os.path.join(sys.path[0], "../..")) + +from opteryx.planner.sql_rewriter import sql_parts + +# Define the test cases as a list of (input, expected_output) tuples +# fmt:off +test_cases = [ + # Contrived cases + ("This is a test string with b'abc' and B\"def\".", "This is a test string with blob('abc') and blob(\"def\")."), + ("b'123' should become blob('123')", "blob('123') should become blob('123')"), + ('B"xyz" should become blob("xyz")', 'blob("xyz") should become blob("xyz")'), + ("Mix of b'one' and B\"two\"", "Mix of blob('one') and blob(\"two\")"), + ("No prefixed strings here.", "No prefixed strings here."), + ("B'' and b\"\" should be handled.", "blob('') and blob(\"\") should be handled."), + + # Basic replacements + ("SELECT * FROM table WHERE column = b'abc';", "SELECT * FROM table WHERE column = blob('abc');"), + ("INSERT INTO table (column) VALUES (B\"def\");", "INSERT INTO table (column) VALUES (blob(\"def\"));"), + ("UPDATE table SET column = b'123' WHERE id = 1;", "UPDATE table SET column = blob('123') WHERE id = 1;"), + + # Mixed cases + ("SELECT * FROM table WHERE column = B'xyz' OR column = b\"uvw\";", "SELECT * FROM table WHERE column = blob('xyz') OR column = blob(\"uvw\");"), + ("INSERT INTO table (col1, col2) VALUES (b'val1', B\"val2\");", "INSERT INTO table (col1, col2) VALUES (blob('val1'), blob(\"val2\"));"), + + # Edge cases + ("SELECT * FROM table WHERE column = b'';", "SELECT * FROM table WHERE column = blob('');"), + ("SELECT * FROM table WHERE column = B\"\";", "SELECT * FROM table WHERE column = blob(\"\");"), + ("SELECT b'abc' AS col1, B'def' AS col2 FROM table;", "SELECT blob('abc') AS col1, blob('def') AS col2 FROM table;"), + + # No replacements + ("SELECT * FROM table WHERE column = 'abc';", "SELECT * FROM table WHERE column = 'abc';"), + ("SELECT * FROM table WHERE column = \"def\";", "SELECT * FROM table WHERE column = \"def\";"), + ("SELECT * FROM table WHERE column = '';", "SELECT * FROM table WHERE column = '';"), + ("SELECT * FROM table WHERE column = \"\";", "SELECT * FROM table WHERE column = \"\";"), + + # Complex statements + ("SELECT * FROM table1 JOIN table2 ON table1.col = table2.col WHERE table1.col = b'join' AND table2.col = B\"join\";", "SELECT * FROM table1 JOIN table2 ON table1.col = table2.col WHERE table1.col = blob('join') AND table2.col = blob(\"join\");"), + ("WITH cte AS (SELECT b'cte' AS col FROM table) SELECT * FROM cte WHERE col = B'cte';", "WITH cte AS (SELECT blob('cte') AS col FROM table) SELECT * FROM cte WHERE col = blob('cte');"), + + # Specific cases + ("SELECT * FROM table WHERE column = blob'a';", "SELECT * FROM table WHERE column = blob'a';"), + ("SELECT * FROM table WHERE column = blob(\"a\");", "SELECT * FROM table WHERE column = blob(\"a\");"), + ("SELECT * FROM table WHERE column = blob('a');", "SELECT * FROM table WHERE column = blob('a');"), + ("SELECT * FROM table WHERE column = b'abc' AND function_call(b'xyz');", "SELECT * FROM table WHERE column = blob('abc') AND function_call(blob('xyz'));"), + + # failed case + ("SELECT * FROM $satellites WHERE (((id = 5 OR (10<11)) AND ('a'='b')) OR (name = 'Europa' AND (TRUE AND (11=11))));", "SELECT * FROM $satellites WHERE (((id = 5 OR (10<11)) AND ('a'='b')) OR (name = 'Europa' AND (TRUE AND (11=11)))) ;"), + + # complex quotes + ("SELECT * FROM table WHERE column = 'This is a ''test'' string';", "SELECT * FROM table WHERE column = 'This is a ''test'' string';"), + ("SELECT * FROM table WHERE column = \"He said, \\\"Hello, World!\\\"\";", "SELECT * FROM table WHERE column = \"He said, \\\"Hello, World!\\\"\";"), + ("SELECT * FROM table WHERE column = 'Single quote within '' single quotes';", "SELECT * FROM table WHERE column = 'Single quote within '' single quotes';"), + ("SELECT * FROM table WHERE column = \"Double quote within \\\" double quotes\";", "SELECT * FROM table WHERE column = \"Double quote within \\\" double quotes\";"), + ("SELECT * FROM table WHERE column = `Backticks are used for column names`;", "SELECT * FROM table WHERE column = `Backticks are used for column names`;"), + ("SELECT * FROM table WHERE column = 'Multiple ''single quotes'' in one string';", "SELECT * FROM table WHERE column = 'Multiple ''single quotes'' in one string';"), + ("SELECT * FROM table WHERE column = \"Multiple \\\"double quotes\\\" in one string\";", "SELECT * FROM table WHERE column = \"Multiple \\\"double quotes\\\" in one string\";"), + ("SELECT * FROM table WHERE column = 'Combination of ''single'' and \"double\" quotes';", "SELECT * FROM table WHERE column = 'Combination of ''single'' and \"double\" quotes';"), + ("SELECT * FROM table WHERE column = 'String with newline\ncharacter';", "SELECT * FROM table WHERE column = 'String with newline\ncharacter';") +] +# fmt:on + + +@pytest.mark.parametrize("input_text, expected_output", test_cases) +def test_replace_b_strings(input_text, expected_output): + assert "".join(sql_parts(input_text)) == expected_output + + +if __name__ == "__main__": # pragma: no cover + + """ + Running in the IDE we do some formatting - it's not functional but helps + when reading the outputs. + """ + + import shutil + import time + + width = shutil.get_terminal_size((80, 20))[0] - 15 + + nl = "\n" + + print(f"RUNNING BATTERY OF {len(test_cases)} B STRINGS") + for index, (input, expected) in enumerate(test_cases): + start = time.monotonic_ns() + print( + f"\033[0;36m{(index + 1):04}\033[0m {input[0:width - 1].ljust(width)}", + end="", + ) + if " ".join(sql_parts(input)).replace(" ", "") == expected.replace(" ", ""): + print(f"\033[0;32m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ✅") + else: + print(f"\033[0;31m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ❌") + print("Expected:", expected) + print("Recieved:", " ".join(sql_parts(input))) + + print("--- ✅ \033[0;32mdone\033[0m") diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index fc1a2c2a3..db4842887 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -453,6 +453,7 @@ ("SELECT CAST('2022-01-0' || planetId::VARCHAR AS TIMESTAMP) FROM $satellites", 177, 1, None), ("SELECT planetId::INTEGER FROM $satellites", 177, 1, None), ("SELECT planetId::DOUBLE FROM $satellites", 177, 1, None), + ("SELECT 1::double", 1, 1, None), ("SELECT TRY_CAST(planetId AS BOOLEAN) FROM $satellites", 177, 1, None), ("SELECT TRY_CAST(planetId AS VARCHAR) FROM $satellites", 177, 1, None), ("SELECT TRY_CAST(planetId AS TIMESTAMP) FROM $satellites", 177, 1, None), @@ -471,6 +472,9 @@ ("SELECT TRY_CAST(planetId AS DECIMAL) AS VALUE FROM $satellites", 177, 1, None), ("SELECT * FROM $planets WHERE id = GET(STRUCT('{\"a\":1,\"b\":\"c\"}'), 'a')", 1, 20, None), # ("SELECT * FROM $planets WHERE id = STRUCT('{\"a\":1,\"b\":\"c\"}')->'a'", 1, 20, None), + ("SELECT b'binary'", 1, 1, None), + ("SELECT B'binary'", 1, 1, None), + ("SELECT * FROM $planets WHERE name = b'Earth';", 1, 20, None), ("SELECT PI()", 1, 1, None), ("SELECT E()", 1, 1, None), From 04ec305d0024fcd7b39c14a5673f7090e0b57398 Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 23 May 2024 20:31:05 +0000 Subject: [PATCH 4/6] Opteryx Version 0.15.0 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index cf2665b46..0579ee14d 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 515 +__build__ = 516 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 2f8e1bdc5cb2a4df06aa0ab4006ae873620863e3 Mon Sep 17 00:00:00 2001 From: joocer Date: Thu, 23 May 2024 21:48:30 +0100 Subject: [PATCH 5/6] #1676 --- .../workflows/regression_suite_mac_ARM.yaml | 52 +++++++++++++++++++ .../workflows/regression_suite_mac_x86.yaml | 4 +- .github/workflows/release.yaml | 2 +- opteryx/__version__.py | 2 +- opteryx/planner/sql_rewriter.py | 27 ---------- tests/query_planner/test_b_strings.py | 12 ++--- 6 files changed, 61 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/regression_suite_mac_ARM.yaml diff --git a/.github/workflows/regression_suite_mac_ARM.yaml b/.github/workflows/regression_suite_mac_ARM.yaml new file mode 100644 index 000000000..80cc479ab --- /dev/null +++ b/.github/workflows/regression_suite_mac_ARM.yaml @@ -0,0 +1,52 @@ +name: Regression Suite (Mac ARM) + +on: + push + +jobs: + regression_matrix: + strategy: + max-parallel: 4 + matrix: + python-version: ['3.10', '3.11'] + os: ['macos-14'] + runs-on: ${{ matrix.os }} + steps: + + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} x64 + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + + - name: Install Requirements + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade wheel numpy cython setuptools_rust pytest + python -m pip install --upgrade -r requirements.txt + python -m pip install --upgrade -r tests/requirements_arm.txt + python setup.py build_ext --inplace + + - name: "Authenticate to Google Cloud" + uses: google-github-actions/auth@v1 + with: + credentials_json: '${{ secrets.GCP_KEY }}' + + - name: Run Regression Tests + run: python -m pytest --color=yes + env: + GCP_PROJECT_ID: mabeldev + MYSQL_USER: '${{ secrets.MYSQL_USER }}' + MYSQL_PASSWORD: '${{ secrets.MYSQL_PASSWORD }}' + POSTGRES_USER: '${{ secrets.POSTGRES_USER }}' + POSTGRES_PASSWORD: '${{ secrets.POSTGRES_PASSWORD }}' + MEMCACHED_SERVER: 'localhost:11211' \ No newline at end of file diff --git a/.github/workflows/regression_suite_mac_x86.yaml b/.github/workflows/regression_suite_mac_x86.yaml index d50d52d62..c23986292 100644 --- a/.github/workflows/regression_suite_mac_x86.yaml +++ b/.github/workflows/regression_suite_mac_x86.yaml @@ -1,4 +1,4 @@ -name: Regression Suite (Mac) +name: Regression Suite (Mac x86) on: push @@ -9,7 +9,7 @@ jobs: max-parallel: 4 matrix: python-version: ['3.10', '3.11'] - os: ['macos-latest'] + os: ['macos-13'] runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index d0b4c9211..122b09604 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -133,7 +133,7 @@ jobs: path: dist build-macos: - runs-on: macos-latest + runs-on: macos-13 strategy: max-parallel: 4 matrix: diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 0579ee14d..c1d85896f 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -29,7 +29,7 @@ class VersionStatus(Enum): _major = 0 _minor = 15 _revision = 0 -_status = VersionStatus.RELEASE +_status = VersionStatus.BETA __author__ = "@joocer" __version__ = f"{_major}.{_minor}.{_revision}" + ( diff --git a/opteryx/planner/sql_rewriter.py b/opteryx/planner/sql_rewriter.py index 817c1c919..917d8ada8 100644 --- a/opteryx/planner/sql_rewriter.py +++ b/opteryx/planner/sql_rewriter.py @@ -383,32 +383,5 @@ def extract_temporal_filters(sql): # pragma: no cover return sql, final_collector -def replace_b_strings(text: str) -> str: - """ - Replaces occurrences of b'...' or B"..." with inline_blob('...') or inline_blob("..."). - - Parameters: - text: str - The input text containing the strings to be replaced. - - Returns: - The modified text with the replacements. - """ - # Define the regex pattern to match b'...' or B"..." (case insensitive) - pattern = re.compile(r"\b([bB]?)(['\"])((?:\\.|[^\2])*?)\2") - - def replacer(match): - prefix = match.group(1) - quote = match.group(2) - content = match.group(3) - # Only replace if there's a b or B prefix - if prefix.lower() == "b": - return f"blob({quote}{content}{quote})" - return match.group(0) - - # Use re.sub with the replacer function - return pattern.sub(replacer, text) - - def do_sql_rewrite(statement): return extract_temporal_filters(statement) diff --git a/tests/query_planner/test_b_strings.py b/tests/query_planner/test_b_strings.py index acc90ab59..8246907d9 100644 --- a/tests/query_planner/test_b_strings.py +++ b/tests/query_planner/test_b_strings.py @@ -66,7 +66,7 @@ @pytest.mark.parametrize("input_text, expected_output", test_cases) def test_replace_b_strings(input_text, expected_output): - assert "".join(sql_parts(input_text)) == expected_output + assert " ".join(sql_parts(input_text)).replace(" ", "") == expected_output.replace(" ", "") if __name__ == "__main__": # pragma: no cover @@ -81,20 +81,18 @@ def test_replace_b_strings(input_text, expected_output): width = shutil.get_terminal_size((80, 20))[0] - 15 - nl = "\n" - print(f"RUNNING BATTERY OF {len(test_cases)} B STRINGS") - for index, (input, expected) in enumerate(test_cases): + for index, (case, expected) in enumerate(test_cases): start = time.monotonic_ns() print( - f"\033[0;36m{(index + 1):04}\033[0m {input[0:width - 1].ljust(width)}", + f"\033[0;36m{(index + 1):04}\033[0m {case[0:width - 1].ljust(width)}", end="", ) - if " ".join(sql_parts(input)).replace(" ", "") == expected.replace(" ", ""): + if " ".join(sql_parts(case)).replace(" ", "") == expected.replace(" ", ""): print(f"\033[0;32m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ✅") else: print(f"\033[0;31m{str(int((time.monotonic_ns() - start)/1e6)).rjust(4)}ms\033[0m ❌") print("Expected:", expected) - print("Recieved:", " ".join(sql_parts(input))) + print("Recieved:", " ".join(sql_parts(case))) print("--- ✅ \033[0;32mdone\033[0m") From cae876c190333d2f2412203e428070c2570c96a0 Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 23 May 2024 20:48:58 +0000 Subject: [PATCH 6/6] Opteryx Version 0.15.0-beta.517 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index c1d85896f..7372b7355 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 516 +__build__ = 517 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.