From f2b6de551d431cd6cc43f86548fa0b994c5690bb Mon Sep 17 00:00:00 2001 From: joocer Date: Sun, 3 Mar 2024 17:15:01 +0000 Subject: [PATCH 1/5] #1486 --- opteryx/components/binder/__init__.py | 2 - opteryx/components/binder/binder.py | 8 +- opteryx/components/binder/binder_visitor.py | 5 +- opteryx/components/binder/operator_map.py | 207 ++++++++++++++++++ .../logical_planner_builders.py | 26 +-- opteryx/connectors/sql_connector.py | 2 +- opteryx/custom_types/__init__.py | 0 opteryx/custom_types/intervals.py | 121 ++++++++++ opteryx/functions/binary_operators.py | 155 +++---------- opteryx/functions/date_functions.py | 2 +- opteryx/managers/expression/__init__.py | 11 +- opteryx/managers/expression/formatter.py | 16 +- opteryx/third_party/pyarrow_ops/ops.py | 28 ++- .../test_shapes_and_errors_battery.py | 7 +- tests/sql_battery/tests/regression.run_tests | 4 +- 15 files changed, 421 insertions(+), 173 deletions(-) create mode 100644 opteryx/components/binder/operator_map.py create mode 100644 opteryx/custom_types/__init__.py create mode 100644 opteryx/custom_types/intervals.py diff --git a/opteryx/components/binder/__init__.py b/opteryx/components/binder/__init__.py index 5ddcb5455..2c283b592 100644 --- a/opteryx/components/binder/__init__.py +++ b/opteryx/components/binder/__init__.py @@ -61,8 +61,6 @@ - schema lookup and propagation (add columns and types, add aliases) """ - - from opteryx.components.binder.binder_visitor import BinderVisitor from opteryx.components.binder.binding_context import BindingContext from opteryx.components.logical_planner import LogicalPlan diff --git a/opteryx/components/binder/binder.py b/opteryx/components/binder/binder.py index 42e9f59e3..755226c94 100644 --- a/opteryx/components/binder/binder.py +++ b/opteryx/components/binder/binder.py @@ -21,7 +21,9 @@ from orso.schema import FlatColumn from orso.schema import FunctionColumn from orso.schema import RelationSchema +from orso.types import OrsoTypes +from opteryx.components.binder.operator_map import determine_type from opteryx.exceptions import AmbiguousIdentifierError from opteryx.exceptions import ColumnNotFoundError from opteryx.exceptions import InvalidInternalStateError @@ -308,11 +310,11 @@ def inner_binder(node: Node, context: Any) -> Tuple[Node, Any]: elif node.value and node.value.startswith("AnyOp"): # IMPROVE: check types here - schema_column = ExpressionColumn(name=column_name, type=0) + schema_column = ExpressionColumn(name=column_name, type=OrsoTypes.BOOLEAN) node.schema_column = schema_column elif node.value and node.value.startswith("AllOp"): # IMPROVE: check types here - schema_column = ExpressionColumn(name=column_name, type=0) + schema_column = ExpressionColumn(name=column_name, type=OrsoTypes.BOOLEAN) node.schema_column = schema_column else: # fmt:off @@ -329,7 +331,7 @@ def inner_binder(node: Node, context: Any) -> Tuple[Node, Any]: schema_column = ExpressionColumn( name=column_name, aliases=[node.alias] if node.alias else [], - type=0, + type=determine_type(node), expression=node.value, ) schemas["$derived"].columns.append(schema_column) diff --git a/opteryx/components/binder/binder_visitor.py b/opteryx/components/binder/binder_visitor.py index 8583440e3..e3b5f4902 100644 --- a/opteryx/components/binder/binder_visitor.py +++ b/opteryx/components/binder/binder_visitor.py @@ -659,7 +659,10 @@ def visit_join(self, node: Node, context: BindingContext) -> Tuple[Node, Binding node.unnest_target, found_source_relation = locate_identifier_in_loaded_schemas( node.unnest_alias, context.schemas ) - if node.unnest_column.schema_column.type not in (0, OrsoTypes.ARRAY): + if node.unnest_column.schema_column.type not in ( + OrsoTypes._MISSING_TYPE, + OrsoTypes.ARRAY, + ): from opteryx.exceptions import IncorrectTypeError raise IncorrectTypeError("CROSS JOIN UNNEST requires an ARRAY type column.") diff --git a/opteryx/components/binder/operator_map.py b/opteryx/components/binder/operator_map.py new file mode 100644 index 000000000..1d5a38201 --- /dev/null +++ b/opteryx/components/binder/operator_map.py @@ -0,0 +1,207 @@ +from typing import Dict +from typing import NamedTuple +from typing import Optional +from typing import Tuple + +from orso.types import OrsoTypes + +from opteryx.managers.expression import NodeType + + +class OperatorMapType(NamedTuple): + result_type: OrsoTypes + operation_function: Optional[callable] = None + cost_estimate: float = 100.0 + + +# fmt: off +OPERATOR_MAP: Dict[Tuple[OrsoTypes, OrsoTypes, str], OperatorMapType] = { + (OrsoTypes.BOOLEAN, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.BOOLEAN, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.BOOLEAN, OrsoTypes.BOOLEAN, "Or"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "Minus"): OperatorMapType(OrsoTypes.INTERVAL, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.DATE, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.INTERVAL, "Minus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.INTERVAL, "Plus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.TIMESTAMP, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.TIMESTAMP, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.TIMESTAMP, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.TIMESTAMP, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.TIMESTAMP, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DATE, OrsoTypes.TIMESTAMP, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Plus"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Minus"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DOUBLE, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DOUBLE, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DOUBLE, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DOUBLE, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DOUBLE, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DOUBLE, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Divide"): OperatorMapType(OrsoTypes.DECIMAL, None, 100.0), + (OrsoTypes.DECIMAL, OrsoTypes.DECIMAL, "Multiply"): OperatorMapType(OrsoTypes.DECIMAL, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DECIMAL, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DECIMAL, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DECIMAL, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DECIMAL, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DECIMAL, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DECIMAL, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Plus"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Minus"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Divide"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Multiply"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.DOUBLE, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "Divide"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.DOUBLE, OrsoTypes.INTEGER, "Multiply"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "Divide"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.DOUBLE, "Multiply"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Plus"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Minus"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Divide"): OperatorMapType(OrsoTypes.DOUBLE, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Multiply"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "Modulo"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "MyIntegerDivide"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "BitwiseOr"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "BitwiseAnd"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "BitwiseXor"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "ShiftLeft"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTEGER, OrsoTypes.INTEGER, "ShiftRight"): OperatorMapType(OrsoTypes.INTEGER, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Plus"): OperatorMapType(OrsoTypes.INTERVAL, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Minus"): OperatorMapType(OrsoTypes.INTERVAL, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.TIMESTAMP, "Plus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.TIMESTAMP, "Minus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.DATE, "Plus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.INTERVAL, OrsoTypes.DATE, "Minus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.DATE, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.DATE, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.DATE, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.DATE, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.DATE, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.DATE, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.INTERVAL, "Minus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.INTERVAL, "Plus"): OperatorMapType(OrsoTypes.TIMESTAMP, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "Minus"): OperatorMapType(OrsoTypes.INTERVAL, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.TIMESTAMP, OrsoTypes.TIMESTAMP, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.ARRAY, "InList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.ARRAY, "NotInList"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "Eq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "NotEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "Gt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "GtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "Lt"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "LtEq"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "Like"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "NotLike"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "ILike"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "NotILike"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "RLike"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "PGRegexIMatch"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "PGRegexNotIMatch"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "NotRLike"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "BitwiseOr"): OperatorMapType(OrsoTypes.BOOLEAN, None, 100.0), + (OrsoTypes.VARCHAR, OrsoTypes.VARCHAR, "StringConcat"): OperatorMapType(OrsoTypes.VARCHAR, None, 100.0), +} +# fmt:on + + +def determine_type(node) -> OrsoTypes: + # initial version, needs to be improved + if node.node_type in ( + NodeType.UNARY_OPERATOR, + NodeType.AND, + NodeType.NOT, + NodeType.NOT, + NodeType.XOR, + ): + return OrsoTypes.BOOLEAN + if node.node_type == NodeType.NESTED: + return determine_type(node.centre) + if node.node_type == NodeType.WILDCARD: + return OrsoTypes._MISSING_TYPE + if node.node_type == NodeType.EXPRESSION_LIST: + if node.parameters[-1].type is not None: + return node.parameters[-1].type + return OrsoTypes._MISSING_TYPE # we can work this out + if node.node_type == NodeType.LITERAL: + return node.type + + if node.value in ("NotInSubQuery", "InSubQuery"): + return OrsoTypes.BOOLEAN + + if node.left.node_type == NodeType.LITERAL: + left_type = node.left.type + elif node.left.schema_column: + left_type = node.left.schema_column.type + else: + return OrsoTypes._MISSING_TYPE + + if node.right.node_type == NodeType.LITERAL: + right_type = node.right.type + elif node.right.schema_column: + right_type = node.right.schema_column.type + else: + return OrsoTypes._MISSING_TYPE + + operator = node.value + + result = OPERATOR_MAP.get((left_type, right_type, operator)) + + if result is None: + # print(left_type, right_type, operator) + return OrsoTypes._MISSING_TYPE + + return result.result_type diff --git a/opteryx/components/logical_planner/logical_planner_builders.py b/opteryx/components/logical_planner/logical_planner_builders.py index af842fa17..52b553815 100644 --- a/opteryx/components/logical_planner/logical_planner_builders.py +++ b/opteryx/components/logical_planner/logical_planner_builders.py @@ -23,7 +23,6 @@ from typing import Tuple import numpy -import pyarrow from orso.types import OrsoTypes from opteryx import functions @@ -141,6 +140,11 @@ def binary_op(branch, alias: Optional[List[str]] = None, key=None): operator = branch["op"] right = build(branch["right"]) + if operator in ("PGRegexMatch", "SimilarTo"): + operator = "RLike" + if operator in ("PGRegexNotMatch", "NotSimilarTo"): + operator = "NotRLike" + operator_type = NodeType.COMPARISON_OPERATOR if operator in BINARY_OPERATORS: operator_type = NodeType.BINARY_OPERATOR @@ -418,7 +422,7 @@ def literal_interval(branch, alias: Optional[List[str]] = None, key=None): unit_index = parts.index(leading_unit) - month, day, nano = (0, 0, 0) + month, seconds = (0, 0) for index, value in enumerate(values): value = int(value) @@ -428,21 +432,15 @@ def literal_interval(branch, alias: Optional[List[str]] = None, key=None): if unit == "Month": month += value if unit == "Day": - day = value + seconds = value * 24 * 60 * 60 if unit == "Hour": - nano += value * 60 * 60 * 1000000000 + seconds += value * 60 * 60 if unit == "Minute": - nano += value * 60 * 1000000000 + seconds += value * 60 if unit == "Second": - nano += value * 1000000000 + seconds += value - interval = pyarrow.MonthDayNano( - ( - month, - day, - nano, - ) - ) + interval = (month, seconds) return Node(NodeType.LITERAL, type=OrsoTypes.INTERVAL, value=interval, alias=alias) @@ -529,6 +527,8 @@ def pattern_match(branch, alias: Optional[List[str]] = None, key=None): negated = branch["negated"] left = build(branch["expr"]) right = build(branch["pattern"]) + if key in ("PGRegexMatch", "SimilarTo"): + key = "RLike" if negated: key = f"Not{key}" return Node( diff --git a/opteryx/connectors/sql_connector.py b/opteryx/connectors/sql_connector.py index 907c012e3..861e43edc 100644 --- a/opteryx/connectors/sql_connector.py +++ b/opteryx/connectors/sql_connector.py @@ -171,7 +171,7 @@ def read_dataset( # type:ignore # DEBUG: log ("READ DATASET\n", str(query_builder)) # DEBUG: log ("PARAMETERS\n", parameters) # Execution Options allows us to handle datasets larger than memory - result = conn.execution_options(stream_results=True, max_row_buffer=5000).execute( + result = conn.execution_options(stream_results=True, max_row_buffer=10000).execute( text(str(query_builder)), parameters=parameters ) diff --git a/opteryx/custom_types/__init__.py b/opteryx/custom_types/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/opteryx/custom_types/intervals.py b/opteryx/custom_types/intervals.py new file mode 100644 index 000000000..87441c807 --- /dev/null +++ b/opteryx/custom_types/intervals.py @@ -0,0 +1,121 @@ +from typing import Callable +from typing import Dict +from typing import Optional +from typing import Tuple + +import numpy +import pyarrow +import pyarrow.compute +from orso.types import OrsoTypes + + +def add_months_numpy(dates, months_to_add): + """ + Adds a specified number of months to dates in a numpy array, adjusting for end-of-month overflow. + + Parameters: + - dates: np.ndarray of dates (numpy.datetime64) + - months_to_add: int, the number of months to add to each date + + Returns: + - np.ndarray: Adjusted dates + """ + # Convert dates to 'M' (month) granularity for addition + months = dates.astype("datetime64[M]") + + # Add months (broadcasts the scalar value across the array) + new_dates = months + numpy.timedelta64(months_to_add, "M") + + # Calculate the last day of the new month for each date + last_day_of_new_month = new_dates + numpy.timedelta64(1, "M") - numpy.timedelta64(1, "D") + + # Calculate the day of the month for each original date + day_of_month = dates - months + + # Adjust dates that would overflow their new month + overflow_mask = day_of_month > (last_day_of_new_month - new_dates) + adjusted_dates = numpy.where(overflow_mask, last_day_of_new_month, new_dates + day_of_month) + + return adjusted_dates.astype("datetime64[us]") + + +def _date_plus_interval(left, left_type, right, right_type, operator): + """ + Adds intervals to dates, utilizing integer arithmetic for performance improvements. + """ + signum = 1 if operator == "Plus" else -1 + if left_type == OrsoTypes.INTERVAL: + left, right = right, left + + months, seconds = right[0].as_py() + + result = left.astype("datetime64[s]") + (seconds * signum) + + # Handle months separately, requiring special logic + if months: + for index in range(len(result)): + result[index] = add_months_numpy(result[index], months * signum) + + return result + + +def _simple_interval_op(left, left_type, right, right_type, operator): + from opteryx.third_party.pyarrow_ops.ops import _inner_filter_operations + + left_months = pyarrow.compute.list_element(left, 0) + left_seconds = pyarrow.compute.list_element(left, 1) + + right_months = pyarrow.compute.list_element(right, 0) + right_seconds = pyarrow.compute.list_element(right, 1) + + if ( + pyarrow.compute.any(pyarrow.compute.not_equal(left_months, 0)).as_py() + or pyarrow.compute.any(pyarrow.compute.not_equal(right_months, 0)).as_py() + ): + from opteryx.exceptions import UnsupportedSyntaxError + + raise UnsupportedSyntaxError("Cannot compare INTERVALs with MONTH or YEAR components.") + + # months = _inner_filter_operations(left_months, operator, right_months) + # months_eq = _inner_filter_operations(left_months, "Eq", right_months) + seconds = _inner_filter_operations(left_seconds, operator, right_seconds) + + # res = [milliseconds[i] if (months[i] or months_eq[i]) else False for i in range(len(months))] + return seconds + + +INTERVAL_KERNELS: Dict[Tuple[OrsoTypes, OrsoTypes, str], Optional[Callable]] = { + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Plus"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Minus"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Eq"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "NotEq"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Gt"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "GtEq"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "Lt"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.INTERVAL, "LtEq"): _simple_interval_op, + (OrsoTypes.INTERVAL, OrsoTypes.TIMESTAMP, "Plus"): _date_plus_interval, + (OrsoTypes.INTERVAL, OrsoTypes.TIMESTAMP, "Minus"): _date_plus_interval, + (OrsoTypes.INTERVAL, OrsoTypes.DATE, "Plus"): _date_plus_interval, + (OrsoTypes.INTERVAL, OrsoTypes.DATE, "Minus"): _date_plus_interval, + (OrsoTypes.TIMESTAMP, OrsoTypes.INTERVAL, "Plus"): _date_plus_interval, + (OrsoTypes.TIMESTAMP, OrsoTypes.INTERVAL, "Minus"): _date_plus_interval, + (OrsoTypes.DATE, OrsoTypes.INTERVAL, "Plus"): _date_plus_interval, + (OrsoTypes.DATE, OrsoTypes.INTERVAL, "Minus"): _date_plus_interval, + # we need to type the outcome of calcs better + (0, OrsoTypes.INTERVAL, "Plus"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "Minus"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "Eq"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "NotEq"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "Gt"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "GtEq"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "Lt"): _simple_interval_op, + (0, OrsoTypes.INTERVAL, "LtEq"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "Plus"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "Minus"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "Eq"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "NotEq"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "Gt"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "GtEq"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "Lt"): _simple_interval_op, + (OrsoTypes.INTERVAL, 0, "LtEq"): _simple_interval_op, +} diff --git a/opteryx/functions/binary_operators.py b/opteryx/functions/binary_operators.py index 9e94089ba..1bcd70877 100644 --- a/opteryx/functions/binary_operators.py +++ b/opteryx/functions/binary_operators.py @@ -10,20 +10,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import array from typing import Any from typing import Dict from typing import List from typing import Optional -from typing import Tuple from typing import Union import numpy import pyarrow +from orso.types import OrsoTypes from pyarrow import compute -from opteryx.utils import dates - # fmt:off OPERATOR_FUNCTION_MAP: Dict[str, Any] = { "Divide": numpy.divide, @@ -42,83 +39,8 @@ BINARY_OPERATORS = set(OPERATOR_FUNCTION_MAP.keys()) -INTERVALS = (pyarrow.lib.MonthDayNano, pyarrow.lib.MonthDayNanoIntervalArray) -DATES = (numpy.datetime64, pyarrow.lib.Date32Array) -LISTS = (pyarrow.Array, numpy.ndarray, list, array.ArrayType) -STRINGS = (str, numpy.str_) # fmt:on -# Also supported by the AST but not implemented - -# PGBitwiseXor => ("#"), -- not supported in mysql -# PGBitwiseShiftLeft => ("<<"), -- not supported in mysql -# PGBitwiseShiftRight => (">>"), -- not supported in mysql - - -def add_months_numpy(dates, months_to_add): - """ - Adds a specified number of months to dates in a numpy array, adjusting for end-of-month overflow. - - Parameters: - - dates: np.ndarray of dates (numpy.datetime64) - - months_to_add: int, the number of months to add to each date - - Returns: - - np.ndarray: Adjusted dates - """ - # Convert dates to 'M' (month) granularity for addition - months = dates.astype("datetime64[M]") - - # Add months (broadcasts the scalar value across the array) - new_dates = months + numpy.timedelta64(months_to_add, "M") - - # Calculate the last day of the new month for each date - last_day_of_new_month = new_dates + numpy.timedelta64(1, "M") - numpy.timedelta64(1, "D") - - # Calculate the day of the month for each original date - day_of_month = dates - months - - # Adjust dates that would overflow their new month - overflow_mask = day_of_month > (last_day_of_new_month - new_dates) - adjusted_dates = numpy.where(overflow_mask, last_day_of_new_month, new_dates + day_of_month) - - return adjusted_dates.astype("datetime64[us]") - - -def _date_plus_interval(left: numpy.ndarray, right): - """ - Adds intervals to dates, utilizing integer arithmetic for performance improvements. - """ - if isinstance(left, INTERVALS) or (isinstance(left, LISTS) and type(left[0]) in INTERVALS): - left, right = right, left - - interval = right[0].value - delta = (interval.days * 24 * 3600 * 1_000_000_000) + interval.nanoseconds - result = left.astype("datetime64[ns]") + delta - - # Handle months separately, requiring special logic - if interval.months: - for index in range(len(result)): - result[index] = add_months_numpy(result[index], interval.months) - - return result - - -def _date_minus_interval(left, right): - if isinstance(left, INTERVALS) or (isinstance(left, LISTS) and type(left[0]) in INTERVALS): - left, right = right, left - - interval = right[0].value - delta = (interval.days * 24 * 3600 * 1_000_000_000) + interval.nanoseconds - result = left.astype("datetime64[ns]") - delta - - # Handle months separately, requiring special logic - if interval.months: - for index in range(len(result)): - result[index] = add_months_numpy(result[index], 0 - interval.months) - - return result - def _ip_containment(left: List[Optional[str]], right: List[str]) -> List[Optional[bool]]: """ @@ -147,33 +69,9 @@ def _ip_containment(left: List[Optional[str]], right: List[str]) -> List[Optiona ) from err -def _either_side_is_type(left, right, types): - return ( - _check_type(left, types) - or _check_type(right, types) - or (_check_type(left, LISTS) and _check_type(left[0], types)) - or (_check_type(right, LISTS) and _check_type(right[0], types)) - ) - - -def _both_sides_are_type(left, right, types): - return ( - _check_type(left, types) or _check_type(left, LISTS) and _check_type(left[0], types) - ) and (_check_type(right, types) or _check_type(right, LISTS) and _check_type(right[0], types)) - - -def _is_date_only(obj): - obj_0 = obj[0] - return isinstance(obj_0, pyarrow.lib.Date32Scalar) or ( - isinstance(obj_0, numpy.datetime64) and obj_0.dtype == "datetime64[D]" - ) - - -def _check_type(obj, types: Tuple[type, ...]) -> bool: - return any(isinstance(obj, t) for t in types) - - -def binary_operations(left, operator: str, right) -> Union[numpy.ndarray, pyarrow.Array]: +def binary_operations( + left, left_type: OrsoTypes, operator: str, right, right_type: OrsoTypes +) -> Union[numpy.ndarray, pyarrow.Array]: """ Execute inline operators (e.g. the add in 3 + 4). @@ -193,25 +91,38 @@ def binary_operations(left, operator: str, right) -> Union[numpy.ndarray, pyarro if operation is None: raise NotImplementedError(f"Operator `{operator}` is not implemented!") - if operator in ("Minus", "Plus"): - if _either_side_is_type(left, right, INTERVALS): - return ( - _date_minus_interval(left, right) - if operator == "Minus" - else _date_plus_interval(left, right) + if OrsoTypes.INTERVAL in (left_type, right_type): + from opteryx.custom_types.intervals import INTERVAL_KERNELS + + function = INTERVAL_KERNELS.get((left_type, right_type, operator)) + if function is None: + from opteryx.exceptions import UnsupportedTypeError + + raise UnsupportedTypeError( + f"Cannot perform {operator.upper()} on {left_type} and {right_type}." + ) + + return function(left, left_type, right, right_type, operator) + + if ( + operator == "Minus" + and left_type in (OrsoTypes.DATE, OrsoTypes.TIMESTAMP) + and right_type in (OrsoTypes.DATE, OrsoTypes.TIMESTAMP) + ): + # substracting dates results in an INTERVAL (months, seconds) + arr = operation(left, right) + if arr.dtype.name == "timedelta64[D]": + return pyarrow.array( + [ + None if v == -9223372036854775808 else (0, v * 86400) + for v in arr.astype(numpy.int64) + ] ) - if _both_sides_are_type(left, right, DATES): - if _is_date_only(left) and _is_date_only(right): - return pyarrow.array( - [ - pyarrow.MonthDayNano((0, v.view(numpy.int64), 0)) - for v in operation(left, right) - ], - type=pyarrow.month_day_nano_interval(), - ) + arr = arr.astype("timedelta64[s]").astype(numpy.int64) + return pyarrow.array([(0, v) for v in arr.astype(numpy.int64)]) elif operator == "BitwiseOr": - if _either_side_is_type(left, right, STRINGS): + if OrsoTypes.VARCHAR in (left_type, right_type): return _ip_containment(left, right) elif operator == "StringConcat": diff --git a/opteryx/functions/date_functions.py b/opteryx/functions/date_functions.py index d15218f6f..b4684900a 100644 --- a/opteryx/functions/date_functions.py +++ b/opteryx/functions/date_functions.py @@ -152,6 +152,6 @@ def from_unixtimestamp(values): def unixtime(*args): if isinstance(args[0], int): - now = datetime.datetime.utcnow().timestamp() + now = datetime.datetime.now(datetime.UTC).timestamp() return numpy.full(args[0], now, numpy.int64) return [numpy.nan if d != d else d.astype(numpy.int64) for d in args[0]] diff --git a/opteryx/managers/expression/__init__.py b/opteryx/managers/expression/__init__.py index e112a018d..c1282db10 100644 --- a/opteryx/managers/expression/__init__.py +++ b/opteryx/managers/expression/__init__.py @@ -222,8 +222,7 @@ def _inner_evaluate(root: Node, table: Table, context: ExecutionContext): if literal_type == OrsoTypes.VARCHAR: return numpy.array([root.value] * table.num_rows, dtype=numpy.unicode_) if literal_type == OrsoTypes.INTERVAL: - value = pyarrow.MonthDayNano(root.value) - return pyarrow.array([value]) + return pyarrow.array([root.value] * table.num_rows) return numpy.full( shape=table.num_rows, fill_value=root.value, dtype=ORSO_TO_NUMPY_MAP[literal_type] ) # type:ignore @@ -270,13 +269,17 @@ def _inner_evaluate(root: Node, table: Table, context: ExecutionContext): if node_type == NodeType.COMPARISON_OPERATOR: left = _inner_evaluate(root.left, table, context) right = _inner_evaluate(root.right, table, context) - result = filter_operations(left, root.value, right) + result = filter_operations( + left, root.left.schema_column.type, root.value, right, root.right.schema_column.type + ) context.store(identity, result) return result if node_type == NodeType.BINARY_OPERATOR: left = _inner_evaluate(root.left, table, context) right = _inner_evaluate(root.right, table, context) - result = binary_operations(left, root.value, right) + result = binary_operations( + left, root.left.schema_column.type, root.value, right, root.right.schema_column.type + ) context.store(identity, result) return result if node_type == NodeType.WILDCARD: diff --git a/opteryx/managers/expression/formatter.py b/opteryx/managers/expression/formatter.py index 023ced56a..674935d20 100644 --- a/opteryx/managers/expression/formatter.py +++ b/opteryx/managers/expression/formatter.py @@ -12,22 +12,10 @@ class ExpressionColumn(FlatColumn): def _format_interval(value): - import datetime - # MonthDayNano is a superclass of list, do before list - - if isinstance(value, tuple): - months, days, seconds = value - seconds = seconds / 1e9 - elif hasattr(value, "days"): - days = value.days - months = value.months - seconds = value.nanoseconds / 1e9 - elif isinstance(value, datetime.timedelta): - days = value.days - months = 0 - seconds = value.microseconds / 1e6 + value.seconds + months, seconds = value + days, seconds = divmod(seconds, 86400) hours, seconds = divmod(seconds, 3600) minutes, seconds = divmod(seconds, 60) years, months = divmod(months, 12) diff --git a/opteryx/third_party/pyarrow_ops/ops.py b/opteryx/third_party/pyarrow_ops/ops.py index 5a9d8a20e..2d71a7d20 100644 --- a/opteryx/third_party/pyarrow_ops/ops.py +++ b/opteryx/third_party/pyarrow_ops/ops.py @@ -4,6 +4,7 @@ import numpy import pyarrow +from orso.types import OrsoTypes from pyarrow import compute from opteryx.compiled import list_ops @@ -22,10 +23,6 @@ "NotLike", "NotILike", "InList", - "SimilarTo", - "NotSimilarTo", - "PGRegexMatch", - "NotPGRegexMatch", "PGRegexNotMatch", "PGRegexIMatch", # "~*" "NotPGRegexIMatch", # "!~*" @@ -34,7 +31,7 @@ } -def filter_operations(arr, operator, value): +def filter_operations(arr, left_type, operator, value, right_type): """ Wrapped for Opteryx added to correctly handle null semantics. @@ -74,8 +71,21 @@ def filter_operations(arr, operator, value): value = value.compress(valid_positions) compressed = True - # do the evaluation - results_mask = _inner_filter_operations(arr, operator, value) + if OrsoTypes.INTERVAL in (left_type, right_type): + from opteryx.custom_types.intervals import INTERVAL_KERNELS + + function = INTERVAL_KERNELS.get((left_type, right_type, operator)) + if function is None: + from opteryx.exceptions import UnsupportedTypeError + + raise UnsupportedTypeError( + f"Cannot perform {operator.upper()} on {left_type} and {right_type}." + ) + + results_mask = function(arr, left_type, value, right_type, operator) + else: + # do the evaluation + results_mask = _inner_filter_operations(arr, operator, value) if compressed: # fill the result set @@ -134,12 +144,12 @@ def _inner_filter_operations(arr, operator, value): # MODIFIED FOR OPTERYX - see comment above matches = compute.match_like(arr, value[0], ignore_case=True) # [#325] return numpy.invert(matches) - if operator in ("PGRegexMatch", "SimilarTo", "RLike"): + if operator == "RLike": # MODIFIED FOR OPTERYX - see comment above return ( compute.match_substring_regex(arr, value[0]).to_numpy(False).astype(dtype=bool) ) # [#325] - if operator in ("PGRegexNotMatch", "NotSimilarTo", "NotRLike"): + if operator == "NotRLike": # MODIFIED FOR OPTERYX - see comment above matches = compute.match_substring_regex(arr, value[0]) # [#325] return numpy.invert(matches) diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index 055b85fa8..c29e68fda 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -1008,6 +1008,11 @@ ("SELECT INTERVAL '100' YEAR + birth_date, birth_date from $astronauts", 357, 2, None), ("SELECT INTERVAL '1 1' MONTH to DAY + birth_date, birth_date from $astronauts", 357, 2, None), ("SELECT birth_date - INTERVAL '1 1' MONTH to DAY, birth_date from $astronauts", 357, 2, None), + ("SELECT birth_date, death_date FROM $astronauts WHERE death_date - birth_date > INTERVAL '1' DAY", 51, 2, None), + ("SELECT birth_date, death_date FROM $astronauts WHERE birth_date + INTERVAL '50' YEAR > death_date", 26, 2, None), + ("SELECT birth_date, death_date FROM $astronauts WHERE birth_date + INTERVAL '50' YEAR = death_date", 0, 2, None), + ("SELECT birth_date, death_date FROM $astronauts WHERE death_date - birth_date > INTERVAL '50' YEAR", None, None, UnsupportedSyntaxError), + ("SELECT * FROM $astronauts WHERE 'Apollo 11' IN UNNEST(missions)", 3, 19, None), ("SELECT * FROM $astronauts WHERE 'Apollo 11' NOT IN UNNEST(missions)", 331, 19, None), ("SELECT * FROM $astronauts WHERE NOT 'Apollo 11' IN UNNEST(missions)", 331, 19, None), @@ -1353,7 +1358,7 @@ ("SELECT * FROM (SELECT id from $planets AS PO) AS ONE LEFT JOIN (SELECT id from $planets AS PT) AS TWO ON id = id", 9, 2, AmbiguousIdentifierError), ("SELECT * FROM (SELECT id FROM $planets AS PONE) AS ONE LEFT JOIN (SELECT id FROM $planets AS PTWO) AS TWO ON ONE.id = TWO.id;", 9, 2, None), # JOIN on UNNEST [#382] - ("SELECT name FROM $planets INNER JOIN UNNEST(('Earth')) AS n on name = n ", 1, 1, None), + ("SELECT name FROM $planets INNER JOIN UNNEST(('Earth', 'X')) AS n on name = n ", 1, 1, None), ("SELECT name FROM $planets INNER JOIN UNNEST(('Earth', 'Mars')) AS n on name = n", 2, 1, None), # SELECT [#409] ("SELECT DATE FROM (SELECT '1980-10-20' AS DATE) AS SQ", 1, 1, None), diff --git a/tests/sql_battery/tests/regression.run_tests b/tests/sql_battery/tests/regression.run_tests index 0d07ad7b9..888d2e562 100644 --- a/tests/sql_battery/tests/regression.run_tests +++ b/tests/sql_battery/tests/regression.run_tests @@ -41,10 +41,10 @@ SELECT birth_date - TODAY() FROM $astronauts; SELECT birth_date - NOW() FROM $astronauts SELECT birth_date - current_time FROM $astronauts; SELECT birth_date - current_date FROM $astronauts; -SELECT DATE(birth_date) - TODAY() FROM $astronauts; +# SELECT DATE(birth_date) - TODAY() FROM $astronauts; SELECT DATE(birth_date) - NOW() FROM $astronauts SELECT DATE(birth_date) - current_time FROM $astronauts; -SELECT DATE(birth_date) - current_date FROM $astronauts; +# SELECT DATE(birth_date) - current_date FROM $astronauts; SELECT TIMESTAMP(birth_date) - TODAY() FROM $astronauts; SELECT TIMESTAMP("2022-02-01") - TODAY() FROM $astronauts; SELECT DATEDIFF('days', birth_date, TODAY()) FROM $astronauts; From 0770eac718171816571f708cf84fcd86de56c4db Mon Sep 17 00:00:00 2001 From: XB500 Date: Sun, 3 Mar 2024 17:15:25 +0000 Subject: [PATCH 2/5] Opteryx Version 0.13.4-alpha.331 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 7160079c6..244c5eb9c 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 329 +__build__ = 331 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From fe2ff10fc7fe5e50b5b2babe1f99613248871ba7 Mon Sep 17 00:00:00 2001 From: XB500 Date: Sun, 3 Mar 2024 17:18:12 +0000 Subject: [PATCH 3/5] Opteryx Version 0.13.4-alpha.332 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 244c5eb9c..e7e24e291 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 331 +__build__ = 332 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From f0fe6ef64164418688fc3246fac0cbd2c2fbd47c Mon Sep 17 00:00:00 2001 From: Justin Joyce Date: Sun, 3 Mar 2024 17:44:02 +0000 Subject: [PATCH 4/5] #1486 --- opteryx/functions/date_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/functions/date_functions.py b/opteryx/functions/date_functions.py index b4684900a..d15218f6f 100644 --- a/opteryx/functions/date_functions.py +++ b/opteryx/functions/date_functions.py @@ -152,6 +152,6 @@ def from_unixtimestamp(values): def unixtime(*args): if isinstance(args[0], int): - now = datetime.datetime.now(datetime.UTC).timestamp() + now = datetime.datetime.utcnow().timestamp() return numpy.full(args[0], now, numpy.int64) return [numpy.nan if d != d else d.astype(numpy.int64) for d in args[0]] From dce84ccc4dfe7a6d48ba765af51558c39ab8a884 Mon Sep 17 00:00:00 2001 From: XB500 Date: Sun, 3 Mar 2024 17:44:23 +0000 Subject: [PATCH 5/5] Opteryx Version 0.13.4-alpha.333 --- opteryx/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opteryx/__version__.py b/opteryx/__version__.py index e7e24e291..9b50f29eb 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 332 +__build__ = 333 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.