diff --git a/opteryx/__version__.py b/opteryx/__version__.py index 8507a9129..c545ffcc5 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -17,4 +17,4 @@ """ # __version__ = "0.4.0-alpha.6" -__version__ = "0.12.0-beta.2" +__version__ = "0.12.0-beta.3" diff --git a/opteryx/functions/__init__.py b/opteryx/functions/__init__.py index 5f545abe8..689e7e704 100644 --- a/opteryx/functions/__init__.py +++ b/opteryx/functions/__init__.py @@ -65,6 +65,7 @@ def _get(value, item): "DECIMAL": pyarrow.decimal128(14), "VARCHAR": "string", "TIMESTAMP": pyarrow.timestamp("us"), + "DATE": pyarrow.date32(), } @@ -90,10 +91,10 @@ def fixed_value_function(function, context): if function == "PI": return OrsoTypes.DOUBLE, 3.14159265358979323846264338327950288419716939937510 if function == "PHI": - """the golden ratio""" + # the golden ratio return OrsoTypes.DOUBLE, 1.61803398874989484820458683436563811772030917980576 if function == "E": - """eulers number""" + # eulers number return OrsoTypes.DOUBLE, 2.71828182845904523536028747135266249775724709369995 return None, None @@ -126,6 +127,7 @@ def try_cast(_type): "VARCHAR": str, "TIMESTAMP": numpy.datetime64, "STRUCT": json.loads, + "DATE": lambda x: dates.parse_iso(x).date(), } if _type in casters: @@ -137,15 +139,6 @@ def _inner(arr): raise FunctionNotFoundError(message=f"Internal function to cast values to `{_type}` not found.") -def _repeat_no_parameters(func): - # call once and repeat - # these should all be eliminated by the optimizer - def _inner(items): - return numpy.array([func()] * items) - - return _inner - - def _iterate_single_parameter(func): def _inner(array): return numpy.array([func(item) for item in array]) @@ -242,6 +235,7 @@ def _coalesce(*arrays): "STRING": cast("VARCHAR"), # alias for VARCHAR "STR": cast("VARCHAR"), "STRUCT": _iterate_single_parameter(json.loads), + "DATE": cast("DATE"), "TRY_TIMESTAMP": try_cast("TIMESTAMP"), "TRY_BOOLEAN": try_cast("BOOLEAN"), "TRY_NUMERIC": try_cast("DOUBLE"), @@ -251,6 +245,7 @@ def _coalesce(*arrays): "TRY_INTEGER": try_cast("INTEGER"), "TRY_DECIMAL": try_cast("DECIMAL"), "TRY_DOUBLE": try_cast("DOUBLE"), + "TRY_DATE": try_cast("DATE"), # STRINGS "LEN": _iterate_single_parameter(get_len), # LENGTH(str) -> int @@ -305,7 +300,7 @@ def _coalesce(*arrays): "GREATEST": _iterate_single_parameter(numpy.nanmax), "LEAST": _iterate_single_parameter(numpy.nanmin), "IIF": other_functions.iif, - "GENERATE_SERIES": series.generate_series, +# "GENERATE_SERIES": series.generate_series, "NULLIF": other_functions.null_if, "CASE": other_functions.case_when, @@ -345,7 +340,7 @@ def _coalesce(*arrays): "TODAY": lambda x: None, # * # "TIME": _repeat_no_parameters(date_functions.get_time), "YESTERDAY": lambda x: None, # * - "DATE": lambda x: compute.cast(x, "date32"), #_iterate_single_parameter(date_functions.get_date), +# "DATE": lambda x: compute.cast(x, "date32"), #_iterate_single_parameter(date_functions.get_date), "YEAR": compute.year, "MONTH": compute.month, "DAY": compute.day, diff --git a/testdata/sql_scripts/mysql,sql b/testdata/sql_scripts/mysql.sql similarity index 100% rename from testdata/sql_scripts/mysql,sql rename to testdata/sql_scripts/mysql.sql diff --git a/testdata/sql_scripts/postgres.sql b/testdata/sql_scripts/postgres.sql new file mode 100644 index 000000000..4031f181c --- /dev/null +++ b/testdata/sql_scripts/postgres.sql @@ -0,0 +1,34 @@ +CREATE TABLE planets ( + id SERIAL PRIMARY KEY, + name VARCHAR(20), + mass NUMERIC(6,1), + diameter INT, + density NUMERIC(6,1), + gravity NUMERIC(6,1), + escape_velocity NUMERIC(6,1), + rotation_period NUMERIC(6,1), + length_of_day NUMERIC(6,1), + distance_from_sun NUMERIC(6,1), + perihelion NUMERIC(6,1), + aphelion NUMERIC(6,1), + orbital_period NUMERIC(6,1), + orbital_velocity NUMERIC(6,1), + orbital_inclination NUMERIC(6,1), + orbital_eccentricity NUMERIC(5,3), + obliquity_to_orbit NUMERIC(6,1), + mean_temperature INT, + surface_pressure NUMERIC(7,5), + number_of_moons INT +); + +INSERT INTO planets (name, mass, diameter, density, gravity, escape_velocity, rotation_period, length_of_day, distance_from_sun, perihelion, aphelion, orbital_period, orbital_velocity, orbital_inclination, orbital_eccentricity, obliquity_to_orbit, mean_temperature, surface_pressure, number_of_moons) +VALUES + ('Mercury', 0.33, 4879, 5427, 3.7, 4.3, 1407.6, 4222.6, 57.9, 46, 69.8, 88, 47.4, 7, 0.205, 0.034, 167, 0, 0), + ('Venus', 4.87, 12104, 5243, 8.9, 10.4, -5832.5, 2802, 108.2, 107.5, 108.9, 224.7, 35, 3.4, 0.007, 177.4, 464, 92, 0), + ('Earth', 5.97, 12756, 5514, 9.8, 11.2, 23.9, 24, 149.6, 147.1, 152.1, 365.2, 29.8, 0, 0.017, 23.4, 15, 1, 1), + ('Mars', 0.642, 6792, 3933, 3.7, 5, 24.6, 24.7, 227.9, 206.6, 249.2, 687, 24.1, 1.9, 0.094, 25.2, -65, 0.01, 2), + ('Jupiter', 1898, 142984, 1326, 23.1, 59.5, 9.9, 9.9, 778.6, 740.5, 816.6, 4331, 13.1, 1.3, 0.049, 3.1, -110, NULL, 79), + ('Saturn', 568, 120536, 687, 9, 35.5, 10.7, 10.7, 1433.5, 1352.6, 1514.5, 10747, 9.7, 2.5, 0.057, 26.7, -140, NULL, 62), + ('Uranus', 86.8, 51118, 1271, 8.7, 21.3, -17.2, 17.2, 2872.5, 2741.3, 3003.6, 30589, 6.8, 0.8, 0.046, 97.8, -195, NULL, 27), + ('Neptune', 102, 49528, 1638, 11, 23.5, 16.1, 16.1, 4495.1, 4444.5, 4545.7, 59800, 5.4, 1.8, 0.011, 28.3, -200, NULL, 14), + ('Pluto', 0.0146, 2370, 2095, 0.7, 1.3, -153.3, 153.3, 5906.4, 4436.8, 7375.9, 90560, 4.7, 17.2, 0.244, 122.5, -225, 0.00001, 5) \ No newline at end of file diff --git a/tests/sql_battery/test_battery_formats.py b/tests/sql_battery/test_battery_formats.py index 0a9cac10f..b5512310e 100644 --- a/tests/sql_battery/test_battery_formats.py +++ b/tests/sql_battery/test_battery_formats.py @@ -67,17 +67,15 @@ def test_sql_battery(statement, rows, columns, skip): # opteryx.register_store("tests", DiskConnector) - conn = opteryx.connect() - cursor = conn.cursor() - cursor.execute(statement) - actual_rows, actual_columns = cursor.shape + result = opteryx.query_to_arrow(statement) + actual_rows, actual_columns = result.shape assert ( rows == actual_rows - ), f"Query returned {actual_rows} rows but {rows} were expected, {statement}\n{cursor.head(10)}" + ), f"Query returned {actual_rows} rows but {rows} were expected, {statement}" assert ( columns == actual_columns - ), f"Query returned {actual_columns} cols but {columns} were expected, {statement}\n{cursor.head(10)}" + ), f"Query returned {actual_columns} cols but {columns} were expected, {statement}" if __name__ == "__main__": # pragma: no cover diff --git a/tests/sql_battery/test_battery_sql92.py b/tests/sql_battery/test_battery_sql92.py index 23e55f175..6b94753b1 100644 --- a/tests/sql_battery/test_battery_sql92.py +++ b/tests/sql_battery/test_battery_sql92.py @@ -223,7 +223,7 @@ def test_sql92(statement, feature): """ Test an battery of statements """ - opteryx.query(statement).fetchall() + opteryx.query_to_arrow(statement) if __name__ == "__main__": # pragma: no cover diff --git a/tests/sql_battery/test_exclude_arm.py b/tests/sql_battery/test_exclude_arm.py index 4207214cc..408d1b99d 100644 --- a/tests/sql_battery/test_exclude_arm.py +++ b/tests/sql_battery/test_exclude_arm.py @@ -41,20 +41,18 @@ def test_sql_battery(statement, rows, columns, exception): opteryx.register_store("tests", DiskConnector) - conn = opteryx.connect() - cursor = conn.cursor() try: - cursor.execute(statement) - actual_rows, actual_columns = cursor.shape + result = opteryx.query_to_arrow(statement) + actual_rows, actual_columns = result.shape assert ( rows == actual_rows ), f"Query returned {actual_rows} rows but {rows} were expected" - f" ({actual_columns} vs {columns})\n{statement}\n{cursor.head(10)}" + f" ({actual_columns} vs {columns})\n{statement}" assert ( columns == actual_columns ), f"Query returned {actual_columns} cols but {columns} were" - f" expected\n{statement}\n{cursor.head(10)}" + f" expected\n{statement}" except Exception as err: # pragma: no cover assert type(err) == exception, f"Query failed with error {type(err)}" f" but error {exception} was expected" diff --git a/tests/sql_battery/test_results_battery.py b/tests/sql_battery/test_results_battery.py index 4e54ab037..2bf00e2c8 100644 --- a/tests/sql_battery/test_results_battery.py +++ b/tests/sql_battery/test_results_battery.py @@ -43,13 +43,8 @@ def get_tests(test_type): @pytest.mark.parametrize("test", RESULTS_TESTS) def test_results_tests(test): """ """ - conn = opteryx.connect() - cursor = conn.cursor() - sql = test["statement"] - - cursor.execute(sql) - result = cursor.arrow().to_pydict() + result = opteryx.query_to_arrow(sql).to_pydict() printable_result = orjson.dumps(result, default=str, option=orjson.OPT_SORT_KEYS).decode() printable_expected = orjson.dumps(test["result"], option=orjson.OPT_SORT_KEYS).decode() diff --git a/tests/sql_battery/test_run_only_battery.py b/tests/sql_battery/test_run_only_battery.py index 7f740d904..6997a92ea 100644 --- a/tests/sql_battery/test_run_only_battery.py +++ b/tests/sql_battery/test_run_only_battery.py @@ -50,7 +50,7 @@ def test_run_only_tests(statement): These tests are only run, the result is not checked. This is useful for parsing checks """ - opteryx.query(statement).fetchall() + opteryx.query_to_arrow(statement) if __name__ == "__main__": # pragma: no cover diff --git a/tests/sql_battery/tests/feature_tests.run_tests b/tests/sql_battery/tests/feature_tests.run_tests index 2da8e2267..ad4ee0291 100644 --- a/tests/sql_battery/tests/feature_tests.run_tests +++ b/tests/sql_battery/tests/feature_tests.run_tests @@ -11,16 +11,28 @@ SELECT INTEGER('22'); SELECT DOUBLE('22.0'); SELECT DECIMAL('22.0'); SELECT BOOLEAN('true'); +SELECT DATE('2022-01-01'); + SELECT TIMESTAMP '2022-01-01'; SELECT INTEGER '22'; SELECT DOUBLE '22.0'; SELECT DECIMAL '22.0'; SELECT BOOLEAN 'true'; +SELECT DATE '2022-01-01'; + SELECT CAST('2022-01-01' AS TIMESTAMP); SELECT CAST('22' AS INTEGER); SELECT CAST('22.0' AS DOUBLE); SELECT CAST('22.0' AS DECIMAL); SELECT CAST('true' AS BOOLEAN); +SELECT CAST('2022-01-01' AS DATE); + +SELECT TRY_CAST('2022-01-01' AS TIMESTAMP); +SELECT TRY_CAST('22' AS INTEGER); +SELECT TRY_CAST('22.0' AS DOUBLE); +SELECT TRY_CAST('22.0' AS DECIMAL); +SELECT TRY_CAST('true' AS BOOLEAN); +SELECT TRY_CAST('2022-01-01' AS DATE); SELECT CEIL(1.5); SELECT FLOOR(2.5); diff --git a/tests/sql_battery/tests/results/complex_003.results_tests b/tests/sql_battery/tests/results/complex_003.results_tests index c3b14871c..841bb1a59 100644 --- a/tests/sql_battery/tests/results/complex_003.results_tests +++ b/tests/sql_battery/tests/results/complex_003.results_tests @@ -3,8 +3,8 @@ "statement": "SELECT pl.name AS planet_name, pl.orbital_period, pl.diameter, dense_moons_stats.total_dense_moons, dense_moons_stats.avg_density, bright_moons_stats.avg_magnitude, bright_moons_stats.total_bright_moons FROM $planets pl LEFT JOIN (SELECT planetId, COUNT(*) AS total_dense_moons, AVG(density) AS avg_density FROM $satellites WHERE density > 2 GROUP BY planetId) dense_moons_stats ON pl.id = dense_moons_stats.planetId LEFT JOIN (SELECT planetId, AVG(magnitude) AS avg_magnitude, COUNT(*) AS total_bright_moons FROM $satellites WHERE magnitude < 5 GROUP BY planetId) bright_moons_stats ON pl.id = bright_moons_stats.planetId WHERE pl.distance_from_sun BETWEEN 100 AND 200 AND pl.orbital_eccentricity < 0.1 ORDER BY dense_moons_stats.total_dense_moons DESC, bright_moons_stats.avg_magnitude ASC LIMIT 10;", "result": { "bright_moons_stats.avg_magnitude": [-12.74, null], - "bright_moons_stats.total_bright_moons": [1.0, null], - "dense_moons_stats.total_dense_moons": [1.0, null], + "bright_moons_stats.total_bright_moons": [1, null], + "dense_moons_stats.total_dense_moons": [1, null], "dense_moons_stats.avg_density": [3.344, null], "planet_name": ["Earth", "Venus"], "pl.diameter": [12756, 12104],