Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Nov 7, 2023
1 parent cb60d56 commit c22de63
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 29 deletions.
13 changes: 10 additions & 3 deletions opteryx/components/logical_planner_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
helps to ensure new AST-based functionality can be added by adding
a function and a reference to it in the dictionary.
"""
import decimal

import numpy
import pyarrow
Expand Down Expand Up @@ -251,9 +252,11 @@ def cast(branch, alias=None, key=None):
elif "Varchar" in data_type:
data_type = "VARCHAR"
elif "Decimal" in data_type:
data_type = "NUMERIC"
elif "Numeric" in data_type:
data_type = "NUMERIC"
data_type = "DECIMAL"
elif "Integer" in data_type:
data_type = "INTEGER"
elif "Double" in data_type:
data_type = "DOUBLE"
elif "Boolean" in data_type:
data_type = "BOOLEAN"
elif "STRUCT" in data_type:
Expand Down Expand Up @@ -519,6 +522,10 @@ def typed_string(branch, alias=None, key=None):
Datatype_Map = {
"TIMESTAMP": ("TIMESTAMP", lambda x: numpy.datetime64(x, "us")),
"DATE": ("DATE", lambda x: numpy.datetime64(x, "D")),
"INTEGER": ("INTEGER", numpy.int64),
"DOUBLE": ("DOUBLE", numpy.float64),
"DECIMAL": ("DECIMAL", decimal.Decimal),
"BOOLEAN": ("BOOLEAN", bool),
}

mapper = Datatype_Map.get(data_type)
Expand Down
9 changes: 7 additions & 2 deletions opteryx/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ def _get(value, item):

VECTORIZED_CASTERS = {
"BOOLEAN": "bool",
"NUMERIC": "float64",
"DOUBLE": "float64",
"INTEGER": "int64",
"DECIMAL": pyarrow.decimal128(14),
"VARCHAR": "string",
"TIMESTAMP": pyarrow.timestamp("us"),
}
Expand Down Expand Up @@ -200,7 +202,10 @@ def _coalesce(*arrays):
# TYPE CONVERSION
"TIMESTAMP": cast("TIMESTAMP"),
"BOOLEAN": cast("BOOLEAN"),
"NUMERIC": cast("NUMERIC"),
"NUMERIC": cast("DOUBLE"),
"INTEGER": cast("INTEGER"),
"DOUBLE": cast("DOUBLE"),
"DECIMAL": cast("DECIMAL"),
"VARCHAR": cast("VARCHAR"),
"STRING": cast("VARCHAR"), # alias for VARCHAR
"STR": cast("VARCHAR"),
Expand Down
3 changes: 3 additions & 0 deletions tests/sql_battery/test_shapes_and_errors_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,9 @@
("SELECT * FROM $planets WHERE diameter > 10000 AND gravity BETWEEN 0.5 AND 2.0;", 0, 20, None),
("SELECT * FROM $planets WHERE diameter > 100 AND gravity BETWEEN 0.5 AND 2.0;", 1, 20, None),

# 10-way join
("SELECT p1.name AS planet1_name, p2.name AS planet2_name, p3.name AS planet3_name, p4.name AS planet4_name, p5.name AS planet5_name, p6.name AS planet6_name, p7.name AS planet7_name, p8.name AS planet8_name, p9.name AS planet9_name, p10.name AS planet10_name, p1.diameter AS planet1_diameter, p2.gravity AS planet2_gravity, p3.orbitalPeriod AS planet3_orbitalPeriod, p4.numberOfMoons AS planet4_numberOfMoons, p5.meanTemperature AS planet5_meanTemperature FROM $planets p1 JOIN $planets p2 ON p1.id = p2.id JOIN $planets p3 ON p1.id = p3.id JOIN $planets p4 ON p1.id = p4.id JOIN $planets p5 ON p1.id = p5.id JOIN $planets p6 ON p1.id = p6.id JOIN $planets p7 ON p1.id = p7.id JOIN $planets p8 ON p1.id = p8.id JOIN $planets p9 ON p1.id = p9.id JOIN $planets p10 ON p1.id = p10.id WHERE p1.diameter > 10000 ORDER BY p1.name, p2.name, p3.name, p4.name, p5.name;", 6, 15, None),

# virtual dataset doesn't exist
("SELECT * FROM $RomanGods", None, None, DatasetNotFoundError),
# disk dataset doesn't exist
Expand Down
47 changes: 26 additions & 21 deletions tests/sql_battery/tests/feature_tests.run_tests
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@

SHOW FUNCTIONS;
SHOW FUNCTIONS LIKE '%date';
SHOW FUNCTIONS LIKE '%date%';
SHOW FUNCTIONS LIKE '%zz';
# SHOW FUNCTIONS;
# SHOW FUNCTIONS LIKE '%date';
# SHOW FUNCTIONS LIKE '%date%';
# SHOW FUNCTIONS LIKE '%zz';

SELECT planetId, LEAST(LIST(magnitude)) FROM $satellites group by planetId;

SELECT TIMESTAMP('2022-01-01');
SELECT NUMERIC('22');
SELECT INTEGER('22');
SELECT DOUBLE('22.0');
SELECT DECIMAL('22.0');
SELECT BOOLEAN('true');
SELECT TIMESTAMP '2022-01-01';
SELECT NUMERIC '22';
SELECT INTEGER '22';
SELECT DOUBLE '22.0';
SELECT DECIMAL '22.0';
SELECT BOOLEAN 'true';
SELECT CAST('2022-01-01' AS TIMESTAMP);
SELECT CAST('22' AS NUMERIC);
SELECT CAST('22' AS INTEGER);
SELECT CAST('22.0' AS DOUBLE);
SELECT CAST('22.0' AS DECIMAL);
SELECT CAST('true' AS BOOLEAN);

SELECT CEIL(1.5);
SELECT FLOOR(2.5);

SHOW DATABASES;
# SHOW DATABASES;

SELECT POSITION('e' IN 'barge');
SELECT POSITION('x' IN 'barge');
Expand Down Expand Up @@ -48,10 +54,9 @@ SELECT TRIM(name) FROM $planets;
SELECT TRIM(LEADING ' ' FROM name) FROM $planets;

SELECT HASH(name || str(id)) FROM $planets GROUP BY name, id;
SELECT * FROM (SELECT HASH(name || str(id)) AS PID, name, id FROM $planets) AS pset INNER JOIN (SELECT HASH(name || str(id)) as PID, name, id FROM $planets GROUP BY name, id) ON PID = PID ORDER BY pset.name, pset.id;

ANALYZE TABLE $planets;
ANALYZE TABLE $astronauts;
# ANALYZE TABLE $planets;
# ANALYZE TABLE $astronauts;

SELECT LEVENSHTEIN(name, 'zeus') FROM $planets;
SELECT FROM_UNIXTIME(0);
Expand All @@ -64,17 +69,17 @@ SELECT name FROM $planets ORDER BY mass DESC, gravity ASC;
SELECT * FROM $planets ORDER BY mass;
SELECT name FROM $planets ORDER BY mass LIMIT 2;

WITH nom AS (SELECT planetId FROM $satellites GROUP BY planetId) SELECT planetId FROM nom;
WITH nom AS (SELECT planetId as id FROM $satellites GROUP BY planetId) SELECT * FROM $planets INNER JOIN nom ON id = id;
WITH nom AS (SELECT id FROM $planets FOR '1688-01-01') SELECT * FROM $planets INNER JOIN nom ON id = id;
WITH nom AS (SELECT id FROM $planets FOR '1688-01-01') SELECT * FROM $planets FOR TODAY INNER JOIN nom ON id = id;
WITH nom AS (SELECT id FROM $planets FOR DATES BETWEEN '2022-01-01' AND TODAY) SELECT * FROM $planets INNER JOIN nom ON id = id;
# WITH nom AS (SELECT planetId FROM $satellites GROUP BY planetId) SELECT planetId FROM nom;
# WITH nom AS (SELECT planetId as id FROM $satellites GROUP BY planetId) SELECT * FROM $planets INNER JOIN nom ON id = id;
# WITH nom AS (SELECT id FROM $planets FOR '1688-01-01') SELECT * FROM $planets INNER JOIN nom ON id = id;
# WITH nom AS (SELECT id FROM $planets FOR '1688-01-01') SELECT * FROM $planets FOR TODAY INNER JOIN nom ON id = id;
# WITH nom AS (SELECT id FROM $planets FOR DATES BETWEEN '2022-01-01' AND TODAY) SELECT * FROM $planets INNER JOIN nom ON id = id;

ANALYZE TABLE $astronauts;
ANALYZE TABLE $planets;
ANALYZE TABLE 'testdata/flat/formats/parquet/tweets.parquet';
# ANALYZE TABLE $astronauts;
# ANALYZE TABLE $planets;
# ANALYZE TABLE 'testdata/flat/formats/parquet/tweets.parquet';

USE opteryx;
# USE opteryx;

SELECT SPLIT('a,bc,def');
SELECT SPLIT('a,bc,def', ',');
Expand All @@ -87,4 +92,4 @@ SELECT CAST('{"test":true, "live":false}' AS STRUCT);
SELECT TRY_CAST('{"test":true, "prod": 73}' AS STRUCT);
SELECT TRY_CAST(name AS STRUCT) FROM $planets;
SELECT STRUCT('{"test":true}');
SELECT ST['prod'] FROM (SELECT STRUCT('{"prod": 73}') AS ST FROM $planets);
SELECT ST['prod'] FROM (SELECT STRUCT('{"prod": 73}') AS ST FROM $planets) AS SB;
2 changes: 1 addition & 1 deletion tests/sql_battery/tests/planner.run_tests_disabled
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT * FROM $planets UNION SELECT * FROM $planets;
SELECT * FROM $planets LEFT ANTI JOIN $satellites ON id = id;
EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id);
# EXPLAIN ANALYZE FORMAT JSON SELECT * FROM $planets AS a INNER JOIN (SELECT id FROM $planets) AS b USING (id);
4 changes: 2 additions & 2 deletions tests/sql_battery/tests/regression.run_tests
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ SELECT * FROM $planets WHERE TRUE OR FALSE;
SELECT * FROM $planets WHERE FALSE OR TRUE;

# [#561] HASH JOIN with an empty table
SELECT * FROM $planets LEFT JOIN (SELECT planetId as id FROM $satellites WHERE id < 0) USING (id);
SELECT * FROM $planets LEFT JOIN (SELECT planetId as id FROM $satellites WHERE id < 0) AS SQ USING (id);

# Zero results queries
SELECT name, COUNT(*) FROM $astronauts WHERE name = 'Jim' GROUP BY name;
Expand Down Expand Up @@ -56,7 +56,7 @@ SELECT DATEDIFF('days', TIMESTAMP("2022-01-02"), CAST("2010-10-01" AS TIMESTAMP)

# [TEMPORAL FILTER EXTRACTION PROBLEMS]
SET @planet = 'Saturn'; SELECT name AS nom, bigsats.occurances, smallsats.occurances FROM (SELECT DISTINCT id as planetId, name FROM $planets WHERE name = @planet) as planets LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites FOR DATES BETWEEN '2022-01-01' AND TODAY WHERE gm > 10 GROUP BY planetId) AS bigsats ON bigsats.planetId = planets.planetId LEFT JOIN (SELECT planetId, COUNT(*) AS occurances FROM $satellites FOR DATES IN LAST_MONTH WHERE gm < 10 GROUP BY planetId) as smallsats ON smallsats.planetId = planets.planetId;
SELECT 'SELECT * FROM $planets FOR TODAY' FROM (SELECT 'FOR TODAY') INNER JOIN $planets FOR YESTERDAY;
SELECT 'SELECT * FROM $planets FOR TODAY' FROM (SELECT 'FOR TODAY') AS SQ CROSS JOIN $planets FOR YESTERDAY;

#
SELECT (true IS NOT null);
Expand Down

0 comments on commit c22de63

Please sign in to comment.