Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#2167 #2212

Merged
merged 4 commits into from
Jan 7, 2025
Merged

#2167 #2212

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ We appreciate your contribution to Opteryx. Your time and effort make a differen

---

### **Fixes: `<issue_number_goes_here>`**
### **Fixes: #`<issue_number_goes_here>`**
Please replace `<issue_number_goes_here>` with the corresponding issue number.

---
Expand Down
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 964
__build__ = 966

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
104 changes: 104 additions & 0 deletions tests/misc/test_flat_hash_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], "../.."))

import pyarrow

from opteryx.compiled.joins.inner_join import abs_hash_join_map

def test_hash_join_map_with_null_values():
# Create a pyarrow Table with null values in the join column
data = {
'a': [1, 2, None, 4],
'b': [None, 'x', 'y', 'z']
}
table = pyarrow.table(data)

# Run the hash join map function
hash_table = abs_hash_join_map(table, ['a'])

# Check that rows with null values are handled correctly
assert hash_table.get(hash(1)) == [0], hash_table.get(hash(1))
assert hash_table.get(hash(2)) == [1]
assert hash_table.get(hash(4)) == [3]
# Ensure no entry for the row with a null value
assert hash_table.get(hash(None)) == []

def test_hash_join_map_empty_input():
# Create an empty pyarrow table
table = pyarrow.table({'a': [], 'b': []})

# Run the hash join map function
hash_table = abs_hash_join_map(table, ['a', 'b'])

# Ensure the hash table is empty
assert hash_table.size() == 0, hash_table.size()

def test_hash_join_map_multicolumn():
# Create a pyarrow Table with multi-column data
data = {
'a': [1, 2, 3, 4],
'b': ['x', 'y', 'z', 'w']
}
table = pyarrow.table(data)

# Run the hash join map function
hash_table = abs_hash_join_map(table, ['a', 'b'])

# Check for correct hash mappings
assert hash_table.get(hash(1) * 31 + hash('x')) == [0]
assert hash_table.get(hash(2) * 31 + hash('y')) == [1]
assert hash_table.get(hash(3) * 31 + hash('z')) == [2]
assert hash_table.get(hash(4) * 31 + hash('w')) == [3]

def test_hash_join_map_large_dataset():
# Create a large dataset to test performance and availability
data = {
'a': list(range(100000)),
'b': ['x'] * 100000
}
table = pyarrow.table(data)

# Run the hash join map function
hash_table = abs_hash_join_map(table, ['a', 'b'])

# Verify it doesn’t crash and handles the large data set
assert hash_table.get(hash(99999) * 31 + hash('x')) == [99999]

def test_hash_join_map_duplicate_keys():
# Create a pyarrow Table with duplicate keys
data = {
'a': [1, 2, 2, 4],
'b': ['x', 'y', 'y', 'z']
}
table = pyarrow.table(data)

# Run the hash join map function
hash_table = abs_hash_join_map(table, ['a', 'b'])

# Check for correct hash mappings with duplicates
assert hash_table.get(hash(1) * 31 + hash('x')) == [0]
assert hash_table.get(hash(2) * 31 + hash('y')) == [1, 2]
assert hash_table.get(hash(4) * 31 + hash('z')) == [3]


def test_hash_join_map_large_null_values():
# Create a large dataset with null values
data = {
'a': [None] * 50000 + list(range(50000)),
'b': ['x'] * 100000
}
table = pyarrow.table(data)

# Run the hash join map function
hash_table = abs_hash_join_map(table, ['a', 'b'])

# Verify it handles the large data set with null values
assert hash_table.get(hash(49999) * 31 + hash('x')) == [99999]
assert hash_table.get(hash(None)) == []

if __name__ == "__main__": # pragma: no cover
from tests.tools import run_tests

run_tests()
17 changes: 10 additions & 7 deletions tests/plan_optimization/test_temporal_extraction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
""" """

import os
import sys
Expand All @@ -8,15 +7,18 @@
import datetime

import pytest
from freezegun import freeze_time

from opteryx.planner.sql_rewriter import extract_temporal_filters
from opteryx.utils.sql import clean_statement, remove_comments

APOLLO_17_LAUNCH_DATE = datetime.datetime(1972, 12, 7, 5, 33, 0) # UTC

# fmt:off
THIS_MORNING = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
TONIGHT = datetime.datetime.utcnow().replace(hour=23, minute=59, second=0, microsecond=0)
NOWISH = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0)
YESTERDAY = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1)
THIS_MORNING = APOLLO_17_LAUNCH_DATE.replace(hour=0, minute=0, second=0, microsecond=0)
TONIGHT = APOLLO_17_LAUNCH_DATE.replace(hour=23, minute=59, second=0, microsecond=0)
NOWISH = APOLLO_17_LAUNCH_DATE.replace(minute=0, second=0, microsecond=0)
YESTERDAY = APOLLO_17_LAUNCH_DATE.replace(hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1)
# fmt:on

# fmt:off
Expand Down Expand Up @@ -130,8 +132,9 @@ def test_temporal_extraction(statement, filters):

"""

clean = clean_statement(remove_comments(statement))
_, extracted_filters = extract_temporal_filters(clean)
with freeze_time(APOLLO_17_LAUNCH_DATE):
clean = clean_statement(remove_comments(statement))
_, extracted_filters = extract_temporal_filters(clean)

assert filters == extracted_filters, f"{filters} != {extracted_filters}"

Expand Down
1 change: 1 addition & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ setuptools_rust

aiohttp
psutil
freezegun
3 changes: 2 additions & 1 deletion tests/requirements_arm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ psycopg2-binary
duckdb==1.1.3 # 1040
duckdb-engine==0.14.0 # 1040

setuptools_rust
setuptools_rust
freezegun
Loading