diff --git a/tests/_expected.py b/tests/_expected.py index a27b2f5..817b4a6 100644 --- a/tests/_expected.py +++ b/tests/_expected.py @@ -2,6 +2,7 @@ from pathlib import Path import pandas as pd +import polars as pl TEST_DIR = Path(__file__).parent DATA_DIR = TEST_DIR / "data" @@ -28,15 +29,18 @@ "+--------+-----------+---------+--------+-----------+---------+", """ -DF_OVERLAP = (mdpd.from_md(EXPECTED_OVERLAP) - .astype({'pos_start_1': 'int64'}) - .astype({'pos_end_1': 'int64'}) - .astype({'pos_start_2': 'int64'}) - .astype({'pos_end_2': 'int64'})) +# Pandas +PD_DF_OVERLAP = (mdpd.from_md(EXPECTED_OVERLAP) + .astype({'pos_start_1': 'int64'}) + .astype({'pos_end_1': 'int64'}) + .astype({'pos_start_2': 'int64'}) + .astype({'pos_end_2': 'int64'})) -DF_OVERLAP = DF_OVERLAP.sort_values(by=list(DF_OVERLAP.columns)).reset_index(drop=True) +PD_DF_OVERLAP = PD_DF_OVERLAP.sort_values(by=list(PD_DF_OVERLAP.columns)).reset_index(drop=True) +PD_DF1 = pd.read_csv(f"{DATA_DIR}/reads.csv") +PD_DF2 = pd.read_csv(f"{DATA_DIR}/targets.csv") - - -DF1 = pd.read_csv(f"{DATA_DIR}/reads.csv") -DF2 = pd.read_csv(f"{DATA_DIR}/targets.csv") \ No newline at end of file +# Polars +PL_DF_OVERLAP = pl.DataFrame(PD_DF_OVERLAP) +PL_DF1 = pl.DataFrame(PD_DF1) +PL_DF2 = pl.DataFrame(PD_DF2) \ No newline at end of file diff --git a/tests/test_pandas_overlap.py b/tests/test_pandas_overlap.py index 36dd0cd..5b792e0 100644 --- a/tests/test_pandas_overlap.py +++ b/tests/test_pandas_overlap.py @@ -1,21 +1,17 @@ import pandas as pd import polars_bio as pb -from _expected import DF_OVERLAP, DF1, DF2 - - - - +from _expected import PD_DF_OVERLAP, PD_DF1, PD_DF2 class TestOverlapPandas: + result = pb.overlap(PD_DF1, PD_DF2, output_type="pandas.DataFrame") def test_overlap_count(self): - assert len(pb.overlap(DF1, DF2, output_type="pandas.DataFrame")) == 16 + assert len(self.result) == 16 def test_overlap_schema_rows(self): - result = pb.overlap(DF1, DF2, output_type="pandas.DataFrame") - result = result.sort_values(by=list(result.columns)).reset_index(drop=True) - expected = DF_OVERLAP + result = self.result.sort_values(by=list(self.result.columns)).reset_index(drop=True) + expected = PD_DF_OVERLAP pd.testing.assert_frame_equal(result, expected) diff --git a/tests/test_polars_overlap.py b/tests/test_polars_overlap.py index 8eb92c3..6e3701f 100644 --- a/tests/test_polars_overlap.py +++ b/tests/test_polars_overlap.py @@ -1,4 +1,22 @@ import pandas as pd -import polars_bio.overlap as overlap +import polars_bio as pb +from _expected import PL_DF_OVERLAP, PL_DF1, PL_DF2 +class TestOverlapPolars: + + result_frame = pb.overlap(PL_DF1, PL_DF2, output_type="polars.DataFrame") + result_lazy = pb.overlap(PL_DF1, PL_DF2, output_type="polars.LazyFrame").collect() + expected = PL_DF_OVERLAP + + def test_overlap_count(self): + assert len(self.result_frame) == 16 + assert len(self.result_lazy) == 16 + + def test_overlap_schema_rows(self): + result = self.result_frame.sort(by=self.result_frame.columns) + assert self.expected.equals(result) + + def test_overlap_schema_rows_lazy(self): + result = self.result_lazy.sort(by=self.result_lazy.columns) + assert self.expected.equals(result)