Skip to content

Commit

Permalink
Bioframe tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mwiewior committed Dec 12, 2024
1 parent 85fd84e commit 99aba85
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 0 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
# polars_bio


## Features

| Features | Bioframe | polars-bio | PyRanges | Pybedtools |
|------------|----------|------------|----------|------------|
| overlap |:white_check_mark:|:white_check_mark:| | |
| closest |:white_check_mark:| | | |
| cluster |:white_check_mark:| | | |
| complement |:white_check_mark:| | | |

## Input/Output
| I/O | Bioframe | polars-bio | PyRanges | Pybedtools |
|------------------|-------------|-------------------|----------|------------|
| Pandas DataFrame |:white_check_mark:|:white_check_mark: | | |
| Polars DataFrame | |:white_check_mark: | | |
| Polars LazyFrame | |:white_check_mark: | | |
| Native readers | |:white_check_mark: | | |
4 changes: 4 additions & 0 deletions tests/_expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
PD_DF1 = pd.read_csv(DF_PATH1)
PD_DF2 = pd.read_csv(DF_PATH2)

BIO_PD_DF1 = pd.read_parquet(f"{DATA_DIR}/exons/")
BIO_PD_DF2 = pd.read_parquet(f"{DATA_DIR}/fBrain-DS14718/")


# Polars
PL_DF_OVERLAP = pl.DataFrame(PD_DF_OVERLAP)
PL_DF1 = pl.DataFrame(PD_DF1)
Expand Down
22 changes: 22 additions & 0 deletions tests/test_bioframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import bioframe as bf
import pandas as pd
import polars_bio as pb

from _expected import BIO_PD_DF1, BIO_PD_DF2
from polars_bio import OverlapFilter

class TestOverlapBioframe:
result = pb.overlap(BIO_PD_DF1, BIO_PD_DF2, output_type="pandas.DataFrame", overlap_filter=OverlapFilter.Strict)
result_bio = bf.overlap(BIO_PD_DF1, BIO_PD_DF2,
cols1=('contig','pos_start','pos_end'),
cols2=('contig','pos_start','pos_end'),
suffixes=('_1', '_2'),
how="inner")
def test_overlap_count(self):
assert len(self.result) == 54246
assert len(self.result) == len(self.result_bio)

def test_overlap_schema_rows(self):
expected = self.result_bio.sort_values(by=list(self.result.columns)).reset_index(drop=True)
result = self.result.sort_values(by=list(self.result.columns)).reset_index(drop=True)
pd.testing.assert_frame_equal(result, expected)

0 comments on commit 99aba85

Please sign in to comment.