Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V0.4.3 #70

Merged
merged 3 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
496 changes: 248 additions & 248 deletions examples/mAP_demo.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies = [
]

[project.optional-dependencies]
dev = ["ruff"]
plot = ["plotly"]
test = ["scikit-learn", "pytest"]
demo = ["notebook", "matplotlib"]
Expand All @@ -31,4 +32,4 @@ requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
where = ["src"]
where = ["src"]
25 changes: 25 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Unit tests

We use `pytest` package to implement and run unit tests for copairs.

## Getting started

### Installation

To install copairs with test dependencies, check out code locally and install as:
```bash
pip install -e .[test]
```

### Running tests
To execute all tests, run:
```bash
pytest
```

Each individual `test_filename.py` file implements tests for particular features in the corresponding `copairs/filename.py`.

To run tests for a particular source file, specify its test file:
```bash
pytest tests/test_map.py
```
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Unit tests for the copairs package."""
10 changes: 6 additions & 4 deletions tests/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Helper functions for testing."""

from itertools import product
from typing import Dict

Expand All @@ -10,7 +12,7 @@


def simulate_plates(n_compounds, n_replicates, plate_size):
"""Round robin creation of platemaps"""
"""Round robin creation of platemaps."""
total = n_compounds * n_replicates

compounds = []
Expand All @@ -35,6 +37,7 @@ def simulate_random_plates(
sameby=ColumnList,
diffby=ColumnList,
):
"""Simulate random platemaps."""
rng = np.random.default_rng(SEED)
dframe = simulate_plates(n_compounds, n_replicates, plate_size)
# Shuffle values
Expand All @@ -52,6 +55,7 @@ def simulate_random_dframe(
diffby: ColumnList,
rng: np.random.Generator,
):
"""Simulate random dataframe."""
dframe = pd.DataFrame(columns=list(vocab_size.keys()), index=range(length))
for col, size in vocab_size.items():
dframe[col] = rng.integers(1, size + 1, size=length)
Expand All @@ -64,9 +68,7 @@ def simulate_random_dframe(


def create_dframe(n_options, n_rows):
"""
Random permutation of a fix number of elements per column
"""
"""Create a dataframe with predefined number of plates, wells, and compounds."""
if isinstance(n_options, int):
n_options = [n_options] * 3
colc = list(f"c{i}" for i in range(n_options[0]))
Expand Down
4 changes: 4 additions & 0 deletions tests/test_build_rank_multilabel.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
"""Test the concatenation of ranges."""

import numpy as np

from copairs.compute import concat_ranges


def naive_concat_ranges(start: np.ndarray, end: np.ndarray):
"""Concatenate ranges into a mask."""
mask = []
for s, e in zip(start, end):
mask.extend(range(s, e))
return np.asarray(mask, dtype=np.int32)


def test_concat_ranges():
"""Test the concatenation of ranges."""
rng = np.random.default_rng()
num_range = 5, 10
start_range = 2, 10
Expand Down
10 changes: 10 additions & 0 deletions tests/test_compute.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Test pairwise distance calculation functions."""

import numpy as np

from copairs import compute
Expand All @@ -7,13 +9,15 @@


def corrcoef_naive(feats, pairs):
"""Compute correlation coefficient between pairs of features."""
corr = np.empty((len(pairs),))
for pos, (i, j) in enumerate(pairs):
corr[pos] = np.corrcoef(feats[i], feats[j])[0, 1]
return corr


def cosine_naive(feats, pairs):
"""Compute cosine similarity between pairs of features."""
cosine = np.empty((len(pairs),))
for pos, (i, j) in enumerate(pairs):
a, b = feats[i], feats[j]
Expand All @@ -24,6 +28,7 @@ def cosine_naive(feats, pairs):


def euclidean_naive(feats, pairs):
"""Compute euclidean similarity between pairs of features."""
euclidean_sim = np.empty((len(pairs),))
for pos, (i, j) in enumerate(pairs):
dist = np.linalg.norm(feats[i] - feats[j])
Expand All @@ -32,10 +37,12 @@ def euclidean_naive(feats, pairs):


def abs_cosine_naive(feats, pairs):
"""Compute absolute cosine similarity between pairs of features."""
return np.abs(cosine_naive(feats, pairs))


def test_corrcoef():
"""Test correlation coefficient computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand All @@ -50,6 +57,7 @@ def test_corrcoef():


def test_cosine():
"""Test cosine similarity computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand All @@ -64,6 +72,7 @@ def test_cosine():


def test_euclidean():
"""Test euclidean similarity computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand All @@ -78,6 +87,7 @@ def test_euclidean():


def test_abs_cosine():
"""Test absolute cosine similarity computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand Down
10 changes: 9 additions & 1 deletion tests/test_map.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Tests for (mean) Average Precision calculation."""

import numpy as np
import pandas as pd
import pytest
Expand All @@ -13,6 +15,7 @@


def test_random_binary_matrix():
"""Test the random binary matrix generation."""
rng = np.random.default_rng(SEED)
# Test with n=3, m=4, k=2
A = compute.random_binary_matrix(3, 4, 2, rng)
Expand All @@ -28,6 +31,7 @@ def test_random_binary_matrix():


def test_compute_ap():
"""Test the average precision computation."""
num_pos, num_neg, num_perm = 5, 6, 100
total = num_pos + num_neg

Expand Down Expand Up @@ -56,6 +60,7 @@ def test_compute_ap():


def test_compute_ap_contiguous():
"""Test the contiguous average precision computation."""
num_pos_range = [2, 9]
num_neg_range = [10, 20]
num_samples_range = [5, 30]
Expand Down Expand Up @@ -88,6 +93,7 @@ def test_compute_ap_contiguous():


def test_pipeline():
"""Check the implementation with for mAP calculation."""
length = 10
vocab_size = {"p": 5, "w": 3, "l": 4}
n_feats = 5
Expand All @@ -103,7 +109,7 @@ def test_pipeline():


def test_pipeline_multilabel():
"""Check the multilabel implementation with for mAP calculation"""
"""Check the multilabel implementation with for mAP calculation."""
length = 10
vocab_size = {"p": 3, "w": 5, "l": 4}
n_feats = 8
Expand All @@ -124,6 +130,7 @@ def test_pipeline_multilabel():


def test_raise_no_pairs():
"""Test the exception raised when no pairs are found."""
length = 10
vocab_size = {"p": 3, "w": 3, "l": 10}
n_feats = 5
Expand All @@ -143,6 +150,7 @@ def test_raise_no_pairs():


def test_raise_nan_error():
"""Test the exception raised when there are null values."""
length = 10
vocab_size = {"p": 5, "w": 3, "l": 4}
n_feats = 8
Expand Down
7 changes: 7 additions & 0 deletions tests/test_map_filter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Tests data filtering by query."""

import numpy as np
import pytest

Expand All @@ -9,6 +11,7 @@

@pytest.fixture
def mock_dataframe():
"""Create a mock dataframe."""
length = 10
vocab_size = {"p": 3, "w": 3, "l": 10}
pos_sameby = ["l"]
Expand All @@ -20,26 +23,30 @@ def mock_dataframe():


def test_correct(mock_dataframe):
"""Test correct query."""
df, parsed_cols = evaluate_and_filter(mock_dataframe, ["p == 'p1'", "w > 'w2'"])
assert not df.empty
assert "p" in parsed_cols and "w" in parsed_cols
assert all(df["w"].str.extract(r"(\d+)")[0].astype(int) > 2)


def test_invalid_query(mock_dataframe):
"""Test invalid query."""
with pytest.raises(ValueError) as excinfo:
evaluate_and_filter(mock_dataframe, ['l == "lHello"'])
assert "Invalid combined query expression" in str(excinfo.value)
assert "No data matched the query" in str(excinfo.value)


def test_empty_result(mock_dataframe):
"""Test empty result."""
with pytest.raises(ValueError) as excinfo:
evaluate_and_filter(mock_dataframe, ['p == "p1"', 'p == "p2"'])
assert "Duplicate queries for column" in str(excinfo.value)


def test_empty_result_from_valid_query(mock_dataframe):
"""Test empty result from valid query."""
with pytest.raises(ValueError) as excinfo:
evaluate_and_filter(mock_dataframe, ['p == "p4"'])
assert "No data matched the query" in str(excinfo.value)
28 changes: 14 additions & 14 deletions tests/test_matching.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Test functions for Matcher"""
"""Test functions for Matcher."""

from string import ascii_letters

Expand All @@ -13,7 +13,7 @@


def run_stress_sample_null(dframe, num_pairs):
"""Assert every generated null pair does not match any column"""
"""Assert every generated null pair does not match any column."""
matcher = Matcher(dframe, dframe.columns, seed=SEED)
for _ in range(num_pairs):
id1, id2 = matcher.sample_null_pair(dframe.columns)
Expand All @@ -23,19 +23,19 @@ def run_stress_sample_null(dframe, num_pairs):


def test_null_sample_large():
"""Test Matcher guarantees elements with different values"""
"""Test Matcher guarantees elements with different values."""
dframe = create_dframe(32, 10000)
run_stress_sample_null(dframe, 5000)


def test_null_sample_small():
"""Test Sample with small set"""
"""Test Sample with small set."""
dframe = create_dframe(3, 10)
run_stress_sample_null(dframe, 100)


def test_null_sample_nan_vals():
"""Test NaN values are ignored"""
"""Test NaN values are ignored."""
dframe = create_dframe(4, 15)
rng = np.random.default_rng(SEED)
nan_mask = rng.random(dframe.shape) < 0.5
Expand All @@ -44,7 +44,7 @@ def test_null_sample_nan_vals():


def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):
"""Compute valid pairs using cross product from pandas"""
"""Compute valid pairs using cross product from pandas."""
cross = dframe.reset_index().merge(
dframe.reset_index(), how="cross", suffixes=("_x", "_y")
)
Expand All @@ -62,7 +62,7 @@ def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):


def check_naive(dframe, matcher: Matcher, sameby, diffby):
"""Check Matcher and naive generate same pairs"""
"""Check Matcher and naive generate same pairs."""
gt_pairs = get_naive_pairs(dframe, sameby, diffby)
vals = matcher.get_all_pairs(sameby, diffby)
vals = sum(vals.values(), [])
Expand All @@ -74,14 +74,14 @@ def check_naive(dframe, matcher: Matcher, sameby, diffby):


def check_simulated_data(length, vocab_size, sameby, diffby, rng):
"""Test sample of valid pairs from a simulated dataset"""
"""Test sample of valid pairs from a simulated dataset."""
dframe = simulate_random_dframe(length, vocab_size, sameby, diffby, rng)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
check_naive(dframe, matcher, sameby, diffby)


def test_stress_simulated_data():
"""Run multiple tests using simulated data"""
"""Run multiple tests using simulated data."""
rng = np.random.default_rng(SEED)
num_cols_range = [2, 6]
vocab_size_range = [5, 10]
Expand All @@ -99,15 +99,15 @@ def test_stress_simulated_data():


def test_empty_sameby():
"""Test query without sameby"""
"""Test query without sameby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
check_naive(dframe, matcher, sameby=[], diffby=["w", "c"])
check_naive(dframe, matcher, sameby=[], diffby=["w"])


def test_empty_diffby():
"""Test query without diffby"""
"""Test query without diffby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
matcher.get_all_pairs(["c"], [])
Expand All @@ -116,23 +116,23 @@ def test_empty_diffby():


def test_raise_distjoint():
"""Test check for disjoint sameby and diffby"""
"""Test check for disjoint sameby and diffby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
with pytest.raises(ValueError, match="must be disjoint lists"):
matcher.get_all_pairs("c", ["w", "c"])


def test_raise_no_params():
"""Test check for at least one of sameby and diffby"""
"""Test check for at least one of sameby and diffby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
with pytest.raises(ValueError, match="at least one should be provided"):
matcher.get_all_pairs([], [])


def assert_sameby_diffby(dframe: pd.DataFrame, pairs_dict: dict, sameby, diffby):
"""Assert the pairs are valid"""
"""Assert the pairs are valid."""
for _, pairs in pairs_dict.items():
for id1, id2 in pairs:
for col in sameby:
Expand Down
Loading
Loading