Skip to content

Commit

Permalink
Merge pull request #70 from alxndrkalinin/v0.4.3
Browse files Browse the repository at this point in the history
V0.4.3
  • Loading branch information
johnarevalo authored Dec 16, 2024
2 parents 44378ed + 67ad16c commit da8a933
Show file tree
Hide file tree
Showing 13 changed files with 349 additions and 281 deletions.
496 changes: 248 additions & 248 deletions examples/mAP_demo.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies = [
]

[project.optional-dependencies]
dev = ["ruff"]
plot = ["plotly"]
test = ["scikit-learn", "pytest"]
demo = ["notebook", "matplotlib"]
Expand All @@ -31,4 +32,4 @@ requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
where = ["src"]
where = ["src"]
25 changes: 25 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Unit tests

We use `pytest` package to implement and run unit tests for copairs.

## Getting started

### Installation

To install copairs with test dependencies, check out code locally and install as:
```bash
pip install -e .[test]
```

### Running tests
To execute all tests, run:
```bash
pytest
```

Each individual `test_filename.py` file implements tests for particular features in the corresponding `copairs/filename.py`.

To run tests for a particular source file, specify its test file:
```bash
pytest tests/test_map.py
```
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Unit tests for the copairs package."""
10 changes: 6 additions & 4 deletions tests/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Helper functions for testing."""

from itertools import product
from typing import Dict

Expand All @@ -10,7 +12,7 @@


def simulate_plates(n_compounds, n_replicates, plate_size):
"""Round robin creation of platemaps"""
"""Round robin creation of platemaps."""
total = n_compounds * n_replicates

compounds = []
Expand All @@ -35,6 +37,7 @@ def simulate_random_plates(
sameby=ColumnList,
diffby=ColumnList,
):
"""Simulate random platemaps."""
rng = np.random.default_rng(SEED)
dframe = simulate_plates(n_compounds, n_replicates, plate_size)
# Shuffle values
Expand All @@ -52,6 +55,7 @@ def simulate_random_dframe(
diffby: ColumnList,
rng: np.random.Generator,
):
"""Simulate random dataframe."""
dframe = pd.DataFrame(columns=list(vocab_size.keys()), index=range(length))
for col, size in vocab_size.items():
dframe[col] = rng.integers(1, size + 1, size=length)
Expand All @@ -64,9 +68,7 @@ def simulate_random_dframe(


def create_dframe(n_options, n_rows):
"""
Random permutation of a fix number of elements per column
"""
"""Create a dataframe with predefined number of plates, wells, and compounds."""
if isinstance(n_options, int):
n_options = [n_options] * 3
colc = list(f"c{i}" for i in range(n_options[0]))
Expand Down
4 changes: 4 additions & 0 deletions tests/test_build_rank_multilabel.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
"""Test the concatenation of ranges."""

import numpy as np

from copairs.compute import concat_ranges


def naive_concat_ranges(start: np.ndarray, end: np.ndarray):
"""Concatenate ranges into a mask."""
mask = []
for s, e in zip(start, end):
mask.extend(range(s, e))
return np.asarray(mask, dtype=np.int32)


def test_concat_ranges():
"""Test the concatenation of ranges."""
rng = np.random.default_rng()
num_range = 5, 10
start_range = 2, 10
Expand Down
10 changes: 10 additions & 0 deletions tests/test_compute.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Test pairwise distance calculation functions."""

import numpy as np

from copairs import compute
Expand All @@ -7,13 +9,15 @@


def corrcoef_naive(feats, pairs):
"""Compute correlation coefficient between pairs of features."""
corr = np.empty((len(pairs),))
for pos, (i, j) in enumerate(pairs):
corr[pos] = np.corrcoef(feats[i], feats[j])[0, 1]
return corr


def cosine_naive(feats, pairs):
"""Compute cosine similarity between pairs of features."""
cosine = np.empty((len(pairs),))
for pos, (i, j) in enumerate(pairs):
a, b = feats[i], feats[j]
Expand All @@ -24,6 +28,7 @@ def cosine_naive(feats, pairs):


def euclidean_naive(feats, pairs):
"""Compute euclidean similarity between pairs of features."""
euclidean_sim = np.empty((len(pairs),))
for pos, (i, j) in enumerate(pairs):
dist = np.linalg.norm(feats[i] - feats[j])
Expand All @@ -32,10 +37,12 @@ def euclidean_naive(feats, pairs):


def abs_cosine_naive(feats, pairs):
"""Compute absolute cosine similarity between pairs of features."""
return np.abs(cosine_naive(feats, pairs))


def test_corrcoef():
"""Test correlation coefficient computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand All @@ -50,6 +57,7 @@ def test_corrcoef():


def test_cosine():
"""Test cosine similarity computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand All @@ -64,6 +72,7 @@ def test_cosine():


def test_euclidean():
"""Test euclidean similarity computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand All @@ -78,6 +87,7 @@ def test_euclidean():


def test_abs_cosine():
"""Test absolute cosine similarity computation."""
n_samples = 10
n_pairs = 20
n_feats = 5
Expand Down
10 changes: 9 additions & 1 deletion tests/test_map.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Tests for (mean) Average Precision calculation."""

import numpy as np
import pandas as pd
import pytest
Expand All @@ -13,6 +15,7 @@


def test_random_binary_matrix():
"""Test the random binary matrix generation."""
rng = np.random.default_rng(SEED)
# Test with n=3, m=4, k=2
A = compute.random_binary_matrix(3, 4, 2, rng)
Expand All @@ -28,6 +31,7 @@ def test_random_binary_matrix():


def test_compute_ap():
"""Test the average precision computation."""
num_pos, num_neg, num_perm = 5, 6, 100
total = num_pos + num_neg

Expand Down Expand Up @@ -56,6 +60,7 @@ def test_compute_ap():


def test_compute_ap_contiguous():
"""Test the contiguous average precision computation."""
num_pos_range = [2, 9]
num_neg_range = [10, 20]
num_samples_range = [5, 30]
Expand Down Expand Up @@ -88,6 +93,7 @@ def test_compute_ap_contiguous():


def test_pipeline():
"""Check the implementation with for mAP calculation."""
length = 10
vocab_size = {"p": 5, "w": 3, "l": 4}
n_feats = 5
Expand All @@ -103,7 +109,7 @@ def test_pipeline():


def test_pipeline_multilabel():
"""Check the multilabel implementation with for mAP calculation"""
"""Check the multilabel implementation with for mAP calculation."""
length = 10
vocab_size = {"p": 3, "w": 5, "l": 4}
n_feats = 8
Expand All @@ -124,6 +130,7 @@ def test_pipeline_multilabel():


def test_raise_no_pairs():
"""Test the exception raised when no pairs are found."""
length = 10
vocab_size = {"p": 3, "w": 3, "l": 10}
n_feats = 5
Expand All @@ -143,6 +150,7 @@ def test_raise_no_pairs():


def test_raise_nan_error():
"""Test the exception raised when there are null values."""
length = 10
vocab_size = {"p": 5, "w": 3, "l": 4}
n_feats = 8
Expand Down
7 changes: 7 additions & 0 deletions tests/test_map_filter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Tests data filtering by query."""

import numpy as np
import pytest

Expand All @@ -9,6 +11,7 @@

@pytest.fixture
def mock_dataframe():
"""Create a mock dataframe."""
length = 10
vocab_size = {"p": 3, "w": 3, "l": 10}
pos_sameby = ["l"]
Expand All @@ -20,26 +23,30 @@ def mock_dataframe():


def test_correct(mock_dataframe):
"""Test correct query."""
df, parsed_cols = evaluate_and_filter(mock_dataframe, ["p == 'p1'", "w > 'w2'"])
assert not df.empty
assert "p" in parsed_cols and "w" in parsed_cols
assert all(df["w"].str.extract(r"(\d+)")[0].astype(int) > 2)


def test_invalid_query(mock_dataframe):
"""Test invalid query."""
with pytest.raises(ValueError) as excinfo:
evaluate_and_filter(mock_dataframe, ['l == "lHello"'])
assert "Invalid combined query expression" in str(excinfo.value)
assert "No data matched the query" in str(excinfo.value)


def test_empty_result(mock_dataframe):
"""Test empty result."""
with pytest.raises(ValueError) as excinfo:
evaluate_and_filter(mock_dataframe, ['p == "p1"', 'p == "p2"'])
assert "Duplicate queries for column" in str(excinfo.value)


def test_empty_result_from_valid_query(mock_dataframe):
"""Test empty result from valid query."""
with pytest.raises(ValueError) as excinfo:
evaluate_and_filter(mock_dataframe, ['p == "p4"'])
assert "No data matched the query" in str(excinfo.value)
28 changes: 14 additions & 14 deletions tests/test_matching.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Test functions for Matcher"""
"""Test functions for Matcher."""

from string import ascii_letters

Expand All @@ -13,7 +13,7 @@


def run_stress_sample_null(dframe, num_pairs):
"""Assert every generated null pair does not match any column"""
"""Assert every generated null pair does not match any column."""
matcher = Matcher(dframe, dframe.columns, seed=SEED)
for _ in range(num_pairs):
id1, id2 = matcher.sample_null_pair(dframe.columns)
Expand All @@ -23,19 +23,19 @@ def run_stress_sample_null(dframe, num_pairs):


def test_null_sample_large():
"""Test Matcher guarantees elements with different values"""
"""Test Matcher guarantees elements with different values."""
dframe = create_dframe(32, 10000)
run_stress_sample_null(dframe, 5000)


def test_null_sample_small():
"""Test Sample with small set"""
"""Test Sample with small set."""
dframe = create_dframe(3, 10)
run_stress_sample_null(dframe, 100)


def test_null_sample_nan_vals():
"""Test NaN values are ignored"""
"""Test NaN values are ignored."""
dframe = create_dframe(4, 15)
rng = np.random.default_rng(SEED)
nan_mask = rng.random(dframe.shape) < 0.5
Expand All @@ -44,7 +44,7 @@ def test_null_sample_nan_vals():


def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):
"""Compute valid pairs using cross product from pandas"""
"""Compute valid pairs using cross product from pandas."""
cross = dframe.reset_index().merge(
dframe.reset_index(), how="cross", suffixes=("_x", "_y")
)
Expand All @@ -62,7 +62,7 @@ def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):


def check_naive(dframe, matcher: Matcher, sameby, diffby):
"""Check Matcher and naive generate same pairs"""
"""Check Matcher and naive generate same pairs."""
gt_pairs = get_naive_pairs(dframe, sameby, diffby)
vals = matcher.get_all_pairs(sameby, diffby)
vals = sum(vals.values(), [])
Expand All @@ -74,14 +74,14 @@ def check_naive(dframe, matcher: Matcher, sameby, diffby):


def check_simulated_data(length, vocab_size, sameby, diffby, rng):
"""Test sample of valid pairs from a simulated dataset"""
"""Test sample of valid pairs from a simulated dataset."""
dframe = simulate_random_dframe(length, vocab_size, sameby, diffby, rng)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
check_naive(dframe, matcher, sameby, diffby)


def test_stress_simulated_data():
"""Run multiple tests using simulated data"""
"""Run multiple tests using simulated data."""
rng = np.random.default_rng(SEED)
num_cols_range = [2, 6]
vocab_size_range = [5, 10]
Expand All @@ -99,15 +99,15 @@ def test_stress_simulated_data():


def test_empty_sameby():
"""Test query without sameby"""
"""Test query without sameby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
check_naive(dframe, matcher, sameby=[], diffby=["w", "c"])
check_naive(dframe, matcher, sameby=[], diffby=["w"])


def test_empty_diffby():
"""Test query without diffby"""
"""Test query without diffby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
matcher.get_all_pairs(["c"], [])
Expand All @@ -116,23 +116,23 @@ def test_empty_diffby():


def test_raise_distjoint():
"""Test check for disjoint sameby and diffby"""
"""Test check for disjoint sameby and diffby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
with pytest.raises(ValueError, match="must be disjoint lists"):
matcher.get_all_pairs("c", ["w", "c"])


def test_raise_no_params():
"""Test check for at least one of sameby and diffby"""
"""Test check for at least one of sameby and diffby."""
dframe = create_dframe(3, 10)
matcher = Matcher(dframe, dframe.columns, seed=SEED)
with pytest.raises(ValueError, match="at least one should be provided"):
matcher.get_all_pairs([], [])


def assert_sameby_diffby(dframe: pd.DataFrame, pairs_dict: dict, sameby, diffby):
"""Assert the pairs are valid"""
"""Assert the pairs are valid."""
for _, pairs in pairs_dict.items():
for id1, id2 in pairs:
for col in sameby:
Expand Down
Loading

0 comments on commit da8a933

Please sign in to comment.