diff --git a/pyproject.toml b/pyproject.toml
index 2133055..1d0645c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+dev = ["ruff"]
 plot = ["plotly"]
 test = ["scikit-learn", "pytest"]
 demo = ["notebook", "matplotlib"]
@@ -31,4 +32,4 @@ requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools.packages.find]
-where = ["src"] 
+where = ["src"]
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..42ebdad
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,25 @@
+ # Unit tests
+
+We use `pytest` package to implement and run unit tests for copairs.
+
+## Getting started
+
+### Installation
+
+To install copairs with test dependencies, check out code locally and install as:
+```bash
+pip install -e .[test]
+```
+
+### Running tests
+To execute all tests, run:
+```bash
+pytest
+```
+
+Each individual `test_filename.py` file implements tests for particular features in the corresponding `copairs/filename.py`.
+
+To run tests for a particular source file, specify its test file:
+```bash
+pytest tests/test_map.py
+```
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29..6bd8fb4 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the copairs package."""
\ No newline at end of file
diff --git a/tests/helpers.py b/tests/helpers.py
index a7a4c25..e7aaaf4 100644
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -1,3 +1,4 @@
+"""Helper functions for testing."""
 from itertools import product
 from typing import Dict
 
@@ -10,7 +11,7 @@
 
 
 def simulate_plates(n_compounds, n_replicates, plate_size):
-    """Round robin creation of platemaps"""
+    """Round robin creation of platemaps."""
     total = n_compounds * n_replicates
 
     compounds = []
@@ -35,6 +36,7 @@ def simulate_random_plates(
     sameby=ColumnList,
     diffby=ColumnList,
 ):
+    """Simulate random platemaps."""
     rng = np.random.default_rng(SEED)
     dframe = simulate_plates(n_compounds, n_replicates, plate_size)
     # Shuffle values
@@ -52,6 +54,7 @@ def simulate_random_dframe(
     diffby: ColumnList,
     rng: np.random.Generator,
 ):
+    """Simulate random dataframe."""
     dframe = pd.DataFrame(columns=list(vocab_size.keys()), index=range(length))
     for col, size in vocab_size.items():
         dframe[col] = rng.integers(1, size + 1, size=length)
@@ -64,9 +67,7 @@ def simulate_random_dframe(
 
 
 def create_dframe(n_options, n_rows):
-    """
-    Random permutation of a fix number of elements per column
-    """
+    """Create a dataframe with predefined number of plates, wells, and compounds."""
     if isinstance(n_options, int):
         n_options = [n_options] * 3
     colc = list(f"c{i}" for i in range(n_options[0]))
diff --git a/tests/test_build_rank_multilabel.py b/tests/test_build_rank_multilabel.py
index 49b6f08..c2afdf4 100644
--- a/tests/test_build_rank_multilabel.py
+++ b/tests/test_build_rank_multilabel.py
@@ -1,9 +1,11 @@
+"""Test the concatenation of ranges."""
 import numpy as np
 
 from copairs.compute import concat_ranges
 
 
 def naive_concat_ranges(start: np.ndarray, end: np.ndarray):
+    """Concatenate ranges into a mask."""
     mask = []
     for s, e in zip(start, end):
         mask.extend(range(s, e))
@@ -11,6 +13,7 @@ def naive_concat_ranges(start: np.ndarray, end: np.ndarray):
 
 
 def test_concat_ranges():
+    """Test the concatenation of ranges."""
     rng = np.random.default_rng()
     num_range = 5, 10
     start_range = 2, 10
diff --git a/tests/test_compute.py b/tests/test_compute.py
index 63444c7..03428c7 100644
--- a/tests/test_compute.py
+++ b/tests/test_compute.py
@@ -1,3 +1,4 @@
+"""Test pairwise distance calculation functions."""
 import numpy as np
 
 from copairs import compute
@@ -7,6 +8,7 @@
 
 
 def corrcoef_naive(feats, pairs):
+    """Compute correlation coefficient between pairs of features."""
     corr = np.empty((len(pairs),))
     for pos, (i, j) in enumerate(pairs):
         corr[pos] = np.corrcoef(feats[i], feats[j])[0, 1]
@@ -14,6 +16,7 @@ def corrcoef_naive(feats, pairs):
 
 
 def cosine_naive(feats, pairs):
+    """Compute cosine similarity between pairs of features."""
     cosine = np.empty((len(pairs),))
     for pos, (i, j) in enumerate(pairs):
         a, b = feats[i], feats[j]
@@ -24,6 +27,7 @@ def cosine_naive(feats, pairs):
 
 
 def euclidean_naive(feats, pairs):
+    """Compute euclidean similarity between pairs of features."""
     euclidean_sim = np.empty((len(pairs),))
     for pos, (i, j) in enumerate(pairs):
         dist = np.linalg.norm(feats[i] - feats[j])
@@ -32,10 +36,12 @@ def euclidean_naive(feats, pairs):
 
 
 def abs_cosine_naive(feats, pairs):
+    """Compute absolute cosine similarity between pairs of features."""
     return np.abs(cosine_naive(feats, pairs))
 
 
 def test_corrcoef():
+    """Test correlation coefficient computation."""
     n_samples = 10
     n_pairs = 20
     n_feats = 5
@@ -50,6 +56,7 @@ def test_corrcoef():
 
 
 def test_cosine():
+    """Test cosine similarity computation."""
     n_samples = 10
     n_pairs = 20
     n_feats = 5
@@ -64,6 +71,7 @@ def test_cosine():
 
 
 def test_euclidean():
+    """Test euclidean similarity computation."""
     n_samples = 10
     n_pairs = 20
     n_feats = 5
@@ -78,6 +86,7 @@ def test_euclidean():
 
 
 def test_abs_cosine():
+    """Test absolute cosine similarity computation."""
     n_samples = 10
     n_pairs = 20
     n_feats = 5
diff --git a/tests/test_map.py b/tests/test_map.py
index 816d7d8..f2e2379 100644
--- a/tests/test_map.py
+++ b/tests/test_map.py
@@ -1,3 +1,4 @@
+"""Tests for (mean) Average Precision calculation."""
 import numpy as np
 import pandas as pd
 import pytest
@@ -13,6 +14,7 @@
 
 
 def test_random_binary_matrix():
+    """Test the random binary matrix generation."""
     rng = np.random.default_rng(SEED)
     # Test with n=3, m=4, k=2
     A = compute.random_binary_matrix(3, 4, 2, rng)
@@ -28,6 +30,7 @@ def test_random_binary_matrix():
 
 
 def test_compute_ap():
+    """Test the average precision computation."""
     num_pos, num_neg, num_perm = 5, 6, 100
     total = num_pos + num_neg
 
@@ -56,6 +59,7 @@ def test_compute_ap():
 
 
 def test_compute_ap_contiguous():
+    """Test the contiguous average precision computation."""
     num_pos_range = [2, 9]
     num_neg_range = [10, 20]
     num_samples_range = [5, 30]
@@ -88,6 +92,7 @@ def test_compute_ap_contiguous():
 
 
 def test_pipeline():
+    """Check the implementation with for mAP calculation."""
     length = 10
     vocab_size = {"p": 5, "w": 3, "l": 4}
     n_feats = 5
@@ -103,7 +108,7 @@ def test_pipeline():
 
 
 def test_pipeline_multilabel():
-    """Check the multilabel implementation with for mAP calculation"""
+    """Check the multilabel implementation with for mAP calculation."""
     length = 10
     vocab_size = {"p": 3, "w": 5, "l": 4}
     n_feats = 8
@@ -124,6 +129,7 @@ def test_pipeline_multilabel():
 
 
 def test_raise_no_pairs():
+    """Test the exception raised when no pairs are found."""
     length = 10
     vocab_size = {"p": 3, "w": 3, "l": 10}
     n_feats = 5
@@ -143,6 +149,7 @@ def test_raise_no_pairs():
 
 
 def test_raise_nan_error():
+    """Test the exception raised when there are null values."""
     length = 10
     vocab_size = {"p": 5, "w": 3, "l": 4}
     n_feats = 8
diff --git a/tests/test_map_filter.py b/tests/test_map_filter.py
index 9b1b311..4fdfe1f 100644
--- a/tests/test_map_filter.py
+++ b/tests/test_map_filter.py
@@ -1,3 +1,4 @@
+"""Tests data filtering by query."""
 import numpy as np
 import pytest
 
@@ -9,6 +10,7 @@
 
 @pytest.fixture
 def mock_dataframe():
+    """Create a mock dataframe."""
     length = 10
     vocab_size = {"p": 3, "w": 3, "l": 10}
     pos_sameby = ["l"]
@@ -20,6 +22,7 @@ def mock_dataframe():
 
 
 def test_correct(mock_dataframe):
+    """Test correct query."""
     df, parsed_cols = evaluate_and_filter(mock_dataframe, ["p == 'p1'", "w > 'w2'"])
     assert not df.empty
     assert "p" in parsed_cols and "w" in parsed_cols
@@ -27,6 +30,7 @@ def test_correct(mock_dataframe):
 
 
 def test_invalid_query(mock_dataframe):
+    """Test invalid query."""
     with pytest.raises(ValueError) as excinfo:
         evaluate_and_filter(mock_dataframe, ['l == "lHello"'])
     assert "Invalid combined query expression" in str(excinfo.value)
@@ -34,12 +38,14 @@ def test_invalid_query(mock_dataframe):
 
 
 def test_empty_result(mock_dataframe):
+    """Test empty result."""
     with pytest.raises(ValueError) as excinfo:
         evaluate_and_filter(mock_dataframe, ['p == "p1"', 'p == "p2"'])
     assert "Duplicate queries for column" in str(excinfo.value)
 
 
 def test_empty_result_from_valid_query(mock_dataframe):
+    """Test empty result from valid query."""
     with pytest.raises(ValueError) as excinfo:
         evaluate_and_filter(mock_dataframe, ['p == "p4"'])
     assert "No data matched the query" in str(excinfo.value)
diff --git a/tests/test_matching.py b/tests/test_matching.py
index 5bc1132..91c494f 100644
--- a/tests/test_matching.py
+++ b/tests/test_matching.py
@@ -1,4 +1,4 @@
-"""Test functions for Matcher"""
+"""Test functions for Matcher."""
 
 from string import ascii_letters
 
@@ -13,7 +13,7 @@
 
 
 def run_stress_sample_null(dframe, num_pairs):
-    """Assert every generated null pair does not match any column"""
+    """Assert every generated null pair does not match any column."""
     matcher = Matcher(dframe, dframe.columns, seed=SEED)
     for _ in range(num_pairs):
         id1, id2 = matcher.sample_null_pair(dframe.columns)
@@ -23,19 +23,19 @@ def run_stress_sample_null(dframe, num_pairs):
 
 
 def test_null_sample_large():
-    """Test Matcher guarantees elements with different values"""
+    """Test Matcher guarantees elements with different values."""
     dframe = create_dframe(32, 10000)
     run_stress_sample_null(dframe, 5000)
 
 
 def test_null_sample_small():
-    """Test Sample with small set"""
+    """Test Sample with small set."""
     dframe = create_dframe(3, 10)
     run_stress_sample_null(dframe, 100)
 
 
 def test_null_sample_nan_vals():
-    """Test NaN values are ignored"""
+    """Test NaN values are ignored."""
     dframe = create_dframe(4, 15)
     rng = np.random.default_rng(SEED)
     nan_mask = rng.random(dframe.shape) < 0.5
@@ -44,7 +44,7 @@ def test_null_sample_nan_vals():
 
 
 def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):
-    """Compute valid pairs using cross product from pandas"""
+    """Compute valid pairs using cross product from pandas."""
     cross = dframe.reset_index().merge(
         dframe.reset_index(), how="cross", suffixes=("_x", "_y")
     )
@@ -62,7 +62,7 @@ def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):
 
 
 def check_naive(dframe, matcher: Matcher, sameby, diffby):
-    """Check Matcher and naive generate same pairs"""
+    """Check Matcher and naive generate same pairs."""
     gt_pairs = get_naive_pairs(dframe, sameby, diffby)
     vals = matcher.get_all_pairs(sameby, diffby)
     vals = sum(vals.values(), [])
@@ -74,14 +74,14 @@ def check_naive(dframe, matcher: Matcher, sameby, diffby):
 
 
 def check_simulated_data(length, vocab_size, sameby, diffby, rng):
-    """Test sample of valid pairs from a simulated dataset"""
+    """Test sample of valid pairs from a simulated dataset."""
     dframe = simulate_random_dframe(length, vocab_size, sameby, diffby, rng)
     matcher = Matcher(dframe, dframe.columns, seed=SEED)
     check_naive(dframe, matcher, sameby, diffby)
 
 
 def test_stress_simulated_data():
-    """Run multiple tests using simulated data"""
+    """Run multiple tests using simulated data."""
     rng = np.random.default_rng(SEED)
     num_cols_range = [2, 6]
     vocab_size_range = [5, 10]
@@ -99,7 +99,7 @@ def test_stress_simulated_data():
 
 
 def test_empty_sameby():
-    """Test query without sameby"""
+    """Test query without sameby."""
     dframe = create_dframe(3, 10)
     matcher = Matcher(dframe, dframe.columns, seed=SEED)
     check_naive(dframe, matcher, sameby=[], diffby=["w", "c"])
@@ -107,7 +107,7 @@ def test_empty_sameby():
 
 
 def test_empty_diffby():
-    """Test query without diffby"""
+    """Test query without diffby."""
     dframe = create_dframe(3, 10)
     matcher = Matcher(dframe, dframe.columns, seed=SEED)
     matcher.get_all_pairs(["c"], [])
@@ -116,7 +116,7 @@ def test_empty_diffby():
 
 
 def test_raise_distjoint():
-    """Test check for disjoint sameby and diffby"""
+    """Test check for disjoint sameby and diffby."""
     dframe = create_dframe(3, 10)
     matcher = Matcher(dframe, dframe.columns, seed=SEED)
     with pytest.raises(ValueError, match="must be disjoint lists"):
@@ -124,7 +124,7 @@ def test_raise_distjoint():
 
 
 def test_raise_no_params():
-    """Test check for at least one of sameby and diffby"""
+    """Test check for at least one of sameby and diffby."""
     dframe = create_dframe(3, 10)
     matcher = Matcher(dframe, dframe.columns, seed=SEED)
     with pytest.raises(ValueError, match="at least one should be provided"):
@@ -132,7 +132,7 @@ def test_raise_no_params():
 
 
 def assert_sameby_diffby(dframe: pd.DataFrame, pairs_dict: dict, sameby, diffby):
-    """Assert the pairs are valid"""
+    """Assert the pairs are valid."""
     for _, pairs in pairs_dict.items():
         for id1, id2 in pairs:
             for col in sameby:
diff --git a/tests/test_matching_any.py b/tests/test_matching_any.py
index 25ccc02..3f18c4f 100644
--- a/tests/test_matching_any.py
+++ b/tests/test_matching_any.py
@@ -1,3 +1,4 @@
+"""Test matching with `any` conditions using simulated data."""
 from string import ascii_letters
 
 import numpy as np
@@ -10,7 +11,7 @@
 
 
 def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):
-    """Compute valid pairs using cross product from pandas"""
+    """Compute valid pairs using cross product from pandas."""
     cross = dframe.reset_index().merge(
         dframe.reset_index(), how="cross", suffixes=("_x", "_y")
     )
@@ -39,7 +40,7 @@ def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby):
 
 
 def check_naive(dframe, matcher: Matcher, sameby, diffby):
-    """Check Matcher and naive generate same pairs"""
+    """Check Matcher and naive generate same pairs."""
     gt_pairs = get_naive_pairs(dframe, sameby, diffby)
     vals = matcher.get_all_pairs(sameby, diffby)
     vals = sum(vals.values(), [])
@@ -51,7 +52,7 @@ def check_naive(dframe, matcher: Matcher, sameby, diffby):
 
 
 def check_simulated_data(length, vocab_size, sameby, diffby, rng):
-    """Test sample of valid pairs from a simulated dataset"""
+    """Test sample of valid pairs from a simulated dataset."""
     sameby_cols = sameby["all"] + sameby["any"]
     diffby_cols = diffby["all"] + diffby["any"]
     dframe = simulate_random_dframe(length, vocab_size, sameby_cols, diffby_cols, rng)
@@ -60,7 +61,7 @@ def check_simulated_data(length, vocab_size, sameby, diffby, rng):
 
 
 def test_stress_simulated_data_any_all():
-    """Run multiple tests using simulated data"""
+    """Run multiple tests using simulated data."""
     rng = np.random.default_rng(SEED)
     num_cols_range = [2, 6]
     vocab_size_range = [5, 10]
@@ -78,7 +79,7 @@ def test_stress_simulated_data_any_all():
 
 
 def test_stress_simulated_data_all_all():
-    """Run multiple tests using simulated data"""
+    """Run multiple tests using simulated data."""
     rng = np.random.default_rng(SEED)
     num_cols_range = [2, 6]
     vocab_size_range = [5, 10]
@@ -96,7 +97,7 @@ def test_stress_simulated_data_all_all():
 
 
 def test_stress_simulated_data_all_any():
-    """Run multiple tests using simulated data"""
+    """Run multiple tests using simulated data."""
     rng = np.random.default_rng(SEED)
     num_cols_range = [2, 6]
     vocab_size_range = [5, 10]
@@ -114,7 +115,7 @@ def test_stress_simulated_data_all_any():
 
 
 def test_stress_simulated_data_any_any():
-    """Run multiple tests using simulated data"""
+    """Run multiple tests using simulated data."""
     rng = np.random.default_rng(SEED)
     num_cols_range = [4, 6]
     vocab_size_range = [5, 10]
diff --git a/tests/test_matching_multilabel.py b/tests/test_matching_multilabel.py
index 50f978e..dd6e308 100644
--- a/tests/test_matching_multilabel.py
+++ b/tests/test_matching_multilabel.py
@@ -1,3 +1,4 @@
+"""Tests for the multilabel matching implementation."""
 import pandas as pd
 
 from copairs.matching import MatcherMultilabel
@@ -7,6 +8,7 @@
 
 
 def get_naive_pairs(dframe: pd.DataFrame, sameby, diffby, multilabel_col: str):
+    """Get pairs using a naive implementation."""
     dframe = dframe.copy()
 
     dframe[multilabel_col] = dframe[multilabel_col].apply(set)
@@ -45,7 +47,7 @@ def any_equal(row):
 
 
 def check_naive(dframe, matcher: MatcherMultilabel, sameby, diffby, multilabel_col):
-    """Check Matcher and naive generate same pairs"""
+    """Check Matcher and naive generate same pairs."""
     gt_pairs = get_naive_pairs(dframe, sameby, diffby, multilabel_col)
     vals = matcher.get_all_pairs(sameby, diffby)
     vals = sum(vals.values(), [])
@@ -57,7 +59,7 @@ def check_naive(dframe, matcher: MatcherMultilabel, sameby, diffby, multilabel_c
 
 
 def test_sameby():
-    """Check the multilabel implementation with sameby"""
+    """Check the multilabel implementation with sameby."""
     multilabel_col = "c"
     sameby = ["c"]
     diffby = ["p", "w"]
@@ -70,7 +72,7 @@ def test_sameby():
 
 
 def test_diffby():
-    """Check the multilabel implementation with sameby"""
+    """Check the multilabel implementation with sameby."""
     multilabel_col = "c"
     sameby = ["p"]
     diffby = ["c", "w"]
@@ -84,7 +86,7 @@ def test_diffby():
 
 
 def test_only_diffby():
-    """Check the multilabel implementation with only diffby being equal to c"""
+    """Check the multilabel implementation with only diffby being equal to c."""
     multilabel_col = "c"
     sameby = []
     diffby = ["c"]
@@ -97,7 +99,7 @@ def test_only_diffby():
 
 
 def test_only_diffby_many_cols():
-    """Check the multilabel implementation with only diffby being equal to c"""
+    """Check the multilabel implementation with only diffby being equal to c."""
     multilabel_col = "c"
     sameby = []
     diffby = ["c", "w"]
@@ -110,7 +112,7 @@ def test_only_diffby_many_cols():
 
 
 def test_only_sameby_many_cols():
-    """Check the multilabel implementation with only diffby being equal to c"""
+    """Check the multilabel implementation with only diffby being equal to c."""
     multilabel_col = "c"
     sameby = ["c", "w"]
     diffby = []
diff --git a/tests/test_replicating.py b/tests/test_replicating.py
index a273bbe..79d5661 100644
--- a/tests/test_replicating.py
+++ b/tests/test_replicating.py
@@ -1,3 +1,4 @@
+"""Tests for the replicating module."""
 from numpy.random import default_rng
 
 from copairs import Matcher
@@ -12,6 +13,7 @@
 
 
 def test_corr_between_replicates():
+    """Test calculating correlation between replicates."""
     rng = default_rng(SEED)
     num_samples = 10
     X = rng.normal(size=[num_samples, 6])
@@ -20,6 +22,7 @@ def test_corr_between_replicates():
 
 
 def test_correlation_test():
+    """Test correlation test."""
     rng = default_rng(SEED)
     num_samples = 10
     X = rng.normal(size=[num_samples, 6])
@@ -31,6 +34,7 @@ def test_correlation_test():
 
 
 def test_corr_from_pairs():
+    """Test calculating correlation from a list of named pairs."""
     num_samples = 10
     sameby = ["c"]
     diffby = ["p", "w"]