From aa8f961cfb9dc7e901aa20be616b0cae949d72d4 Mon Sep 17 00:00:00 2001 From: Felix Soubelet <19598248+fsoubelet@users.noreply.github.com> Date: Thu, 14 Nov 2024 17:10:16 +0100 Subject: [PATCH] Unit Testing Helper (#136) --- .github/workflows/coverage.yml | 3 +- CHANGELOG.md | 5 +++ doc/modules/index.rst | 5 +++ tests/test_testing.py | 69 ++++++++++++++++++++++++++++++++++ tfs/__init__.py | 2 +- tfs/testing.py | 63 +++++++++++++++++++++++++++++++ 6 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 tests/test_testing.py create mode 100644 tfs/testing.py diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 772f72ea..f05fdded 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -11,5 +11,6 @@ jobs: coverage: uses: pylhc/.github/.github/workflows/coverage.yml@master with: - src-dir: tfs + src-dir: tfs + pytest-options: -m "not cern_network" --cov-report term-missing secrets: inherit diff --git a/CHANGELOG.md b/CHANGELOG.md index d3074f3a..33053f97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # TFS-Pandas Changelog +## Version 3.9.0 + +- Added: + - A module, `tfs.testing`, has been added and made publicly available. It provides an assert function to compare `TfsDataFrame` similar to that provided by `pandas`, destined for unit tests. + ## Version 3.8.2 - Changed: diff --git a/doc/modules/index.rst b/doc/modules/index.rst index 760e4ec7..2d941eb9 100644 --- a/doc/modules/index.rst +++ b/doc/modules/index.rst @@ -31,6 +31,11 @@ API Reference :noindex: +.. automodule:: tfs.testing + :members: + :noindex: + + .. automodule:: tfs.tools :members: :noindex: diff --git a/tests/test_testing.py b/tests/test_testing.py new file mode 100644 index 00000000..2a749e30 --- /dev/null +++ b/tests/test_testing.py @@ -0,0 +1,69 @@ +import pytest + +from tfs.frame import TfsDataFrame +from tfs.testing import assert_tfs_frame_equal + + +class TestAssertTfsDataFrameEqual: + + def test_no_headers_equal(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + assert_tfs_frame_equal(df1, df1) # we expect True + + def test_no_headers_different_data(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = TfsDataFrame({"a": [1, 2, 2], "b": [4, 5, 6]}) + with pytest.raises(AssertionError): + assert_tfs_frame_equal(df1, df2) + + def test_no_headers_different_order(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df2 = TfsDataFrame({"b": [4, 5, 6], "a": [1, 2, 3]}) + with pytest.raises(AssertionError): + assert_tfs_frame_equal(df1, df2) + assert_tfs_frame_equal(df1, df2, check_like=True) + + def test_with_headers_equal(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + df2 = TfsDataFrame({"b": [4, 5, 6], "a": [1, 2, 3]}, headers={"a": "a", "b": "b"}) + assert_tfs_frame_equal(df1, df1) + with pytest.raises(AssertionError): + assert_tfs_frame_equal(df1, df2) + assert_tfs_frame_equal(df1, df2, check_like=True) + + def test_with_headers_different_data(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + df2 = TfsDataFrame({"a": [1, 2, 2], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + with pytest.raises(AssertionError): + assert_tfs_frame_equal(df1, df2) + + def test_with_headers_different_datatypes(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + df2 = TfsDataFrame({"a": [1, 2, 3], "b": ["4", "5", "6"]}, headers={"a": "a", "b": "b"}) + with pytest.raises(AssertionError): + assert_tfs_frame_equal(df1, df2) + + df3 = TfsDataFrame({"a": [1.0, 2.0, 3.0], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + with pytest.raises(AssertionError) as e: + assert_tfs_frame_equal(df1, df3) + assert "dtype" in str(e) + + def test_with_headers_different_headers_values(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + df2 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "c"}) + with pytest.raises(AssertionError) as e: + assert_tfs_frame_equal(df1, df2) + assert "b != c" in str(e) + + with pytest.raises(AssertionError) as e: + assert_tfs_frame_equal(df1, df2, compare_keys=False) + assert "b != c" in str(e) + + def test_with_headers_different_headers_keys(self): + df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b"}) + df2 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, headers={"a": "a", "b": "b", "c": "c"}) + with pytest.raises(AssertionError): + assert_tfs_frame_equal(df1, df2) # `compare_keys=True` is default + + # compare only common keys --- + assert_tfs_frame_equal(df1, df2, compare_keys=False) diff --git a/tfs/__init__.py b/tfs/__init__.py index 61ae5d07..cf88d223 100644 --- a/tfs/__init__.py +++ b/tfs/__init__.py @@ -11,7 +11,7 @@ __title__ = "tfs-pandas" __description__ = "Read and write tfs files." __url__ = "https://github.com/pylhc/tfs" -__version__ = "3.8.2" +__version__ = "3.9.0" __author__ = "pylhc" __author_email__ = "pylhc@github.com" __license__ = "MIT" diff --git a/tfs/testing.py b/tfs/testing.py new file mode 100644 index 00000000..bdea2ba9 --- /dev/null +++ b/tfs/testing.py @@ -0,0 +1,63 @@ +""" +Testing +------- + +Testing functionalty for TfsDataFrames. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas._testing import assert_dict_equal +from pandas.testing import assert_frame_equal + +if TYPE_CHECKING: + from tfs.frame import TfsDataFrame + + +# ----- Helpers ----- # + + +def assert_tfs_frame_equal( + df1: TfsDataFrame, df2: TfsDataFrame, compare_keys: bool = True, **kwargs +): + """ + Compare two `TfsDataFrame` objects, with `df1` being the reference + that `df2` is compared to. This is mostly intended for unit tests. + Comparison is done on both the contents of the headers dictionaries + (with `pandas`'s `assert_dict_equal`) as well as the data itself + (with `pandas`'s `assert_frame_equal`). + + .. note:: + The `compare_keys` argument is inherited from `pandas`'s + `assert_dict_equal` function and is quite unintuitive. It + means to check that both dictionaries have *the exact same + set of keys*. + + Whether this is given as `True` or `False`, the values are + compared anyway for all keys in the first (reference) dict. + In the case of this helper function, all keys present in + `df1`'s headers will be checked for in `df2`'s headers and + their corresponding values compared. If given as `True`, + then both headers should be the exact same dictionary. + + Args: + df1 (TfsDataFrame): The first `TfsDataFrame` to compare. + df2 (TfsDataFrame): The second `TfsDataFrame` to compare. + compare_keys (bool): If `True`, checks that both headers + have the exact same set of keys. See the above note + for exact meaning and caveat. Defaults to `True`. + **kwargs: Additional keyword arguments are transmitted to + `pandas.testing.assert_frame_equal` for the comparison of + the dataframe parts themselves. + + Example: + .. code-block:: python + + reference_df = tfs.read("path/to/file.tfs") + new_df = some_function(*args, **kwargs) + assert_tfs_frame_equal(reference_df, new_df) + """ + assert_frame_equal(df1, df2, **kwargs) + assert_dict_equal(df1.headers, df2.headers, compare_keys=compare_keys)