diff --git a/tests/regression/test_regression.py b/tests/intg/test_accuracy_metrics.py similarity index 78% rename from tests/regression/test_regression.py rename to tests/intg/test_accuracy_metrics.py index 1d962fd8..e75f92bd 100644 --- a/tests/regression/test_regression.py +++ b/tests/intg/test_accuracy_metrics.py @@ -5,6 +5,7 @@ from typing import Dict, List, Tuple import cv2 +import numpy as np import openvino as ov import pytest @@ -53,18 +54,10 @@ def load_gt_bboxes(json_coco_path: str) -> List[Dict[str, List[Tuple[int, int, i return list(result.values()) -class TestDummyRegression: +class TestAccuracyMetrics: image = cv2.imread(IMAGE_PATH) gt_bboxes = load_gt_bboxes(COCO_ANN_PATH) - preprocess_fn = get_preprocess_fn( - change_channel_order=True, - input_size=(224, 224), - hwc_to_chw=True, - ) - - postprocess_fn = get_postprocess_fn(activation=ActivationType.SIGMOID) - @pytest.fixture(autouse=True) def setup(self, fxt_data_root): data_dir = fxt_data_root @@ -72,16 +65,23 @@ def setup(self, fxt_data_root): model_path = data_dir / "otx_models" / (MODEL_NAME + ".xml") model = ov.Core().read_model(model_path) + preprocess_fn = get_preprocess_fn( + change_channel_order=True, + input_size=(224, 224), + hwc_to_chw=True, + ) + postprocess_fn = get_postprocess_fn(activation=ActivationType.SIGMOID) + self.explainer = Explainer( model=model, task=Task.CLASSIFICATION, - preprocess_fn=self.preprocess_fn, + preprocess_fn=preprocess_fn, explain_mode=ExplainMode.WHITEBOX, ) self.pointing_game = PointingGame() - self.auc = InsertionDeletionAUC(model, self.preprocess_fn, self.postprocess_fn) - self.adcc = ADCC(model, self.preprocess_fn, self.postprocess_fn, self.explainer) + self.auc = InsertionDeletionAUC(model, preprocess_fn, postprocess_fn) + self.adcc = ADCC(model, preprocess_fn, postprocess_fn, self.explainer) def test_explainer_image(self): explanation = self.explainer(self.image, targets=["person"], label_names=VOC_NAMES, colormap=False) @@ -92,12 +92,12 @@ def test_explainer_image(self): auc_score = self.auc.evaluate([explanation], [self.image], steps=10).values() insertion_auc_score, deletion_auc_score, delta_auc_score = auc_score - assert insertion_auc_score >= 0.9 - assert deletion_auc_score >= 0.2 - assert delta_auc_score >= 0.7 + assert np.abs(insertion_auc_score - 0.94) <= 0.01 + assert np.abs(deletion_auc_score - 0.23) <= 0.01 + assert np.abs(delta_auc_score - 0.71) <= 0.01 adcc_score = self.adcc.evaluate([explanation], [self.image])["adcc"] - assert adcc_score > 0.9 + assert np.abs(adcc_score - 0.96) <= 0.01 def test_explainer_image_2_classes(self): explanation = self.explainer(self.image, targets=["person", "cat"], label_names=VOC_NAMES, colormap=False) @@ -108,12 +108,12 @@ def test_explainer_image_2_classes(self): auc_score = self.auc.evaluate([explanation], [self.image], steps=10).values() insertion_auc_score, deletion_auc_score, delta_auc_score = auc_score - assert insertion_auc_score >= 0.5 - assert deletion_auc_score >= 0.1 - assert delta_auc_score >= 0.35 + assert np.abs(insertion_auc_score - 0.53) <= 0.01 + assert np.abs(deletion_auc_score - 0.13) <= 0.01 + assert np.abs(delta_auc_score - 0.39) <= 0.01 adcc_score = self.adcc.evaluate([explanation], [self.image])["adcc"] - assert adcc_score > 0.5 + assert np.abs(adcc_score - 0.55) <= 0.01 def test_explainer_images(self): images = [self.image, self.image] @@ -128,9 +128,9 @@ def test_explainer_images(self): auc_score = self.auc.evaluate(explanations, images, steps=10).values() insertion_auc_score, deletion_auc_score, delta_auc_score = auc_score - assert insertion_auc_score >= 0.9 - assert deletion_auc_score >= 0.2 - assert delta_auc_score >= 0.7 + assert np.abs(insertion_auc_score - 0.94) <= 0.01 + assert np.abs(deletion_auc_score - 0.23) <= 0.01 + assert np.abs(delta_auc_score - 0.71) <= 0.01 adcc_score = self.adcc.evaluate(explanations, images)["adcc"] - assert adcc_score > 0.9 + assert np.abs(adcc_score - 0.96) <= 0.01 diff --git a/tests/regression/__init__.py b/tests/regression/__init__.py deleted file mode 100644 index e69de29b..00000000