diff --git a/docs/source/_static/ovxai-architecture.svg b/docs/source/_static/ovxai-architecture.svg index c0d2e7e3..73317cab 100644 --- a/docs/source/_static/ovxai-architecture.svg +++ b/docs/source/_static/ovxai-architecture.svg @@ -1 +1 @@ -OpenVINO RuntimeBlack-Box XAI MethodsIR InsertionWhite-Box XAI MethodsWhite-Box XAI Method BaseBlack-Box XAI Method BaseXAI Method InterfaceExplainer (Stateful Class)API (Easy-to-use Stateless Function)CLI (TBD)Black-Box XAI MethodsWhite-Box XAI MethodsBlack-Box XAI MethodsWhite-Box XAI MethodsBlack-Box XAI MethodsWhite-Box XAI MethodsBlack-Box XAI MethodsWhite-Box XAI MethodsMetrics \ No newline at end of file +OpenVINO RuntimeBlack-Box XAI MethodsXAI Branch InsertionWhite-Box XAI MethodsWhite-Box XAI Method BaseBlack-Box XAI Method BaseXAI Method Interface & FactoryExplainer (Stateful Class)API (Easy-to-use Stateless Function)CLI (TBD)Black-Box XAI MethodsWhite-Box XAI MethodsBlack-Box XAI MethodsWhite-Box XAI MethodsBlack-Box XAI MethodsWhite-Box XAI MethodsBlack-Box XAI MethodsWhite-Box XAI MethodsMetricsPytorchONNX \ No newline at end of file diff --git a/examples/run_torch_onnx.py b/examples/run_torch_onnx.py index 25769147..29c226ea 100644 --- a/examples/run_torch_onnx.py +++ b/examples/run_torch_onnx.py @@ -63,7 +63,7 @@ def run_insert_xai_torch(args: list[str]): logger.info(f"Torch model prediction: classes ({probs.shape[-1]}) -> label ({label}) -> prob ({probs[0, label]})") # Insert XAI head - model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION) + model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION, input_size=input_size) # Optional input size arg to help insertion # Torch XAI model inference model_xai.eval() @@ -121,7 +121,7 @@ def run_insert_xai_torch_to_onnx(args: list[str]): image_norm = image_norm[None, :] # CxHxW -> 1xCxHxW # Insert XAI head - model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION) + model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION, input_size=input_size) # ONNX model conversion model_path = Path(args.output_dir) / "model.onnx" @@ -184,7 +184,7 @@ def run_insert_xai_torch_to_openvino(args: list[str]): image_norm = image_norm[None, :] # CxHxW -> 1xCxHxW # Insert XAI head - model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION) + model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION, input_size=input_size) # OpenVINO model conversion ov_model = ov.convert_model( diff --git a/openvino_xai/methods/white_box/torch.py b/openvino_xai/methods/white_box/torch.py index a3139ec1..b2f763ae 100644 --- a/openvino_xai/methods/white_box/torch.py +++ b/openvino_xai/methods/white_box/torch.py @@ -47,11 +47,13 @@ def __init__( embed_scaling: bool = True, device_name: str = "CPU", prepare_model: bool = True, + input_size: tuple[int, int] = (224, 224), # For fixed input size models like ViT **kwargs, ): super().__init__(model=model, preprocess_fn=preprocess_fn, device_name=device_name) self._target_layer = target_layer self._embed_scaling = embed_scaling + self._input_size = input_size if prepare_model: self.prepare_model() @@ -66,6 +68,7 @@ def prepare_model(self, load_model: bool = True) -> torch.nn.Module: return self._model_compiled model = copy.deepcopy(self._model) + model.eval() # Feature if self._target_layer: @@ -78,7 +81,6 @@ def prepare_model(self, load_model: bool = True) -> torch.nn.Module: model.register_forward_hook(self._output_hook) setattr(model, "has_xai", True) - model.eval() if load_model: self._model_compiled = model @@ -119,17 +121,26 @@ def _find_feature_module_auto(self, module: torch.nn.Module) -> torch.nn.Module: self._feature_module = None self._num_modules = 0 + def _has_spatial_dim(shape: torch.Size): + if len(shape) != 4: # BxCxHxW + return False + if shape[2] <= 1 or shape[3] <= 1: # H > 1 and W > 1 + return False + if shape[1] <= shape[2] or shape[1] <= shape[3]: # H < C and H < C for feature maps generally + return False + return True + def _detect_hook(module: torch.nn.Module, inputs: Any, output: Any) -> None: if isinstance(output, torch.Tensor): module.index = self._num_modules self._num_modules += 1 shape = output.shape - if len(shape) == 4 and shape[2] > 1 and shape[3] > 1: + if _has_spatial_dim(shape): self._feature_module = module global_hook_handle = torch.nn.modules.module.register_module_forward_hook(_detect_hook) try: - module.forward(torch.zeros((1, 3, 128, 128))) + module.forward(torch.zeros((1, 3, *self._input_size))) finally: global_hook_handle.remove() if self._feature_module is None: @@ -274,10 +285,13 @@ def __init__( def _find_feature_module_auto(self, module: torch.nn.Module) -> torch.nn.Module: """Detect feature module in the model by finding the 3rd last LayerNorm module.""" self._feature_module = None - norm_modules = [m for _, m in module.named_modules() if isinstance(m, torch.nn.LayerNorm)] + norm_modules = [] + for name, sub_module in module.named_modules(): + if "LayerNorm" in type(sub_module).__name__ or "BatchNorm" in type(sub_module).__name__ or "norm1" in name: + norm_modules.append(sub_module) if len(norm_modules) < 3: - raise RuntimeError("Feature modules with LayerNorm are less than 3 in the torch model") + raise RuntimeError("Feature modules with LayerNorm or BatchNorm are less than 3 in the torch model") self._feature_module = norm_modules[-3] return self._feature_module diff --git a/pyproject.toml b/pyproject.toml index b4e0f6fe..14eda272 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "openvino_xai" -version = "1.1.0rc0" +version = "1.1.0" dependencies = [ "openvino-dev==2024.4", "opencv-python", diff --git a/tests/func/test_torch_onnx_timm_full.py b/tests/func/test_torch_onnx_timm_full.py index 6266327a..692666cb 100644 --- a/tests/func/test_torch_onnx_timm_full.py +++ b/tests/func/test_torch_onnx_timm_full.py @@ -21,61 +21,12 @@ TEST_MODELS = timm.list_models(pretrained=True) -NOT_SUPPORTED_BY_BB_MODELS = { +SKIPPED_MODELS = { "repvit": "urllib.error.HTTPError: HTTP Error 404: Not Found", "tf_efficientnet_cc": "torch.onnx.errors.SymbolicValueError: Unsupported: ONNX export of convolution for kernel of unknown shape.", "vit_base_r50_s16_224.orig_in21k": "RuntimeError: Error(s) in loading state_dict for VisionTransformer", - "vit_gigantic_patch16_224_ijepa.in22k": "RuntimeError: shape '[1, 13, 13, -1]' is invalid for input of size 274560", "vit_huge_patch14_224.orig_in21k": "RuntimeError: Error(s) in loading state_dict for VisionTransformer", "vit_large_patch32_224.orig_in21k": "RuntimeError: Error(s) in loading state_dict for VisionTransformer", - "volo_": "RuntimeError: Exception from src/core/src/dimension.cpp:227: Cannot get length of dynamic dimension", -} - -SUPPORTED_BUT_FAILED_BY_WB_MODELS = { - "swin": "Only two outputs of the between block Add node supported, but got 1. Try to use black-box.", - "vit_base_patch16_rpn_224": "Number of normalization outputs > 1", - "vit_relpos_medium_patch16_rpn_224": "ValueError in openvino_xai/methods/white_box/recipro_cam.py:215", -} - -NOT_SUPPORTED_BY_WB_MODELS = { - **NOT_SUPPORTED_BY_BB_MODELS, - # Killed on WB - "beit_large_patch16_512": "Failed to allocate 94652825600 bytes of memory", - "convmixer_1536_20": "OOM Killed", - "eva_large_patch14_336": "OOM Killed", - "eva02_base_patch14_448": "OOM Killed", - "eva02_large_patch14_448": "OOM Killed", - "mobilevit_": "Segmentation fault", - "mobilevit_xxs": "Segmentation fault", - "mvitv2_base.fb_in1k": "Segmentation fault", - "mvitv2_large": "OOM Killed", - "mvitv2_small": "Segmentation fault", - "mvitv2_tiny": "Segmentation fault", - "pit_": "Segmentation fault", - "pvt_": "Segmentation fault", - "tf_efficientnet_l2.ns_jft_in1k": "OOM Killed", - "xcit_large": "Failed to allocate 81581875200 bytes of memory", - "xcit_medium_24_p8_384": "OOM Killed", - "xcit_small_12_p8_384": "OOM Killed", - "xcit_small_24_p8_384": "OOM Killed", - # Not expected to work for now - "cait_": "Cannot create an empty Constant. Please provide valid data.", - "coat_": "Only two outputs of the between block Add node supported, but got 1.", - "crossvit": "One (and only one) of the nodes has to be Add type. But got StridedSlice and StridedSlice.", - # work in CNN mode -> "davit": "Only two outputs of the between block Add node supported, but got 1.", - # work in CNN mode -> "efficientformer": "Cannot find output backbone_node in auto mode.", - # work in CNN mode -> "focalnet": "Cannot find output backbone_node in auto mode, please provide target_layer.", - # work in CNN mode -> "gcvit": "Cannot find output backbone_node in auto mode, please provide target_layer.", - "levit_": "Check 'TRShape::merge_into(output_shape, in_copy)' failed", - # work in CNN mode -> "maxvit": "Cannot find output backbone_node in auto mode, please provide target_layer.", - # work in CNN mode -> "maxxvit": "Cannot find output backbone_node in auto mode, please provide target_layer.", - # work in CNN mode -> "mobilevitv2": "Cannot find output backbone_node in auto mode, please provide target_layer.", - # work in CNN mode -> "nest_": "Cannot find output backbone_node in auto mode, please provide target_layer.", - # work in CNN mode -> "poolformer": "Cannot find output backbone_node in auto mode, please provide target_layer.", - "sequencer2d": "Cannot find output backbone_node in auto mode, please provide target_layer.", - "tnt_s_patch16_224": "Only two outputs of the between block Add node supported, but got 1.", - "twins": "One (and only one) of the nodes has to be Add type. But got ShapeOf and Transpose.", - # work in CNN mode -> "visformer": "Cannot find output backbone_node in auto mode, please provide target_layer", } @@ -89,15 +40,10 @@ def setup(self, fxt_clear_cache): self.clear_cache_converted_models = fxt_clear_cache @pytest.mark.parametrize("model_id", TEST_MODELS) - # @pytest.mark.parametrize("model_id", ["resnet18.a1_in1k"]) def test_insert_xai(self, model_id, fxt_output_root: Path): - # for skipped_model in NOT_SUPPORTED_BY_WB_MODELS.keys(): - # if skipped_model in model_id: - # pytest.skip(reason=NOT_SUPPORTED_BY_WB_MODELS[skipped_model]) - - # for failed_model in SUPPORTED_BUT_FAILED_BY_WB_MODELS.keys(): - # if failed_model in model_id: - # pytest.xfail(reason=SUPPORTED_BUT_FAILED_BY_WB_MODELS[failed_model]) + for skipped_model in SKIPPED_MODELS.keys(): + if skipped_model in model_id: + pytest.skip(reason=SKIPPED_MODELS[skipped_model]) # Load Torch model from timm model = timm.create_model(model_id, in_chans=3, pretrained=True) @@ -114,7 +60,7 @@ def test_insert_xai(self, model_id, fxt_output_root: Path): image_norm = image_norm[None, :] # CxHxW -> 1xCxHxW # Insert XAI head - model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION) + model_xai: torch.nn.Module = insert_xai(model, Task.CLASSIFICATION, input_size=input_size) # Torch XAI model inference model_xai.eval() @@ -164,6 +110,7 @@ def test_insert_xai(self, model_id, fxt_output_root: Path): assert saliency_map.dtype == np.uint8 # Clean up + model_path.unlink() self.clear_cache() def clear_cache(self): diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 2e387bc7..c852255f 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -27,10 +27,11 @@ def pytest_addoption(parser: pytest.Parser): "Defaults to 10.", ) parser.addoption( - "--num-masks", + "--preset", action="store", - default=5000, - help="Number of masks for black box methods." "Defaults to 5000.", + default="speed", + choices=("speed", "balance", "quality"), + help="Efficiency preset for blackbox methods. Defaults to 'speed'.", ) parser.addoption( "--dataset-root", @@ -57,13 +58,13 @@ def fxt_num_repeat(request: pytest.FixtureRequest) -> int: @pytest.fixture(scope="session") -def fxt_num_masks(request: pytest.FixtureRequest) -> int: - """Number of masks for black box methods.""" - num_masks = int(request.config.getoption("--num-masks")) - msg = f"{num_masks = }" +def fxt_preset(request: pytest.FixtureRequest) -> str: + """Efficiency preset for black box methods.""" + preset = request.config.getoption("--preset") + msg = f"{preset = }" log.info(msg) print(msg) - return num_masks + return preset @pytest.fixture(scope="session") @@ -148,6 +149,7 @@ def fxt_perf_summary( "Method.RECIPROCAM": "RECIPROCAM", "Method.VITRECIPROCAM": "RECIPROCAM", "Method.RISE": "RISE", + "Method.AISE": "AISE", } ) raw_data.to_csv(fxt_output_root / "perf-raw-all.csv", index=False) diff --git a/tests/perf/test_performance.py b/tests/perf/test_efficiency.py similarity index 91% rename from tests/perf/test_performance.py rename to tests/perf/test_efficiency.py index c1b83ede..9d9da02c 100644 --- a/tests/perf/test_performance.py +++ b/tests/perf/test_efficiency.py @@ -14,6 +14,7 @@ from openvino_xai.common.parameters import Method, Task from openvino_xai.explainer.explainer import Explainer, ExplainMode from openvino_xai.explainer.utils import get_postprocess_fn, get_preprocess_fn +from openvino_xai.methods.black_box.base import Preset from openvino_xai.utils.model_export import export_to_onnx from tests.perf.perf_tests_utils import ( clear_cache, @@ -38,7 +39,7 @@ ) -class TestPerfClassificationTimm: +class TestEfficiency: clear_cache_converted_models = False clear_cache_hf_models = False supported_num_classes = { @@ -122,12 +123,15 @@ def test_classification_white_box(self, model_id: str, fxt_num_repeat: int, fxt_ records.append(record) df = pd.DataFrame(records) - df.to_csv(self.output_dir / f"perf-raw-wb-{model_id}.csv") + df.to_csv(self.output_dir / f"perf-raw-wb-{model_id}-{explain_method}.csv") clear_cache(self.data_dir, self.cache_dir, self.clear_cache_converted_models, self.clear_cache_hf_models) @pytest.mark.parametrize("model_id", TEST_MODELS) - def test_classification_black_box(self, model_id, fxt_num_repeat: int, fxt_num_masks: int, fxt_tags: dict): + @pytest.mark.parametrize("method", [Method.AISE, Method.RISE]) + def test_classification_black_box( + self, model_id: str, method: Method, fxt_num_repeat: int, fxt_preset: str, fxt_tags: dict + ): timm_model, model_cfg = get_timm_model(model_id, self.supported_num_classes) onnx_path = self.data_dir / "timm_models" / "converted_models" / model_id / "model_fp32.onnx" @@ -163,9 +167,9 @@ def test_classification_black_box(self, model_id, fxt_num_repeat: int, fxt_num_m record = fxt_tags.copy() record["model"] = model_id - record["method"] = Method.RISE + record["method"] = method record["seed"] = seed - record["num_masks"] = fxt_num_masks + record["preset"] = fxt_preset start_time = time() @@ -175,6 +179,7 @@ def test_classification_black_box(self, model_id, fxt_num_repeat: int, fxt_num_m preprocess_fn=preprocess_fn, postprocess_fn=postprocess_fn, explain_mode=ExplainMode.BLACKBOX, # defaults to AUTO + explain_method=method, # defaults to AISE ) explanation = explainer( image, @@ -182,7 +187,7 @@ def test_classification_black_box(self, model_id, fxt_num_repeat: int, fxt_num_m resize=True, colormap=True, overlay=True, - num_masks=fxt_num_masks, # kwargs of the RISE algo + preset=Preset(fxt_preset), # kwargs of the black box algo ) explain_time = time() - start_time @@ -194,6 +199,6 @@ def test_classification_black_box(self, model_id, fxt_num_repeat: int, fxt_num_m records.append(record) df = pd.DataFrame(records) - df.to_csv(self.output_dir / f"perf-raw-bb-{model_id}.csv", index=False) + df.to_csv(self.output_dir / f"perf-raw-bb-{model_id}-{method}.csv", index=False) clear_cache(self.data_dir, self.cache_dir, self.clear_cache_converted_models, self.clear_cache_hf_models) diff --git a/tests/unit/methods/white_box/test_torch.py b/tests/unit/methods/white_box/test_torch.py index 8bea7d4d..ee487302 100644 --- a/tests/unit/methods/white_box/test_torch.py +++ b/tests/unit/methods/white_box/test_torch.py @@ -40,6 +40,7 @@ def __init__(self, num_classes: int = 2): torch.nn.Identity(), torch.nn.Identity(), torch.nn.Identity(), + torch.nn.LazyConv2d(256, (1, 1)), ) self.neck = torch.nn.AdaptiveAvgPool2d((1, 1)) self.output = torch.nn.LazyLinear(out_features=num_classes) @@ -123,7 +124,6 @@ def _output_hook( assert type(output) == dict prediction = output["prediction"] saliency_maps = output[SALIENCY_MAP_OUTPUT_NAME] - assert np.all(saliency_maps == prediction) def test_prepare_model():