Merge branch 'develop' into enhance/try-cnn-transformer-ir-insersion

openvinotoolkit · Aug 2, 2024 · aaa6da7 · aaa6da7
2 parents 5312d86 + c497c01
commit aaa6da7
Show file tree

Hide file tree

Showing 23 changed files with 712 additions and 170 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### Summary
 
 * Support OpenVINO IR (.xml) / ONNX (.onnx) model file for `Explainer` model
+* Enable AISE: Adaptive Input Sampling for Explanation of Black-box Models.
 
 ### What's Changed
 
@@ -13,7 +14,7 @@
 * Refactor OpenVINO imports by @goodsong81 in https://github.com/openvinotoolkit/openvino_xai/pull/45
 * Support OV IR / ONNX model file for Explainer by @goodsong81 in https://github.com/openvinotoolkit/openvino_xai/pull/47
 * Try CNN -> ViT assumption for IR insertion by @goodsong81 in https://github.com/openvinotoolkit/openvino_xai/pull/48
-
+* Enable AISE: Adaptive Input Sampling for Explanation of Black-box Models by @negvet in https://github.com/openvinotoolkit/openvino_xai/pull/49
 
 ### Known Issues
 

diff --git a/README.md b/README.md
@@ -72,7 +72,8 @@ At the moment, *Image Classification* and *Object Detection* tasks are supported
 | Computer Vision | Image Classification | White-Box | ReciproCAM          | [arxiv](https://arxiv.org/abs/2209.14074) / [src](openvino_xai/methods/white_box/recipro_cam.py) |
 |                 |                      |           | VITReciproCAM       | [arxiv](https://arxiv.org/abs/2310.02588) / [src](openvino_xai/methods/white_box/recipro_cam.py) |
 |                 |                      |           | ActivationMap       | experimental / [src](openvino_xai/methods/white_box/activation_map.py) |
-|                 |                      | Black-Box | RISE                | [arxiv](https://arxiv.org/abs/1806.07421v3) / [src](openvino_xai/methods/black_box/rise.py) |
+|                 |                      | Black-Box | AISE                | [src](openvino_xai/methods/black_box/aise.py) |
+|                 |                      |           | RISE                | [arxiv](https://arxiv.org/abs/1806.07421v3) / [src](openvino_xai/methods/black_box/rise.py) |
 |                 | Object Detection     | White-Box | ClassProbabilityMap | experimental / [src](openvino_xai/methods/white_box/det_class_probability_map.py) |
 
 ### Supported explainable models

diff --git a/docs/source/user-guide.md b/docs/source/user-guide.md
@@ -247,7 +247,12 @@ explanation.save("output_path", "name")
 Black-box mode does not update the model (treating the model as a black box).
 Black-box approaches are based on the perturbation of the input data and measurement of the model's output change.
 
-The process is repeated many times, requiring hundreds or thousands of forward passes and introducing **significant computational overhead**.
+For black-box mode we support 2 algorithms: **AISE** (by default) and [**RISE**](https://arxiv.org/abs/1806.07421). AISE is more effective for generating saliency maps for a few specific classes. RISE - to generate maps for all classes at once.
+
+Pros:
+- **Flexible** - can be applied to any custom model.
+Cons:
+- **Computational overhead** - black-box requires hundreds or thousands of forward passes.
 
 `preprocess_fn` (or preprocessed images) and `postprocess_fn` are required to be provided by the user for black-box mode.
 
@@ -286,7 +291,6 @@ explanation = explainer(
     target_explain_labels=[11, 14],  # target classes to explain
     # target_explain_labels=-1,  # explain all classes
     overlay=True,  # False by default
-    num_masks=1000,  # kwargs for the RISE algorithm
 )
 
 # Save saliency maps

diff --git a/examples/run_classification.py b/examples/run_classification.py
@@ -53,7 +53,7 @@ def explain_auto(args):
         preprocess_fn=preprocess_fn,
     )
 
-    # Prepare input image and explanation parameters, can be different for each explain call
+    # Prepare input image
     image = cv2.imread(args.image_path)
 
     # Generate explanation
@@ -96,7 +96,7 @@ def explain_white_box(args):
         embed_scaling=True, # True by default.  If set to True, saliency map scale (0 ~ 255) operation is embedded in the model
     )
 
-    # Prepare input image and explanation parameters, can be different for each explain call
+    # Prepare input image and label names
     image = cv2.imread(args.image_path)
     voc_labels = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
               'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
@@ -139,7 +139,7 @@ def explain_black_box(args):
         explain_mode=ExplainMode.BLACKBOX,  # defaults to AUTO
     )
 
-    # Prepare input image and explanation parameters, can be different for each explain call
+    # Prepare input image and label names
     image = cv2.imread(args.image_path)
     voc_labels = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
               'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
@@ -150,7 +150,6 @@ def explain_black_box(args):
         targets=['dog', 'person'],  # target classes to explain, also [11, 14] possible
         label_names=voc_labels,  # optional names
         overlay=True,
-        num_masks=1000,  # kwargs of the RISE algo
     )
 
     logger.info(
@@ -225,7 +224,7 @@ def explain_white_box_vit(args):
         # target_layer="/blocks/blocks.10/Add_1",  # timm vit_base_patch8_224.augreg_in21k_ft_in1k
     )
 
-    # Prepare input image and explanation parameters, can be different for each explain call
+    # Prepare input image
     image = cv2.imread(args.image_path)
 
     # Generate explanation

diff --git a/openvino_xai/common/parameters.py b/openvino_xai/common/parameters.py
@@ -27,13 +27,15 @@ class Method(Enum):
         VITRECIPROCAM - VITReciproCAM method.
         DETCLASSPROBABILITYMAP - DetClassProbabilityMap method.
         RISE - RISE method.
+        AISE - AISE method.
     """
 
     ACTIVATIONMAP = "activationmap"
     RECIPROCAM = "reciprocam"
     VITRECIPROCAM = "vitreciprocam"
     DETCLASSPROBABILITYMAP = "detclassprobabilitymap"
     RISE = "rise"
+    AISE = "aise"
 
 
 WhiteBoxXAIMethods = {
@@ -43,11 +45,13 @@ class Method(Enum):
 }
 BlackBoxXAIMethods = {
     Method.RISE,
+    Method.AISE,
 }
 ClassificationXAIMethods = {
     Method.ACTIVATIONMAP,
     Method.RECIPROCAM,
     Method.RISE,
+    Method.AISE,
 }
 DetectionXAIMethods = {
     Method.DETCLASSPROBABILITYMAP,

diff --git a/openvino_xai/common/utils.py b/openvino_xai/common/utils.py
@@ -59,8 +59,8 @@ def retrieve_otx_model(data_dir: str | Path, model_name: str, dir_url=None) -> N
 
 def scaling(saliency_map: np.ndarray, cast_to_uint8: bool = True) -> np.ndarray:
     """Scaling saliency maps to [0, 255] range."""
-    original_num_dims = saliency_map.shape
-    if len(original_num_dims) == 2:
+    original_num_dims = saliency_map.ndim
+    if original_num_dims == 2:
         # If input map is 2D array, add dim so that below code would work
         saliency_map = saliency_map[np.newaxis, ...]
 
@@ -87,6 +87,52 @@ def get_min_max(saliency_map: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
     return min_values, max_values
 
 
+def sigmoid(x: np.ndarray) -> np.ndarray:
+    """Compute sigmoid values of x."""
+    return 1 / (1 + np.exp(-x))
+
+
+def softmax(x: np.ndarray) -> np.ndarray:
+    """Compute softmax values of x."""
+    e_x = np.exp(x - np.max(x))
+    return e_x / e_x.sum()
+
+
 class IdentityPreprocessFN:
     def __call__(self, x: Any) -> Any:
         return x
+
+
+def is_bhwc_layout(image: np.array) -> bool:
+    """Check whether layout of image is BHWC."""
+    _, dim0, dim1, dim2 = image.shape
+    if dim0 > dim2 and dim1 > dim2:  # bhwc layout
+        return True
+    return False
+
+
+def format_to_bhwc(image: np.ndarray) -> np.ndarray:
+    """Format image to BHWC from ndim=3 or ndim=4."""
+    if image.ndim == 3:
+        image = np.expand_dims(image, axis=0)
+    if not is_bhwc_layout(image):
+        # bchw layout -> bhwc
+        image = image.transpose((0, 2, 3, 1))
+    return image
+
+
+def infer_size_from_image(image: np.ndarray) -> Tuple[int, int]:
+    """Estimate image size."""
+    if image.ndim not in [2, 3, 4]:
+        raise ValueError(f"Supports only two, three, and four dimensional image, but got {image.ndim}.")
+
+    if image.ndim == 2:
+        return image.shape
+
+    if image.ndim == 3:
+        image = np.expand_dims(image, axis=0)
+    _, dim0, dim1, dim2 = image.shape
+    if dim0 > dim2 and dim1 > dim2:  # bhwc layout:
+        return dim0, dim1
+    else:
+        return dim1, dim2
diff --git a/openvino_xai/explainer/explainer.py b/openvino_xai/explainer/explainer.py
@@ -10,13 +10,16 @@
 
 from openvino_xai import Task
 from openvino_xai.common.parameters import Method
-from openvino_xai.common.utils import IdentityPreprocessFN, logger
+from openvino_xai.common.utils import (
+    IdentityPreprocessFN,
+    infer_size_from_image,
+    logger,
+)
 from openvino_xai.explainer.explanation import Explanation
 from openvino_xai.explainer.utils import (
     convert_targets_to_numpy,
     explains_all,
-    get_explain_target_indices,
-    infer_size_from_image,
+    get_target_indices,
 )
 from openvino_xai.explainer.visualizer import Visualizer
 from openvino_xai.methods.base import MethodBase
@@ -202,16 +205,16 @@ def explain(
         """
         targets = convert_targets_to_numpy(targets)
 
-        explain_target_indices = None
+        target_indices = None
         if isinstance(self.method, BlackBoxXAIMethod) and not explains_all(targets):
-            explain_target_indices = get_explain_target_indices(
+            target_indices = get_target_indices(
                 targets,
                 label_names,
             )
 
         saliency_map = self.method.generate_saliency_map(
             data,
-            explain_target_indices=explain_target_indices,  # type: ignore
+            target_indices=target_indices,  # type: ignore
             **kwargs,
         )
 
@@ -258,6 +261,7 @@ def _create_black_box_method(self, task: Task) -> MethodBase:
             model=self.model,
             postprocess_fn=self.postprocess_fn,
             preprocess_fn=self.preprocess_fn,
+            explain_method=self.explain_method,
             device_name=self.device_name,
         )
         logger.info("Explaining the model in black-box mode.")

diff --git a/openvino_xai/explainer/explanation.py b/openvino_xai/explainer/explanation.py
@@ -12,15 +12,16 @@
 from openvino_xai.explainer.utils import (
     convert_targets_to_numpy,
     explains_all,
-    get_explain_target_indices,
+    get_target_indices,
 )
 
 
 class Explanation:
     """
     Explanation selects target saliency maps, holds it and its layout.
 
-    :param saliency_map: Raw saliency map.
+    :param saliency_map: Raw saliency map, as a numpy array or as a dict.
+    :type saliency_map: np.ndarray | Dict[int | str, np.ndarray]
     :param targets: List of custom labels to explain, optional. Can be list of integer indices (int),
         or list of names (str) from label_names.
     :type targets: np.ndarray | List[int | str] | int | str
@@ -30,14 +31,21 @@ class Explanation:
 
     def __init__(
         self,
-        saliency_map: np.ndarray,
+        saliency_map: np.ndarray | Dict[int | str, np.ndarray],
         targets: np.ndarray | List[int | str] | int | str,
         label_names: List[str] | None = None,
     ):
         targets = convert_targets_to_numpy(targets)
 
-        self._check_saliency_map(saliency_map)
-        self._saliency_map = self._format_sal_map_as_dict(saliency_map)
+        if isinstance(saliency_map, np.ndarray):
+            self._check_saliency_map(saliency_map)
+            self._saliency_map = self._format_sal_map_as_dict(saliency_map)
+            self.total_num_targets = len(self._saliency_map)
+        elif isinstance(saliency_map, dict):
+            self._saliency_map = saliency_map
+            self.total_num_targets = None
+        else:
+            raise ValueError(f"Expect saliency_map to be np.ndarray or dict, but got{type(saliency_map)}.")
 
         if "per_image_map" in self._saliency_map:
             self.layout = Layout.ONE_MAP_PER_IMAGE_GRAY
@@ -74,8 +82,6 @@ def targets(self):
     def _check_saliency_map(saliency_map: np.ndarray):
         if saliency_map is None:
             raise RuntimeError("Saliency map is None.")
-        if not isinstance(saliency_map, np.ndarray):
-            raise ValueError(f"Raw saliency_map has to be np.ndarray, but got {type(saliency_map)}.")
         if saliency_map.size == 0:
             raise RuntimeError("Saliency map is zero size array.")
         if saliency_map.shape[0] > 1:
@@ -105,24 +111,26 @@ def _select_target_saliency_maps(
         label_names: List[str] | None = None,
     ) -> Dict[int | str, np.ndarray]:
         assert self.layout == Layout.MULTIPLE_MAPS_PER_IMAGE_GRAY
-        explain_target_indices = self._select_target_indices(
+        target_indices = self._select_target_indices(
             targets=targets,
-            total_num_targets=len(self._saliency_map),
             label_names=label_names,
         )
-        saliency_maps_selected = {i: self._saliency_map[i] for i in explain_target_indices}
+        saliency_maps_selected = {i: self._saliency_map[i] for i in target_indices}
         return saliency_maps_selected
 
-    @staticmethod
     def _select_target_indices(
+        self,
         targets: np.ndarray | List[int | str],
-        total_num_targets: int,
         label_names: List[str] | None = None,
     ) -> List[int] | np.ndarray:
-        explain_target_indices = get_explain_target_indices(targets, label_names)
-        if not all(0 <= target_index <= (total_num_targets - 1) for target_index in explain_target_indices):
-            raise ValueError(f"All targets explanation indices have to be in range 0..{total_num_targets - 1}.")
-        return explain_target_indices
+        target_indices = get_target_indices(targets, label_names)
+        if self.total_num_targets is not None:
+            if not all(0 <= target_index <= (self.total_num_targets - 1) for target_index in target_indices):
+                raise ValueError(f"All targets indices have to be in range 0..{self.total_num_targets - 1}.")
+        else:
+            if not all(target_index in self.saliency_map for target_index in target_indices):
+                raise ValueError("Provided targer index {targer_index} is not available among saliency maps.")
+        return target_indices
 
     def save(self, dir_path: Path | str, name: str | None = None) -> None:
         """Dumps saliency map."""

diff --git a/openvino_xai/explainer/utils.py b/openvino_xai/explainer/utils.py
@@ -7,6 +7,8 @@
 import cv2
 import numpy as np
 
+from openvino_xai.common.utils import sigmoid, softmax
+
 
 def convert_targets_to_numpy(targets):
     targets = np.asarray(targets)
@@ -15,7 +17,7 @@ def convert_targets_to_numpy(targets):
     return np.atleast_1d(targets)
 
 
-def get_explain_target_indices(
+def get_target_indices(
     targets: np.ndarray | List[int | str],
     label_names: List[str] | None = None,
 ) -> List[int]:
@@ -127,17 +129,6 @@ def get_postprocess_fn(logit_name="logits") -> Callable[[], np.ndarray]:
     return partial(postprocess_fn, logit_name=logit_name)
 
 
-def softmax(x: np.ndarray) -> np.ndarray:
-    """Compute softmax values of x."""
-    e_x = np.exp(x - np.max(x))
-    return e_x / e_x.sum()
-
-
-def sigmoid(x: np.ndarray) -> np.ndarray:
-    """Compute sigmoid values of x."""
-    return 1 / (1 + np.exp(-x))
-
-
 class ActivationType(Enum):
     SIGMOID = "sigmoid"
     SOFTMAX = "softmax"
@@ -154,34 +145,3 @@ def get_score(x: np.ndarray, index: int, activation: ActivationType = Activation
         assert x.shape[0] == 1
         return x[0, index]
     return x[index]
-
-
-def format_to_bhwc(image: np.ndarray) -> np.ndarray:
-    """Format image to BHWC from ndim=3 or ndim=4."""
-    if image.ndim == 3:
-        image = np.expand_dims(image, axis=0)
-    if not is_bhwc_layout(image):
-        # bchw layout -> bhwc
-        image = image.transpose((0, 2, 3, 1))
-    return image
-
-
-def is_bhwc_layout(image: np.array) -> bool:
-    """Check whether layout of image is BHWC."""
-    _, dim0, dim1, dim2 = image.shape
-    if dim0 > dim2 and dim1 > dim2:  # bhwc layout
-        return True
-    return False
-
-
-def infer_size_from_image(image: np.ndarray) -> Tuple[int, int]:
-    """Estimate image size."""
-    if image.ndim == 2:
-        return image.shape
-
-    image = format_to_bhwc(image)
-    if image.ndim == 4:
-        _, h, w, _ = image.shape
-    else:
-        raise ValueError(f"Supports only two, three, and four dimensional image, but got {image.ndim}.")
-    return h, w