Skip to content

Commit

Permalink
Merge pull request #2321 from f4str/pytorch-yolo-rebase
Browse files Browse the repository at this point in the history
Subclass `PyTorchYolo` and `PyTorchDetectionTransformer` off `PyTorchObjectDetector`
  • Loading branch information
beat-buesser authored Feb 27, 2024
2 parents 80fd393 + 9e347e6 commit 3597228
Show file tree
Hide file tree
Showing 11 changed files with 994 additions and 1,882 deletions.
162 changes: 37 additions & 125 deletions art/estimators/certification/object_seeker/object_seeker.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from sklearn.cluster import DBSCAN
from tqdm.auto import tqdm

from art.utils import intersection_over_area, non_maximum_suppression
from art.utils import intersection_over_area

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -94,68 +94,16 @@ def __init__(
self.epsilon = epsilon
self.verbose = verbose

@property
@abc.abstractmethod
def channels_first(self) -> bool:
def _image_dimensions(self) -> Tuple[int, int]:
"""
:return: Boolean to indicate index of the color channels in the sample `x`.
"""
pass

@property
@abc.abstractmethod
def input_shape(self) -> Tuple[int, ...]:
"""
:return: Shape of one input sample.
"""
pass
Get the height and width of a sample input image.
@abc.abstractmethod
def _predict_classifier(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[str, np.ndarray]]:
"""
Perform prediction for a batch of inputs.
:param x: Samples of shape NCHW or NHWC.
:param batch_size: Batch size.
:return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The fields of the Dict
are as follows:
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
- labels [N]: the labels for each image
- scores [N]: the scores or each prediction.
:return: Tuple containing the height and width of a sample input image.
"""
raise NotImplementedError

def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[str, np.ndarray]]:
"""
Perform prediction for a batch of inputs.
:param x: Samples of shape NCHW or NHWC.
:param batch_size: Batch size.
:return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The fields of the Dict
are as follows:
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
- labels [N]: the labels for each image
- scores [N]: the scores or each prediction.
"""
predictions = []

for x_i in tqdm(x, desc="ObjectSeeker", disable=not self.verbose):
base_preds, masked_preds = self._masked_predictions(x_i, batch_size=batch_size, **kwargs)
pruned_preds = self._prune_boxes(masked_preds, base_preds)
unionized_preds = self._unionize_clusters(pruned_preds)

preds = {
"boxes": np.concatenate([base_preds["boxes"], unionized_preds["boxes"]]),
"labels": np.concatenate([base_preds["labels"], unionized_preds["labels"]]),
"scores": np.concatenate([base_preds["scores"], unionized_preds["scores"]]),
}

predictions.append(preds)

return predictions

@abc.abstractmethod
def _masked_predictions(
self, x_i: np.ndarray, batch_size: int = 128, **kwargs
) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray]]:
Expand All @@ -167,70 +115,7 @@ def _masked_predictions(
:batch_size: Batch size.
:return: Predictions for the base unmasked image and merged predictions for the masked image.
"""
x_mask = np.repeat(x_i[np.newaxis], self.num_lines * 4 + 1, axis=0)

if self.channels_first:
height = self.input_shape[1]
width = self.input_shape[2]
else:
height = self.input_shape[0]
width = self.input_shape[1]
x_mask = np.transpose(x_mask, (0, 3, 1, 2))

idx = 1

# Left masks
for k in range(1, self.num_lines + 1):
boundary = int(width / (self.num_lines + 1) * k)
x_mask[idx, :, :, :boundary] = 0
idx += 1

# Right masks
for k in range(1, self.num_lines + 1):
boundary = width - int(width / (self.num_lines + 1) * k)
x_mask[idx, :, :, boundary:] = 0
idx += 1

# Top masks
for k in range(1, self.num_lines + 1):
boundary = int(height / (self.num_lines + 1) * k)
x_mask[idx, :, :boundary, :] = 0
idx += 1

# Bottom masks
for k in range(1, self.num_lines + 1):
boundary = height - int(height / (self.num_lines + 1) * k)
x_mask[idx, :, boundary:, :] = 0
idx += 1

if not self.channels_first:
x_mask = np.transpose(x_mask, (0, 2, 3, 1))

predictions = self._predict_classifier(x=x_mask, batch_size=batch_size, **kwargs)
filtered_predictions = [
non_maximum_suppression(
pred, iou_threshold=self.iou_threshold, confidence_threshold=self.confidence_threshold
)
for pred in predictions
]

# Extract base predictions
base_predictions = filtered_predictions[0]

# Extract and merge masked predictions
boxes = np.concatenate([pred["boxes"] for pred in filtered_predictions[1:]])
labels = np.concatenate([pred["labels"] for pred in filtered_predictions[1:]])
scores = np.concatenate([pred["scores"] for pred in filtered_predictions[1:]])
merged_predictions = {
"boxes": boxes,
"labels": labels,
"scores": scores,
}
masked_predictions = non_maximum_suppression(
merged_predictions, iou_threshold=self.iou_threshold, confidence_threshold=self.confidence_threshold
)

return base_predictions, masked_predictions
raise NotImplementedError

def _prune_boxes(
self, masked_preds: Dict[str, np.ndarray], base_preds: Dict[str, np.ndarray]
Expand Down Expand Up @@ -332,6 +217,36 @@ def _unionize_clusters(self, masked_preds: Dict[str, np.ndarray]) -> Dict[str, n
}
return unionized_predictions

def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[str, np.ndarray]]:
"""
Perform prediction for a batch of inputs.
:param x: Samples of shape NCHW or NHWC.
:param batch_size: Batch size.
:return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The fields of the Dict
are as follows:
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
- labels [N]: the labels for each image
- scores [N]: the scores or each prediction.
"""
predictions = []

for x_i in tqdm(x, desc="ObjectSeeker", disable=not self.verbose):
base_preds, masked_preds = self._masked_predictions(x_i, batch_size=batch_size, **kwargs)
pruned_preds = self._prune_boxes(masked_preds, base_preds)
unionized_preds = self._unionize_clusters(pruned_preds)

preds = {
"boxes": np.concatenate([base_preds["boxes"], unionized_preds["boxes"]]),
"labels": np.concatenate([base_preds["labels"], unionized_preds["labels"]]),
"scores": np.concatenate([base_preds["scores"], unionized_preds["scores"]]),
}

predictions.append(preds)

return predictions

def certify(
self,
x: np.ndarray,
Expand All @@ -348,10 +263,7 @@ def certify(
:return: A list containing an array of bools for each bounding box per image indicating if the bounding
box is certified against the given patch.
"""
if self.channels_first:
_, height, width = self.input_shape
else:
height, width, _ = self.input_shape
height, width = self._image_dimensions()

patch_size = np.sqrt(height * width * patch_size)
height_offset = offset * height
Expand Down
Loading

0 comments on commit 3597228

Please sign in to comment.