diff --git a/art/attacks/inference/membership_inference/black_box.py b/art/attacks/inference/membership_inference/black_box.py index 21758bbe05..3715e55329 100644 --- a/art/attacks/inference/membership_inference/black_box.py +++ b/art/attacks/inference/membership_inference/black_box.py @@ -98,6 +98,7 @@ def __init__( self.epochs = nn_model_epochs self.batch_size = nn_model_batch_size self.learning_rate = nn_model_learning_rate + self.use_label = True self._regressor_model = RegressorMixin in type(self.estimator).__mro__ @@ -108,67 +109,8 @@ def __init__( self.attack_model_type = "None" else: self.default_model = True - if self.attack_model_type == "nn": - import torch - from torch import nn - class MembershipInferenceAttackModel(nn.Module): - """ - Implementation of a pytorch model for learning a membership inference attack. - - The features used are probabilities/logits or losses for the attack training data along with - its true labels. - """ - - def __init__(self, num_classes, num_features=None): - - self.num_classes = num_classes - if num_features: - self.num_features = num_features - else: - self.num_features = num_classes - - super().__init__() - - self.features = nn.Sequential( - nn.Linear(self.num_features, 512), - nn.ReLU(), - nn.Linear(512, 100), - nn.ReLU(), - nn.Linear(100, 64), - nn.ReLU(), - ) - - self.labels = nn.Sequential( - nn.Linear(self.num_classes, 256), - nn.ReLU(), - nn.Linear(256, 64), - nn.ReLU(), - ) - - self.combine = nn.Sequential( - nn.Linear(64 * 2, 1), - ) - - self.output = nn.Sigmoid() - - def forward(self, x_1, label): - """Forward the model.""" - out_x1 = self.features(x_1) - out_l = self.labels(label) - is_member = self.combine(torch.cat((out_x1, out_l), 1)) - return self.output(is_member) - - if self.input_type == "prediction": - num_classes = estimator.nb_classes # type: ignore - self.attack_model = MembershipInferenceAttackModel(num_classes) - else: - if self._regressor_model: - self.attack_model = MembershipInferenceAttackModel(1, num_features=1) - else: - num_classes = estimator.nb_classes # type: ignore - self.attack_model = MembershipInferenceAttackModel(num_classes, num_features=1) - elif self.attack_model_type == "rf": + if self.attack_model_type == "rf": self.attack_model = RandomForestClassifier() elif self.attack_model_type == "gb": self.attack_model = GradientBoostingClassifier() @@ -180,13 +122,15 @@ def forward(self, x_1, label): self.attack_model = KNeighborsClassifier() elif self.attack_model_type == "svm": self.attack_model = SVC(probability=True) + elif attack_model_type != "nn": + raise ValueError("Illegal value for parameter `attack_model_type`.") def fit( # pylint: disable=W0613 self, - x: np.ndarray, - y: np.ndarray, - test_x: np.ndarray, - test_y: np.ndarray, + x: Optional[np.ndarray] = None, + y: Optional[np.ndarray] = None, + test_x: Optional[np.ndarray] = None, + test_y: Optional[np.ndarray] = None, pred: Optional[np.ndarray] = None, test_pred: Optional[np.ndarray] = None, **kwargs @@ -195,10 +139,10 @@ def fit( # pylint: disable=W0613 Train the attack model. :param x: Records that were used in training the target estimator. Can be None if supplying `pred`. - :param y: True labels for `x`. + :param y: True labels for `x`. If not supplied, attack will be based solely on model predictions. :param test_x: Records that were not used in training the target estimator. Can be None if supplying `test_pred`. - :param test_y: True labels for `test_x`. + :param test_y: True labels for `test_x`. If not supplied, attack will be based solely on model predictions. :param pred: Estimator predictions for the records, if not supplied will be generated by calling the estimators' `predict` function. Only relevant for input_type='prediction'. :param test_pred: Estimator predictions for the test records, if not supplied will be generated by calling the @@ -216,28 +160,30 @@ def fit( # pylint: disable=W0613 if test_x is not None and self.estimator.input_shape[0] != test_x.shape[1]: # pragma: no cover raise ValueError("Shape of test_x does not match input_shape of estimator") - if not self._regressor_model: + if y is not None and test_y is not None and not self._regressor_model: y = check_and_transform_label_format(y, nb_classes=self.estimator.nb_classes, return_one_hot=True) test_y = check_and_transform_label_format(test_y, nb_classes=self.estimator.nb_classes, return_one_hot=True) - if x is not None and y.shape[0] != x.shape[0]: # pragma: no cover + if x is not None and y is not None and y.shape[0] != x.shape[0]: # pragma: no cover raise ValueError("Number of rows in x and y do not match") - if pred is not None and y.shape[0] != pred.shape[0]: # pragma: no cover + if pred is not None and y is not None and y.shape[0] != pred.shape[0]: # pragma: no cover raise ValueError("Number of rows in pred and y do not match") - if test_x is not None and test_y.shape[0] != test_x.shape[0]: # pragma: no cover + if test_x is not None and test_y is not None and test_y.shape[0] != test_x.shape[0]: # pragma: no cover raise ValueError("Number of rows in test_x and test_y do not match") - if test_pred is not None and test_y.shape[0] != test_pred.shape[0]: # pragma: no cover + if test_pred is not None and test_y is not None and test_y.shape[0] != test_pred.shape[0]: # pragma: no cover raise ValueError("Number of rows in test_pred and test_y do not match") # Create attack dataset # uses final probabilities/logits - if pred is None: + x_len = 0 + test_len = 0 + if pred is None and x is not None: x_len = x.shape[0] - else: + elif pred is not None: x_len = pred.shape[0] - if test_pred is None: + if test_pred is None and test_x is not None: test_len = test_x.shape[0] - else: + elif test_pred is not None: test_len = test_pred.shape[0] if self.input_type == "prediction": @@ -253,6 +199,8 @@ def fit( # pylint: disable=W0613 test_features = test_pred.astype(np.float32) # only for models with loss elif self.input_type == "loss": + if y is None: + raise ValueError("Cannot compute loss values without y.") if x is not None: # members features = self.estimator.compute_loss(x, y).astype(np.float32).reshape(-1, 1) @@ -288,11 +236,14 @@ def fit( # pylint: disable=W0613 test_labels = np.zeros(test_len) x_1 = np.concatenate((features, test_features)) - x_2 = np.concatenate((y, test_y)) + x_2: Optional[np.ndarray] = None + if y is not None and test_y is not None: + x_2 = np.concatenate((y, test_y)) + if self._regressor_model and x_2 is not None: + x_2 = x_2.astype(np.float32).reshape(-1, 1) y_new = np.concatenate((labels, test_labels)) - - if self._regressor_model: - x_2 = x_2.astype(np.float32).reshape(-1, 1) + if x_2 is None: + self.use_label = False if self.default_model and self.attack_model_type == "nn": import torch @@ -301,37 +252,157 @@ def fit( # pylint: disable=W0613 from torch.utils.data import DataLoader from art.utils import to_cuda - loss_fn = nn.BCELoss() - optimizer = optim.Adam(self.attack_model.parameters(), lr=self.learning_rate) # type: ignore + if x_2 is not None: - attack_train_set = self._get_attack_dataset(f_1=x_1, f_2=x_2, label=y_new) - train_loader = DataLoader(attack_train_set, batch_size=self.batch_size, shuffle=True, num_workers=0) + class MembershipInferenceAttackModel(nn.Module): + """ + Implementation of a pytorch model for learning a membership inference attack. - self.attack_model = to_cuda(self.attack_model) # type: ignore - self.attack_model.train() # type: ignore + The features used are probabilities/logits or losses for the attack training data along with + its true labels. + """ - for _ in range(self.epochs): - for (input1, input2, targets) in train_loader: - input1, input2, targets = to_cuda(input1), to_cuda(input2), to_cuda(targets) - _, input2 = torch.autograd.Variable(input1), torch.autograd.Variable(input2) - targets = torch.autograd.Variable(targets) + def __init__(self, num_classes, num_features=None): - optimizer.zero_grad() - outputs = self.attack_model(input1, input2) # type: ignore - loss = loss_fn(outputs, targets.unsqueeze(1)) + self.num_classes = num_classes + if num_features: + self.num_features = num_features + else: + self.num_features = num_classes - loss.backward() - optimizer.step() - else: + super().__init__() + + self.features = nn.Sequential( + nn.Linear(self.num_features, 512), + nn.ReLU(), + nn.Linear(512, 100), + nn.ReLU(), + nn.Linear(100, 64), + nn.ReLU(), + ) + + self.labels = nn.Sequential( + nn.Linear(self.num_classes, 256), + nn.ReLU(), + nn.Linear(256, 64), + nn.ReLU(), + ) + + self.combine = nn.Sequential( + nn.Linear(64 * 2, 1), + ) + + self.output = nn.Sigmoid() + + def forward(self, x_1, label): + """Forward the model.""" + out_x1 = self.features(x_1) + out_l = self.labels(label) + is_member = self.combine(torch.cat((out_x1, out_l), 1)) + return self.output(is_member) + + if self.input_type == "prediction": + num_classes = self.estimator.nb_classes # type: ignore + self.attack_model = MembershipInferenceAttackModel(num_classes) + else: # loss + if self._regressor_model: + self.attack_model = MembershipInferenceAttackModel(1, num_features=1) + else: + num_classes = self.estimator.nb_classes # type: ignore + self.attack_model = MembershipInferenceAttackModel(num_classes, num_features=1) + + loss_fn = nn.BCELoss() + optimizer = optim.Adam(self.attack_model.parameters(), lr=self.learning_rate) # type: ignore + + attack_train_set = self._get_attack_dataset(f_1=x_1, f_2=x_2, label=y_new) + train_loader = DataLoader(attack_train_set, batch_size=self.batch_size, shuffle=True, num_workers=0) + + self.attack_model = to_cuda(self.attack_model) # type: ignore + self.attack_model.train() # type: ignore + + for _ in range(self.epochs): + for (input1, input2, targets) in train_loader: + input1, input2, targets = to_cuda(input1), to_cuda(input2), to_cuda(targets) + _, input2 = torch.autograd.Variable(input1), torch.autograd.Variable(input2) + targets = torch.autograd.Variable(targets) + + optimizer.zero_grad() + outputs = self.attack_model(input1, input2) # type: ignore + loss = loss_fn(outputs, targets.unsqueeze(1)) + + loss.backward() + optimizer.step() + else: # no label + + class MembershipInferenceAttackModelNoLabel(nn.Module): + """ + Implementation of a pytorch model for learning a membership inference attack. + + The features used are probabilities/logits or losses for the attack training data along with + its true labels. + """ + + def __init__(self, num_features): + + self.num_features = num_features + + super().__init__() + + self.features = nn.Sequential( + nn.Linear(self.num_features, 512), + nn.ReLU(), + nn.Linear(512, 100), + nn.ReLU(), + nn.Linear(100, 64), + nn.ReLU(), + nn.Linear(64, 1), + ) + + self.output = nn.Sigmoid() + + def forward(self, x_1): + """Forward the model.""" + out_x1 = self.features(x_1) + return self.output(out_x1) + + num_classes = self.estimator.nb_classes # type: ignore + self.attack_model = MembershipInferenceAttackModelNoLabel(num_classes) + + loss_fn = nn.BCELoss() + optimizer = optim.Adam(self.attack_model.parameters(), lr=self.learning_rate) # type: ignore + + attack_train_set = self._get_attack_dataset_no_label(f_1=x_1, label=y_new) + train_loader = DataLoader(attack_train_set, batch_size=self.batch_size, shuffle=True, num_workers=0) + + self.attack_model = to_cuda(self.attack_model) # type: ignore + self.attack_model.train() # type: ignore + + for _ in range(self.epochs): + for (input1, targets) in train_loader: + input1, targets = to_cuda(input1), to_cuda(targets) + input1 = torch.autograd.Variable(input1) + targets = torch.autograd.Variable(targets) + + optimizer.zero_grad() + outputs = self.attack_model(input1) # type: ignore + loss = loss_fn(outputs, targets.unsqueeze(1)) + + loss.backward() + optimizer.step() + + else: # not nn y_ready = check_and_transform_label_format(y_new, nb_classes=2, return_one_hot=False) - self.attack_model.fit(np.c_[x_1, x_2], y_ready.ravel()) # type: ignore + if x_2 is not None: + self.attack_model.fit(np.c_[x_1, x_2], y_ready.ravel()) # type: ignore + else: + self.attack_model.fit(x_1, y_ready.ravel()) # type: ignore def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Infer membership in the training set of the target estimator. :param x: Input records to attack. Can be None if supplying `pred`. - :param y: True labels for `x`. + :param y: True labels for `x`. If not supplied, attack will be based solely on model predictions. :param pred: Estimator predictions for the records, if not supplied will be generated by calling the estimators' `predict` function. Only relevant for input_type='prediction'. :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just @@ -349,24 +420,22 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n else: probabilities = False - if y is None: # pragma: no cover - raise ValueError("MembershipInferenceBlackBox requires true labels `y`.") if x is None and pred is None: raise ValueError("Must supply either x or pred") + if y is None and self.use_label: + raise ValueError("y must be provided") + if self.estimator.input_shape is not None and x is not None: # pragma: no cover if self.estimator.input_shape[0] != x.shape[1]: raise ValueError("Shape of x does not match input_shape of estimator") - if not self._regressor_model: + if y is not None and not self._regressor_model: y = check_and_transform_label_format(y, nb_classes=self.estimator.nb_classes, return_one_hot=True) - if y is None: - raise ValueError("None value detected.") - - if x is not None and y.shape[0] != x.shape[0]: # pragma: no cover + if x is not None and y is not None and y.shape[0] != x.shape[0]: # pragma: no cover raise ValueError("Number of rows in x and y do not match") - if pred is not None and y.shape[0] != pred.shape[0]: # pragma: no cover + if pred is not None and y is not None and y.shape[0] != pred.shape[0]: # pragma: no cover raise ValueError("Number of rows in pred and y do not match") if self.input_type == "prediction": @@ -375,6 +444,8 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n else: features = pred.astype(np.float32) elif self.input_type == "loss": + if y is None: + raise ValueError("Cannot compute loss values without y.") if x is not None: features = self.estimator.compute_loss(x, y).astype(np.float32).reshape(-1, 1) else: @@ -388,7 +459,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n else: raise ValueError("Value of `input_type` not recognized.") - if self._regressor_model: + if y is not None and self._regressor_model: y = y.astype(np.float32).reshape(-1, 1) if self.default_model and self.attack_model_type == "nn": @@ -398,22 +469,39 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n self.attack_model.eval() # type: ignore predictions: Optional[np.ndarray] = None - test_set = self._get_attack_dataset(f_1=features, f_2=y) - test_loader = DataLoader(test_set, batch_size=self.batch_size, shuffle=False, num_workers=0) - for input1, input2, _ in test_loader: - input1, input2 = to_cuda(input1), to_cuda(input2) - outputs = self.attack_model(input1, input2) # type: ignore - if not probabilities: - predicted = torch.round(outputs) - else: - predicted = outputs - predicted = from_cuda(predicted) - if predictions is None: - predictions = predicted.detach().numpy() - else: - predictions = np.vstack((predictions, predicted.detach().numpy())) + if y is not None and self.use_label: + test_set = self._get_attack_dataset(f_1=features, f_2=y) + test_loader = DataLoader(test_set, batch_size=self.batch_size, shuffle=False, num_workers=0) + for input1, input2, _ in test_loader: + input1, input2 = to_cuda(input1), to_cuda(input2) + outputs = self.attack_model(input1, input2) # type: ignore + if not probabilities: + predicted = torch.round(outputs) + else: + predicted = outputs + predicted = from_cuda(predicted) + + if predictions is None: + predictions = predicted.detach().numpy() + else: + predictions = np.vstack((predictions, predicted.detach().numpy())) + else: + test_set = self._get_attack_dataset_no_label(f_1=features) + test_loader = DataLoader(test_set, batch_size=self.batch_size, shuffle=False, num_workers=0) + for input1, _ in test_loader: + input1 = to_cuda(input1) + outputs = self.attack_model(input1) # type: ignore + if not probabilities: + predicted = torch.round(outputs) + else: + predicted = outputs + predicted = from_cuda(predicted) + if predictions is None: + predictions = predicted.detach().numpy() + else: + predictions = np.vstack((predictions, predicted.detach().numpy())) if predictions is not None: if not probabilities: inferred_return = np.round(predictions) @@ -423,13 +511,19 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n raise ValueError("No data available.") elif not self.default_model: # assumes the predict method of the supplied model returns probabilities - inferred = self.attack_model.predict(np.c_[features, y]) # type: ignore + if y is not None and self.use_label: + inferred = self.attack_model.predict(np.c_[features, y]) # type: ignore + else: + inferred = self.attack_model.predict(features) # type: ignore if probabilities: inferred_return = inferred else: inferred_return = np.round(inferred) else: - inferred = self.attack_model.predict_proba(np.c_[features, y]) # type: ignore + if y is not None and self.use_label: + inferred = self.attack_model.predict_proba(np.c_[features, y]) # type: ignore + else: + inferred = self.attack_model.predict_proba(features) # type: ignore if probabilities: inferred_return = inferred[:, [1]] else: @@ -470,6 +564,38 @@ def __getitem__(self, idx): return AttackDataset(x_1=f_1, x_2=f_2, y=label) + def _get_attack_dataset_no_label(self, f_1, label=None): + from torch.utils.data.dataset import Dataset + + class AttackDataset(Dataset): + """ + Implementation of a pytorch dataset for membership inference attack. + + The features are probabilities/logits or losses for the attack training data (`x_1`) along with + its true labels (`x_2`). The labels (`y`) are a boolean representing whether this is a member. + """ + + def __init__(self, x_1, y=None): + import torch + + self.x_1 = torch.from_numpy(x_1.astype(np.float64)).type(torch.FloatTensor) + + if y is not None: + self.y = torch.from_numpy(y.astype(np.int8)).type(torch.FloatTensor) + else: + self.y = torch.zeros(x_1.shape[0]) + + def __len__(self): + return len(self.x_1) + + def __getitem__(self, idx): + if idx >= len(self.x_1): # pragma: no cover + raise IndexError("Invalid Index") + + return self.x_1[idx], self.y[idx] + + return AttackDataset(x_1=f_1, y=label) + def _check_params(self) -> None: if self.input_type not in ["prediction", "loss"]: raise ValueError("Illegal value for parameter `input_type`.") diff --git a/tests/attacks/inference/attribute_inference/test_true_label_baseline.py b/tests/attacks/inference/attribute_inference/test_true_label_baseline.py index 05de0df14a..9209ff25b7 100644 --- a/tests/attacks/inference/attribute_inference/test_true_label_baseline.py +++ b/tests/attacks/inference/attribute_inference/test_true_label_baseline.py @@ -605,8 +605,8 @@ def transform_other_feature(x): baseline_inferred_test ) - expected_train_acc = {"nn": 0.81, "rf": 0.95, "gb": 0.95, "lr": 0.81, "dt": 0.94, "knn": 0.87, "svm": 0.81} - expected_test_acc = {"nn": 0.88, "rf": 0.79, "gb": 0.8, "lr": 0.88, "dt": 0.74, "knn": 0.86, "svm": 0.88} + expected_train_acc = {"nn": 0.81, "rf": 0.93, "gb": 0.95, "lr": 0.81, "dt": 0.94, "knn": 0.87, "svm": 0.81} + expected_test_acc = {"nn": 0.88, "rf": 0.78, "gb": 0.8, "lr": 0.88, "dt": 0.74, "knn": 0.86, "svm": 0.88} assert expected_train_acc[model_type] <= baseline_train_acc assert expected_test_acc[model_type] <= baseline_test_acc diff --git a/tests/attacks/inference/membership_inference/test_black_box.py b/tests/attacks/inference/membership_inference/test_black_box.py index 0896620404..7f32d3697c 100644 --- a/tests/attacks/inference/membership_inference/test_black_box.py +++ b/tests/attacks/inference/membership_inference/test_black_box.py @@ -48,7 +48,38 @@ def test_black_box_image(art_warning, get_default_mnist_subset, image_dl_estimat @pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) -def test_black_box_tabular(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): +def test_black_box_tabular(art_warning, model_type, decision_tree_estimator, get_iris_dataset): + try: + classifier = decision_tree_estimator() + attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) + backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25) + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_tabular_no_label(art_warning, model_type, decision_tree_estimator, get_iris_dataset): + try: + classifier = decision_tree_estimator() + attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) + backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25, False) + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_loss_tabular(art_warning, model_type, decision_tree_estimator, get_iris_dataset): + try: + classifier = decision_tree_estimator() + if type(classifier).__name__ == "PyTorchClassifier" or type(classifier).__name__ == "TensorFlowV2Classifier": + attack = MembershipInferenceBlackBox(classifier, input_type="loss", attack_model_type=model_type) + backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25) + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_tabular_dl(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): try: classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) @@ -58,7 +89,17 @@ def test_black_box_tabular(art_warning, model_type, tabular_dl_estimator_for_att @pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) -def test_black_box_loss_tabular(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): +def test_black_box_tabular_no_label_dl(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): + try: + classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) + attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) + backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25, False) + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_loss_tabular_dl(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): try: classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) if type(classifier).__name__ == "PyTorchClassifier" or type(classifier).__name__ == "TensorFlowV2Classifier": @@ -115,55 +156,62 @@ def test_black_box_keras_loss(art_warning, get_iris_dataset): art_warning(e) -def test_black_box_tabular_rf(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): +@pytest.mark.skip_framework("tensorflow", "keras", "scikitlearn", "mxnet", "kerastf") +def test_black_box_with_model(art_warning, tabular_dl_estimator_for_attack, estimator_for_attack, get_iris_dataset): try: classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) - attack = MembershipInferenceBlackBox(classifier, attack_model_type="rf") - backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.2) + attack_model = estimator_for_attack(num_features=2 * num_classes_iris) + attack = MembershipInferenceBlackBox(classifier, attack_model=attack_model) + backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25) except ARTTestException as e: art_warning(e) -def test_black_box_tabular_gb(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_tabular_prob(art_warning, decision_tree_estimator, get_iris_dataset, model_type): try: - classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) - attack = MembershipInferenceBlackBox(classifier, attack_model_type="gb") - # train attack model using only attack_train_ratio of data - backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25) + classifier = decision_tree_estimator() + attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) + backend_check_membership_probabilities(attack, get_iris_dataset, attack_train_ratio) except ARTTestException as e: art_warning(e) -@pytest.mark.skip_framework("tensorflow", "keras", "scikitlearn", "mxnet", "kerastf") -def test_black_box_with_model(art_warning, tabular_dl_estimator_for_attack, estimator_for_attack, get_iris_dataset): +def test_black_box_with_model_prob(art_warning, decision_tree_estimator, estimator_for_attack, get_iris_dataset): try: - classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) + classifier = decision_tree_estimator() attack_model = estimator_for_attack(num_features=2 * num_classes_iris) attack = MembershipInferenceBlackBox(classifier, attack_model=attack_model) - backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25) + backend_check_membership_probabilities(attack, get_iris_dataset, attack_train_ratio) except ARTTestException as e: art_warning(e) -def test_black_box_tabular_prob_rf(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_pred(art_warning, model_type, decision_tree_estimator, get_iris_dataset): try: - classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) - attack = MembershipInferenceBlackBox(classifier, attack_model_type="rf") - backend_check_membership_probabilities(attack, get_iris_dataset, attack_train_ratio) + (x_train, _), (x_test, _) = get_iris_dataset + classifier = decision_tree_estimator() + attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) + pred_x = classifier.predict(x_train) + test_pred_x = classifier.predict(x_test) + pred = (pred_x, test_pred_x) + backend_check_membership_accuracy_pred(attack, get_iris_dataset, pred, attack_train_ratio, 0.25) except ARTTestException as e: art_warning(e) -def test_black_box_tabular_prob_nn(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): +@pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) +def test_black_box_tabular_prob_dl(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset, model_type): try: classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) - attack = MembershipInferenceBlackBox(classifier, attack_model_type="nn") + attack = MembershipInferenceBlackBox(classifier, attack_model_type=model_type) backend_check_membership_probabilities(attack, get_iris_dataset, attack_train_ratio) except ARTTestException as e: art_warning(e) -def test_black_box_with_model_prob( +def test_black_box_with_model_prob_dl( art_warning, tabular_dl_estimator_for_attack, estimator_for_attack, get_iris_dataset ): try: @@ -176,7 +224,7 @@ def test_black_box_with_model_prob( @pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"]) -def test_black_box_pred(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): +def test_black_box_pred_dl(art_warning, model_type, tabular_dl_estimator_for_attack, get_iris_dataset): try: (x_train, _), (x_test, _) = get_iris_dataset classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) @@ -207,7 +255,29 @@ def test_black_box_loss_regression_pred(art_warning, model_type, get_diabetes_da art_warning(e) -def test_errors(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): +def test_errors(art_warning, decision_tree_estimator, get_iris_dataset): + try: + classifier = decision_tree_estimator() + (x_train, y_train), (x_test, y_test) = get_iris_dataset + with pytest.raises(ValueError): + MembershipInferenceBlackBox(classifier, attack_model_type="a") + with pytest.raises(ValueError): + MembershipInferenceBlackBox(classifier, input_type="a") + attack = MembershipInferenceBlackBox(classifier) + with pytest.raises(ValueError): + attack.fit(x_train, y_test, x_test, y_test) + with pytest.raises(ValueError): + attack.fit(x_train, y_train, x_test, y_train) + with pytest.raises(ValueError): + attack.infer(x_train, y_test) + attack.fit(x_train, y_train, x_test, y_test) + with pytest.raises(ValueError): + attack.infer(x_test, y_test=None) + except ARTTestException as e: + art_warning(e) + + +def test_errors_dl(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): try: classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox) (x_train, y_train), (x_test, y_test) = get_iris_dataset @@ -223,10 +293,17 @@ def test_errors(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): attack.fit(x_train, y_train, x_test, y_train) with pytest.raises(ValueError): attack.infer(x_train, y_test) + attack.fit(x_train, y_train, x_test, y_test) + with pytest.raises(ValueError): + attack.infer(x_test, y_test=None) attack = MembershipInferenceBlackBox(classifier, input_type="loss") + with pytest.raises(ValueError): + attack.fit(x_train, test_x=x_test) attack.fit(x_train, y_train, x_test, y_test) with pytest.raises(ValueError): attack.infer(None, y_test, pred=pred_test) + with pytest.raises(ValueError): + attack.infer(x_test, y_test=None) except ARTTestException as e: art_warning(e) @@ -240,19 +317,27 @@ def test_classifier_type_check_fail(art_warning): art_warning(e) -def backend_check_membership_accuracy(attack, dataset, attack_train_ratio, approx): +def backend_check_membership_accuracy(attack, dataset, attack_train_ratio, approx, use_label=True): (x_train, y_train), (x_test, y_test) = dataset attack_train_size = int(len(x_train) * attack_train_ratio) attack_test_size = int(len(x_test) * attack_train_ratio) # train attack model using only attack_train_ratio of data - attack.fit( - x_train[:attack_train_size], y_train[:attack_train_size], x_test[:attack_test_size], y_test[:attack_test_size] - ) - - # infer attacked feature on remainder of data - inferred_train = attack.infer(x_train[attack_train_size:], y_train[attack_train_size:]) - inferred_test = attack.infer(x_test[attack_test_size:], y_test[attack_test_size:]) + if use_label: + attack.fit( + x_train[:attack_train_size], + y_train[:attack_train_size], + x_test[:attack_test_size], + y_test[:attack_test_size], + ) + # infer attacked feature on remainder of data + inferred_train = attack.infer(x_train[attack_train_size:], y_train[attack_train_size:]) + inferred_test = attack.infer(x_test[attack_test_size:], y_test[attack_test_size:]) + else: + attack.fit(x_train[:attack_train_size], test_x=x_test[:attack_test_size]) + # infer attacked feature on remainder of data + inferred_train = attack.infer(x_train[attack_train_size:]) + inferred_test = attack.infer(x_test[attack_test_size:]) # check accuracy backend_check_accuracy(inferred_train, inferred_test, approx)