Skip to content

Commit

Permalink
Adds support for sklearn MLPs (#276)
Browse files Browse the repository at this point in the history
* added support for sklearn MLPs

* remove spurious print statments

* removes superseded test function

* linting

---------

Co-authored-by: Richard Preen <[email protected]>
  • Loading branch information
jim-smith and rpreen authored May 30, 2024
1 parent feb7e4d commit a0d7876
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 5 deletions.
18 changes: 13 additions & 5 deletions aisdc/attacks/structural_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# tree-based model types currently supported
from sklearn.base import BaseEstimator
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost.sklearn import XGBClassifier

Expand Down Expand Up @@ -184,6 +185,12 @@ def get_model_param_count(model: BaseEstimator) -> int:
n_leaves = len(df[df.Feature == "Leaf"])
# 2 per internal node, one per clas in leaves, one weight per tree
n_params = 2 * (total - n_leaves) + (model.n_classes_ - 1) * n_leaves + n_trees

elif isinstance(model, MLPClassifier):
weights = model.coefs_ # dtype is list of numpy.ndarrays
biasses = model.intercepts_ # dtype is list of numpy.ndarrays
n_params = sum(a.size for a in weights) + sum(a.size for a in biasses)

else:
pass

Expand Down Expand Up @@ -291,11 +298,11 @@ def attack(self, target: Target) -> None:
errstr = "len mismatch between equiv classes and "
assert len(equiv_classes) == len(equiv_counts), errstr + "counts"
assert len(equiv_classes) == len(equiv_members), errstr + "membership"
print(
f"equiv_classes is {equiv_classes}\n"
f"equiv_counts is {equiv_counts}\n"
# #f'equiv_members is {equiv_members}\n'
)
# print(
# f"equiv_classes is {equiv_classes}\n"
# f"equiv_counts is {equiv_counts}\n"
# # #f'equiv_members is {equiv_members}\n'
# )

# now assess the risk
# Degrees of Freedom
Expand Down Expand Up @@ -354,6 +361,7 @@ def get_equivalence_classes(self) -> tuple:
for prob_vals in equiv_classes:
ingroup = np.unique(np.asarray(self.yprobs == prob_vals).nonzero()[0])
members.append(ingroup)
# print(equiv_counts)
return [equiv_classes, equiv_counts, members]

def _get_global_metrics(self, attack_metrics: list) -> dict:
Expand Down
54 changes: 54 additions & 0 deletions tests/attacks/test_structural_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost.sklearn import XGBClassifier
Expand All @@ -37,6 +38,8 @@ def get_target(modeltype: str, **kwparams: dict) -> Target:
target_model = XGBClassifier(**kwparams)
elif modeltype == "adaboost":
target_model = AdaBoostClassifier(**kwparams)
elif modeltype == "mlpclassifier":
target_model = MLPClassifier(**kwparams)
# should get polite error but not DoF yet
elif modeltype == "svc":
target_model = SVC(**kwparams)
Expand Down Expand Up @@ -313,6 +316,57 @@ def test_xgb():
assert myattack2.unnecessary_risk == 1, " unnecessary risk with these xgb params"


def test_sklearnmlp():
"""Test for sklearn MLPClassifier."""
# non-disclosive
safeparams = {
"hidden_layer_sizes": (10,),
"random_state": 12345,
"activation": "identity",
"max_iter": 1,
}
target = get_target("mlpclassifier", **safeparams)
myattack = sa.StructuralAttack()
myattack.attack(target)
paramstr = ""
for key, val in safeparams.items():
paramstr += f"{key}:{val}\n"
assert (
myattack.DoF_risk == 0
), f"should be no DoF risk with small mlp with params {paramstr}"
assert (
myattack.k_anonymity_risk == 0
), f"should be no k-anonymity risk with params {paramstr}"
assert (
myattack.class_disclosure_risk == 1
), f"should be class disclosure risk with params {paramstr}"
assert myattack.unnecessary_risk == 0, "not unnecessary risk for mlps at present"

# highly disclosive
unsafeparams = {
"hidden_layer_sizes": (50, 50),
"random_state": 12345,
"activation": "relu",
"max_iter": 100,
}
uparamstr = ""
for key, val in unsafeparams.items():
uparamstr += f"{key}:{val}\n"
target2 = get_target("mlpclassifier", **unsafeparams)
myattack2 = sa.StructuralAttack()
myattack2.attack(target2)
assert myattack2.DoF_risk == 1, f"should be DoF risk with this MLP:\n{uparamstr}"
assert (
myattack2.k_anonymity_risk == 1
), "559/560 records should have should be k-anonymity 1 with this MLP:\n{uparamstr}"
assert (
myattack2.class_disclosure_risk == 1
), "should be class disclosure risk with this MLP:\n{uparamstr}"
assert (
myattack2.unnecessary_risk == 0
), " no unnecessary risk yet for MLPClassifiers"


def test_reporting():
"""Test reporting functionality."""
param_dict = {"max_depth": 1, "min_samples_leaf": 150}
Expand Down

0 comments on commit a0d7876

Please sign in to comment.