-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #100 from ealcobaca/0.4.x
0.4.x
- Loading branch information
Showing
35 changed files
with
2,733 additions
and
1,887 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
examples/01_introductory_examples/plot_extract_from_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
""" | ||
Meta-features from a model | ||
========================== | ||
In this example, we will show you how to extract meta-features from a | ||
pre-fitted model. | ||
""" | ||
|
||
# Load a dataset | ||
import sklearn.tree | ||
from sklearn.datasets import load_iris | ||
from pymfe.mfe import MFE | ||
|
||
iris = load_iris() | ||
|
||
############################################################################### | ||
# If you want to extract metafeatures from a pre-fitted machine learning model | ||
# (from sklearn package), you can use the `extract_from_model` method without | ||
# needing to use the training data: | ||
|
||
# Extract from model | ||
|
||
model = sklearn.tree.DecisionTreeClassifier().fit(iris.data, iris.target) | ||
extractor = MFE() | ||
ft = extractor.extract_from_model(model) | ||
print("\n".join("{:50} {:30}".format(x, y) for x, y in zip(ft[0], ft[1]))) | ||
|
||
# Extract specific metafeatures from model | ||
extractor = MFE(features=["tree_shape", "nodes_repeated"], summary="histogram") | ||
|
||
ft = extractor.extract_from_model( | ||
model, | ||
arguments_fit={"verbose": 1}, | ||
arguments_extract={"verbose": 1, "histogram": {"bins": 5}}) | ||
|
||
print("\n".join("{:50} {:30}".format(x, y) for x, y in zip(ft[0], ft[1]))) |
33 changes: 33 additions & 0 deletions
33
examples/01_introductory_examples/plot_unsupervised_meta_features.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
""" | ||
Extracting meta-features from unsupervised learning | ||
=================================================== | ||
In this example we will show you how to extract meta-features from unsupervised | ||
machine learning tasks. | ||
""" | ||
|
||
# Load a dataset | ||
from sklearn.datasets import load_iris | ||
from pymfe.mfe import MFE | ||
|
||
data = load_iris() | ||
y = data.target | ||
X = data.data | ||
|
||
############################################################################### | ||
# | ||
# You can simply omit the target attribute for unsupervised tasks while | ||
# fitting the data into the MFE model. The `pymfe` package automatically finds | ||
# and extracts only the metafeatures suitable for this type of task. | ||
|
||
# Extract default unsupervised measures | ||
mfe = MFE() | ||
mfe.fit(X) | ||
ft = mfe.extract() | ||
print("\n".join("{:50} {:30}".format(x, y) for x, y in zip(ft[0], ft[1]))) | ||
|
||
# Extract all available unsupervised measures | ||
mfe = MFE(groups="all") | ||
mfe.fit(X) | ||
ft = mfe.extract() | ||
print("\n".join("{:50} {:30}".format(x, y) for x, y in zip(ft[0], ft[1]))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
""" | ||
Meta-feature confidence interval | ||
================================ | ||
In this example, we will show you how to extract meta-features with confidence | ||
interval. | ||
""" | ||
|
||
# Load a dataset | ||
import sklearn.tree | ||
from sklearn.datasets import load_iris | ||
from pymfe.mfe import MFE | ||
|
||
data = load_iris() | ||
y = data.target | ||
X = data.data | ||
|
||
# You can also extract your meta-features with confidence intervals using | ||
# bootstrap. Keep in mind that this method extracts each meta-feature several | ||
# times, and may be very expensive depending mainly on your data and the | ||
# number of meta-feature extract methods called. | ||
|
||
# Extract meta-features with confidence interval | ||
mfe = MFE(features=["mean", "nr_cor_attr", "sd", "max"]) | ||
mfe.fit(X, y) | ||
|
||
ft = mfe.extract_with_confidence( | ||
sample_num=256, | ||
confidence=0.99, | ||
verbose=1, | ||
) | ||
|
||
print("\n".join("{:50} {:30} {:30}".format(x, y[0], y[1]) | ||
for x, y in zip(ft[0], ft[2]))) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
examples/03_miscellaneous_examples/plot_default_value_for_attr_conc.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
Meta-feature confidence interval | ||
================================ | ||
In this example, we will show you how the default value `max_attr_num` of | ||
meta-feature `attr_conc` was solved. | ||
""" | ||
|
||
# Load a dataset | ||
from sklearn.datasets import load_iris | ||
import numpy as np | ||
import pymfe.mfe | ||
import matplotlib.pyplot as plt | ||
|
||
iris = load_iris() | ||
|
||
# Added a default value for `max_attr_num` parameter of the `attr_conc` | ||
# meta-feature extraction method, which is the most expensive meta-feature | ||
# extraction method by far. | ||
|
||
# The default parameter was determined by a simple inspection at the feature | ||
# extraction time growing rate to the number of attributes on the fitted data. | ||
# The threshold accepted for the time extraction is a value less than 2 | ||
# seconds. | ||
|
||
# The test dataset was the iris dataset. The test code used is reproduced | ||
# below. | ||
np.random.seed(0) | ||
|
||
arrsize = np.zeros(10) | ||
time = np.zeros(10) | ||
|
||
X = np.empty((iris.target.size, 0)) | ||
|
||
for i in np.arange(10): | ||
X = np.hstack((X, iris.data)) | ||
print(f"{i}. Number of attributes: {X.shape[1]} ...") | ||
model = pymfe.mfe.MFE(features="attr_conc", | ||
summary="mean", | ||
measure_time="total").fit(X) | ||
res = model.extract(suppress_warnings=True) | ||
|
||
arrsize[i] = model._custom_args_ft["C"].shape[1] | ||
time[i] = res[2][0] | ||
|
||
plt.plot(arrsize, time, label="time elapsed") | ||
plt.hlines(y=np.arange(1, 1 + int(np.ceil(np.max(time)))), | ||
xmin=0, | ||
xmax=arrsize[-1], | ||
linestyle="dotted", | ||
color="red") | ||
plt.legend() | ||
plt.show() | ||
|
||
# The time cost of extraction for the attr_conc meta-feature does not grow | ||
# significantly with the number of instance and, hence, it is not necessary to | ||
# sample in the instance axis. |
Oops, something went wrong.