Merge pull request #93 from lbluque/main

Updates and fixes
CederGroupHub · Jul 11, 2023 · f57d6ce · f57d6ce
2 parents 9bf3af9 + 7e27874
commit f57d6ce
Show file tree

Hide file tree

Showing 28 changed files with 991 additions and 103 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -43,7 +43,7 @@ jobs:
         run: |
           pytest tests --cov=sparselm --cov-report=xml
 
-      - if: ${{ matrix.python_version == 3.9 && github.event_name == 'push' }}
+      - if: ${{ matrix.python_version == 3.10 && github.event_name == 'push' }}
         name: codacy-coverage-reporter
         uses: codacy/codacy-coverage-reporter-action@v1
         with:

diff --git a/README.md b/README.md
@@ -39,3 +39,5 @@ cvsearch = GridSearchCV(alasso, param_grid)
 cvsearch.fit(X, y)
 print(cvsearch.best_params_)
 ```
+
+For more details on use and functionality see the [documentation](https://cedergrouphub.github.io/sparse-lm/).
diff --git a/docs/changelog.rst b/docs/changelog.rst
diff --git a/docs/conf.py b/docs/conf.py
@@ -37,9 +37,7 @@
     "sphinx.ext.autosummary",
     "sphinx.ext.mathjax",
     "m2r2",
-    # "nbsphinx",
-    # "nbsphinx_link",
-    # "sphinxcontrib.autodoc_pydantic",
+    "sphinx_gallery.gen_gallery",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -126,3 +124,10 @@
     "numpy": ("https://numpy.org/doc/stable/", None),
     "cvxpy": ("https://www.cvxpy.org/en/latest/", None),
 }
+
+# -- Options for sphinx gallery extension  ---------------------------------------
+
+sphinx_gallery_conf = {
+    "examples_dirs": "../examples",  # path to your example scripts
+    "gallery_dirs": "auto_examples",  # path to where to save gallery generated output
+}
diff --git a/docs/examples.rst b/docs/examples.rst
diff --git a/docs/index.rst b/docs/index.rst
@@ -4,15 +4,14 @@
    :hidden:
 
    install
-   examples
+   auto_examples/index
 
 .. toctree::
    :caption: Information
    :hidden:
 
    contributing
    license
-   changelog
    GitHub <https://github.com/CederGroupHub/sparse-lm>
 
 

diff --git a/docs/install.rst b/docs/install.rst
@@ -21,6 +21,50 @@ To install **sparse-lm** from source, (fork and) clone the repository from `gith
     cd sparselm
     pip install .
 
+Installing MIQP solvers
+-----------------------
+
+Since **cvxpy** is used to specify and solve regression optimization problems, any of
+`supported solvers <https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options>`_
+can be used with **sparse-lm** estimators. **cvxpy** is shipped with open source solvers
+(OSQP, SCS, and ECOS) which are usually enough to solve most convex regression problems.
+
+However, for the mixed integer quadratic programming (MIQP) formulations used in
+:class:`BestSubsetSelection` and :class:`RegularizedL0` based classes we highly
+recommend installing an MIQP capable solver. ECOS_BB can be used to solve MIQP problems,
+but it can be very slow and more importantly has recurring correctness issues. See the
+`mixed-integer program section <https://www.cvxpy.org/version/1.2/tutorial/advanced/index.html#mixed-integer-programs>`_
+in the cvxpy documentation for more details.
+
+Gurobi
+^^^^^^
+
+For using **sparse-lm** with MIQP solvers, we highly recommend installing **Gurobi**.
+It can be installed directly from PyPi::
+
+    pip install gurobipy
+
+Without a license, a free trial **Gurobi** can be used to solve small problems. For
+larger problems a license is required. **Gurobi** grants
+`free academic licenses <https://www.gurobi.com/academia/academic-program-and-licenses/>`_
+to students and academic researchers.
+
+SCIP
+^^^^
+
+If installing a licensed solver is not an option, **SCIP** can be used as a free
+alternative. To use **SCIP**, the python interface **PySCIPOpt** must also be installed.
+**PySCIPOpt** can be installed from PyPi, however this requires building SCIP from
+source. See installation details `here <https://github.com/scipopt/PySCIPOpt>`_.
+
+If you use conda, we recommend installing **SCIP** and **PySCIPOpt** using their
+conda-forge channel::
+
+    conda install -c conda-forge scipopt pyscipopt
+
+The above command will install **PySCIPOpt** with a pre-built version of **SCIP**, and
+so you will not need to build it from source.
+
 Testing
 -------
 

diff --git a/examples/README.rst b/examples/README.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+This is set of simple examples using sparse linear regression models implemented in
+**sparse-lm**. For the the vast majority of cases, the **sparse-lm** models can be
+used in the same way as the linear regression models in **scikit-learn**.
diff --git a/examples/corr.npy b/examples/corr.npy
diff --git a/examples/energy.npy b/examples/energy.npy
diff --git a/examples/plot_adaptive.py b/examples/plot_adaptive.py
@@ -0,0 +1,99 @@
+"""
+==============================
+Using adaptive regularization
+==============================
+
+Adaptive or iteratively re-weighted regularization is a technique that can improve
+feature selection properties over the standard Lasso and Group Lasso extensions. In
+this example we compare the performance of the standard Lasso with adaptive Lasso.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.datasets import make_regression
+from sklearn.linear_model import Lasso
+from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.model_selection import GridSearchCV, KFold, train_test_split
+
+from sparselm.model import AdaptiveLasso
+
+X, y, coef = make_regression(
+    n_samples=200,
+    n_features=100,
+    n_informative=10,
+    noise=40.0,
+    bias=-15.0,
+    coef=True,
+    random_state=0,
+)
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.25, random_state=0
+)
+
+# create estimators
+lasso = Lasso(fit_intercept=True)
+alasso = AdaptiveLasso(max_iter=5, fit_intercept=True)
+
+# create cv search objects for each estimator
+cv5 = KFold(n_splits=5, shuffle=True, random_state=0)
+params = {"alpha": np.logspace(-1, 1, 10)}
+
+lasso_cv = GridSearchCV(lasso, params, cv=cv5, n_jobs=-1)
+alasso_cv = GridSearchCV(alasso, params, cv=cv5, n_jobs=-1)
+
+# fit models on training data
+lasso_cv.fit(X_train, y_train)
+alasso_cv.fit(X_train, y_train)
+
+# calculate model performance on test and train data
+lasso_train = {
+    "r2": r2_score(y_train, lasso_cv.predict(X_train)),
+    "rmse": np.sqrt(mean_squared_error(y_train, lasso_cv.predict(X_train))),
+}
+
+lasso_test = {
+    "r2": r2_score(y_test, lasso_cv.predict(X_test)),
+    "rmse": np.sqrt(mean_squared_error(y_test, lasso_cv.predict(X_test))),
+}
+
+alasso_train = {
+    "r2": r2_score(y_train, alasso_cv.predict(X_train)),
+    "rmse": np.sqrt(mean_squared_error(y_train, alasso_cv.predict(X_train))),
+}
+
+alasso_test = {
+    "r2": r2_score(y_test, alasso_cv.predict(X_test)),
+    "rmse": np.sqrt(mean_squared_error(y_test, alasso_cv.predict(X_test))),
+}
+
+print("Lasso performance metrics:")
+print(f"    train r2: {lasso_train['r2']:.3f}")
+print(f"    test r2: {lasso_test['r2']:.3f}")
+print(f"    train rmse: {lasso_train['rmse']:.3f}")
+print(f"    test rmse: {lasso_test['rmse']:.3f}")
+
+print("Adaptive Lasso performance metrics:")
+print(f"    train r2: {alasso_train['r2']:.3f}")
+print(f"    test r2: {alasso_test['r2']:.3f}")
+print(f"    train rmse: {alasso_train['rmse']:.3f}")
+print(f"    test rmse: {alasso_test['rmse']:.3f}")
+
+# plot predicted values
+fig, ax = plt.subplots()
+ax.plot(y_test, lasso_cv.predict(X_test), "o", label="lasso", alpha=0.5)
+ax.plot(y_test, alasso_cv.predict(X_test), "o", label="adaptive lasso", alpha=0.5)
+ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--")
+ax.set_xlabel("true values")
+ax.set_ylabel("predicted values")
+ax.legend()
+fig.show()
+
+# plot model coefficients
+fig, ax = plt.subplots()
+ax.plot(coef, "o", label="True coefficients")
+ax.plot(lasso_cv.best_estimator_.coef_, "o", label="Lasso", alpha=0.5)
+ax.plot(alasso_cv.best_estimator_.coef_, "o", label="Adaptive Lasso", alpha=0.5)
+ax.set_xlabel("covariate index")
+ax.set_ylabel("coefficient value")
+fig.show()