Merge pull request #12 from MaxGhenis/default

Add defaults to years and columns, rename cols to columns, and improve tests and formatting
PSLmodels · Nov 27, 2020 · 75b6e70 · 75b6e70
2 parents 386c10f + b040e0f
commit 75b6e70
Show file tree

Hide file tree

Showing 7 changed files with 66 additions and 32 deletions.
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -1,13 +1,13 @@
-name: Build and test [Python 3.6, 3.7, 3.8]
+name: Build and test [Python 3.7, 3.8]
 
-on: push
+on: [push, pull_request]
 
 jobs:
   build:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
 
     steps:
       - name: Checkout
@@ -16,7 +16,7 @@ jobs:
           persist-credentials: false
 
       - name: Setup Miniconda using Python ${{ matrix.python-version }}
-        uses: goanpeca/setup-miniconda@v1
+        uses: conda-incubator/setup-miniconda@v2
         with:
           activate-environment: scf
           environment-file: environment.yml

diff --git a/.github/workflows/check_jupyterbook.yml b/.github/workflows/check_jupyterbook.yml
@@ -2,7 +2,7 @@ name: Test that Jupyter-Book builds
 on: [push, pull_request]
 jobs:
   build:
-    if: github.repository == 'MaxGhenis/scf'
+    if: github.repository == 'PSLmodels/scf'
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -11,7 +11,7 @@ jobs:
           persist-credentials: false
 
       - name: Setup Miniconda
-        uses: goanpeca/setup-miniconda@v1
+        uses: conda-incubator/setup-miniconda@v2
         with:
           activate-environment: scf
           environment-file: environment.yml

diff --git a/.github/workflows/deploy_jupyterbook.yml b/.github/workflows/deploy_jupyterbook.yml
@@ -5,7 +5,7 @@ on:
       - main
 jobs:
   build-and-deploy:
-    if: github.repository == 'MaxGhenis/scf'
+    if: github.repository == 'PSLmodels/scf'
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -14,7 +14,7 @@ jobs:
           persist-credentials: false
 
       - name: Setup Miniconda
-        uses: goanpeca/setup-miniconda@v1
+        uses: conda-incubator/setup-miniconda@v2
         with:
           activate-environment: scf
           environment-file: environment.yml

diff --git a/docs/_config.yml b/docs/_config.yml
@@ -6,8 +6,8 @@ launch_buttons:
   colab_url: "https://colab.research.google.com"
 
 repository:
-  url: https://github.com/MaxGhenis/scf
-  branch: master
+  url: https://github.com/PSLmodels/scf
+  branch: main
   path_to_book: docs
 
 html:

diff --git a/docs/home.md b/docs/home.md
@@ -5,14 +5,14 @@
 Install via:
 
 ```
-pip install git+https://github.com/MaxGhenis/scf.git
+pip install git+https://github.com/PSLmodels/scf.git
 ```
 
 Try it with:
 ```
 import scf
 
-scf.load(years=[2016, 2019], cols=['income', 'networth'])
+scf.load(years=[2016, 2019], columns=['income', 'networth'])
 ```
 
 This will return a `pandas` `DataFrame` with columns for

diff --git a/scf/load.py b/scf/load.py
@@ -2,8 +2,19 @@
 import pandas as pd
 
 
-VALID_YEARS = [1986, 1989, 1992, 1995, 1998, 2001, 2004, 2007, 2010,
-               2013, 2016, 2019]
+VALID_YEARS = [
+    1989,
+    1992,
+    1995,
+    1998,
+    2001,
+    2004,
+    2007,
+    2010,
+    2013,
+    2016,
+    2019,
+]
 
 
 def scf_url(year: int):
@@ -15,44 +26,52 @@ def scf_url(year: int):
     :rtype: str
     """
     assert year in VALID_YEARS, "The SCF is not available for " + str(year)
-    return ('https://www.federalreserve.gov/econres/files/scfp' +
-            str(year) + 's.zip')
+    return (
+        "https://www.federalreserve.gov/econres/files/scfp"
+        + str(year)
+        + "s.zip"
+    )
 
 
-def load_single_scf(year: int, cols: list):
+def load_single_scf(year: int, columns: list):
     """ Loads SCF summary microdata for a given year and set of columns.
 
     :param year: Year of SCF summary microdata to retrieve.
     :type year: int
-    :param cols: List of columns. The weight column `wgt` is always returned.
-    :type cols: list
+    :param columns: List of columns. The weight column `wgt` is always
+        returned. Defaults to all columns in the summary dataset.
+    :type columns: list
     :return: SCF summary microdata for the given year.
     :rtype: pd.DataFrame
     """
     # Add wgt to all returns.
-    cols = list(set(cols) | set(['wgt']))
-    return mdf.read_stata_zip(scf_url(year), columns=cols)
+    if columns is not None:
+        columns = list(set(columns) | set(["wgt"]))
+    return mdf.read_stata_zip(scf_url(year), columns=columns)
 
 
-def load(years: list, cols: list):
+def load(years: list = VALID_YEARS, columns: list = None):
     """ Loads SCF summary microdata for a set of years and columns.
 
     :param years: Year(s) to load SCF data for. Can be a list or single number.
+        Defaults to all available years, starting with 1989.
     :type years: list
-    :param cols: List of columns. The weight column `wgt` is always returned.
-    :type cols: list
+    :param columns: List of columns. The weight column `wgt` is always
+        returned. Defaults to all columns in the summary dataset.
+    :type columns: list
     :return: SCF summary microdata for the set of years.
     :rtype: pd.DataFrame
     """
     # Make cols a list if a single column is passed.
-    cols = mdf.listify(cols)
+    if columns is not None:
+        columns = mdf.listify(columns)
     # If years is a single year rather than a list, return without a loop.
     if isinstance(years, int):
-        return load_single_scf(years, cols)
+        return load_single_scf(years, columns)
     # Otherwise append to a list within a loop, and return concatenation.
     scfs = []
     for year in years:
-        tmp = load_single_scf(year, cols)
-        tmp['year'] = year
+        tmp = load_single_scf(year, columns)
+        tmp["year"] = year
         scfs.append(tmp)
     return pd.concat(scfs)
diff --git a/scf/tests/test_load.py b/scf/tests/test_load.py
@@ -7,14 +7,29 @@ def equal_elements(l1, l2):
 
 def test_load_multiple_years():
     YEARS = [2016, 2019]
-    res = scf.load(YEARS, ['income', 'networth'])
+    res = scf.load(YEARS, ["income", "networth"])
     # Should return the specified columns, plus year and wgt.
-    assert equal_elements(res.columns, ['income', 'networth', 'wgt', 'year'])
+    assert equal_elements(res.columns, ["income", "networth", "wgt", "year"])
     assert equal_elements(res.year.unique().tolist(), YEARS)
 
 
 def test_load_single_year():
     # Test with a single year and single column.
-    res = scf.load(2016, 'networth')
+    res = scf.load(2016, "networth")
     # Should return the specified column, plus wgt (not year).
-    assert equal_elements(res.columns, ['networth', 'wgt'])
+    assert equal_elements(res.columns, ["networth", "wgt"])
+
+
+def test_load_all_columns():
+    # Test with a single year and all columns.
+    res = scf.load(2019)
+    # Should return data with many columns (generally 300-400).
+    assert res.columns.size > 100
+
+
+def test_load_all_years():
+    # Test with a single columns and all years.
+    res = scf.load(columns="wgt")
+    # Should return data with many rows and two columns.
+    assert res.size > 0
+    assert equal_elements(res.columns, ["year", "wgt"])