Skip to content

Commit

Permalink
Merge pull request #12 from MaxGhenis/default
Browse files Browse the repository at this point in the history
Add defaults to years and columns, rename cols to columns, and improve tests and formatting
  • Loading branch information
MaxGhenis authored Nov 27, 2020
2 parents 386c10f + b040e0f commit 75b6e70
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 32 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name: Build and test [Python 3.6, 3.7, 3.8]
name: Build and test [Python 3.7, 3.8]

on: push
on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8]

steps:
- name: Checkout
Expand All @@ -16,7 +16,7 @@ jobs:
persist-credentials: false

- name: Setup Miniconda using Python ${{ matrix.python-version }}
uses: goanpeca/setup-miniconda@v1
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: scf
environment-file: environment.yml
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/check_jupyterbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Test that Jupyter-Book builds
on: [push, pull_request]
jobs:
build:
if: github.repository == 'MaxGhenis/scf'
if: github.repository == 'PSLmodels/scf'
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -11,7 +11,7 @@ jobs:
persist-credentials: false

- name: Setup Miniconda
uses: goanpeca/setup-miniconda@v1
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: scf
environment-file: environment.yml
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/deploy_jupyterbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
- main
jobs:
build-and-deploy:
if: github.repository == 'MaxGhenis/scf'
if: github.repository == 'PSLmodels/scf'
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -14,7 +14,7 @@ jobs:
persist-credentials: false

- name: Setup Miniconda
uses: goanpeca/setup-miniconda@v1
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: scf
environment-file: environment.yml
Expand Down
4 changes: 2 additions & 2 deletions docs/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ launch_buttons:
colab_url: "https://colab.research.google.com"

repository:
url: https://github.com/MaxGhenis/scf
branch: master
url: https://github.com/PSLmodels/scf
branch: main
path_to_book: docs

html:
Expand Down
4 changes: 2 additions & 2 deletions docs/home.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
Install via:

```
pip install git+https://github.com/MaxGhenis/scf.git
pip install git+https://github.com/PSLmodels/scf.git
```

Try it with:
```
import scf
scf.load(years=[2016, 2019], cols=['income', 'networth'])
scf.load(years=[2016, 2019], columns=['income', 'networth'])
```

This will return a `pandas` `DataFrame` with columns for
Expand Down
51 changes: 35 additions & 16 deletions scf/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,19 @@
import pandas as pd


VALID_YEARS = [1986, 1989, 1992, 1995, 1998, 2001, 2004, 2007, 2010,
2013, 2016, 2019]
VALID_YEARS = [
1989,
1992,
1995,
1998,
2001,
2004,
2007,
2010,
2013,
2016,
2019,
]


def scf_url(year: int):
Expand All @@ -15,44 +26,52 @@ def scf_url(year: int):
:rtype: str
"""
assert year in VALID_YEARS, "The SCF is not available for " + str(year)
return ('https://www.federalreserve.gov/econres/files/scfp' +
str(year) + 's.zip')
return (
"https://www.federalreserve.gov/econres/files/scfp"
+ str(year)
+ "s.zip"
)


def load_single_scf(year: int, cols: list):
def load_single_scf(year: int, columns: list):
""" Loads SCF summary microdata for a given year and set of columns.
:param year: Year of SCF summary microdata to retrieve.
:type year: int
:param cols: List of columns. The weight column `wgt` is always returned.
:type cols: list
:param columns: List of columns. The weight column `wgt` is always
returned. Defaults to all columns in the summary dataset.
:type columns: list
:return: SCF summary microdata for the given year.
:rtype: pd.DataFrame
"""
# Add wgt to all returns.
cols = list(set(cols) | set(['wgt']))
return mdf.read_stata_zip(scf_url(year), columns=cols)
if columns is not None:
columns = list(set(columns) | set(["wgt"]))
return mdf.read_stata_zip(scf_url(year), columns=columns)


def load(years: list, cols: list):
def load(years: list = VALID_YEARS, columns: list = None):
""" Loads SCF summary microdata for a set of years and columns.
:param years: Year(s) to load SCF data for. Can be a list or single number.
Defaults to all available years, starting with 1989.
:type years: list
:param cols: List of columns. The weight column `wgt` is always returned.
:type cols: list
:param columns: List of columns. The weight column `wgt` is always
returned. Defaults to all columns in the summary dataset.
:type columns: list
:return: SCF summary microdata for the set of years.
:rtype: pd.DataFrame
"""
# Make cols a list if a single column is passed.
cols = mdf.listify(cols)
if columns is not None:
columns = mdf.listify(columns)
# If years is a single year rather than a list, return without a loop.
if isinstance(years, int):
return load_single_scf(years, cols)
return load_single_scf(years, columns)
# Otherwise append to a list within a loop, and return concatenation.
scfs = []
for year in years:
tmp = load_single_scf(year, cols)
tmp['year'] = year
tmp = load_single_scf(year, columns)
tmp["year"] = year
scfs.append(tmp)
return pd.concat(scfs)
23 changes: 19 additions & 4 deletions scf/tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,29 @@ def equal_elements(l1, l2):

def test_load_multiple_years():
YEARS = [2016, 2019]
res = scf.load(YEARS, ['income', 'networth'])
res = scf.load(YEARS, ["income", "networth"])
# Should return the specified columns, plus year and wgt.
assert equal_elements(res.columns, ['income', 'networth', 'wgt', 'year'])
assert equal_elements(res.columns, ["income", "networth", "wgt", "year"])
assert equal_elements(res.year.unique().tolist(), YEARS)


def test_load_single_year():
# Test with a single year and single column.
res = scf.load(2016, 'networth')
res = scf.load(2016, "networth")
# Should return the specified column, plus wgt (not year).
assert equal_elements(res.columns, ['networth', 'wgt'])
assert equal_elements(res.columns, ["networth", "wgt"])


def test_load_all_columns():
# Test with a single year and all columns.
res = scf.load(2019)
# Should return data with many columns (generally 300-400).
assert res.columns.size > 100


def test_load_all_years():
# Test with a single columns and all years.
res = scf.load(columns="wgt")
# Should return data with many rows and two columns.
assert res.size > 0
assert equal_elements(res.columns, ["year", "wgt"])

0 comments on commit 75b6e70

Please sign in to comment.