Skip to content

Commit

Permalink
Merge pull request #16 from MaxGhenis/mdf
Browse files Browse the repository at this point in the history
Adds option to return as MicroDataFrame
  • Loading branch information
MaxGhenis authored Feb 27, 2021
2 parents 75b6e70 + 00911d8 commit e4681a9
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 20 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Build and test [Python 3.7, 3.8]
name: Build and test [Python 3.7, 3.8, 3.9]

on: [push, pull_request]

Expand All @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8]
python-version: [3.7, 3.8, 3.9]

steps:
- name: Checkout
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check_jupyterbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
with:
activate-environment: scf
environment-file: environment.yml
python-version: 3.8
python-version: 3.9
auto-activate-base: false

- name: Build # Build Jupyter Book
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy_jupyterbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
with:
activate-environment: scf
environment-file: environment.yml
python-version: 3.8
python-version: 3.9
auto-activate-base: false

- name: Build # Build Jupyter Book
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
python-version: [3.9]

steps:
- name: Checkout
Expand Down
42 changes: 27 additions & 15 deletions scf/load.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import microdf as mdf
import pandas as pd
from typing import Union


VALID_YEARS = [
Expand All @@ -17,7 +18,7 @@
]


def scf_url(year: int):
def scf_url(year: int) -> str:
""" Returns the URL of the SCF summary microdata zip file for a year.
:param year: Year of SCF summary microdata to retrieve.
Expand All @@ -33,7 +34,7 @@ def scf_url(year: int):
)


def load_single_scf(year: int, columns: list):
def load_single_scf(year: int, columns: list) -> pd.DataFrame:
""" Loads SCF summary microdata for a given year and set of columns.
:param year: Year of SCF summary microdata to retrieve.
Expand All @@ -50,28 +51,39 @@ def load_single_scf(year: int, columns: list):
return mdf.read_stata_zip(scf_url(year), columns=columns)


def load(years: list = VALID_YEARS, columns: list = None):
def load(
years: list = VALID_YEARS,
columns: list = None,
as_microdataframe: bool = False,
) -> Union[pd.DataFrame, mdf.MicroDataFrame]:
""" Loads SCF summary microdata for a set of years and columns.
:param years: Year(s) to load SCF data for. Can be a list or single number.
Defaults to all available years, starting with 1989.
:type years: list
:param columns: List of columns. The weight column `wgt` is always
returned. Defaults to all columns in the summary dataset.
:param columns: List of columns. The weight column `wgt` is always returned.
:type columns: list
:param as_microdataframe: Whether to return as a MicroDataFrame with
weight set, defaults to False.
:type as_microdataframe: bool
:return: SCF summary microdata for the set of years.
:rtype: pd.DataFrame
:rtype: Union[pd.DataFrame, mdf.MicroDataFrame]
"""
# Make cols a list if a single column is passed.
if columns is not None:
columns = mdf.listify(columns)
# If years is a single year rather than a list, return without a loop.
# If years is a single year rather than a list, don't use a loop.
if isinstance(years, int):
return load_single_scf(years, columns)
# Otherwise append to a list within a loop, and return concatenation.
scfs = []
for year in years:
tmp = load_single_scf(year, columns)
tmp["year"] = year
scfs.append(tmp)
return pd.concat(scfs)
res = load_single_scf(years, columns)
# Otherwise append to a list within a loop, and concatenate.
else:
scfs = []
for year in years:
tmp = load_single_scf(year, columns)
tmp["year"] = year
scfs.append(tmp)
res = pd.concat(scfs)
# Return as a MicroDataFrame or DataFrame.
if as_microdataframe:
return mdf.MicroDataFrame(res, weights="wgt")
return res
6 changes: 6 additions & 0 deletions scf/tests/test_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import scf
import microdf as mdf


def equal_elements(l1, l2):
Expand Down Expand Up @@ -33,3 +34,8 @@ def test_load_all_years():
# Should return data with many rows and two columns.
assert res.size > 0
assert equal_elements(res.columns, ["year", "wgt"])


def test_load_microdf():
res = scf.load(2019, "networth", as_microdataframe=True)
assert isinstance(res, mdf.MicroDataFrame)

0 comments on commit e4681a9

Please sign in to comment.