Skip to content

Commit

Permalink
First package pieces
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxGhenis committed Oct 4, 2020
1 parent 4fb2262 commit 1c7eaa2
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 0 deletions.
51 changes: 51 additions & 0 deletions PSL_catalog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"project_one_line": {
"start_header": null,
"end_header": null,
"source": null,
"type": "html",
"data": "<p>scf is a Python package for working with Survey of Consumer Finances microdata.</p>",
},
"project_overview": {
"start_header": null,
"end_header": null,
"source": null,
"type": "html",
"data": '<a href="https://github.com/MaxGhenis/scf">What is scf?</a>',
},
"core_maintainers": {
"start_header": null,
"end_header": null,
"source": null,
"type": "html",
"data": "<ul><li>Max Ghenis</li><ul><li>email: [email protected]</li></ul>",
},
"user_documentation": {
"start_header": null,
"end_header": null,
"source": null,
"type": "html",
"data": '<a href="http://maxghenis.github.io/scf/"></a>',
},
"contributor_overview": {
"start_header": null,
"end_header": null,
"source": null,
"type": "html",
"data": '<a href="http://github.com/MaxGhenis/scf/"></a>',
},
"user_changelog_recent": {
"start_header": null,
"end_header": null,
"source": null,
"type": "html",
"data": '<a href="https://github.com/MaxGhenis/scf/releases"></a>',
},
"link_to_webapp": {
"start_header": null,
"end_header": null,
"source": null,
"type": null,
"data": null,
},
}
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# `scf`

`scf` is a Python package for loading and working with Survey of Consumer Finances summary microdata.

For documentation on the summary microdata, see [Berkeley SDA's codebook](https://sda.berkeley.edu/sdaweb/docs/scfcomb2019/DOC/hcbk.htm).
9 changes: 9 additions & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# `scf` roadmap

`scf` currently provides capabilities for loading the Survey of Consumer Finances summary microdata.
In the future, it will provide more functionality, including:
* Functions to summarize the microdata, e.g., wealth distributions over time.
* Charts to visualize these summaries.
* Standard error calculations for full SCF microdata files.

See the [issues page](https://github.com/MaxGhenis/scf/issues) to view and suggest other items.
12 changes: 12 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: scf
dependencies:
- codecov
- flake8
- numpy
- pandas
- pip
- pytest
- setuptools
- pip:
- "--editable=git+https://github.com/PSLmodels/microdf.git"
- jupyter-book
10 changes: 10 additions & 0 deletions scf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .load import load


name = "scf"
__version__ = "0.1.0"

__all__ = [
# load.py
"load",
]
51 changes: 51 additions & 0 deletions scf/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import microdf as mdf
import pandas as pd


def scf_url(year: int):
""" Returns the URL of the SCF summary microdata zip file for a year.
:param year: Year of SCF summary microdata to retrieve.
:type year: int
:return: URL of summary microdata zip file for the given year.
:rtype: str
"""
return ('https://www.federalreserve.gov/econres/files/scfp' +
str(year) + 's.zip')


def load_single_scf(year: int, cols: list):
""" Loads SCF summary microdata for a given year and set of columns.
:param year: Year of SCF summary microdata to retrieve.
:type year: int
:param cols: List of columns. The weight column `wgt` is always returned.
:type cols: list
:return: SCF summary microdata for the given year.
:rtype: pd.DataFrame
"""
# Add wgt to all returns.
cols = list(set(cols) | set(['wgt']))
return mdf.read_stata_zip(scf_url(year), columns=cols)


def load(years: list, cols: list):
""" Loads SCF summary microdata for a set of years and columns.
:param years: Year(s) to load SCF data for. Can be a list or single number.
:type years: list
:param cols: List of columns. The weight column `wgt` is always returned.
:type cols: list
:return: SCF summary microdata for the set of years.
:rtype: pd.DataFrame
"""
# If years is a single year rather than a list, return without a loop.
if isinstance(years, int):
return load_single_scf(year, cols)
# Otherwise append to a list within a loop, and return concatenation.
scfs = []
for year in years:
tmp = load_single_scf(year, cols)
tmp['year'] = year
scfs.append(tmp)
return pd.concat(scfs)
12 changes: 12 additions & 0 deletions scf/tests/test_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import scf


def equal_elements(l1, l2):
return set(l1) == set(l2)

def test_load():
YEARS = [2016, 2019]
res = scf.load(YEARS, ['income', 'networth'])
# Should return the specified columns, plus year and wgt.
assert equal_elements(res.columns, ['income', 'networth', 'wgt', 'year'])
assert equal_elements(res.year.unique().tolist(), YEARS)
18 changes: 18 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from setuptools import setup

setup(
name="scf",
version="0.1.0",
description="Python package for working with the Survey of Consumer Finances microdata.",
url="http://github.com/maxghenis/scf",
author="Max Ghenis",
author_email="[email protected]",
license="MIT",
packages=["scf"],
install_requires=[
"numpy",
"pandas",
"microdf",
],
zip_safe=False,
)

0 comments on commit 1c7eaa2

Please sign in to comment.