First package pieces

PSLmodels · Oct 4, 2020 · 1c7eaa2 · 1c7eaa2
1 parent 4fb2262
commit 1c7eaa2
Show file tree

Hide file tree

Showing 8 changed files with 168 additions and 0 deletions.
diff --git a/PSL_catalog.json b/PSL_catalog.json
@@ -0,0 +1,51 @@
+{
+    "project_one_line": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": "html",
+        "data": "<p>scf is a Python package for working with Survey of Consumer Finances microdata.</p>",
+    },
+    "project_overview": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": "html",
+        "data": '<a href="https://github.com/MaxGhenis/scf">What is scf?</a>',
+    },
+    "core_maintainers": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": "html",
+        "data": "<ul><li>Max Ghenis</li><ul><li>email: [email protected]</li></ul>",
+    },
+    "user_documentation": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": "html",
+        "data": '<a href="http://maxghenis.github.io/scf/"></a>',
+    },
+    "contributor_overview": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": "html",
+        "data": '<a href="http://github.com/MaxGhenis/scf/"></a>',
+    },
+    "user_changelog_recent": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": "html",
+        "data": '<a href="https://github.com/MaxGhenis/scf/releases"></a>',
+    },
+    "link_to_webapp": {
+        "start_header": null,
+        "end_header": null,
+        "source": null,
+        "type": null,
+        "data": null,
+    },
+}
diff --git a/README.md b/README.md
@@ -0,0 +1,5 @@
+# `scf`
+
+`scf` is a Python package for loading and working with Survey of Consumer Finances summary microdata.
+
+For documentation on the summary microdata, see [Berkeley SDA's codebook](https://sda.berkeley.edu/sdaweb/docs/scfcomb2019/DOC/hcbk.htm).
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -0,0 +1,9 @@
+# `scf` roadmap
+
+`scf` currently provides capabilities for loading the Survey of Consumer Finances summary microdata.
+In the future, it will provide more functionality, including:
+* Functions to summarize the microdata, e.g., wealth distributions over time.
+* Charts to visualize these summaries.
+* Standard error calculations for full SCF microdata files.
+
+See the [issues page](https://github.com/MaxGhenis/scf/issues) to view and suggest other items.
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,12 @@
+name: scf
+dependencies:
+- codecov
+- flake8
+- numpy
+- pandas
+- pip
+- pytest
+- setuptools
+- pip:
+  - "--editable=git+https://github.com/PSLmodels/microdf.git"
+  - jupyter-book
diff --git a/scf/__init__.py b/scf/__init__.py
@@ -0,0 +1,10 @@
+from .load import load
+
+
+name = "scf"
+__version__ = "0.1.0"
+
+__all__ = [
+    # load.py
+    "load",
+]
diff --git a/scf/load.py b/scf/load.py
@@ -0,0 +1,51 @@
+import microdf as mdf
+import pandas as pd
+
+
+def scf_url(year: int):
+    """ Returns the URL of the SCF summary microdata zip file for a year.
+
+    :param year: Year of SCF summary microdata to retrieve.
+    :type year: int
+    :return: URL of summary microdata zip file for the given year.
+    :rtype: str
+    """
+    return ('https://www.federalreserve.gov/econres/files/scfp' + 
+            str(year) + 's.zip')
+
+
+def load_single_scf(year: int, cols: list):
+    """ Loads SCF summary microdata for a given year and set of columns.
+
+    :param year: Year of SCF summary microdata to retrieve.
+    :type year: int
+    :param cols: List of columns. The weight column `wgt` is always returned.
+    :type cols: list
+    :return: SCF summary microdata for the given year.
+    :rtype: pd.DataFrame
+    """
+    # Add wgt to all returns.
+    cols = list(set(cols) | set(['wgt']))
+    return mdf.read_stata_zip(scf_url(year), columns=cols)
+
+
+def load(years: list, cols: list):
+    """ Loads SCF summary microdata for a set of years and columns.
+
+    :param years: Year(s) to load SCF data for. Can be a list or single number.
+    :type years: list
+    :param cols: List of columns. The weight column `wgt` is always returned.
+    :type cols: list
+    :return: SCF summary microdata for the set of years.
+    :rtype: pd.DataFrame
+    """
+    # If years is a single year rather than a list, return without a loop.
+    if isinstance(years, int):
+        return load_single_scf(year, cols)
+    # Otherwise append to a list within a loop, and return concatenation.
+    scfs = []
+    for year in years:
+        tmp = load_single_scf(year, cols)
+        tmp['year'] = year
+        scfs.append(tmp)
+    return pd.concat(scfs)
diff --git a/scf/tests/test_load.py b/scf/tests/test_load.py
@@ -0,0 +1,12 @@
+import scf
+
+
+def equal_elements(l1, l2):
+    return set(l1) == set(l2)
+
+def test_load():
+    YEARS = [2016, 2019]
+    res = scf.load(YEARS, ['income', 'networth'])
+    # Should return the specified columns, plus year and wgt.
+    assert equal_elements(res.columns, ['income', 'networth', 'wgt', 'year'])
+    assert equal_elements(res.year.unique().tolist(), YEARS)
diff --git a/setup.py b/setup.py
@@ -0,0 +1,18 @@
+from setuptools import setup
+
+setup(
+    name="scf",
+    version="0.1.0",
+    description="Python package for working with the Survey of Consumer Finances microdata.",
+    url="http://github.com/maxghenis/scf",
+    author="Max Ghenis",
+    author_email="[email protected]",
+    license="MIT",
+    packages=["scf"],
+    install_requires=[
+        "numpy",
+        "pandas",
+        "microdf",
+    ],
+    zip_safe=False,
+)