diff --git a/PSL_catalog.json b/PSL_catalog.json
new file mode 100644
index 0000000..35f5a39
--- /dev/null
+++ b/PSL_catalog.json
@@ -0,0 +1,51 @@
+{
+ "project_one_line": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": "html",
+ "data": "
scf is a Python package for working with Survey of Consumer Finances microdata.
",
+ },
+ "project_overview": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": "html",
+ "data": 'What is scf?',
+ },
+ "core_maintainers": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": "html",
+ "data": "- Max Ghenis
",
+ },
+ "user_documentation": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": "html",
+ "data": '',
+ },
+ "contributor_overview": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": "html",
+ "data": '',
+ },
+ "user_changelog_recent": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": "html",
+ "data": '',
+ },
+ "link_to_webapp": {
+ "start_header": null,
+ "end_header": null,
+ "source": null,
+ "type": null,
+ "data": null,
+ },
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8d27c94
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# `scf`
+
+`scf` is a Python package for loading and working with Survey of Consumer Finances summary microdata.
+
+For documentation on the summary microdata, see [Berkeley SDA's codebook](https://sda.berkeley.edu/sdaweb/docs/scfcomb2019/DOC/hcbk.htm).
diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000..298cd17
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,9 @@
+# `scf` roadmap
+
+`scf` currently provides capabilities for loading the Survey of Consumer Finances summary microdata.
+In the future, it will provide more functionality, including:
+* Functions to summarize the microdata, e.g., wealth distributions over time.
+* Charts to visualize these summaries.
+* Standard error calculations for full SCF microdata files.
+
+See the [issues page](https://github.com/MaxGhenis/scf/issues) to view and suggest other items.
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..105d3a3
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,12 @@
+name: scf
+dependencies:
+- codecov
+- flake8
+- numpy
+- pandas
+- pip
+- pytest
+- setuptools
+- pip:
+ - "--editable=git+https://github.com/PSLmodels/microdf.git"
+ - jupyter-book
diff --git a/scf/__init__.py b/scf/__init__.py
new file mode 100644
index 0000000..ba7a6f2
--- /dev/null
+++ b/scf/__init__.py
@@ -0,0 +1,10 @@
+from .load import load
+
+
+name = "scf"
+__version__ = "0.1.0"
+
+__all__ = [
+ # load.py
+ "load",
+]
diff --git a/scf/load.py b/scf/load.py
new file mode 100644
index 0000000..c617f73
--- /dev/null
+++ b/scf/load.py
@@ -0,0 +1,51 @@
+import microdf as mdf
+import pandas as pd
+
+
+def scf_url(year: int):
+ """ Returns the URL of the SCF summary microdata zip file for a year.
+
+ :param year: Year of SCF summary microdata to retrieve.
+ :type year: int
+ :return: URL of summary microdata zip file for the given year.
+ :rtype: str
+ """
+ return ('https://www.federalreserve.gov/econres/files/scfp' +
+ str(year) + 's.zip')
+
+
+def load_single_scf(year: int, cols: list):
+ """ Loads SCF summary microdata for a given year and set of columns.
+
+ :param year: Year of SCF summary microdata to retrieve.
+ :type year: int
+ :param cols: List of columns. The weight column `wgt` is always returned.
+ :type cols: list
+ :return: SCF summary microdata for the given year.
+ :rtype: pd.DataFrame
+ """
+ # Add wgt to all returns.
+ cols = list(set(cols) | set(['wgt']))
+ return mdf.read_stata_zip(scf_url(year), columns=cols)
+
+
+def load(years: list, cols: list):
+ """ Loads SCF summary microdata for a set of years and columns.
+
+ :param years: Year(s) to load SCF data for. Can be a list or single number.
+ :type years: list
+ :param cols: List of columns. The weight column `wgt` is always returned.
+ :type cols: list
+ :return: SCF summary microdata for the set of years.
+ :rtype: pd.DataFrame
+ """
+ # If years is a single year rather than a list, return without a loop.
+ if isinstance(years, int):
+ return load_single_scf(year, cols)
+ # Otherwise append to a list within a loop, and return concatenation.
+ scfs = []
+ for year in years:
+ tmp = load_single_scf(year, cols)
+ tmp['year'] = year
+ scfs.append(tmp)
+ return pd.concat(scfs)
diff --git a/scf/tests/test_load.py b/scf/tests/test_load.py
new file mode 100644
index 0000000..0818d72
--- /dev/null
+++ b/scf/tests/test_load.py
@@ -0,0 +1,12 @@
+import scf
+
+
+def equal_elements(l1, l2):
+ return set(l1) == set(l2)
+
+def test_load():
+ YEARS = [2016, 2019]
+ res = scf.load(YEARS, ['income', 'networth'])
+ # Should return the specified columns, plus year and wgt.
+ assert equal_elements(res.columns, ['income', 'networth', 'wgt', 'year'])
+ assert equal_elements(res.year.unique().tolist(), YEARS)
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..7fedc0b
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,18 @@
+from setuptools import setup
+
+setup(
+ name="scf",
+ version="0.1.0",
+ description="Python package for working with the Survey of Consumer Finances microdata.",
+ url="http://github.com/maxghenis/scf",
+ author="Max Ghenis",
+ author_email="mghenis@gmail.com",
+ license="MIT",
+ packages=["scf"],
+ install_requires=[
+ "numpy",
+ "pandas",
+ "microdf",
+ ],
+ zip_safe=False,
+)