diff --git a/MANIFEST.in b/MANIFEST.in index e74b337ce..0ed740b96 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,5 +7,3 @@ include taxcalc/policy_current_law.json include taxcalc/puf_weights.csv.gz include taxcalc/puf_ratios.csv include taxcalc/records_variables.json -include taxcalc/tmd_weights.csv.gz -include taxcalc/tmd_growfactors.csv diff --git a/Makefile b/Makefile index dcf83aeb9..778d70226 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ help: @echo "clean : remove .pyc files and local taxcalc package" @echo "package : build and install local package" @echo "pytest-cps : generate report for and cleanup after" - @echo " pytest -m 'not requires_pufcsv and not requires_tmdcsv and not pre_release'" + @echo " pytest -m 'not requires_pufcsv and not pre_release'" @echo "pytest : generate report for and cleanup after" @echo " pytest -m 'not pre_release'" @echo "pytest-all : generate report for and cleanup after" @@ -51,7 +51,7 @@ endef .PHONY=pytest-cps pytest-cps: @$(pytest-setup) - @cd taxcalc ; pytest -n4 --disable-warnings --durations=0 --durations-min=2 -m "not requires_pufcsv and not requires_tmdcsv and not pre_release" + @cd taxcalc ; pytest -n4 --disable-warnings --durations=0 --durations-min=2 -m "not requires_pufcsv and not pre_release" @$(pytest-cleanup) .PHONY=pytest @@ -103,7 +103,7 @@ define coverage-cleanup rm -f .coverage htmlcov/* endef -COVMARK = "not requires_pufcsv and not requires_tmdcsv and not pre_release" +COVMARK = "not requires_pufcsv and not pre_release" OS := $(shell uname -s) diff --git a/docs/usage/data.md b/docs/usage/data.md index b55e19239..13d387f33 100644 --- a/docs/usage/data.md +++ b/docs/usage/data.md @@ -61,13 +61,16 @@ file. The [tax-microdata repository](https://github.com/PSLmodels/tax-microdata-benchmarking) -produces an input variables file (`tmd.csv`) and a -`tmd_weights.csv.gz` file that is included in the Tax-Calculator +produces an input variables file (`tmd.csv`), a national weights file +(`tmd_weights.csv.gz`), and a variable growth factors file +(`tmd_growfactors.csv`) that can be used with the Tax-Calculator package beginning with the 3.6.0 release. The `tmd.csv` file is available only to Tax-Calculator users who have purchased their own -version of the 2015 IRS-SOI PUF. For those users, the -`Records.tmd_constructor()` method creates a `Records` class object -containing the `tmd` variables and weights. +version of the 2015 IRS-SOI PUF. For those users, those three files +are avaiable from the tax-microdata repository. These three tmd files +can be used with the Tax-Calculator Python API (using the +`Records.tmd_constructor()` static method) or with the Tax-Calculator +CLI tool, `tc`. ## Using other data with Tax-Calculator diff --git a/pytest.ini b/pytest.ini index f40bfdba7..c67875214 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,7 +3,6 @@ testpaths = taxcalc markers = requires_pufcsv - requires_tmdcsv pre_release compatible_data local diff --git a/taxcalc.egg-info/SOURCES.txt b/taxcalc.egg-info/SOURCES.txt index 1d6021c0a..c9bf6fd20 100644 --- a/taxcalc.egg-info/SOURCES.txt +++ b/taxcalc.egg-info/SOURCES.txt @@ -121,8 +121,6 @@ taxcalc/puf_weights.csv.gz taxcalc/records.py taxcalc/records_variables.json taxcalc/taxcalcio.py -taxcalc/tmd_growfactors.csv -taxcalc/tmd_weights.csv.gz taxcalc/utils.py taxcalc/utilsprvt.py taxcalc.egg-info/PKG-INFO @@ -214,7 +212,6 @@ taxcalc/tests/test_records.py taxcalc/tests/test_reforms.py taxcalc/tests/test_responses.py taxcalc/tests/test_taxcalcio.py -taxcalc/tests/test_tmdcsv.py taxcalc/tests/test_utils.py taxcalc/validation/CSV_INPUT_VARS.md taxcalc/validation/CSV_OUTPUT_VARS.md diff --git a/taxcalc/records.py b/taxcalc/records.py index a5e942561..7b5a9936a 100644 --- a/taxcalc/records.py +++ b/taxcalc/records.py @@ -6,6 +6,7 @@ # pylint --disable=locally-disabled records.py import os +from pathlib import Path import numpy as np import pandas as pd from taxcalc.data import Data @@ -116,9 +117,6 @@ class instance: Records PUF_RATIOS_FILENAME = 'puf_ratios.csv' CPS_WEIGHTS_FILENAME = 'cps_weights.csv.gz' CPS_RATIOS_FILENAME = None - TMD_WEIGHTS_FILENAME = 'tmd_weights.csv.gz' - TMD_GROWFACTORS_FILENAME = 'tmd_growfactors.csv' - TMD_RATIOS_FILENAME = None CODE_PATH = os.path.abspath(os.path.dirname(__file__)) VARINFO_FILE_NAME = 'records_variables.json' VARINFO_FILE_PATH = CODE_PATH @@ -226,9 +224,12 @@ def cps_constructor(data=None, exact_calculations=exact_calculations) @staticmethod - def tmd_constructor(data, # path to tmd.csv file or dataframe - gfactors=GrowFactors(TMD_GROWFACTORS_FILENAME), - exact_calculations=False): # pragma: no cover + def tmd_constructor( + data_path: Path, + weights_path: Path, + growfactors_path: Path, + exact_calculations=False + ): # pragma: no cover """ Static method returns a Records object instantiated with TMD input data. This works in a analogous way to Records(), which @@ -239,14 +240,18 @@ def tmd_constructor(data, # path to tmd.csv file or dataframe eliminate the need to specify all the details of the PUF input data. """ - weights = os.path.join(Records.CODE_PATH, Records.TMD_WEIGHTS_FILENAME) - return Records(data=data, - start_year=Records.TMDCSV_YEAR, - gfactors=gfactors, - weights=weights, - adjust_ratios=Records.TMD_RATIOS_FILENAME, - exact_calculations=exact_calculations) - + assert isinstance(data_path, Path) + assert isinstance(weights_path, Path) + assert isinstance(growfactors_path, Path) + return Records( + data=pd.read_csv(data_path), + start_year=Records.TMDCSV_YEAR, + weights=str(weights_path), + gfactors=GrowFactors(growfactors_filename=str(growfactors_path)), + adjust_ratios=None, + exact_calculations=exact_calculations, + ) + def increment_year(self): """ Add one to current year, and also does @@ -277,7 +282,7 @@ def _extrapolate(self, year): """ # pylint: disable=too-many-statements,no-member # put values in local dictionary - gfv = dict() + gfv = {} for name in GrowFactors.VALID_NAMES: gfv[name] = self.gfactors.factor_value(name, year) # apply values to Records variables diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py index 5296de83c..9bd7f7d51 100644 --- a/taxcalc/taxcalcio.py +++ b/taxcalc/taxcalcio.py @@ -74,6 +74,8 @@ def __init__(self, input_data, tax_year, baseline, reform, assump, self.puf_input_data = False self.cps_input_data = False self.tmd_input_data = False + self.tmd_weights = None + self.tmd_gfactor = None if isinstance(input_data, str): # remove any leading directory path from INPUT filename fname = os.path.basename(input_data) @@ -90,6 +92,23 @@ def __init__(self, input_data, tax_year, baseline, reform, assump, if not self.cps_input_data and not os.path.isfile(input_data): msg = 'INPUT file could not be found' self.errmsg += 'ERROR: {}\n'.format(msg) + # if tmd_input_data is True, construct weights and gfactor paths + if self.tmd_input_data: # pragma: no cover + tmd_dir = os.path.dirname(input_data) + if 'TMD_AREA' in os.environ: + area = os.environ['TMD_AREA'] + wfile = f'{area}_tmd_weights.csv.gz' + inp = f'{fname[:-4]}_{area}-{str(tax_year)[2:]}' + else: # using national weights + wfile = 'tmd_weights.csv.gz' + self.tmd_weights = os.path.join(tmd_dir, wfile) + self.tmd_gfactor = os.path.join(tmd_dir, 'tmd_growfactors.csv') + if not os.path.isfile(self.tmd_weights): + msg = f'weights file {self.tmd_weights} could not be found' + self.errmsg += 'ERROR: {}\n'.format(msg) + if not os.path.isfile(self.tmd_gfactor): + msg = f'gfactor file {self.tmd_gfactor} could not be found' + self.errmsg += 'ERROR: {}\n'.format(msg) elif isinstance(input_data, pd.DataFrame): inp = 'df-{}'.format(str(tax_year)[2:]) else: @@ -123,7 +142,7 @@ def __init__(self, input_data, tax_year, baseline, reform, assump, elif isinstance(reform, str): self.specified_reform = True # split any compound reform into list of simple reforms - refnames = list() + refnames = [] reforms = reform.split('+') for rfm in reforms: # remove any leading directory path from rfm filename @@ -206,7 +225,7 @@ def __init__(self, input_data, tax_year, baseline, reform, assump, self.calc = None self.calc_base = None self.param_dict = None - self.policy_dicts = list() + self.policy_dicts = [] def init(self, input_data, tax_year, baseline, reform, assump, aging_input_data, exact_calculations): @@ -234,7 +253,7 @@ def init(self, input_data, tax_year, baseline, reform, assump, # get assumption sub-dictionaries paramdict = Calculator.read_json_param_objects(None, assump) # get policy parameter dictionaries from --reform file(s) - policydicts = list() + policydicts = [] if self.specified_reform: reforms = reform.split('+') for ref in reforms: @@ -252,9 +271,7 @@ def init(self, input_data, tax_year, baseline, reform, assump, self.errmsg += valerr_msg.__str__() # create GrowFactors base object that incorporates gdiff_baseline if self.tmd_input_data: - gfactors_base = GrowFactors( # pragma: no cover - Records.TMD_GROWFACTORS_FILENAME - ) + gfactors_base = GrowFactors(self.tmd_gfactor) # pragma: no cover else: gfactors_base = GrowFactors() gdiff_baseline.apply_to(gfactors_base) @@ -266,9 +283,7 @@ def init(self, input_data, tax_year, baseline, reform, assump, self.errmsg += valerr_msg.__str__() # create GrowFactors ref object that has all gdiff objects applied if self.tmd_input_data: - gfactors_ref = GrowFactors( # pragma: no cover - Records.TMD_GROWFACTORS_FILENAME - ) + gfactors_ref = GrowFactors(self.tmd_gfactor) # pragma: no cover else: gfactors_ref = GrowFactors() gdiff_baseline.apply_to(gfactors_ref) @@ -333,14 +348,20 @@ def init(self, input_data, tax_year, baseline, reform, assump, exact_calculations=exact_calculations ) elif self.tmd_input_data: - recs = Records.tmd_constructor( - data=input_data, + recs = Records( + data=pd.read_csv(input_data), + start_year=Records.TMDCSV_YEAR, + weights=self.tmd_weights, gfactors=gfactors_ref, + adjust_ratios=None, exact_calculations=exact_calculations ) # pragma: no cover - recs_base = Records.tmd_constructor( - data=input_data, + recs_base = Records( + data=pd.read_csv(input_data), + start_year=Records.TMDCSV_YEAR, + weights=self.tmd_weights, gfactors=gfactors_base, + adjust_ratios=None, exact_calculations=exact_calculations ) # pragma: no cover else: # if not {cps|tmd}_input_data but aging_input_data @@ -541,7 +562,7 @@ def write_doc_file(self): doc = Calculator.reform_documentation(self.param_dict, self.policy_dicts[1:]) doc_fname = self._output_filename.replace('.csv', '-doc.text') - with open(doc_fname, 'w') as dfile: + with open(doc_fname, 'w', encoding='utf-8') as dfile: dfile.write(doc) def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax, @@ -575,7 +596,7 @@ def write_tables_file(self): tab_fname = self._output_filename.replace('.csv', '-tab.text') # skip tables if there are not some positive weights if self.calc_base.total_weight() <= 0.: - with open(tab_fname, 'w') as tfile: + with open(tab_fname, 'w', encoding='utf-8') as tfile: msg = 'No tables because sum of weights is not positive\n' tfile.write(msg) return @@ -597,7 +618,7 @@ def write_tables_file(self): diff = nontax + change # using expanded_income under baseline policy diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars) # write each kind of distributional table - with open(tab_fname, 'w') as tfile: + with open(tab_fname, 'w', encoding='utf-8') as tfile: TaxCalcIO.write_decile_table(distdf, tfile, tkind='Reform Totals') tfile.write('\n') TaxCalcIO.write_decile_table(diffdf, tfile, tkind='Differences') @@ -730,7 +751,7 @@ def write_empty_graph_file(fname, title, reason): '