Skip to content

Commit

Permalink
Merge pull request #1976 from martinholmer/fuzz-reform-affected
Browse files Browse the repository at this point in the history
Restrict fuzzing to reform-affected filing units
  • Loading branch information
martinholmer authored Apr 19, 2018
2 parents ce37b17 + edd259b commit f978987
Show file tree
Hide file tree
Showing 4 changed files with 757 additions and 801 deletions.
25 changes: 11 additions & 14 deletions taxcalc/tbi/tbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# pylint --disable=locally-disabled tbi.py

from __future__ import print_function
import gc
import time
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -101,12 +100,12 @@ def run_nth_year_tax_calc_model(year_n, start_year,

start_time = time.time()

# create calc1 and calc2 calculated for year_n and mask
# create calc1 and calc2 calculated for year_n
check_years_return_first_year(year_n, start_year, use_puf_not_cps)
(calc1, calc2, mask) = calculate(year_n, start_year,
use_puf_not_cps, use_full_sample,
user_mods,
behavior_allowed=True)
(calc1, calc2) = calculate(year_n, start_year,
use_puf_not_cps, use_full_sample,
user_mods,
behavior_allowed=True)

# extract raw results from calc1 and calc2
rawres1 = calc1.distribution_table_dataframe()
Expand All @@ -115,18 +114,16 @@ def run_nth_year_tax_calc_model(year_n, start_year,
# delete calc1 and calc2 now that raw results have been extracted
del calc1
del calc2
gc.collect()

# seed random number generator with a seed value based on user_mods
seed = random_seed(user_mods)
print('seed={}'.format(seed))
np.random.seed(seed) # pylint: disable=no-member

# construct TaxBrain summary results from raw results
summ = summary(rawres1, rawres2, mask)
summ = summary(rawres1, rawres2, use_puf_not_cps)
del rawres1
del rawres2
gc.collect()

def append_year(pdf):
"""
Expand Down Expand Up @@ -202,11 +199,11 @@ def run_nth_year_gdp_elast_model(year_n, start_year,
fyear = check_years_return_first_year(year_n, start_year, use_puf_not_cps)
if year_n > 0 and (start_year + year_n) > fyear:
# create calc1 and calc2 calculated for year_n - 1
(calc1, calc2, _) = calculate((year_n - 1), start_year,
use_puf_not_cps,
use_full_sample,
user_mods,
behavior_allowed=False)
(calc1, calc2) = calculate((year_n - 1), start_year,
use_puf_not_cps,
use_full_sample,
user_mods,
behavior_allowed=False)
# compute GDP effect given specified gdp_elasticity
gdp_effect = proportional_change_in_gdp((start_year + year_n),
calc1, calc2, gdp_elasticity)
Expand Down
115 changes: 37 additions & 78 deletions taxcalc/tbi/tbi_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,9 @@ def calculate(year_n, start_year,
"""
The calculate function assumes the specified user_mods is a dictionary
returned by the Calculator.read_json_param_objects() function.
The function returns (calc1, calc2, mask) where
calc1 is pre-reform Calculator object calculated for year_n,
calc2 is post-reform Calculator object calculated for year_n, and
mask is boolean array marking records with reform-induced iitax diffs
The function returns (calc1, calc2) where
calc1 is pre-reform Calculator object calculated for year_n, and
calc2 is post-reform Calculator object calculated for year_n.
Set behavior_allowed to False when generating static results or
set behavior_allowed to True when generating dynamic results.
"""
Expand Down Expand Up @@ -149,41 +148,6 @@ def calculate(year_n, start_year,
calc1.calc_all()
assert calc1.current_year == start_year

# compute mask array
res1 = calc1.dataframe(DIST_VARIABLES)
if use_puf_not_cps:
# create pre-reform Calculator instance with extra income
recs1p = Records(data=sample, gfactors=growfactors_pre)
# add one dollar to the income of each filing unit to determine
# which filing units undergo a resulting change in tax liability
recs1p.e00200 += 1.0 # pylint: disable=no-member
recs1p.e00200p += 1.0 # pylint: disable=no-member
policy1p = Policy(gfactors=growfactors_pre)
# create Calculator with recs1p and calculate for start_year
calc1p = Calculator(policy=policy1p, records=recs1p,
consumption=consump)
while calc1p.current_year < start_year:
calc1p.increment_year()
calc1p.calc_all()
assert calc1p.current_year == start_year
# compute mask showing which of the calc1 and calc1p results differ;
# mask is true if a filing unit's income tax liability changed after
# a dollar was added to the filing unit's wage and salary income
res1p = calc1p.dataframe(DIST_VARIABLES)
mask = np.logical_not( # pylint: disable=no-member
np.isclose(res1.iitax, res1p.iitax, atol=0.001, rtol=0.0)
)
assert np.any(mask)
# delete intermediate objects
del recs1p
del policy1p
del calc1p
del res1p
else: # if use_puf_not_cps is False
# indicate that fuzzing of reform results is not required
mask = np.full(res1.shape, False)
del res1

# specify Behavior instance
behv = Behavior()
behavior_assumps = user_mods['behavior']
Expand Down Expand Up @@ -239,8 +203,8 @@ def calculate(year_n, start_year,
else:
calc2.calc_all()

# return calculated Calculator objects and mask
return (calc1, calc2, mask)
# return calculated Calculator objects
return (calc1, calc2)


def random_seed(user_mods):
Expand Down Expand Up @@ -372,13 +336,13 @@ def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
if do_fuzzing:
df2['mask'] = mask
df2['expanded_income_baseline'] = df1['expanded_income']
create(df1, df2, 'dec', 'expanded_income_baseline', '_xdec',
create(df1, df2, 'dec', 'expanded_income_baseline', '', # '_xdec',
columns_to_create, do_fuzzing)
df2_xdec = copy.deepcopy(df2)
create(df1, df2, 'bin', 'expanded_income_baseline', '_xbin',
create(df1, df2, 'bin', 'expanded_income_baseline', '', # '_xbin',
columns_to_create, do_fuzzing)
df2_xbin = copy.deepcopy(df2)
create(df1, df2, 'agg', 'expanded_income_baseline', '_agg',
create(df1, df2, 'agg', 'expanded_income_baseline', '', # '_agg',
columns_to_create, do_fuzzing)
df2_aggr = copy.deepcopy(df2)
return (df2_xdec, df2_xbin, df2_aggr)
Expand All @@ -387,31 +351,42 @@ def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
AGGR_ROW_NAMES = ['ind_tax', 'payroll_tax', 'combined_tax']


def summary(df1, df2, mask):
def summary(df1, df2, fuzzing):
"""
df1 contains raw results for baseline plan
df2 contains raw results for reform plan
mask is the boolean array specifying records with reform-induced tax diffs
returns dictionary of summary results DataFrames
df1 contains distribution-table variables for baseline.
df2 contains distribution-table variables for reform.
fuzzing indicates whether or not there is a need to fuzz df2 variables.
returns dictionary of summary-results DataFrames.
"""
# pylint: disable=too-many-statements,too-many-locals

df2_xdec, df2_xbin, df2_aggr = create_results_columns(df1, df2, mask)
df1_xdec = add_quantile_table_row_variable(df1, 'expanded_income',
10, decile_details=True)
del df1_xdec['table_row']
df1_xbin = add_income_table_row_variable(df1, 'expanded_income',
bins=STANDARD_INCOME_BINS)
del df1_xbin['table_row']
if fuzzing:
reform_affected = np.logical_not( # pylint: disable=no-member
np.isclose(df1['combined'], df2['combined'],
atol=0.001, rtol=0.0))
df2_xdec, df2_xbin, df2_aggr = create_results_columns(df1, df2,
reform_affected)
df1_xdec = add_quantile_table_row_variable(df1, 'expanded_income',
10, decile_details=True)
del df1_xdec['table_row']
df1_xbin = add_income_table_row_variable(df1, 'expanded_income',
bins=STANDARD_INCOME_BINS)
del df1_xbin['table_row']
else:
df2_aggr = copy.deepcopy(df2)
df2_xdec = copy.deepcopy(df2)
df2_xbin = copy.deepcopy(df2)
df1_xdec = copy.deepcopy(df1)
df1_xbin = copy.deepcopy(df1)

summ = dict()

# tax difference totals between reform and baseline
tdiff = df2_aggr['iitax_agg'] - df1['iitax']
tdiff = df2_aggr['iitax'] - df1['iitax']
aggr_itax_d = (tdiff * df2['s006']).sum()
tdiff = df2_aggr['payrolltax_agg'] - df1['payrolltax']
tdiff = df2_aggr['payrolltax'] - df1['payrolltax']
aggr_ptax_d = (tdiff * df2['s006']).sum()
tdiff = df2_aggr['combined_agg'] - df1['combined']
tdiff = df2_aggr['combined'] - df1['combined']
aggr_comb_d = (tdiff * df2['s006']).sum()
aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)
Expand All @@ -424,55 +399,49 @@ def summary(df1, df2, mask):
summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)

# totals for reform
aggr_itax_2 = (df2_aggr['iitax_agg'] * df2['s006']).sum()
aggr_ptax_2 = (df2_aggr['payrolltax_agg'] * df2['s006']).sum()
aggr_comb_2 = (df2_aggr['combined_agg'] * df2['s006']).sum()
aggr_itax_2 = (df2_aggr['iitax'] * df2['s006']).sum()
aggr_ptax_2 = (df2_aggr['payrolltax'] * df2['s006']).sum()
aggr_comb_2 = (df2_aggr['combined'] * df2['s006']).sum()
aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)

del df1
del df2

# create difference tables grouped by xdec
df2_xdec['iitax'] = df2_xdec['iitax_xdec']
summ['diff_itax_xdec'] = \
create_difference_table(df1_xdec, df2_xdec,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='iitax')

df2_xdec['payrolltax'] = df2_xdec['payrolltax_xdec']
summ['diff_ptax_xdec'] = \
create_difference_table(df1_xdec, df2_xdec,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='payrolltax')

df2_xdec['combined'] = df2_xdec['combined_xdec']
summ['diff_comb_xdec'] = \
create_difference_table(df1_xdec, df2_xdec,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='combined')

# create difference tables grouped by xbin
df2_xbin['iitax'] = df2_xbin['iitax_xbin']
diff_itax_xbin = \
create_difference_table(df1_xdec, df2_xbin,
groupby='standard_income_bins',
income_measure='expanded_income',
tax_to_diff='iitax')
summ['diff_itax_xbin'] = diff_itax_xbin

df2_xbin['payrolltax'] = df2_xbin['payrolltax_xbin']
diff_ptax_xbin = \
create_difference_table(df1_xbin, df2_xbin,
groupby='standard_income_bins',
income_measure='expanded_income',
tax_to_diff='payrolltax')
summ['diff_ptax_xbin'] = diff_ptax_xbin

df2_xbin['combined'] = df2_xbin['combined_xbin']
diff_comb_xbin = \
create_difference_table(df1_xbin, df2_xbin,
groupby='standard_income_bins',
Expand All @@ -486,11 +455,6 @@ def summary(df1, df2, mask):
income_measure='expanded_income',
result_type='weighted_sum')

suffix = '_xdec'
df2_cols_with_suffix = [c for c in list(df2_xdec) if c.endswith(suffix)]
for col in df2_cols_with_suffix:
root_col_name = col.replace(suffix, '')
df2_xdec[root_col_name] = df2_xdec[col]
df2_xdec['expanded_income_baseline'] = df1_xdec['expanded_income']
summ['dist2_xdec'] = \
create_distribution_table(df2_xdec, groupby='weighted_deciles',
Expand All @@ -504,11 +468,6 @@ def summary(df1, df2, mask):
result_type='weighted_sum')
summ['dist1_xbin'] = dist1_xbin

suffix = '_xbin'
df2_cols_with_suffix = [c for c in list(df2_xbin) if c.endswith(suffix)]
for col in df2_cols_with_suffix:
root_col_name = col.replace(suffix, '')
df2_xbin[root_col_name] = df2_xbin[col]
df2_xbin['expanded_income_baseline'] = df1_xbin['expanded_income']
dist2_xbin = \
create_distribution_table(df2_xbin, groupby='standard_income_bins',
Expand Down
4 changes: 2 additions & 2 deletions taxcalc/tests/tbi_cps_expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2324,7 +2324,7 @@ TABLE dist2_xbin RESULTS:
"3869917.61",
"86741286308.30",
"0.00",
"826699773746.32",
"826699773746.31",
"117350446739.81",
"1136777879078.92",
"99669.26",
Expand Down Expand Up @@ -2444,7 +2444,7 @@ TABLE dist2_xbin RESULTS:
"3028591969682.75",
"3028591969682.75",
"15110730620949.12",
"12341701408829.37"
"12341701408829.38"
]
}
TABLE dist2_xdec RESULTS:
Expand Down
Loading

0 comments on commit f978987

Please sign in to comment.