Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for calmar on number of positive value #324

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Changelog
### 3.0.2 [#324](https://github.com/openfisca/openfisca-survey-manager/pull/324)

# 3.0.1 [#322](https://github.com/openfisca/openfisca-survey-manager/pull/322)
* New feature
- Introduce the possibility to use calmar on variables changed with expressions starting with a space, with a target associated, for instance 'wage > 0' to have the number of positive wage in population.

### 3.0.1 [#322](https://github.com/openfisca/openfisca-survey-manager/pull/322)

* Technical changes
- Fix build.
Expand Down
35 changes: 25 additions & 10 deletions openfisca_survey_manager/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy
from numpy import logical_not
import pandas as pd
import re

from openfisca_core.model_api import Enum
from openfisca_survey_manager.calmar import calmar
Expand Down Expand Up @@ -52,8 +53,11 @@ def __init__(self, simulation, target_margins, period, target_entity_count = Non
variable_instance_by_variable_name = simulation.tax_benefit_system.variables
entities = set(
variable_instance_by_variable_name[variable].entity.key
for variable in margin_variables
for var in margin_variables for variable in re.findall('[A-Za-z_]+', var)
)
for var in margin_variables:
assert len(set([variable_instance_by_variable_name[variable].entity.key
for variable in re.findall('[A-Za-z_]+', var)])) == 1, "An expression use variables that are not based on the same entity"
if entity is not None:
entities.add(entity)
self.entities = list(entities)
Expand Down Expand Up @@ -100,6 +104,8 @@ def __init__(self, simulation, target_margins, period, target_entity_count = Non
self.filter_by = numpy.array(1.0)

assert weight_name is not None, "A calibration needs a weight variable name to act on"
weight_variable = simulation.tax_benefit_system.variables[weight_name]
weight_variable.unit = ""
self._initial_weight_name = weight_name + "_ini"
self.initial_weight = initial_weight = simulation.calculate(weight_name, period = period)

Expand Down Expand Up @@ -131,8 +137,13 @@ def _build_calmar_data(self) -> dict:
data[self.target_entity][self._initial_weight_name] = self.initial_weight * self.filter_by
period = self.period
for variable in self.margins_by_variable:
assert variable in self.simulation.tax_benefit_system.variables
data[self.simulation.tax_benefit_system.variables[variable].entity.key][variable] = self.simulation.adaptative_calculate_variable(variable, period = period)
list_var = re.findall('[A-Za-z_]+', variable)
assert all([var in self.simulation.tax_benefit_system.variables for var in list_var])
dic_eval = {}
for var in list_var:
dic_eval[var] = self.simulation.adaptative_calculate_variable(var, period = period)
value = eval(variable, {}, dic_eval)
data[self.simulation.tax_benefit_system.variables[list_var[0]].entity.key][variable] = value

if len(self.entities) == 2:
for entity in self.entities:
Expand Down Expand Up @@ -209,8 +220,9 @@ def set_target_margin(self, variable, target):
"""
simulation = self.simulation
period = self.period
assert variable in simulation.tax_benefit_system.variables
variable_instance = simulation.tax_benefit_system.variables[variable]
list_var = re.findall('[A-Za-z_]+', variable)
assert all([var in simulation.tax_benefit_system.variables for var in list_var])
variable_instance = simulation.tax_benefit_system.variables[list_var[0]]

filter_by = self.filter_by
target_by_category = None
Expand Down Expand Up @@ -247,8 +259,6 @@ def set_calibrated_weights(self):
weight_variable = simulation.tax_benefit_system.variables[weight_name]
if weight_name == self.weight_name:
weight_variable.unit = "base_weight" # The weight variable is flagged as the one that have changed
if weight_variable.formulas:
weight_variable.formulas = [] # The weight variable becomes an input variable after it changes with calibration
# Delete other entites already computed weigths
# to ensure that this weights a recomputed if they derive from
# the calibrated weight variable
Expand All @@ -275,10 +285,15 @@ def _update_margins(self):
filter_by = self.filter_by
initial_weight = self.initial_weight

value = simulation.adaptative_calculate_variable(variable, period = period)
list_var = re.findall('[A-Za-z_]+', variable)
dic_eval = {}
for var in list_var:
dic_eval[var] = simulation.adaptative_calculate_variable(var, period = period)
value = eval(variable, {}, dic_eval)

weight_variable = simulation.weight_variable_by_entity[target_entity]

if len(self.entities) == 2 and simulation.tax_benefit_system.variables[variable].entity.key != self.target_entity:
if len(self.entities) == 2 and simulation.tax_benefit_system.variables[list_var[0]].entity.key != self.target_entity:
value_df = pd.DataFrame(value)
id_variable = self.parameters["id_variable_link"]
value_df[id_variable] = simulation.adaptative_calculate_variable(id_variable, period = period)
Expand All @@ -297,7 +312,7 @@ def _update_margins(self):
('initial', initial_weight),
]

variable_instance = simulation.tax_benefit_system.get_variable(variable)
variable_instance = simulation.tax_benefit_system.get_variable(list_var[0])
assert variable_instance is not None
if variable_instance.value_type in [bool, Enum]:
margin_items.append(('category', value))
Expand Down
4 changes: 3 additions & 1 deletion openfisca_survey_manager/calmar.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,16 +253,18 @@ def F_prime(x):
if isinstance(val, dict):
dummies_dict = build_dummies_dict(data[entity][var])
k, pop = 0, 0
list_col_to_add = [data[entity]]
for cat, nb in val.items():
cat_varname = var + '_' + str(cat)
data[entity][cat_varname] = dummies_dict[cat]
list_col_to_add.append(pd.Series(dummies_dict[cat], name = cat_varname))
margins_new[cat_varname] = nb
if var not in margins_new_dict:
margins_new_dict[var] = {}
margins_new_dict[var][cat] = nb
pop += nb
k += 1
nj += 1
data[entity] = pd.concat(list_col_to_add, axis = 1)
# Check total popualtion
population = (entity == target_entity) * total_population + (entity != target_entity) * total_population_smaller_entity
if pop != population:
Expand Down
4 changes: 3 additions & 1 deletion openfisca_survey_manager/tests/test_calmar.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def create_margins(entities = 1):
1: 30,
2: 50,
},
'Z': 140.0
'Z': 140.0,
'Z > 0': 80,
'(Z > 0) * (X > 0)': 80
}
if entities == 2:
margins_by_variable['C'] = 85
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "OpenFisca-Survey-Manager"
version = "3.0.1"
version = "3.0.2"
description = "A tool for managing survey/administrative data and import them in OpenFisca"
readme = "README.md"
keywords = ["microsimulation", "tax", "benefit", "rac", "rules-as-code", "survey", "data"]
Expand Down