Skip to content

Commit

Permalink
PropensityFeatureStandardization deepcopy fix (#35)
Browse files Browse the repository at this point in the history
* Fix deepcopy fail in PropensityFeatureStandardization

removed calculating of feature functions on init
this makes the object un-deepcopyable.
the reason is that the set of feature functions store a copy of self
that does not get updated on deepcopy.
since the function is only called once, and it is a light function,
i opted to remove the attribute entirely and just create the dict
when it is needed. that way self will never be misreferenced.

Add deepcopy tests

* Bump version: 0.8.2

Co-authored-by: Michael M Danziger <[email protected]>
  • Loading branch information
ehudkr and mmdanziger authored May 24, 2022
1 parent 6974fd4 commit 40a3a47
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 17 deletions.
2 changes: 1 addition & 1 deletion causallib/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.1"
__version__ = "0.8.2"
12 changes: 3 additions & 9 deletions causallib/estimation/doubly_robust.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,6 @@ def __init__(self, outcome_model, weight_model,
outcome_covariates, weight_covariates)
self.feature_type = feature_type

self._feature_functions = self._define_feature_functions()

def estimate_individual_outcome(self, X, a, treatment_values=None, predict_proba=None):
X_augmented = self._augment_outcome_model_data(X, a)
Expand All @@ -365,12 +364,7 @@ def _augment_outcome_model_data(self, X, a):
matrix (W | X).
"""
X_outcome, X_weight = self._prepare_data(X, a)
feature_func = self._feature_functions.get(self.feature_type)
if feature_func is None:
raise ValueError(
f"feature type {self.feature_type} is not recognized."
f"Supported options are: {set(self._feature_functions.keys())}"
)
feature_func = self._get_feature_function(self.feature_type)
weights_feature = feature_func(X_weight, a)
# Let standardization deal with incorporating treatment assignment (a) into the data:
X_augmented = pd.concat([weights_feature, X_outcome], join="outer", axis="columns")
Expand All @@ -390,7 +384,7 @@ def fit(self, X, a, y, refit_weight_model=True, **kwargs):
self.outcome_model.fit(X=X_augmented, y=y, a=a)
return self

def _define_feature_functions(self):
def _get_feature_function(self, function_name):

def weight_vector(X, a):
w = self.weight_model.compute_weights(X, a)
Expand Down Expand Up @@ -448,7 +442,7 @@ def propensity_matrix(X, a):
"logit_propensity_vector": logit_propensity_vector,
"propensity_matrix": propensity_matrix,
}
return feature_functions
return feature_functions[function_name]


class WeightedStandardization(BaseDoublyRobust):
Expand Down
16 changes: 9 additions & 7 deletions causallib/tests/test_doublyrobust.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,19 +444,14 @@ def test_many_models(self):
self.ensure_many_models(clip_min=0.001, clip_max=1-0.001)

def test_many_feature_types(self):
with self.subTest("Ensure all feature types are tested"):
with self.subTest("Ensure all expected feature types are supported"):
feature_types = [
"weight_vector", "signed_weight_vector",
"weight_matrix", "masked_weight_matrix",
"propensity_vector", "propensity_matrix",
"logit_propensity_vector",
]
model_feature_types = set(self.estimator._feature_functions.keys())
if set(feature_types) != model_feature_types:
raise AssertionError(
"Hey there, there's a mismatch between `PropensityFeatureStandardization._feature_types"
"and its corresponding tests. Did you add a new type without testing?"
)
assert all(self.estimator._get_feature_function(name) for name in feature_types)

# These two options are from Bang and Robins, and should be theoretically sound,
# however, they do seem to be less efficient (greater variance) than the other methods.
Expand All @@ -481,3 +476,10 @@ def test_many_feature_types(self):
# self.estimator.feature_type = "signed_weight_vector"
# with self.assertRaises(AssertionError):
# self.estimator.fit(data['X'], a, data['y'])

def test_can_fit_after_deepcopy(self):
# added following https://github.ibm.com/CausalDev/CausalInference/issues/101
from copy import deepcopy
estimator_copy = deepcopy(self.estimator)
data = self.create_uninformative_ox_dataset()
estimator_copy.fit(data['X'], data['a'], data['y'])

0 comments on commit 40a3a47

Please sign in to comment.