Skip to content

Commit

Permalink
raise error instead of warning when a user has missing data and add c… (
Browse files Browse the repository at this point in the history
#2143)

* raise error instead of warning when a user has missing data and add check for train data in addition to test

* address comments
  • Loading branch information
hawestra authored Jul 6, 2023
1 parent 9bfca4b commit 0d5e41a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 13 deletions.
22 changes: 14 additions & 8 deletions responsibleai/responsibleai/rai_insights/rai_insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,14 +597,9 @@ def _validate_rai_insights_input_parameters(
"identified as categorical features: "
f"{non_categorical_or_time_string_columns}")

list_of_feature_having_missing_values = []
for feature in test.columns.tolist():
if np.any(test[feature].isnull()):
list_of_feature_having_missing_values.append(feature)
if len(list_of_feature_having_missing_values) > 0:
warnings.warn(
f"Features {list_of_feature_having_missing_values} "
"have missing values in test data")
# Check if any of the data is missing in test and train data
self._validate_data_is_not_missing(test, "test")
self._validate_data_is_not_missing(train, "train")

self._validate_feature_metadata(
feature_metadata, train, task_type, model, target_column)
Expand Down Expand Up @@ -717,6 +712,17 @@ def _validate_classes(
if_predictions=True
)

def _validate_data_is_not_missing(self, data, data_name):
"""Validates that data is not missing (ie null)"""
list_of_feature_having_missing_values = []
for feature in data.columns.tolist():
if np.any(data[feature].isnull()):
list_of_feature_having_missing_values.append(feature)
if len(list_of_feature_having_missing_values) > 0:
raise UserConfigValidationException(
f"Features {list_of_feature_having_missing_values} "
f"have missing values in {data_name} data.")

def _validate_feature_metadata(
self, feature_metadata, train, task_type, model, target_column):
"""Validates the feature metadata."""
Expand Down
44 changes: 39 additions & 5 deletions responsibleai/tests/rai_insights/test_rai_insights_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_validate_unsupported_task_type(self, forecasting_enabled):
task_type='regre',
forecasting_enabled=forecasting_enabled)

def test_missing_data_warnings(self):
def test_missing_test_data(self):
train_data = {
'Column1': [10, 20, 90, 40, 50],
'Column2': [10, 20, 90, 40, 50],
Expand All @@ -57,7 +57,39 @@ def test_missing_data_warnings(self):
train = pd.DataFrame(train_data)

test_data = {
'Column1': [10, 20, np.nan, 40, 50],
'Column1': [10, 20, 90, 40, 50],
'Column2': [10, 20, 90, 40, 50],
'Target': [10, 20, np.nan, 40, 50]
}
test = pd.DataFrame(test_data)

X_train = train.drop(columns=['Target'])
y_train = train['Target'].values
model = create_complex_classification_pipeline(
X_train, y_train, ['Column1', 'Column2'], [])

with pytest.raises(
UserConfigValidationException,
match="['Column1']") as ucve:
RAIInsights(
model=model,
train=train,
test=test,
target_column='Target',
task_type='classification')
assert "Features ['Target'] have missing values in " + \
"test data" in str(ucve.value)

def test_missing_train_data(self):
train_data = {
'Column1': [10, 20, 90, 40, 50],
'Column2': [10, 20, np.nan, 40, 50],
'Target': [10, 20, 90, 40, 50]
}
train = pd.DataFrame(train_data)

test_data = {
'Column1': [10, 20, 90, 40, 50],
'Column2': [10, 20, 90, 40, 50],
'Target': [10, 20, 90, 40, 50]
}
Expand All @@ -68,15 +100,17 @@ def test_missing_data_warnings(self):
model = create_complex_classification_pipeline(
X_train, y_train, ['Column1', 'Column2'], [])

with pytest.warns(
UserWarning,
match="['Column1']"):
with pytest.raises(
UserConfigValidationException,
match="['Column2']") as ucve:
RAIInsights(
model=model,
train=train,
test=test,
target_column='Target',
task_type='classification')
assert "Features ['Column2'] have missing values in " + \
"train data" in str(ucve.value)

def test_validate_test_data_size(self):
X_train, X_test, y_train, y_test, _, _ = \
Expand Down

0 comments on commit 0d5e41a

Please sign in to comment.