Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add validation target column type in the classification scenario #2127

Merged
merged 4 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions responsibleai/responsibleai/rai_insights/rai_insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,13 @@ def _validate_rai_insights_input_parameters(
f"Error finding unique values in column {column}. "
"Please check your test data.")

# Validate that the target column isn't continuous if the
# user is running classification scenario
if (task_type == ModelTask.CLASSIFICATION and
train[target_column].dtype == "float64"):
hawestra marked this conversation as resolved.
Show resolved Hide resolved
raise UserConfigValidationException(
imatiach-msft marked this conversation as resolved.
Show resolved Hide resolved
"Target column type must not be continuous "
"for classification scenario.")
# Check if any features exist that are not numeric, datetime, or
# categorical.
train_features = train.drop(columns=[target_column]).columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,31 @@ def test_validate_categorical_features_not_having_train_features(self):
task_type='classification',
categorical_features=['not_a_feature'])

def test_validate_multi_classification_continuous_target_column(self):
raw_data = {
'Column1': [10, 20, 90, 40, 50],
'Column2': [10, 20, 90, 40, 50],
'Target': [.1, .2, .9, .4, .5]
}
data = pd.DataFrame(raw_data)
X_data = data.drop(columns=['Target'])
X_data[TARGET] = data['Target'].values

# use valid target data to create the model
y_train = np.array([1, 1, 2, 0, 1])
model = create_lightgbm_classifier(X_data, y_train)

with pytest.raises(
UserConfigValidationException,
match="Target column type must not be continuous "
"for classification scenario."):
RAIInsights(
model=model,
train=X_data,
test=X_data,
target_column=TARGET,
task_type='classification')

def test_validate_serializer(self):
X_train, X_test, y_train, y_test, _, _ = \
create_cancer_data(return_dataframe=True)
Expand Down