Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added hack-support for n_features_to_select #93

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion boruta/boruta_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,15 @@ class BorutaPy(BaseEstimator, TransformerMixin):
"""

def __init__(self, estimator, n_estimators=1000, perc=100, alpha=0.05,
two_step=True, max_iter=100, random_state=None, verbose=0):
two_step=True, max_iter=100, n_features_to_select=None,
random_state=None, verbose=0):
self.estimator = estimator
self.n_estimators = n_estimators
self.perc = perc
self.alpha = alpha
self.two_step = two_step
self.max_iter = max_iter
self.n_features_to_select = n_features_to_select
self.random_state = random_state
self.verbose = verbose
self.__version__ = '0.3'
Expand Down Expand Up @@ -384,6 +386,14 @@ def _fit(self, X, y):

self.importance_history_ = imp_history

if self.n_features_to_select is not None:
assert type(self.n_features_to_select) == int
self.support_strong_ = self.support_.copy()
self.support_ = self._select_n_features(
self.n_features_to_select,
self.importance_history_
)

# notify user
if self.verbose > 0:
self._print_results(dec_reg, _iter, 1)
Expand Down Expand Up @@ -590,3 +600,17 @@ def _print_results(self, dec_reg, _iter, flag):
result = '\n'.join([x[0] + '\t' + x[1] for x in zip(cols, content)])
output = "\n\nBorutaPy finished running.\n\n" + result
print(output)

def _select_n_features(self, n_features_to_select, importance_history_):
# fetch faeture importances of last trained ensemble
# -1 -encoded features were already rejected
feature_importance = np.nan_to_num(importance_history_[-1], nan=-1.)
selected_features = feature_importance.argsort()[-n_features_to_select:]

support = np.zeros_like(feature_importance, dtype=bool)
support[selected_features] = True

# ensure rejected features are not selected
support = ((feature_importance != -1) & support)

return support