From f9580c770ad0ffedbe32915b30ad102452f6eab1 Mon Sep 17 00:00:00 2001 From: Yonatan CARRANZA Date: Thu, 18 Apr 2024 23:12:15 +0200 Subject: [PATCH 1/2] Fix bug https://github.com/scikit-learn-contrib/MAPIE/issues/406 --- .../residual_conformity_scores.py | 3 +++ mapie/wrap_arrays.py | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 mapie/wrap_arrays.py diff --git a/mapie/conformity_scores/residual_conformity_scores.py b/mapie/conformity_scores/residual_conformity_scores.py index d9b174e49..e3017d522 100644 --- a/mapie/conformity_scores/residual_conformity_scores.py +++ b/mapie/conformity_scores/residual_conformity_scores.py @@ -12,6 +12,7 @@ from mapie._machine_precision import EPSILON from mapie._typing import ArrayLike, NDArray from mapie.conformity_scores import ConformityScore +from mapie.wrap_arrays import wrap_ndarray_and_dataframe class AbsoluteConformityScore(ConformityScore): @@ -377,6 +378,8 @@ def get_signed_conformity_scores( (X, y, y_pred, self.residual_estimator_, random_state) = self._check_parameters(X, y, y_pred) + # Wrap numpy or pandas array transparently to handle indexing + X = wrap_ndarray_and_dataframe(X) full_indexes = np.argwhere( np.logical_not(np.isnan(y_pred)) diff --git a/mapie/wrap_arrays.py b/mapie/wrap_arrays.py new file mode 100644 index 000000000..4644a351d --- /dev/null +++ b/mapie/wrap_arrays.py @@ -0,0 +1,26 @@ +from typing import Union +import numpy as np +import pandas as pd +from mapie._typing import NDArray + + +class wrap_ndarray_and_dataframe: + + def __init__(self, X_array: Union[NDArray | pd.DataFrame]): + """ + This class is a wrapper for numpy arrays and pandas DataFrames. + It is used to handle the indexing access to the data in a consistent way. + :param X_array: + """ + self.X_array = X_array + if isinstance(X_array, pd.DataFrame): + self.X_array = pd.DataFrame(X_array, columns=X_array.columns) + self.X_array = self.X_array.astype(self.X_array.dtypes.to_dict()) + + def __getitem__(self, item): + if isinstance(self.X_array, pd.DataFrame): + return self.X_array.iloc[item].values + elif isinstance(self.X_array, np.ndarray): + return self.X_array[item] + else: + raise ValueError("Input must be a numpy array or pandas DataFrame") From 350d8d86b8fdf8b757cfcd60fee0e69f209a98dc Mon Sep 17 00:00:00 2001 From: Yonatan CARRANZA Date: Fri, 19 Apr 2024 11:16:09 +0200 Subject: [PATCH 2/2] #434 Add coments additional and solve the automatic tests --- mapie/wrap_arrays.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/mapie/wrap_arrays.py b/mapie/wrap_arrays.py index 4644a351d..fa6ee3637 100644 --- a/mapie/wrap_arrays.py +++ b/mapie/wrap_arrays.py @@ -9,18 +9,36 @@ class wrap_ndarray_and_dataframe: def __init__(self, X_array: Union[NDArray | pd.DataFrame]): """ This class is a wrapper for numpy arrays and pandas DataFrames. - It is used to handle the indexing access to the data in a consistent way. - :param X_array: + It is used to handle the indexing access to the data + in a consistent way. + + Parameters + ---------- + X_array: Union[NDArray | pd.DataFrame] + The data to wrap, either a numpy array or a pandas DataFrame. """ self.X_array = X_array if isinstance(X_array, pd.DataFrame): self.X_array = pd.DataFrame(X_array, columns=X_array.columns) self.X_array = self.X_array.astype(self.X_array.dtypes.to_dict()) - def __getitem__(self, item): + def __getitem__(self, i: int): + """ + This method is used to handle the indexing access to X_array. + + Parameters + ---------- + i: int + Index to access. + + Returns + ------- + NDArray + The data at index i. + """ if isinstance(self.X_array, pd.DataFrame): - return self.X_array.iloc[item].values + return self.X_array.iloc[i].values elif isinstance(self.X_array, np.ndarray): - return self.X_array[item] + return self.X_array[i] else: raise ValueError("Input must be a numpy array or pandas DataFrame")