-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
34 lines (26 loc) · 1.09 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
def convert_to_lime_format(X, categorical_names, col_names=None, invert=False):
"""Converts data with categorical values as string into the right format
for LIME, with categorical values as integers labels.
It takes categorical_names, the same dictionary that has to be passed
to LIME to ensure consistency.
col_names and invert allow to rebuild the original dataFrame from
a numpy array in LIME format to be passed to a Pipeline or sklearn
OneHotEncoder
"""
# If the data isn't a dataframe, we need to be able to build it
if not isinstance(X, pd.DataFrame):
X_lime = pd.DataFrame(X, columns=col_names)
else:
X_lime = X.copy()
for k, v in categorical_names.items():
if not invert:
label_map = {
str_label: int_label for int_label, str_label in enumerate(v)
}
else:
label_map = {
int_label: str_label for int_label, str_label in enumerate(v)
}
X_lime.iloc[:, k] = X_lime.iloc[:, k].map(label_map)
return X_lime