diff --git a/feature_importance/01_run_ablation_classification.py b/feature_importance/01_run_ablation_classification.py index adf1199..989f5cc 100644 --- a/feature_importance/01_run_ablation_classification.py +++ b/feature_importance/01_run_ablation_classification.py @@ -22,7 +22,7 @@ from sklearn.linear_model import LogisticRegressionCV from sklearn.svm import SVC import xgboost as xgb -from imodels.importance import RandomForestPlusRegressor, RandomForestPlusClassifier +from imodels.tree.rf_plus.rf_plus.rf_plus_models import RandomForestPlusRegressor, RandomForestPlusClassifier sys.path.append(".") sys.path.append("..") sys.path.append("../..") @@ -62,11 +62,11 @@ def ablation_to_mean(train, data, feature_importance, mode, num_features): """ - Replace the top num_features max feature importance data with random shuffle for each sample + Replace the top num_features max feature importance data with mean value for each sample """ train_mean = np.mean(train, axis=0) assert mode in ["max", "min"] - fi = feature_importance.to_numpy() + fi = feature_importance if mode == "max": indices = np.argsort(-fi) else: @@ -77,17 +77,18 @@ def ablation_to_mean(train, data, feature_importance, mode, num_features): data_copy[i, indices[i,j]] = train_mean[indices[i,j]] return data_copy -def ablation_by_addition(data, feature_importance, mode, num_features): +def ablation_by_addition(train, data, feature_importance, mode, num_features): """ - Initialize the data with zeros and add the top num_features max feature importance data for each sample + Initialize the data with mean values and add the top num_features max feature importance data for each sample """ assert mode in ["max", "min"] - fi = feature_importance.to_numpy() + fi = feature_importance if mode == "max": indices = np.argsort(-fi) else: indices = np.argsort(fi) - data_copy = np.zeros(data.shape) + row_values = np.mean(train, axis=0).tolist() + data_copy = np.array([row_values] * data.shape[0]) for i in range(data.shape[0]): for j in range(num_features): data_copy[i, indices[i,j]] = data[i, indices[i,j]] @@ -137,21 +138,20 @@ def compare_estimators(estimators: List[ModelConfig], y_tune = y y_test = y - normalizer = preprocessing.Normalizer() - if splitting_strategy == "train-test": - X_train = normalizer.fit_transform(X_train) - X_test = normalizer.transform(X_test) - else: - X = normalizer.fit_transform(X) - X_train = normalizer.transform(X_train) - X_test = normalizer.transform(X_test) - - - # fit model + # fit RF model est.fit(X_train, y_train) - test_all_auc = roc_auc_score(y_test, est.predict_proba(X_test)[:, 1]) - test_all_auprc = average_precision_score(y_test, est.predict_proba(X_test)[:, 1]) - test_all_f1 = f1_score(y_test, est.predict_proba(X_test)[:, 1] > 0.5) + test_all_auc_rf = roc_auc_score(y_test, est.predict_proba(X_test)[:, 1]) + test_all_auprc_rf = average_precision_score(y_test, est.predict_proba(X_test)[:, 1]) + test_all_f1_rf = f1_score(y_test, est.predict_proba(X_test)[:, 1] > 0.5) + + # fit RF_plus model + start = time.time() + rf_plus_base = RandomForestPlusClassifier(rf_model=est) + rf_plus_base.fit(X_train, y_train) + end = time.time() + test_all_auc_rf_plus = roc_auc_score(y_test, rf_plus_base.predict_proba(X_test)[:, 1]) + test_all_auprc_rf_plus = average_precision_score(y_test, rf_plus_base.predict_proba(X_test)[:, 1]) + test_all_f1_rf_plus = f1_score(y_test, rf_plus_base.predict_proba(X_test)[:, 1] > 0.5) np.random.seed(42) indices_train = np.random.choice(X_train.shape[0], 100, replace=False) @@ -161,46 +161,57 @@ def compare_estimators(estimators: List[ModelConfig], X_test_subset = X_test[indices_test] y_test_subset = y_test[indices_test] - # loop over fi estimators - rng = np.random.RandomState() - number_of_ablations = 1 - seeds = rng.randint(0, 10000, number_of_ablations) for fi_est in tqdm(fi_ests): metric_results = { 'model': model.name, 'fi': fi_est.name, 'train_size': X_train.shape[0], + 'train_subset_size': X_train_subset.shape[0], 'test_size': X_test.shape[0], + 'test_subset_size': X_test_subset.shape[0], 'num_features': X_train.shape[1], 'data_split_seed': args.split_seed, - 'test_all_auc': test_all_auc, - 'test_all_auprc': test_all_auprc, - 'test_all_f1': test_all_f1 + 'test_all_auc_rf': test_all_auc_rf, + 'test_all_auprc_rf': test_all_auprc_rf, + 'test_all_f1_rf': test_all_f1_rf, + 'test_all_auc_rf_plus': test_all_auc_rf_plus, + 'test_all_auprc_rf_plus': test_all_auprc_rf_plus, + 'test_all_f1_rf_plus': test_all_f1_rf_plus, + 'rf_plus_fit_time': end - start, } for i in range(100): metric_results[f'sample_train_{i}'] = indices_train[i] metric_results[f'sample_test_{i}'] = indices_test[i] - for i in range(len(seeds)): - metric_results[f'ablation_seed_{i}'] = seeds[i] + + print("Compute feature importance") start = time.time() - local_fi_score_train_subset = fi_est.cls(X_train=X_train, y_train=y_train, - X_train_subset = X_train_subset, y_train_subset=y_train_subset, - X_test=X_test, y_test=y_test, - fit=copy.deepcopy(est), data_fit_on="train_subset", **fi_est.kwargs) - if fi_est.name not in ["LIME_RF_plus", "Kernel_SHAP_RF_plus"]: - local_fi_score_test = fi_est.cls(X_train=X_train, y_train=y_train, - X_train_subset = X_train_subset, y_train_subset=y_train_subset, - X_test=X_test, y_test=y_test, - fit=copy.deepcopy(est), data_fit_on="test", **fi_est.kwargs) - else: - local_fi_score_test = None - local_fi_score_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, - X_train_subset = X_train_subset, y_train_subset=y_train_subset, - X_test=X_test_subset, y_test=y_test_subset, - fit=copy.deepcopy(est), data_fit_on="test", **fi_est.kwargs) + if fi_est.name == "LFI_evaluate_on_all_RF_plus" or fi_est.name == "LFI_evaluate_on_oob_RF_plus": + local_fi_score_train, local_parital_pred_train, local_fi_score_test, local_partial_pred_test, local_fi_score_test_subset, local_partial_pred_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=rf_plus_base, **fi_est.kwargs) + local_fi_score_train_subset = local_fi_score_train[indices_train] + local_partial_pred_train_subset = local_parital_pred_train[indices_train] + elif fi_est.name == "LFI_fit_on_inbag_RF" or fi_est.name == "LFI_fit_on_inbag_RF": + local_fi_score_train, local_parital_pred_train, local_fi_score_test, local_partial_pred_test, local_fi_score_test_subset, local_partial_pred_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=copy.deepcopy(est), **fi_est.kwargs) + local_fi_score_train_subset = local_fi_score_train[indices_train] + local_partial_pred_train_subset = local_parital_pred_train[indices_train] + elif fi_est.name == "TreeSHAP_RF": + local_fi_score_train_subset, local_fi_score_test, local_fi_score_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=copy.deepcopy(est), **fi_est.kwargs) + elif fi_est.name == "Kernel_SHAP_RF_plus" or fi_est.name == "LIME_RF_plus": + local_fi_score_train_subset, local_fi_score_test, local_fi_score_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=rf_plus_base, **fi_est.kwargs) end = time.time() metric_results['fi_time'] = end - start - # feature_importance_list.append(local_fi_score_train_subset) + feature_importance_list.append(local_fi_score_train_subset) feature_importance_list.append(local_fi_score_test) feature_importance_list.append(local_fi_score_test_subset) @@ -208,13 +219,21 @@ def compare_estimators(estimators: List[ModelConfig], "LogisticCV": LogisticRegressionCV(random_state=42), "SVM": SVC(random_state=42, probability=True), "XGBoost_Classifier": xgb.XGBClassifier(random_state=42), - "RF_Plus_Classifier": RandomForestPlusClassifier(rf_model=RandomForestClassifier(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42))} + "RF_Plus_Classifier": rf_plus_base} + start = time.time() + for a_model in ablation_models: + if a_model != "RF_Plus_Classifier": + ablation_models[a_model].fit(X_train, y_train) + end = time.time() + metric_results['ablation_model_fit_time'] = end - start + print("start ablation") # Subset Train data ablation for all FI methods start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) + if a_model != "RF_Plus_Classifier": + ablation_est.fit(X_train, y_train) y_pred = ablation_est.predict_proba(X_train_subset)[:, 1] metric_results[a_model+'_train_subset_AUROC_before_ablation'] = roc_auc_score(y_train_subset, y_pred) metric_results[a_model+'_train_subset_AUPRC_before_ablation'] = average_precision_score(y_train_subset, y_pred) @@ -225,31 +244,30 @@ def compare_estimators(estimators: List[ModelConfig], ablation_results_auroc_list = [0] * X_train_subset.shape[1] ablation_results_auprc_list = [0] * X_train_subset.shape[1] ablation_results_f1_list = [0] * X_train_subset.shape[1] - for seed in seeds: - for i in range(X_train_subset.shape[1]): - if fi_est.ascending: - ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "max", i+1) - else: - ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "min", i+1) - ablation_results_auroc_list[i] += roc_auc_score(y_train_subset, ablation_est.predict_proba(ablation_X_train_subset)[:, 1]) - ablation_results_auprc_list[i] += average_precision_score(y_train_subset, ablation_est.predict_proba(ablation_X_train_subset)[:, 1]) - ablation_results_f1_list[i] += f1_score(y_train_subset, ablation_est.predict_proba(ablation_X_train_subset)[:, 1] > 0.5) - ablation_results_f1_list = [x / number_of_ablations for x in ablation_results_f1_list] - ablation_results_auroc_list = [x / number_of_ablations for x in ablation_results_auroc_list] - ablation_results_auprc_list = [x / number_of_ablations for x in ablation_results_auprc_list] + for i in range(X_train_subset.shape[1]): + if fi_est.ascending: + ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "max", i+1) + else: + ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "min", i+1) + ablation_results_auroc_list[i] += roc_auc_score(y_train_subset, ablation_est.predict_proba(ablation_X_train_subset)[:, 1]) + ablation_results_auprc_list[i] += average_precision_score(y_train_subset, ablation_est.predict_proba(ablation_X_train_subset)[:, 1]) + ablation_results_f1_list[i] += f1_score(y_train_subset, ablation_est.predict_proba(ablation_X_train_subset)[:, 1] > 0.5) for i in range(X_train_subset.shape[1]): metric_results[f'{a_model}_train_subset_AUROC_after_ablation_{i+1}'] = ablation_results_auroc_list[i] metric_results[f'{a_model}_train_subset_AUPRC_after_ablation_{i+1}'] = ablation_results_auprc_list[i] metric_results[f'{a_model}_train_subset_F1_after_ablation_{i+1}'] = ablation_results_f1_list[i] end = time.time() - metric_results['train_subset_data_ablation_time'] = end - start + print(f"done with ablation train subset {end - start}") + metric_results['train_subset_ablation_time'] = end - start + # Test data ablation # Subset test data ablation for all FI methods - removal start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) + if a_model != "RF_Plus_Classifier": + ablation_est.fit(X_train, y_train) y_pred_subset = ablation_est.predict_proba(X_test_subset)[:, 1] metric_results[a_model+'_test_subset_AUROC_before_ablation'] = roc_auc_score(y_test_subset, y_pred_subset) metric_results[a_model+'_test_subset_AUPRC_before_ablation'] = average_precision_score(y_test_subset, y_pred_subset) @@ -260,31 +278,29 @@ def compare_estimators(estimators: List[ModelConfig], ablation_results_auroc_list = [0] * X_test_subset.shape[1] ablation_results_auprc_list = [0] * X_test_subset.shape[1] ablation_results_f1_list = [0] * X_test_subset.shape[1] - for seed in seeds: - for i in range(X_test_subset.shape[1]): - if fi_est.ascending: - ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "max", i+1) - else: - ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "min", i+1) - ablation_results_auroc_list[i] += roc_auc_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset)[:, 1]) - ablation_results_auprc_list[i] += average_precision_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset)[:, 1]) - ablation_results_f1_list[i] += f1_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset)[:, 1] > 0.5) - ablation_results_f1_list = [x / number_of_ablations for x in ablation_results_f1_list] - ablation_results_auroc_list = [x / number_of_ablations for x in ablation_results_auroc_list] - ablation_results_auprc_list = [x / number_of_ablations for x in ablation_results_auprc_list] + for i in range(X_test_subset.shape[1]): + if fi_est.ascending: + ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "max", i+1) + else: + ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "min", i+1) + ablation_results_auroc_list[i] += roc_auc_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset)[:, 1]) + ablation_results_auprc_list[i] += average_precision_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset)[:, 1]) + ablation_results_f1_list[i] += f1_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset)[:, 1] > 0.5) for i in range(X_test_subset.shape[1]): metric_results[f'{a_model}_test_subset_AUROC_after_ablation_{i+1}'] = ablation_results_auroc_list[i] metric_results[f'{a_model}_test_subset_AUPRC_after_ablation_{i+1}'] = ablation_results_auprc_list[i] metric_results[f'{a_model}_test_subset_F1_after_ablation_{i+1}'] = ablation_results_f1_list[i] end = time.time() - metric_results['test_subset_ablation_time'] = end - start + print(f"done with ablation 1 test subset {end - start}") + metric_results['test_subset_ablation_1_time'] = end - start # Subset test data ablation for all FI methods - addition start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) + if a_model != "RF_Plus_Classifier": + ablation_est.fit(X_train, y_train) metric_results[a_model+'_test_subset_AUROC_before_ablation_blank'] = roc_auc_score(y_test_subset, ablation_est.predict(np.zeros(X_test_subset.shape))) metric_results[a_model+'_test_subset_AUPRC_before_ablation_blank'] = average_precision_score(y_test_subset, ablation_est.predict(np.zeros(X_test_subset.shape))) metric_results[a_model+'_test_subset_F1_before_ablation_blank'] = f1_score(y_test_subset, ablation_est.predict(np.zeros(X_test_subset.shape)) > 0.5) @@ -294,31 +310,29 @@ def compare_estimators(estimators: List[ModelConfig], ablation_results_auroc_list = [0] * X_test_subset.shape[1] ablation_results_auprc_list = [0] * X_test_subset.shape[1] ablation_results_f1_list = [0] * X_test_subset.shape[1] - for seed in seeds: - for i in range(X_test_subset.shape[1]): - if fi_est.ascending: - ablation_X_test_subset_blank = ablation_by_addition(X_test_subset, imp_vals, "max", i+1) - else: - ablation_X_test_subset_blank = ablation_by_addition(X_test_subset, imp_vals, "min", i+1) - ablation_results_auroc_list[i] += roc_auc_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset_blank)[:, 1]) - ablation_results_auprc_list[i] += average_precision_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset_blank)[:, 1]) - ablation_results_f1_list[i] += f1_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset_blank)[:, 1] > 0.5) - ablation_results_f1_list = [x / number_of_ablations for x in ablation_results_f1_list] - ablation_results_auroc_list = [x / number_of_ablations for x in ablation_results_auroc_list] - ablation_results_auprc_list = [x / number_of_ablations for x in ablation_results_auprc_list] + for i in range(X_test_subset.shape[1]): + if fi_est.ascending: + ablation_X_test_subset_blank = ablation_by_addition(X_train, X_test_subset, imp_vals, "max", i+1) + else: + ablation_X_test_subset_blank = ablation_by_addition(X_train, X_test_subset, imp_vals, "min", i+1) + ablation_results_auroc_list[i] += roc_auc_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset_blank)[:, 1]) + ablation_results_auprc_list[i] += average_precision_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset_blank)[:, 1]) + ablation_results_f1_list[i] += f1_score(y_test_subset, ablation_est.predict_proba(ablation_X_test_subset_blank)[:, 1] > 0.5) for i in range(X_test_subset.shape[1]): metric_results[f'{a_model}_test_subset_AUROC_after_ablation_{i+1}_blank'] = ablation_results_auroc_list[i] metric_results[f'{a_model}_test_subset_AUPRC_after_ablation_{i+1}_blank'] = ablation_results_auprc_list[i] metric_results[f'{a_model}_test_subset_F1_after_ablation_{i+1}_blank'] = ablation_results_f1_list[i] end = time.time() - metric_results['test_subset_blank_ablation_time'] = end - start + print(f"done with ablation 2 test subset {end - start}") + metric_results['test_subset_ablation_2_time'] = end - start # Whole test data ablation for all FI methods except for KernelSHAP and LIME if fi_est.name not in ["LIME_RF_plus", "Kernel_SHAP_RF_plus"]: start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) + if a_model != "RF_Plus_Classifier": + ablation_est.fit(X_train, y_train) y_pred = ablation_est.predict_proba(X_test)[:, 1] metric_results[a_model+'_test_AUROC_before_ablation'] = roc_auc_score(y_test, y_pred) metric_results[a_model+'_test_AUPRC_before_ablation'] = average_precision_score(y_test, y_pred) @@ -329,24 +343,21 @@ def compare_estimators(estimators: List[ModelConfig], ablation_results_auroc_list = [0] * X_test.shape[1] ablation_results_auprc_list = [0] * X_test.shape[1] ablation_results_f1_list = [0] * X_test.shape[1] - for seed in seeds: - for i in range(X_test.shape[1]): - if fi_est.ascending: - ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "max", i+1) - else: - ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "min", i+1) - ablation_results_auroc_list[i] += roc_auc_score(y_test, ablation_est.predict_proba(ablation_X_test)[:, 1]) - ablation_results_auprc_list[i] += average_precision_score(y_test, ablation_est.predict_proba(ablation_X_test)[:, 1]) - ablation_results_f1_list[i] += f1_score(y_test, ablation_est.predict_proba(ablation_X_test)[:, 1] > 0.5) - ablation_results_f1_list = [x / number_of_ablations for x in ablation_results_f1_list] - ablation_results_auroc_list = [x / number_of_ablations for x in ablation_results_auroc_list] - ablation_results_auprc_list = [x / number_of_ablations for x in ablation_results_auprc_list] + for i in range(X_test.shape[1]): + if fi_est.ascending: + ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "max", i+1) + else: + ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "min", i+1) + ablation_results_auroc_list[i] += roc_auc_score(y_test, ablation_est.predict_proba(ablation_X_test)[:, 1]) + ablation_results_auprc_list[i] += average_precision_score(y_test, ablation_est.predict_proba(ablation_X_test)[:, 1]) + ablation_results_f1_list[i] += f1_score(y_test, ablation_est.predict_proba(ablation_X_test)[:, 1] > 0.5) for i in range(X_test.shape[1]): metric_results[f'{a_model}_test_AUROC_after_ablation_{i+1}'] = ablation_results_auroc_list[i] metric_results[f'{a_model}_test_AUPRC_after_ablation_{i+1}'] = ablation_results_auprc_list[i] metric_results[f'{a_model}_test_F1_after_ablation_{i+1}'] = ablation_results_f1_list[i] end = time.time() metric_results['test_data_ablation_time'] = end - start + print(f"done with ablation test {end - start}") else: for a_model in ablation_models: metric_results[a_model+'_test_AUROC_before_ablation'] = None @@ -357,7 +368,8 @@ def compare_estimators(estimators: List[ModelConfig], metric_results[f'{a_model}_test_AUPRC_after_ablation_{i+1}'] = None metric_results[f'{a_model}_test_F1_after_ablation_{i+1}'] = None metric_results["test_data_ablation_time"] = None - print(f"fi: {fi_est.name} ablation done with time: {end - start}") + + print(f"fi: {fi_est.name} all ablation done") # initialize results with metadata and metric results kwargs: dict = model.kwargs # dict diff --git a/feature_importance/01_run_ablation_regression.py b/feature_importance/01_run_ablation_regression.py index 3501404..207f7bd 100644 --- a/feature_importance/01_run_ablation_regression.py +++ b/feature_importance/01_run_ablation_regression.py @@ -21,12 +21,10 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression import xgboost as xgb +from imodels.tree.rf_plus.rf_plus.rf_plus_models import RandomForestPlusRegressor, RandomForestPlusClassifier sys.path.append(".") sys.path.append("..") sys.path.append("../..") -sys.path.append("/accounts/grad/zachrewolinski/research/imodels") -print("sys.path", sys.path) -from imodels.importance import RandomForestPlusRegressor, RandomForestPlusClassifier import fi_config from util import ModelConfig, FIModelConfig, tp, fp, neg, pos, specificity_score, auroc_score, auprc_score, compute_nsg_feat_corr_w_sig_subspace, apply_splitting_strategy @@ -63,11 +61,11 @@ def ablation_to_mean(train, data, feature_importance, mode, num_features): """ - Replace the top num_features max feature importance data with random shuffle for each sample + Replace the top num_features max feature importance data with mean value for each sample """ train_mean = np.mean(train, axis=0) assert mode in ["max", "min"] - fi = feature_importance.to_numpy() + fi = feature_importance if mode == "max": indices = np.argsort(-fi) else: @@ -78,17 +76,18 @@ def ablation_to_mean(train, data, feature_importance, mode, num_features): data_copy[i, indices[i,j]] = train_mean[indices[i,j]] return data_copy -def ablation_by_addition(data, feature_importance, mode, num_features): +def ablation_by_addition(train, data, feature_importance, mode, num_features): """ - Initialize the data with zeros and add the top num_features max feature importance data for each sample + Initialize the data with mean values and add the top num_features max feature importance data for each sample """ assert mode in ["max", "min"] - fi = feature_importance.to_numpy() + fi = feature_importance if mode == "max": indices = np.argsort(-fi) else: indices = np.argsort(fi) - data_copy = np.zeros(data.shape) + row_values = np.mean(train, axis=0).tolist() + data_copy = np.array([row_values] * data.shape[0]) for i in range(data.shape[0]): for j in range(num_features): data_copy[i, indices[i,j]] = data[i, indices[i,j]] @@ -137,19 +136,18 @@ def compare_estimators(estimators: List[ModelConfig], y_train = y y_test = y - normalizer = preprocessing.Normalizer() - if splitting_strategy == "train-test": - X_train = normalizer.fit_transform(X_train) - X_test = normalizer.transform(X_test) - else: - X = normalizer.fit_transform(X) - X_train = normalizer.transform(X_train) - X_test = normalizer.transform(X_test) - - # fit model + # fit RF model est.fit(X_train, y_train) - test_all_mse = mean_squared_error(y_test, est.predict(X_test)) - test_all_r2 = r2_score(y_test, est.predict(X_test)) + test_all_mse_rf = mean_squared_error(y_test, est.predict(X_test)) + test_all_r2_rf = r2_score(y_test, est.predict(X_test)) + + # fit RF_plus model + start = time.time() + rf_plus_base = RandomForestPlusRegressor(rf_model=est) + rf_plus_base.fit(X_train, y_train) + end = time.time() + test_all_mse_rf_plus = mean_squared_error(y_test, rf_plus_base.predict(X_test)) + test_all_r2_rf_plus = r2_score(y_test, rf_plus_base.predict(X_test)) np.random.seed(42) indices_train = np.random.choice(X_train.shape[0], 100, replace=False) @@ -160,57 +158,74 @@ def compare_estimators(estimators: List[ModelConfig], y_test_subset = y_test[indices_test] # loop over fi estimators - rng = np.random.RandomState() - number_of_ablations = 1 - seeds = rng.randint(0, 10000, number_of_ablations) for fi_est in tqdm(fi_ests): metric_results = { 'model': model.name, 'fi': fi_est.name, 'train_size': X_train.shape[0], + 'train_subset_size': X_train_subset.shape[0], 'test_size': X_test.shape[0], + 'test_subset_size': X_test_subset.shape[0], 'num_features': X_train.shape[1], 'data_split_seed': args.split_seed, - 'test_all_mse': test_all_mse, - 'test_all_r2': test_all_r2 + 'test_all_mse_rf': test_all_mse_rf, + 'test_all_r2_rf': test_all_r2_rf, + 'test_all_mse_rf_plus': test_all_mse_rf_plus, + 'test_all_r2_rf_plus': test_all_r2_rf_plus, + 'rf_plus_fit_time': end - start, } for i in range(100): metric_results[f'sample_train_{i}'] = indices_train[i] metric_results[f'sample_test_{i}'] = indices_test[i] - for i in range(len(seeds)): - metric_results[f'ablation_seed_{i}'] = seeds[i] + + print("Compute feature importance") start = time.time() - local_fi_score_train_subset = fi_est.cls(X_train=X_train, y_train=y_train, - X_train_subset = X_train_subset, y_train_subset=y_train_subset, - X_test=X_test, y_test=y_test, - fit=copy.deepcopy(est), data_fit_on="train_subset", **fi_est.kwargs) - if fi_est.name not in ["LIME_RF_plus", "Kernel_SHAP_RF_plus"]: - local_fi_score_test = fi_est.cls(X_train=X_train, y_train=y_train, - X_train_subset = X_train_subset, y_train_subset=y_train_subset, - X_test=X_test, y_test=y_test, - fit=copy.deepcopy(est), data_fit_on="test", **fi_est.kwargs) - else: - local_fi_score_test = None - local_fi_score_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, - X_train_subset = X_train_subset, y_train_subset=y_train_subset, - X_test=X_test_subset, y_test=y_test_subset, - fit=copy.deepcopy(est), data_fit_on="test", **fi_est.kwargs) + if fi_est.name == "LFI_evaluate_on_all_RF_plus" or fi_est.name == "LFI_evaluate_on_oob_RF_plus": + local_fi_score_train, local_parital_pred_train, local_fi_score_test, local_partial_pred_test, local_fi_score_test_subset, local_partial_pred_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=rf_plus_base, **fi_est.kwargs) + local_fi_score_train_subset = local_fi_score_train[indices_train] + local_partial_pred_train_subset = local_parital_pred_train[indices_train] + elif fi_est.name == "LFI_fit_on_inbag_RF" or fi_est.name == "LFI_fit_on_inbag_RF": + local_fi_score_train, local_parital_pred_train, local_fi_score_test, local_partial_pred_test, local_fi_score_test_subset, local_partial_pred_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=copy.deepcopy(est), **fi_est.kwargs) + local_fi_score_train_subset = local_fi_score_train[indices_train] + local_partial_pred_train_subset = local_parital_pred_train[indices_train] + elif fi_est.name == "TreeSHAP_RF": + local_fi_score_train_subset, local_fi_score_test, local_fi_score_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=copy.deepcopy(est), **fi_est.kwargs) + elif fi_est.name == "Kernel_SHAP_RF_plus" or fi_est.name == "LIME_RF_plus": + local_fi_score_train_subset, local_fi_score_test, local_fi_score_test_subset = fi_est.cls(X_train=X_train, y_train=y_train, + X_train_subset = X_train_subset, y_train_subset=y_train_subset, + X_test_subset=X_test_subset, X_test=X_test, + fit=rf_plus_base, **fi_est.kwargs) end = time.time() metric_results['fi_time'] = end - start - # feature_importance_list.append(local_fi_score_train_subset) + feature_importance_list.append(local_fi_score_train_subset) feature_importance_list.append(local_fi_score_test) feature_importance_list.append(local_fi_score_test_subset) ablation_models = {"RF_Regressor": RandomForestRegressor(n_estimators=100,min_samples_leaf=5,max_features=0.33,random_state=42), "Linear": LinearRegression(), "XGB_Regressor": xgb.XGBRegressor(random_state=42), - "RF_Plus_Regressor":RandomForestPlusRegressor(rf_model=RandomForestRegressor(n_estimators=100,min_samples_leaf=5,max_features=0.33,random_state=42))} + "RF_Plus_Regressor": rf_plus_base} + start = time.time() + for a_model in ablation_models: + if a_model != "RF_Plus_Regressor": + ablation_models[a_model].fit(X_train, y_train) + end = time.time() + metric_results['ablation_model_fit_time'] = end - start + print("start ablation") # Subset Train data ablation for all FI methods start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) y_pred_subset = ablation_est.predict(X_train_subset) metric_results[a_model + '_train_subset_MSE_before_ablation'] = mean_squared_error(y_train_subset, y_pred_subset) metric_results[a_model + '_train_subset_R_2_before_ablation'] = r2_score(y_train_subset, y_pred_subset) @@ -219,20 +234,18 @@ def compare_estimators(estimators: List[ModelConfig], imp_vals[imp_vals == float("inf")] = sys.maxsize - 1 ablation_results_list = [0] * X_train_subset.shape[1] ablation_results_list_r2 = [0] * X_train_subset.shape[1] - for seed in seeds: - for i in range(X_train_subset.shape[1]): - if fi_est.ascending: - ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "max", i+1) - else: - ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "min", i+1) - ablation_results_list[i] += mean_squared_error(y_train_subset, ablation_est.predict(ablation_X_train_subset)) - ablation_results_list_r2[i] += r2_score(y_train_subset, ablation_est.predict(ablation_X_train_subset)) - ablation_results_list = [x / len(seeds) for x in ablation_results_list] - ablation_results_list_r2 = [x / len(seeds) for x in ablation_results_list_r2] + for i in range(X_train_subset.shape[1]): + if fi_est.ascending: + ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "max", i+1) + else: + ablation_X_train_subset = ablation_to_mean(X_train, X_train_subset, imp_vals, "min", i+1) + ablation_results_list[i] += mean_squared_error(y_train_subset, ablation_est.predict(ablation_X_train_subset)) + ablation_results_list_r2[i] += r2_score(y_train_subset, ablation_est.predict(ablation_X_train_subset)) for i in range(X_train.shape[1]): metric_results[f'{a_model}_train_subset_MSE_after_ablation_{i+1}'] = ablation_results_list[i] metric_results[f'{a_model}_train_subset_R_2_after_ablation_{i+1}'] = ablation_results_list_r2[i] end = time.time() + print(f"done with ablation train subset {end - start}") metric_results['train_subset_ablation_time'] = end - start # Test data ablation @@ -240,7 +253,6 @@ def compare_estimators(estimators: List[ModelConfig], start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) y_pred_subset = ablation_est.predict(X_test_subset) metric_results[a_model + '_test_subset_MSE_before_ablation'] = mean_squared_error(y_test_subset, y_pred_subset) metric_results[a_model + '_test_subset_R_2_before_ablation'] = r2_score(y_test_subset, y_pred_subset) @@ -249,28 +261,25 @@ def compare_estimators(estimators: List[ModelConfig], imp_vals[imp_vals == float("inf")] = sys.maxsize - 1 ablation_results_list = [0] * X_test_subset.shape[1] ablation_results_list_r2 = [0] * X_test_subset.shape[1] - for seed in seeds: - for i in range(X_test_subset.shape[1]): - if fi_est.ascending: - ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "max", i+1) - else: - ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "min", i+1) - ablation_results_list[i] += mean_squared_error(y_test_subset, ablation_est.predict(ablation_X_test_subset)) - ablation_results_list_r2[i] += r2_score(y_test_subset, ablation_est.predict(ablation_X_test_subset)) - ablation_results_list = [x / len(seeds) for x in ablation_results_list] - ablation_results_list_r2 = [x / len(seeds) for x in ablation_results_list_r2] + for i in range(X_test_subset.shape[1]): + if fi_est.ascending: + ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "max", i+1) + else: + ablation_X_test_subset = ablation_to_mean(X_train, X_test_subset, imp_vals, "min", i+1) + ablation_results_list[i] += mean_squared_error(y_test_subset, ablation_est.predict(ablation_X_test_subset)) + ablation_results_list_r2[i] += r2_score(y_test_subset, ablation_est.predict(ablation_X_test_subset)) for i in range(X_test_subset.shape[1]): metric_results[f'{a_model}_test_subset_MSE_after_ablation_{i+1}'] = ablation_results_list[i] metric_results[f'{a_model}_test_subset_R_2_after_ablation_{i+1}'] = ablation_results_list_r2[i] end = time.time() - metric_results['test_subset_ablation_time'] = end - start + print(f"done with ablation 1 test subset {end - start}") + metric_results['test_subset_ablation_1_time'] = end - start # Subset test data ablation for all FI methods - addition start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) metric_results[a_model + '_test_subset_MSE_before_ablation_blank'] = mean_squared_error(y_test_subset, ablation_est.predict(np.zeros(X_test_subset.shape))) metric_results[a_model + '_test_subset_R_2_before_ablation_blank'] = r2_score(y_test_subset, ablation_est.predict(np.zeros(X_test_subset.shape))) imp_vals = copy.deepcopy(local_fi_score_test_subset) @@ -278,28 +287,25 @@ def compare_estimators(estimators: List[ModelConfig], imp_vals[imp_vals == float("inf")] = sys.maxsize - 1 ablation_results_list = [0] * X_test_subset.shape[1] ablation_results_list_r2 = [0] * X_test_subset.shape[1] - for seed in seeds: - for i in range(X_test_subset.shape[1]): - if fi_est.ascending: - ablation_X_test_subset_blank = ablation_by_addition(X_test_subset, imp_vals, "max", i+1) - else: - ablation_X_test_subset_blank = ablation_by_addition(X_test_subset, imp_vals, "min", i+1) - ablation_results_list[i] += mean_squared_error(y_test_subset, ablation_est.predict(ablation_X_test_subset_blank)) - ablation_results_list_r2[i] += r2_score(y_test_subset, ablation_est.predict(ablation_X_test_subset_blank)) - ablation_results_list = [x / len(seeds) for x in ablation_results_list] - ablation_results_list_r2 = [x / len(seeds) for x in ablation_results_list_r2] + for i in range(X_test_subset.shape[1]): + if fi_est.ascending: + ablation_X_test_subset_blank = ablation_by_addition(X_train, X_test_subset, imp_vals, "max", i+1) + else: + ablation_X_test_subset_blank = ablation_by_addition(X_train, X_test_subset, imp_vals, "min", i+1) + ablation_results_list[i] += mean_squared_error(y_test_subset, ablation_est.predict(ablation_X_test_subset_blank)) + ablation_results_list_r2[i] += r2_score(y_test_subset, ablation_est.predict(ablation_X_test_subset_blank)) for i in range(X_test_subset.shape[1]): metric_results[f'{a_model}_test_subset_MSE_after_ablation_{i+1}_blank'] = ablation_results_list[i] metric_results[f'{a_model}_test_subset_R_2_after_ablation_{i+1}_blank'] = ablation_results_list_r2[i] end = time.time() - metric_results['test_subset_blank_ablation_time'] = end - start + print(f"done with ablation 2 test subset {end - start}") + metric_results['test_subset_ablation_2_time'] = end - start # Whole test data ablation for all FI methods except for KernelSHAP and LIME if fi_est.name not in ["LIME_RF_plus", "Kernel_SHAP_RF_plus"]: start = time.time() for a_model in ablation_models: ablation_est = ablation_models[a_model] - ablation_est.fit(X_train, y_train) y_pred = ablation_est.predict(X_test) metric_results[a_model + '_test_MSE_before_ablation'] = mean_squared_error(y_test, y_pred) metric_results[a_model + '_test_R_2_before_ablation'] = r2_score(y_test, y_pred) @@ -308,21 +314,19 @@ def compare_estimators(estimators: List[ModelConfig], imp_vals[imp_vals == float("inf")] = sys.maxsize - 1 ablation_results_list = [0] * X_test.shape[1] ablation_results_list_r2 = [0] * X_test.shape[1] - for seed in seeds: - for i in range(X_test.shape[1]): - if fi_est.ascending: - ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "max", i+1) - else: - ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "min", i+1) - ablation_results_list[i] += mean_squared_error(y_test, ablation_est.predict(ablation_X_test)) - ablation_results_list_r2[i] += r2_score(y_test, ablation_est.predict(ablation_X_test)) - ablation_results_list = [x / len(seeds) for x in ablation_results_list] - ablation_results_list_r2 = [x / len(seeds) for x in ablation_results_list_r2] + for i in range(X_test.shape[1]): + if fi_est.ascending: + ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "max", i+1) + else: + ablation_X_test = ablation_to_mean(X_train, X_test, imp_vals, "min", i+1) + ablation_results_list[i] += mean_squared_error(y_test, ablation_est.predict(ablation_X_test)) + ablation_results_list_r2[i] += r2_score(y_test, ablation_est.predict(ablation_X_test)) for i in range(X_test.shape[1]): metric_results[f'{a_model}_test_MSE_after_ablation_{i+1}'] = ablation_results_list[i] metric_results[f'{a_model}_test_R_2_after_ablation_{i+1}'] = ablation_results_list_r2[i] end = time.time() metric_results['test_data_ablation_time'] = end - start + print(f"done with ablation test {end - start}") else: for a_model in ablation_models: metric_results[a_model + '_test_MSE_before_ablation'] = None @@ -332,7 +336,7 @@ def compare_estimators(estimators: List[ModelConfig], metric_results[f'{a_model}_test_R_2_after_ablation_{i+1}'] = None metric_results["test_data_ablation_time"] = None - print(f"fi: {fi_est.name} ablation done with time: {end - start}") + print(f"fi: {fi_est.name} all ablation done") # initialize results with metadata and metric results kwargs: dict = model.kwargs # dict diff --git a/feature_importance/feature_ranking.sh b/feature_importance/feature_ranking.sh index 659942e..0a7ae14 100644 --- a/feature_importance/feature_ranking.sh +++ b/feature_importance/feature_ranking.sh @@ -3,7 +3,7 @@ #SBATCH --mail-type=ALL source activate mdi -command="ranking_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y.diabetes-classification.lss-model --split_seed ${1} --ignore_cache --create_rmd --result_name diabetes-class-lss" +command="ranking_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y.diabetes-regression.lss-model --split_seed 6 --ignore_cache --create_rmd --result_name diabetes-reg-lss" # Execute the command python $command \ No newline at end of file diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/dgp.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/dgp.py index 98cc33d..fd592dc 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/dgp.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/dgp.py @@ -3,11 +3,13 @@ from feature_importance.scripts.simulations_util import * -X_DGP = sample_real_X +X_DGP = sample_real_data_X X_PARAMS_DICT = { - "fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv", - "sample_row_n": 442 + "source": "imodels", + "data_name": "diabetes_regr", + "sample_row_n": None } + Y_DGP = hierarchical_poly Y_PARAMS_DICT = { "beta": 1, diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/models.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/models.py index 0d225d0..5d578cb 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/models.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/hierarchical-polynomial/models.py @@ -2,24 +2,21 @@ import numpy as np from feature_importance.util import ModelConfig, FIModelConfig from sklearn.ensemble import RandomForestRegressor -from imodels.importance.rf_plus import RandomForestPlusRegressor from feature_importance.scripts.competing_methods_local import * - +from sklearn.linear_model import Ridge ESTIMATORS = [ [ModelConfig('RF', RandomForestRegressor, model_type='tree', - other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})], - [ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus', - other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})] + other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})] ] FI_ESTIMATORS = [ - [FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})], - [FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})], [FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], + [FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})], + [FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})], + [FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], + [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], ] \ No newline at end of file diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/dgp.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/dgp.py index 98cc33d..fd592dc 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/dgp.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/dgp.py @@ -3,11 +3,13 @@ from feature_importance.scripts.simulations_util import * -X_DGP = sample_real_X +X_DGP = sample_real_data_X X_PARAMS_DICT = { - "fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv", - "sample_row_n": 442 + "source": "imodels", + "data_name": "diabetes_regr", + "sample_row_n": None } + Y_DGP = hierarchical_poly Y_PARAMS_DICT = { "beta": 1, diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/models.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/models.py index 0d225d0..5d578cb 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/models.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-lss/models.py @@ -2,24 +2,21 @@ import numpy as np from feature_importance.util import ModelConfig, FIModelConfig from sklearn.ensemble import RandomForestRegressor -from imodels.importance.rf_plus import RandomForestPlusRegressor from feature_importance.scripts.competing_methods_local import * - +from sklearn.linear_model import Ridge ESTIMATORS = [ [ModelConfig('RF', RandomForestRegressor, model_type='tree', - other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})], - [ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus', - other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})] + other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})] ] FI_ESTIMATORS = [ - [FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})], - [FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})], [FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], + [FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})], + [FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})], + [FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], + [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], ] \ No newline at end of file diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/dgp.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/dgp.py index 2b6256b..9210596 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/dgp.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/dgp.py @@ -3,21 +3,13 @@ from feature_importance.scripts.simulations_util import * -X_DGP = sample_real_X +X_DGP = sample_real_data_X X_PARAMS_DICT = { - "fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv", - "sample_row_n": 442 + "source": "imodels", + "data_name": "diabetes_regr", + "sample_row_n": None } -# X_PARAMS_DICT = { -# "X_fpath": "../data/classification_data/Fico/X_fico.csv", -# "sample_row_n": None, -# "return_data": "X" -# } -# X_PARAMS_DICT = { -# "X_fpath": "../data/classification_data/Juvenile/X_juvenile.csv", -# "sample_row_n": None, -# "return_data": "X" -# } + Y_DGP = linear_model Y_PARAMS_DICT = { "beta": 1, @@ -25,16 +17,7 @@ "heritability": 0.4, "s": 5 } -# Y_PARAMS_DICT = { -# "y_fpath": "../data/classification_data/Fico/y_fico.csv", -# "return_data": "y" -# } -# Y_PARAMS_DICT = { -# "y_fpath": "../data/classification_data/Juvenile/y_juvenile.csv", -# "return_data": "y" -# } -# vary one parameter VARY_PARAM_NAME = ["heritability", "sample_row_n"] VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2, "0.4": 0.4, "0.8": 0.8}, diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/models.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/models.py index 0d225d0..5d578cb 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/models.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/linear-model/models.py @@ -2,24 +2,21 @@ import numpy as np from feature_importance.util import ModelConfig, FIModelConfig from sklearn.ensemble import RandomForestRegressor -from imodels.importance.rf_plus import RandomForestPlusRegressor from feature_importance.scripts.competing_methods_local import * - +from sklearn.linear_model import Ridge ESTIMATORS = [ [ModelConfig('RF', RandomForestRegressor, model_type='tree', - other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})], - [ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus', - other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})] + other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})] ] FI_ESTIMATORS = [ - [FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})], - [FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})], [FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], + [FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})], + [FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})], + [FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], + [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], ] \ No newline at end of file diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/dgp.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/dgp.py index 78a86f7..243a098 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/dgp.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/dgp.py @@ -2,11 +2,11 @@ sys.path.append("../..") from feature_importance.scripts.simulations_util import * -X_DGP = sample_real_X +X_DGP = sample_real_data_X X_PARAMS_DICT = { - "fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv", - "sample_row_n": None, - "sample_col_n": None + "source": "imodels", + "data_name": "diabetes_regr", + "sample_row_n": None } Y_DGP = lss_model diff --git a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/models.py b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/models.py index 0d225d0..5d578cb 100644 --- a/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/models.py +++ b/feature_importance/fi_config/mdi_local/real_x_sim_y/diabetes-regression/lss-model/models.py @@ -2,24 +2,21 @@ import numpy as np from feature_importance.util import ModelConfig, FIModelConfig from sklearn.ensemble import RandomForestRegressor -from imodels.importance.rf_plus import RandomForestPlusRegressor from feature_importance.scripts.competing_methods_local import * - +from sklearn.linear_model import Ridge ESTIMATORS = [ [ModelConfig('RF', RandomForestRegressor, model_type='tree', - other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})], - [ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus', - other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})] + other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})] ] FI_ESTIMATORS = [ - [FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})], - [FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})], [FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")], - [FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], - [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")], + [FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})], + [FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})], + [FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)], + [FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], + [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")], ] \ No newline at end of file diff --git a/feature_importance/ranking_importance_local_sims.ipynb b/feature_importance/ranking_importance_local_sims.ipynb index 0392647..2e8526b 100644 --- a/feature_importance/ranking_importance_local_sims.ipynb +++ b/feature_importance/ranking_importance_local_sims.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 115, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -15,11 +15,11 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "directory = \"./results/mdi_local.real_x_sim_y.diabetes-regression.hierarchical-polynomial/diabetes-reg-hierpoly/varying_heritability_sample_row_n/\"\n", + "directory = \"./results/mdi_local.real_x_sim_y.diabetes-regression.linear-model/diabetes-reg-hierpoly/varying_heritability_sample_row_n/\"\n", "folder_names = [folder for folder in os.listdir(directory) if os.path.isdir(os.path.join(directory, folder))]\n", "experiments_seeds = []\n", "for folder_name in folder_names:\n", @@ -32,7 +32,2063 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | rep | \n", + "sample_row_n | \n", + "sample_row_n_name | \n", + "heritability | \n", + "heritability_name | \n", + "n_estimators | \n", + "min_samples_leaf | \n", + "max_features | \n", + "random_state | \n", + "include_raw | \n", + "cv_ridge | \n", + "calc_loo_coef | \n", + "sample_split | \n", + "fit_on | \n", + "model | \n", + "fi | \n", + "train_size | \n", + "test_size | \n", + "num_features | \n", + "data_split_seed | \n", + "sample_train_0 | \n", + "sample_train_1 | \n", + "sample_train_2 | \n", + "sample_train_3 | \n", + "sample_train_4 | \n", + "sample_train_5 | \n", + "sample_train_6 | \n", + "sample_train_7 | \n", + "sample_train_8 | \n", + "sample_train_9 | \n", + "sample_train_10 | \n", + "sample_train_11 | \n", + "sample_train_12 | \n", + "sample_train_13 | \n", + "sample_train_14 | \n", + "sample_train_15 | \n", + "sample_test_0 | \n", + "sample_test_1 | \n", + "sample_test_2 | \n", + "sample_test_3 | \n", + "sample_test_4 | \n", + "sample_test_5 | \n", + "sample_test_6 | \n", + "sample_test_7 | \n", + "ablation_seed_0 | \n", + "fi_time | \n", + "train_AUROC | \n", + "train_AUPRC | \n", + "train_F1 | \n", + "test_AUROC | \n", + "test_AUPRC | \n", + "test_F1 | \n", + "split_seed | \n", + "rf_model | \n", + "sample_train_16 | \n", + "sample_train_17 | \n", + "sample_train_18 | \n", + "sample_train_19 | \n", + "sample_train_20 | \n", + "sample_train_21 | \n", + "sample_train_22 | \n", + "sample_train_23 | \n", + "sample_train_24 | \n", + "sample_train_25 | \n", + "sample_train_26 | \n", + "sample_train_27 | \n", + "sample_train_28 | \n", + "sample_train_29 | \n", + "sample_train_30 | \n", + "sample_train_31 | \n", + "sample_train_32 | \n", + "sample_test_8 | \n", + "sample_test_9 | \n", + "sample_test_10 | \n", + "sample_test_11 | \n", + "sample_test_12 | \n", + "sample_test_13 | \n", + "sample_test_14 | \n", + "sample_test_15 | \n", + "sample_train_33 | \n", + "sample_train_34 | \n", + "sample_train_35 | \n", + "sample_train_36 | \n", + "sample_train_37 | \n", + "sample_train_38 | \n", + "sample_train_39 | \n", + "sample_train_40 | \n", + "sample_train_41 | \n", + "sample_train_42 | \n", + "sample_train_43 | \n", + "sample_train_44 | \n", + "sample_train_45 | \n", + "sample_train_46 | \n", + "sample_train_47 | \n", + "sample_train_48 | \n", + "sample_train_49 | \n", + "sample_test_16 | \n", + "sample_test_17 | \n", + "sample_test_18 | \n", + "sample_test_19 | \n", + "sample_test_20 | \n", + "sample_test_21 | \n", + "sample_test_22 | \n", + "sample_test_23 | \n", + "sample_train_50 | \n", + "sample_train_51 | \n", + "sample_train_52 | \n", + "sample_train_53 | \n", + "sample_train_54 | \n", + "sample_train_55 | \n", + "sample_train_56 | \n", + "sample_train_57 | \n", + "sample_train_58 | \n", + "sample_train_59 | \n", + "sample_train_60 | \n", + "sample_train_61 | \n", + "sample_train_62 | \n", + "sample_train_63 | \n", + "sample_train_64 | \n", + "sample_train_65 | \n", + "sample_train_66 | \n", + "sample_test_24 | \n", + "sample_test_25 | \n", + "sample_test_26 | \n", + "sample_test_27 | \n", + "sample_test_28 | \n", + "sample_test_29 | \n", + "sample_test_30 | \n", + "sample_test_31 | \n", + "sample_test_32 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "100 | \n", + "100 | \n", + "0.1 | \n", + "0.1 | \n", + "100.0 | \n", + "1.0 | \n", + "sqrt | \n", + "42.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "oob | \n", + "test | \n", + "RF | \n", + "LFI_with_raw_OOB_RF | \n", + "67 | \n", + "33 | \n", + "10 | \n", + "2 | \n", + "36 | \n", + "16 | \n", + "4 | \n", + "9 | \n", + "45 | \n", + "40 | \n", + "61 | \n", + "5 | \n", + "64 | \n", + "12 | \n", + "25 | \n", + "59 | \n", + "28 | \n", + "0 | \n", + "62 | \n", + "34 | \n", + "29 | \n", + "5 | \n", + "2 | \n", + "19 | \n", + "27 | \n", + "16 | \n", + "22 | \n", + "6 | \n", + "5371 | \n", + "16.394167 | \n", + "0.752604 | \n", + "0.847491 | \n", + "0.522592 | \n", + "0.770833 | \n", + "0.869907 | \n", + "0.541667 | \n", + "2 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1 | \n", + "0 | \n", + "100 | \n", + "100 | \n", + "0.1 | \n", + "0.1 | \n", + "100.0 | \n", + "1.0 | \n", + "sqrt | \n", + "42.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF | \n", + "LFI_with_raw_RF | \n", + "67 | \n", + "33 | \n", + "10 | \n", + "2 | \n", + "36 | \n", + "16 | \n", + "4 | \n", + "9 | \n", + "45 | \n", + "40 | \n", + "61 | \n", + "5 | \n", + "64 | \n", + "12 | \n", + "25 | \n", + "59 | \n", + "28 | \n", + "0 | \n", + "62 | \n", + "34 | \n", + "29 | \n", + "5 | \n", + "2 | \n", + "19 | \n", + "27 | \n", + "16 | \n", + "22 | \n", + "6 | \n", + "5371 | \n", + "17.529187 | \n", + "0.661458 | \n", + "0.798983 | \n", + "0.664165 | \n", + "0.682292 | \n", + "0.810425 | \n", + "0.720484 | \n", + "2 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
2 | \n", + "0 | \n", + "100 | \n", + "100 | \n", + "0.1 | \n", + "0.1 | \n", + "100.0 | \n", + "1.0 | \n", + "sqrt | \n", + "42.0 | \n", + "False | \n", + "0.0 | \n", + "False | \n", + "inbag | \n", + "NaN | \n", + "RF | \n", + "MDI_RF | \n", + "67 | \n", + "33 | \n", + "10 | \n", + "2 | \n", + "36 | \n", + "16 | \n", + "4 | \n", + "9 | \n", + "45 | \n", + "40 | \n", + "61 | \n", + "5 | \n", + "64 | \n", + "12 | \n", + "25 | \n", + "59 | \n", + "28 | \n", + "0 | \n", + "62 | \n", + "34 | \n", + "29 | \n", + "5 | \n", + "2 | \n", + "19 | \n", + "27 | \n", + "16 | \n", + "22 | \n", + "6 | \n", + "5371 | \n", + "8.725140 | \n", + "0.567708 | \n", + "0.713682 | \n", + "0.681767 | \n", + "0.609375 | \n", + "0.758722 | \n", + "0.663172 | \n", + "2 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
3 | \n", + "0 | \n", + "100 | \n", + "100 | \n", + "0.1 | \n", + "0.1 | \n", + "100.0 | \n", + "1.0 | \n", + "sqrt | \n", + "42.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF | \n", + "TreeSHAP_RF | \n", + "67 | \n", + "33 | \n", + "10 | \n", + "2 | \n", + "36 | \n", + "16 | \n", + "4 | \n", + "9 | \n", + "45 | \n", + "40 | \n", + "61 | \n", + "5 | \n", + "64 | \n", + "12 | \n", + "25 | \n", + "59 | \n", + "28 | \n", + "0 | \n", + "62 | \n", + "34 | \n", + "29 | \n", + "5 | \n", + "2 | \n", + "19 | \n", + "27 | \n", + "16 | \n", + "22 | \n", + "6 | \n", + "5371 | \n", + "0.250836 | \n", + "0.502604 | \n", + "0.705510 | \n", + "0.602102 | \n", + "0.552083 | \n", + "0.728001 | \n", + "0.616987 | \n", + "2 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
4 | \n", + "0 | \n", + "100 | \n", + "100 | \n", + "0.1 | \n", + "0.1 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF_plus | \n", + "Kernel_SHAP_RF_plus | \n", + "67 | \n", + "33 | \n", + "10 | \n", + "2 | \n", + "36 | \n", + "16 | \n", + "4 | \n", + "9 | \n", + "45 | \n", + "40 | \n", + "61 | \n", + "5 | \n", + "64 | \n", + "12 | \n", + "25 | \n", + "59 | \n", + "28 | \n", + "0 | \n", + "62 | \n", + "34 | \n", + "29 | \n", + "5 | \n", + "2 | \n", + "19 | \n", + "27 | \n", + "16 | \n", + "22 | \n", + "6 | \n", + "4861 | \n", + "44.517748 | \n", + "0.675781 | \n", + "0.814439 | \n", + "0.606255 | \n", + "0.656250 | \n", + "0.811053 | \n", + "0.626389 | \n", + "2 | \n", + "RandomForestRegressor(max_features='sqrt', ran... | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1115 | \n", + "0 | \n", + "400 | \n", + "400 | \n", + "0.8 | \n", + "0.8 | \n", + "100.0 | \n", + "1.0 | \n", + "sqrt | \n", + "42.0 | \n", + "False | \n", + "0.0 | \n", + "False | \n", + "inbag | \n", + "NaN | \n", + "RF | \n", + "MDI_RF | \n", + "268 | \n", + "132 | \n", + "10 | \n", + "7 | \n", + "115 | \n", + "213 | \n", + "22 | \n", + "132 | \n", + "117 | \n", + "159 | \n", + "234 | \n", + "137 | \n", + "30 | \n", + "67 | \n", + "126 | \n", + "82 | \n", + "79 | \n", + "184 | \n", + "24 | \n", + "68 | \n", + "78 | \n", + "4 | \n", + "44 | \n", + "55 | \n", + "71 | \n", + "61 | \n", + "42 | \n", + "91 | \n", + "4731 | \n", + "26.897712 | \n", + "0.787313 | \n", + "0.887580 | \n", + "0.640807 | \n", + "0.744949 | \n", + "0.855740 | \n", + "0.506839 | \n", + "7 | \n", + "NaN | \n", + "179.0 | \n", + "180.0 | \n", + "66.0 | \n", + "112.0 | \n", + "90.0 | \n", + "9.0 | \n", + "93.0 | \n", + "196.0 | \n", + "108.0 | \n", + "42.0 | \n", + "216.0 | \n", + "46.0 | \n", + "201.0 | \n", + "124.0 | \n", + "45.0 | \n", + "144.0 | \n", + "6.0 | \n", + "10.0 | \n", + "116.0 | \n", + "9.0 | \n", + "117.0 | \n", + "62.0 | \n", + "69.0 | \n", + "35.0 | \n", + "38.0 | \n", + "240.0 | \n", + "155.0 | \n", + "86.0 | \n", + "118.0 | \n", + "25.0 | \n", + "208.0 | \n", + "127.0 | \n", + "19.0 | \n", + "97.0 | \n", + "164.0 | \n", + "238.0 | \n", + "183.0 | \n", + "92.0 | \n", + "150.0 | \n", + "15.0 | \n", + "255.0 | \n", + "60.0 | \n", + "124.0 | \n", + "14.0 | \n", + "11.0 | \n", + "49.0 | \n", + "15.0 | \n", + "106.0 | \n", + "123.0 | \n", + "0.0 | \n", + "241.0 | \n", + "250.0 | \n", + "33.0 | \n", + "244.0 | \n", + "140.0 | \n", + "165.0 | \n", + "249.0 | \n", + "16.0 | \n", + "266.0 | \n", + "75.0 | \n", + "10.0 | \n", + "223.0 | \n", + "227.0 | \n", + "224.0 | \n", + "119.0 | \n", + "84.0 | \n", + "104.0 | \n", + "128.0 | \n", + "113.0 | \n", + "77.0 | \n", + "33.0 | \n", + "101.0 | \n", + "76.0 | \n", + "127.0 | \n", + "12.0 | \n", + "130.0 | \n", + "
1116 | \n", + "0 | \n", + "400 | \n", + "400 | \n", + "0.8 | \n", + "0.8 | \n", + "100.0 | \n", + "1.0 | \n", + "sqrt | \n", + "42.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF | \n", + "TreeSHAP_RF | \n", + "268 | \n", + "132 | \n", + "10 | \n", + "7 | \n", + "115 | \n", + "213 | \n", + "22 | \n", + "132 | \n", + "117 | \n", + "159 | \n", + "234 | \n", + "137 | \n", + "30 | \n", + "67 | \n", + "126 | \n", + "82 | \n", + "79 | \n", + "184 | \n", + "24 | \n", + "68 | \n", + "78 | \n", + "4 | \n", + "44 | \n", + "55 | \n", + "71 | \n", + "61 | \n", + "42 | \n", + "91 | \n", + "4731 | \n", + "3.519123 | \n", + "0.799751 | \n", + "0.900437 | \n", + "0.604907 | \n", + "0.835859 | \n", + "0.912963 | \n", + "0.545081 | \n", + "7 | \n", + "NaN | \n", + "179.0 | \n", + "180.0 | \n", + "66.0 | \n", + "112.0 | \n", + "90.0 | \n", + "9.0 | \n", + "93.0 | \n", + "196.0 | \n", + "108.0 | \n", + "42.0 | \n", + "216.0 | \n", + "46.0 | \n", + "201.0 | \n", + "124.0 | \n", + "45.0 | \n", + "144.0 | \n", + "6.0 | \n", + "10.0 | \n", + "116.0 | \n", + "9.0 | \n", + "117.0 | \n", + "62.0 | \n", + "69.0 | \n", + "35.0 | \n", + "38.0 | \n", + "240.0 | \n", + "155.0 | \n", + "86.0 | \n", + "118.0 | \n", + "25.0 | \n", + "208.0 | \n", + "127.0 | \n", + "19.0 | \n", + "97.0 | \n", + "164.0 | \n", + "238.0 | \n", + "183.0 | \n", + "92.0 | \n", + "150.0 | \n", + "15.0 | \n", + "255.0 | \n", + "60.0 | \n", + "124.0 | \n", + "14.0 | \n", + "11.0 | \n", + "49.0 | \n", + "15.0 | \n", + "106.0 | \n", + "123.0 | \n", + "0.0 | \n", + "241.0 | \n", + "250.0 | \n", + "33.0 | \n", + "244.0 | \n", + "140.0 | \n", + "165.0 | \n", + "249.0 | \n", + "16.0 | \n", + "266.0 | \n", + "75.0 | \n", + "10.0 | \n", + "223.0 | \n", + "227.0 | \n", + "224.0 | \n", + "119.0 | \n", + "84.0 | \n", + "104.0 | \n", + "128.0 | \n", + "113.0 | \n", + "77.0 | \n", + "33.0 | \n", + "101.0 | \n", + "76.0 | \n", + "127.0 | \n", + "12.0 | \n", + "130.0 | \n", + "
1117 | \n", + "0 | \n", + "400 | \n", + "400 | \n", + "0.8 | \n", + "0.8 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF_plus | \n", + "Kernel_SHAP_RF_plus | \n", + "268 | \n", + "132 | \n", + "10 | \n", + "7 | \n", + "115 | \n", + "213 | \n", + "22 | \n", + "132 | \n", + "117 | \n", + "159 | \n", + "234 | \n", + "137 | \n", + "30 | \n", + "67 | \n", + "126 | \n", + "82 | \n", + "79 | \n", + "184 | \n", + "24 | \n", + "68 | \n", + "78 | \n", + "4 | \n", + "44 | \n", + "55 | \n", + "71 | \n", + "61 | \n", + "42 | \n", + "91 | \n", + "9672 | \n", + "188.567548 | \n", + "0.799129 | \n", + "0.890131 | \n", + "0.615626 | \n", + "0.766414 | \n", + "0.878808 | \n", + "0.590590 | \n", + "7 | \n", + "RandomForestRegressor(max_features='sqrt', ran... | \n", + "179.0 | \n", + "180.0 | \n", + "66.0 | \n", + "112.0 | \n", + "90.0 | \n", + "9.0 | \n", + "93.0 | \n", + "196.0 | \n", + "108.0 | \n", + "42.0 | \n", + "216.0 | \n", + "46.0 | \n", + "201.0 | \n", + "124.0 | \n", + "45.0 | \n", + "144.0 | \n", + "6.0 | \n", + "10.0 | \n", + "116.0 | \n", + "9.0 | \n", + "117.0 | \n", + "62.0 | \n", + "69.0 | \n", + "35.0 | \n", + "38.0 | \n", + "240.0 | \n", + "155.0 | \n", + "86.0 | \n", + "118.0 | \n", + "25.0 | \n", + "208.0 | \n", + "127.0 | \n", + "19.0 | \n", + "97.0 | \n", + "164.0 | \n", + "238.0 | \n", + "183.0 | \n", + "92.0 | \n", + "150.0 | \n", + "15.0 | \n", + "255.0 | \n", + "60.0 | \n", + "124.0 | \n", + "14.0 | \n", + "11.0 | \n", + "49.0 | \n", + "15.0 | \n", + "106.0 | \n", + "123.0 | \n", + "0.0 | \n", + "241.0 | \n", + "250.0 | \n", + "33.0 | \n", + "244.0 | \n", + "140.0 | \n", + "165.0 | \n", + "249.0 | \n", + "16.0 | \n", + "266.0 | \n", + "75.0 | \n", + "10.0 | \n", + "223.0 | \n", + "227.0 | \n", + "224.0 | \n", + "119.0 | \n", + "84.0 | \n", + "104.0 | \n", + "128.0 | \n", + "113.0 | \n", + "77.0 | \n", + "33.0 | \n", + "101.0 | \n", + "76.0 | \n", + "127.0 | \n", + "12.0 | \n", + "130.0 | \n", + "
1118 | \n", + "0 | \n", + "400 | \n", + "400 | \n", + "0.8 | \n", + "0.8 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF_plus | \n", + "LFI_with_raw_RF_plus | \n", + "268 | \n", + "132 | \n", + "10 | \n", + "7 | \n", + "115 | \n", + "213 | \n", + "22 | \n", + "132 | \n", + "117 | \n", + "159 | \n", + "234 | \n", + "137 | \n", + "30 | \n", + "67 | \n", + "126 | \n", + "82 | \n", + "79 | \n", + "184 | \n", + "24 | \n", + "68 | \n", + "78 | \n", + "4 | \n", + "44 | \n", + "55 | \n", + "71 | \n", + "61 | \n", + "42 | \n", + "91 | \n", + "9672 | \n", + "5.624723 | \n", + "0.801617 | \n", + "0.895591 | \n", + "0.688723 | \n", + "0.821970 | \n", + "0.911975 | \n", + "0.685238 | \n", + "7 | \n", + "RandomForestRegressor(max_features='sqrt', ran... | \n", + "179.0 | \n", + "180.0 | \n", + "66.0 | \n", + "112.0 | \n", + "90.0 | \n", + "9.0 | \n", + "93.0 | \n", + "196.0 | \n", + "108.0 | \n", + "42.0 | \n", + "216.0 | \n", + "46.0 | \n", + "201.0 | \n", + "124.0 | \n", + "45.0 | \n", + "144.0 | \n", + "6.0 | \n", + "10.0 | \n", + "116.0 | \n", + "9.0 | \n", + "117.0 | \n", + "62.0 | \n", + "69.0 | \n", + "35.0 | \n", + "38.0 | \n", + "240.0 | \n", + "155.0 | \n", + "86.0 | \n", + "118.0 | \n", + "25.0 | \n", + "208.0 | \n", + "127.0 | \n", + "19.0 | \n", + "97.0 | \n", + "164.0 | \n", + "238.0 | \n", + "183.0 | \n", + "92.0 | \n", + "150.0 | \n", + "15.0 | \n", + "255.0 | \n", + "60.0 | \n", + "124.0 | \n", + "14.0 | \n", + "11.0 | \n", + "49.0 | \n", + "15.0 | \n", + "106.0 | \n", + "123.0 | \n", + "0.0 | \n", + "241.0 | \n", + "250.0 | \n", + "33.0 | \n", + "244.0 | \n", + "140.0 | \n", + "165.0 | \n", + "249.0 | \n", + "16.0 | \n", + "266.0 | \n", + "75.0 | \n", + "10.0 | \n", + "223.0 | \n", + "227.0 | \n", + "224.0 | \n", + "119.0 | \n", + "84.0 | \n", + "104.0 | \n", + "128.0 | \n", + "113.0 | \n", + "77.0 | \n", + "33.0 | \n", + "101.0 | \n", + "76.0 | \n", + "127.0 | \n", + "12.0 | \n", + "130.0 | \n", + "
1119 | \n", + "0 | \n", + "400 | \n", + "400 | \n", + "0.8 | \n", + "0.8 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "RF_plus | \n", + "LIME_RF_plus | \n", + "268 | \n", + "132 | \n", + "10 | \n", + "7 | \n", + "115 | \n", + "213 | \n", + "22 | \n", + "132 | \n", + "117 | \n", + "159 | \n", + "234 | \n", + "137 | \n", + "30 | \n", + "67 | \n", + "126 | \n", + "82 | \n", + "79 | \n", + "184 | \n", + "24 | \n", + "68 | \n", + "78 | \n", + "4 | \n", + "44 | \n", + "55 | \n", + "71 | \n", + "61 | \n", + "42 | \n", + "91 | \n", + "9672 | \n", + "364.408627 | \n", + "0.815299 | \n", + "0.900771 | \n", + "0.725484 | \n", + "0.828283 | \n", + "0.908782 | \n", + "0.729354 | \n", + "7 | \n", + "RandomForestRegressor(max_features='sqrt', ran... | \n", + "179.0 | \n", + "180.0 | \n", + "66.0 | \n", + "112.0 | \n", + "90.0 | \n", + "9.0 | \n", + "93.0 | \n", + "196.0 | \n", + "108.0 | \n", + "42.0 | \n", + "216.0 | \n", + "46.0 | \n", + "201.0 | \n", + "124.0 | \n", + "45.0 | \n", + "144.0 | \n", + "6.0 | \n", + "10.0 | \n", + "116.0 | \n", + "9.0 | \n", + "117.0 | \n", + "62.0 | \n", + "69.0 | \n", + "35.0 | \n", + "38.0 | \n", + "240.0 | \n", + "155.0 | \n", + "86.0 | \n", + "118.0 | \n", + "25.0 | \n", + "208.0 | \n", + "127.0 | \n", + "19.0 | \n", + "97.0 | \n", + "164.0 | \n", + "238.0 | \n", + "183.0 | \n", + "92.0 | \n", + "150.0 | \n", + "15.0 | \n", + "255.0 | \n", + "60.0 | \n", + "124.0 | \n", + "14.0 | \n", + "11.0 | \n", + "49.0 | \n", + "15.0 | \n", + "106.0 | \n", + "123.0 | \n", + "0.0 | \n", + "241.0 | \n", + "250.0 | \n", + "33.0 | \n", + "244.0 | \n", + "140.0 | \n", + "165.0 | \n", + "249.0 | \n", + "16.0 | \n", + "266.0 | \n", + "75.0 | \n", + "10.0 | \n", + "223.0 | \n", + "227.0 | \n", + "224.0 | \n", + "119.0 | \n", + "84.0 | \n", + "104.0 | \n", + "128.0 | \n", + "113.0 | \n", + "77.0 | \n", + "33.0 | \n", + "101.0 | \n", + "76.0 | \n", + "127.0 | \n", + "12.0 | \n", + "130.0 | \n", + "
1120 rows × 130 columns
\n", + "