diff --git a/feature_importance/test.ipynb b/feature_importance/test.ipynb new file mode 100644 index 0000000..c5e67e5 --- /dev/null +++ b/feature_importance/test.ipynb @@ -0,0 +1,123 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fetching diabetes from sklearn\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 3.5s\n", + "[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 9.3s finished\n" + ] + } + ], + "source": [ + "from scripts.simulations_util import *\n", + "from scripts.competing_methods_local import *\n", + "from util import apply_splitting_strategy\n", + "from sklearn.metrics import roc_auc_score, f1_score, recall_score, precision_score, mean_squared_error, r2_score, average_precision_score\n", + "X = sample_real_data_X(source = \"imodels\", data_name = \"diabetes_regr\", sample_row_n = 400)\n", + "y = linear_model(X, beta = 1, sigma = None, heritability = 0.8, s = 5)\n", + "X_train, X_tune, X_test, y_train, y_tune, y_test = apply_splitting_strategy(X, y, \"train-test\", 1)\n", + "np.random.seed(42)\n", + "indices_train = np.random.choice(X_train.shape[0], int(X_train.shape[0]*.25), replace=False)\n", + "indices_test = np.random.choice(X_test.shape[0], int(X_test.shape[0]*.25), replace=False)\n", + "X_train_subset = X_train[indices_train]\n", + "y_train_subset = y_train[indices_train]\n", + "X_test_subset = X_test[indices_test]\n", + "y_test_subset = y_test[indices_test]\n", + "# fit RF model\n", + "est = RandomForestRegressor(n_estimators = 100, min_samples_leaf = 5, max_features = 0.33, random_state = 42)\n", + "\n", + "est.fit(X_train, y_train)\n", + "\n", + "# fit RF_plus model\n", + "rf_plus_base = RandomForestPlusRegressor(rf_model=est)\n", + "rf_plus_base.fit(X_train, y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "local_fi_score_train, local_parital_pred_train, local_fi_score_test, local_partial_pred_test, local_fi_score_test_subset, local_partial_pred_test_subset = LFI_evaluation_RF_plus(X_train=X_train, y_train=y_train,\n", + " X_train_subset = X_train_subset, y_train_subset=y_train_subset,\n", + " X_test_subset=X_test_subset, X_test=X_test,\n", + " fit=rf_plus_base)\n", + "local_fi_score_train_subset = local_fi_score_train[indices_train]\n", + "local_partial_pred_train_subset = local_parital_pred_train[indices_train]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.6399999999999999, 1.0, 0.44000000000000006, 0.76, 0.8400000000000001, 0.4, 0.7600000000000001, 0.6399999999999999, 0.88, 0.92, 1.0, 0.28, 0.24000000000000002, 0.6400000000000001, 0.72, 0.8400000000000001, 1.0, 0.8, 0.6799999999999999, 0.4, 0.6, 0.6799999999999999, 0.7600000000000001, 1.0, 0.88, 0.52, 0.7200000000000002, 0.6799999999999999, 0.7600000000000001, 0.92, 0.28, 0.8, 0.48]\n", + "[0.8, 1.0, 0.6746031746031746, 0.8211111111111111, 0.911111111111111, 0.5088888888888888, 0.8599999999999999, 0.8, 0.925, 0.9428571428571428, 1.0, 0.45460317460317456, 0.5305555555555554, 0.7642857142857142, 0.8333333333333333, 0.911111111111111, 1.0, 0.8999999999999999, 0.8111111111111111, 0.6638888888888889, 0.7888888888888888, 0.7833333333333332, 0.8599999999999999, 1.0, 0.925, 0.7088888888888889, 0.81, 0.8111111111111111, 0.8599999999999999, 0.9428571428571428, 0.5412698412698412, 0.8999999999999999, 0.6888888888888889]\n", + "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", + "0.6957575757575759\n", + "0.8100817700817701\n", + "0.0\n" + ] + } + ], + "source": [ + "local_fi_score_test_subset = pd.DataFrame(local_fi_score_test_subset)\n", + "auroc = []\n", + "auprc = []\n", + "f1 = []\n", + "support = [1,1,1,1,1,0,0,0,0,0]\n", + "for rownum in range(local_fi_score_test_subset.shape[0]):\n", + " auroc.append(roc_auc_score(support, local_fi_score_test_subset.iloc[rownum,:]))\n", + " auprc.append(average_precision_score(support, local_fi_score_test_subset.iloc[rownum,:]))\n", + " f1.append(f1_score(support, local_fi_score_test_subset.iloc[rownum,:] > 0.5))\n", + "print(auroc)\n", + "print(auprc)\n", + "print(f1)\n", + "print(np.array(auroc).mean())\n", + "print(np.array(auprc).mean())\n", + "print(np.array(f1).mean())\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}