Skip to content

Commit

Permalink
notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
zachrewolinski committed May 15, 2024
1 parent fceb2c7 commit 7e1c74a
Showing 1 changed file with 123 additions and 0 deletions.
123 changes: 123 additions & 0 deletions feature_importance/test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fetching diabetes from sklearn\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
"[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 3.5s\n",
"[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 9.3s finished\n"
]
}
],
"source": [
"from scripts.simulations_util import *\n",
"from scripts.competing_methods_local import *\n",
"from util import apply_splitting_strategy\n",
"from sklearn.metrics import roc_auc_score, f1_score, recall_score, precision_score, mean_squared_error, r2_score, average_precision_score\n",
"X = sample_real_data_X(source = \"imodels\", data_name = \"diabetes_regr\", sample_row_n = 400)\n",
"y = linear_model(X, beta = 1, sigma = None, heritability = 0.8, s = 5)\n",
"X_train, X_tune, X_test, y_train, y_tune, y_test = apply_splitting_strategy(X, y, \"train-test\", 1)\n",
"np.random.seed(42)\n",
"indices_train = np.random.choice(X_train.shape[0], int(X_train.shape[0]*.25), replace=False)\n",
"indices_test = np.random.choice(X_test.shape[0], int(X_test.shape[0]*.25), replace=False)\n",
"X_train_subset = X_train[indices_train]\n",
"y_train_subset = y_train[indices_train]\n",
"X_test_subset = X_test[indices_test]\n",
"y_test_subset = y_test[indices_test]\n",
"# fit RF model\n",
"est = RandomForestRegressor(n_estimators = 100, min_samples_leaf = 5, max_features = 0.33, random_state = 42)\n",
"\n",
"est.fit(X_train, y_train)\n",
"\n",
"# fit RF_plus model\n",
"rf_plus_base = RandomForestPlusRegressor(rf_model=est)\n",
"rf_plus_base.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"local_fi_score_train, local_parital_pred_train, local_fi_score_test, local_partial_pred_test, local_fi_score_test_subset, local_partial_pred_test_subset = LFI_evaluation_RF_plus(X_train=X_train, y_train=y_train,\n",
" X_train_subset = X_train_subset, y_train_subset=y_train_subset,\n",
" X_test_subset=X_test_subset, X_test=X_test,\n",
" fit=rf_plus_base)\n",
"local_fi_score_train_subset = local_fi_score_train[indices_train]\n",
"local_partial_pred_train_subset = local_parital_pred_train[indices_train]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.6399999999999999, 1.0, 0.44000000000000006, 0.76, 0.8400000000000001, 0.4, 0.7600000000000001, 0.6399999999999999, 0.88, 0.92, 1.0, 0.28, 0.24000000000000002, 0.6400000000000001, 0.72, 0.8400000000000001, 1.0, 0.8, 0.6799999999999999, 0.4, 0.6, 0.6799999999999999, 0.7600000000000001, 1.0, 0.88, 0.52, 0.7200000000000002, 0.6799999999999999, 0.7600000000000001, 0.92, 0.28, 0.8, 0.48]\n",
"[0.8, 1.0, 0.6746031746031746, 0.8211111111111111, 0.911111111111111, 0.5088888888888888, 0.8599999999999999, 0.8, 0.925, 0.9428571428571428, 1.0, 0.45460317460317456, 0.5305555555555554, 0.7642857142857142, 0.8333333333333333, 0.911111111111111, 1.0, 0.8999999999999999, 0.8111111111111111, 0.6638888888888889, 0.7888888888888888, 0.7833333333333332, 0.8599999999999999, 1.0, 0.925, 0.7088888888888889, 0.81, 0.8111111111111111, 0.8599999999999999, 0.9428571428571428, 0.5412698412698412, 0.8999999999999999, 0.6888888888888889]\n",
"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
"0.6957575757575759\n",
"0.8100817700817701\n",
"0.0\n"
]
}
],
"source": [
"local_fi_score_test_subset = pd.DataFrame(local_fi_score_test_subset)\n",
"auroc = []\n",
"auprc = []\n",
"f1 = []\n",
"support = [1,1,1,1,1,0,0,0,0,0]\n",
"for rownum in range(local_fi_score_test_subset.shape[0]):\n",
" auroc.append(roc_auc_score(support, local_fi_score_test_subset.iloc[rownum,:]))\n",
" auprc.append(average_precision_score(support, local_fi_score_test_subset.iloc[rownum,:]))\n",
" f1.append(f1_score(support, local_fi_score_test_subset.iloc[rownum,:] > 0.5))\n",
"print(auroc)\n",
"print(auprc)\n",
"print(f1)\n",
"print(np.array(auroc).mean())\n",
"print(np.array(auprc).mean())\n",
"print(np.array(f1).mean())\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 7e1c74a

Please sign in to comment.