Skip to content

Commit

Permalink
feature ranking stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
zachrewolinski committed Apr 21, 2024
1 parent 9ab24a6 commit 0a7508a
Show file tree
Hide file tree
Showing 8 changed files with 1,314 additions and 22 deletions.
10 changes: 10 additions & 0 deletions feature_importance/feature_ranking.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
#SBATCH [email protected]
#SBATCH --mail-type=ALL
#SBATCH --partition=yugroup

source activate mdi
command="run_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y --split_seed 1 --ignore_cache --create_rmd --result_name feature_ranking"

# Execute the command
python $command
43 changes: 43 additions & 0 deletions feature_importance/fi_config/mdi_local/real_x_sim_y/dgp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_data
X_PARAMS_DICT = {
"X_fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": None,
"return_data": "X"
}
# X_PARAMS_DICT = {
# "X_fpath": "../data/classification_data/Fico/X_fico.csv",
# "sample_row_n": None,
# "return_data": "X"
# }
# X_PARAMS_DICT = {
# "X_fpath": "../data/classification_data/Juvenile/X_juvenile.csv",
# "sample_row_n": None,
# "return_data": "X"
# }
Y_DGP = linear_model
Y_PARAMS_DICT = {
"beta": 1,
"sigma": None,
"heritability": 0.4,
"s": 4
}
# Y_PARAMS_DICT = {
# "y_fpath": "../data/classification_data/Fico/y_fico.csv",
# "return_data": "y"
# }
# Y_PARAMS_DICT = {
# "y_fpath": "../data/classification_data/Juvenile/y_juvenile.csv",
# "return_data": "y"
# }

# vary one parameter
VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"300": 300, "442": 442}}
25 changes: 25 additions & 0 deletions feature_importance/fi_config/mdi_local/real_x_sim_y/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import copy
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *



ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusClassifier, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
# [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
31 changes: 11 additions & 20 deletions feature_importance/fi_config/mdi_local/synthetic_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,17 @@
# N_ESTIMATORS=[50, 100, 500, 1000]
ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33})],
# [ModelConfig('RF', RandomForestRegressor, model_type='tree', vary_param="n_estimators", vary_param_val=m,
# other_params={'min_samples_leaf': 5, 'max_features': 0.33}) for m in N_ESTIMATORS]
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusClassifier, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('MDI_local_all_stumps_evaluate', MDI_local_all_stumps_evaluate, ascending = False, splitting_strategy = "train-test", model_type='tree')],
# [FIModelConfig('MDI_sub_stumps', MDI_local_sub_stumps, ascending = False, model_type='tree')],
[FIModelConfig('MDI_local_all_stumps_evaluate_without_raw', MDI_local_all_stumps_evaluate, ascending = False, splitting_strategy = "train-test", model_type='tree', other_params={"include_raw": False})],
# [FIModelConfig('MDI_sub_stumps_without_raw', MDI_local_sub_stumps, ascending = False, model_type='tree', other_params={"include_raw": False})],
# [FIModelConfig('LFI_sum_absolute', LFI_sum_absolute, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_absolute_sum_evaluate', LFI_absolute_sum_evaluate, model_type='tree', splitting_strategy = "train-test")],
# [FIModelConfig('LFI_sum_absolute_sub_stumps', LFI_sum_absolute_sub_stumps, model_type='tree')],
# [FIModelConfig('LFI_absolute_sum_sub_stumps', LFI_absolute_sum_sub_stumps, model_type='tree')],
# [FIModelConfig('LFI_sum_absolute_without_raw', LFI_sum_absolute, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False})],
[FIModelConfig('LFI_absolute_sum_evaluate_without_raw', LFI_absolute_sum_evaluate, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False})],
# [FIModelConfig('LFI_sum_absolute_sub_stumps_without_raw', LFI_sum_absolute_sub_stumps, model_type='tree', other_params={"include_raw": False})],
# [FIModelConfig('LFI_absolute_sum_sub_stumps_without_raw', LFI_absolute_sum_sub_stumps, model_type='tree', other_params={"include_raw": False})],
[FIModelConfig('TreeSHAP', tree_shap_local, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LIME', lime_local, model_type='tree', splitting_strategy = "train-test")],
]

# [FIModelConfig('Permutation', permutation_local, model_type='tree')],
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
703 changes: 703 additions & 0 deletions feature_importance/run_importance_local_sims.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 0a7508a

Please sign in to comment.