Skip to content

Commit

Permalink
pushing so abhi can replicate error
Browse files Browse the repository at this point in the history
  • Loading branch information
zachrewolinski committed May 14, 2024
1 parent 59a45b3 commit 51f608a
Show file tree
Hide file tree
Showing 17 changed files with 2,610 additions and 505 deletions.
218 changes: 115 additions & 103 deletions feature_importance/01_run_ablation_classification.py

Large diffs are not rendered by default.

188 changes: 96 additions & 92 deletions feature_importance/01_run_ablation_regression.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion feature_importance/feature_ranking.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#SBATCH --mail-type=ALL

source activate mdi
command="ranking_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y.diabetes-classification.lss-model --split_seed ${1} --ignore_cache --create_rmd --result_name diabetes-class-lss"
command="ranking_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y.diabetes-regression.lss-model --split_seed 6 --ignore_cache --create_rmd --result_name diabetes-reg-lss"

# Execute the command
python $command
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_DGP = sample_real_data_X
X_PARAMS_DICT = {
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": 442
"source": "imodels",
"data_name": "diabetes_regr",
"sample_row_n": None
}

Y_DGP = hierarchical_poly
Y_PARAMS_DICT = {
"beta": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,21 @@
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *
from sklearn.linear_model import Ridge


ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})],
[FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})],
[FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_DGP = sample_real_data_X
X_PARAMS_DICT = {
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": 442
"source": "imodels",
"data_name": "diabetes_regr",
"sample_row_n": None
}

Y_DGP = hierarchical_poly
Y_PARAMS_DICT = {
"beta": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,21 @@
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *
from sklearn.linear_model import Ridge


ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})],
[FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})],
[FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,21 @@
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_DGP = sample_real_data_X
X_PARAMS_DICT = {
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": 442
"source": "imodels",
"data_name": "diabetes_regr",
"sample_row_n": None
}
# X_PARAMS_DICT = {
# "X_fpath": "../data/classification_data/Fico/X_fico.csv",
# "sample_row_n": None,
# "return_data": "X"
# }
# X_PARAMS_DICT = {
# "X_fpath": "../data/classification_data/Juvenile/X_juvenile.csv",
# "sample_row_n": None,
# "return_data": "X"
# }

Y_DGP = linear_model
Y_PARAMS_DICT = {
"beta": 1,
"sigma": None,
"heritability": 0.4,
"s": 5
}
# Y_PARAMS_DICT = {
# "y_fpath": "../data/classification_data/Fico/y_fico.csv",
# "return_data": "y"
# }
# Y_PARAMS_DICT = {
# "y_fpath": "../data/classification_data/Juvenile/y_juvenile.csv",
# "return_data": "y"
# }

# vary one parameter
VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,21 @@
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *
from sklearn.linear_model import Ridge


ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})],
[FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})],
[FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *

X_DGP = sample_real_X
X_DGP = sample_real_data_X
X_PARAMS_DICT = {
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": None,
"sample_col_n": None
"source": "imodels",
"data_name": "diabetes_regr",
"sample_row_n": None
}

Y_DGP = lss_model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,21 @@
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *
from sklearn.linear_model import Ridge


ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
other_params={'n_estimators': 100, 'min_samples_leaf': 5, 'max_features': 0.33, 'random_state': 42})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LFI_fit_on_inbag_RF', LFI_evaluation_RF_MDI, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"include_raw":False, "fit_on":"inbag", "prediction_model": Ridge(alpha=1e-6)})],
[FIModelConfig('LFI_fit_on_OOB_RF', LFI_evaluation_RF_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False, other_params={"fit_on":"oob"})],
[FIModelConfig('LFI_evaluate_on_all_RF_plus', LFI_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('LFI_evaluate_on_oob_RF_plus', LFI_evaluation_RF_plus_OOB, model_type='tree', splitting_strategy = "train-test", ascending = False)],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='tree', splitting_strategy = "train-test")],
]
Loading

0 comments on commit 51f608a

Please sign in to comment.