Skip to content

Commit

Permalink
Update normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
zyliang2001 committed Mar 21, 2024
1 parent 9994589 commit 794f47f
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 4 deletions.
2 changes: 1 addition & 1 deletion feature_importance/01_ablation_classification_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#SBATCH --partition=yugroup

source activate mdi
command="01_run_ablation_classification.py --nreps 1 --config mdi_local.real_data_classification --split_seed ${1} --ignore_cache --create_rmd --result_name Diabetes_classification_parallel"
command="01_run_ablation_classification.py --nreps 1 --config mdi_local.real_data_classification --split_seed ${1} --normalization train_test --ignore_cache --create_rmd --result_name Diabetes_classification_parallel"

# Execute the command
python $command
2 changes: 1 addition & 1 deletion feature_importance/01_ablation_regression_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#SBATCH --partition=yugroup

source activate mdi
command="01_run_ablation_regression.py --nreps 1 --config mdi_local.real_data_regression --split_seed ${1} --ignore_cache --create_rmd --result_name diabetes_regression_parallel"
command="01_run_ablation_regression.py --nreps 1 --config mdi_local.real_data_regression --split_seed ${1} --normalization train_test --ignore_cache --create_rmd --result_name diabetes_regression_parallel"

# Execute the command
python $command
11 changes: 11 additions & 0 deletions feature_importance/01_run_ablation_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Callable, List, Tuple
import itertools
from sklearn.metrics import roc_auc_score, f1_score, recall_score, precision_score, mean_squared_error
from sklearn import preprocessing

sys.path.append(".")
sys.path.append("..")
Expand Down Expand Up @@ -100,6 +101,15 @@ def compare_estimators(estimators: List[ModelConfig],
y_tune = y
y_test = y

normalizer = preprocessing.Normalizer()
if args.normalization == "train_test":
X_train = normalizer.fit_transform(X_train)
X_test = normalizer.transform(X_test)
elif args.normalization == "all":
X = normalizer.fit_transform(X)
X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)

# fit model
est.fit(X_train, y_train)
test_all_auc = roc_auc_score(y_test, est.predict_proba(X_test)[:, 1])
Expand Down Expand Up @@ -329,6 +339,7 @@ def run_simulation(i, path, val_name, X_params_dict, X_dgp, y_params_dict, y_dgp
parser.add_argument('--n_cores', type=int, default=None)
parser.add_argument('--split_seed', type=int, default=0)
parser.add_argument('--results_path', type=str, default=default_dir)
parser.add_argument('--normalization', type=str, default="none")

# arguments for rmd output of results
parser.add_argument('--create_rmd', action='store_true', default=False)
Expand Down
11 changes: 11 additions & 0 deletions feature_importance/01_run_ablation_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Callable, List, Tuple
import itertools
from sklearn.metrics import roc_auc_score, f1_score, recall_score, precision_score, mean_squared_error, r2_score
from sklearn import preprocessing

sys.path.append(".")
sys.path.append("..")
Expand Down Expand Up @@ -100,6 +101,15 @@ def compare_estimators(estimators: List[ModelConfig],
y_tune = y
y_test = y

normalizer = preprocessing.Normalizer()
if args.normalization == "train_test":
X_train = normalizer.fit_transform(X_train)
X_test = normalizer.transform(X_test)
elif args.normalization == "all":
X = normalizer.fit_transform(X)
X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)

# fit model
est.fit(X_train, y_train)
test_all_mse = mean_squared_error(y_test, est.predict(X_test))
Expand Down Expand Up @@ -325,6 +335,7 @@ def run_simulation(i, path, val_name, X_params_dict, X_dgp, y_params_dict, y_dgp
parser.add_argument('--n_cores', type=int, default=None)
parser.add_argument('--split_seed', type=int, default=0)
parser.add_argument('--results_path', type=str, default=default_dir)
parser.add_argument('--normalization', type=str, default="none")

# arguments for rmd output of results
parser.add_argument('--create_rmd', action='store_true', default=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_test_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_CV_RF', LFI_test_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"cv_ridge": 5, "calc_loo_coef":False})],
[FIModelConfig('MDI_RF', LFI_test_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_test_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
Expand Down
2 changes: 1 addition & 1 deletion feature_importance/scripts/simulations_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import math


def sample_real_data(X_fpath=None, y_fpath=None, seed=4307, normalize=True,
def sample_real_data(X_fpath=None, y_fpath=None, seed=4307, normalize=False,
sample_row_n=None, sample_col_n=None, return_data=None,
return_support=True):

Expand Down

0 comments on commit 794f47f

Please sign in to comment.