-
Notifications
You must be signed in to change notification settings - Fork 1
/
problem.py
executable file
·107 lines (86 loc) · 3.21 KB
/
problem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
##########################################################################
# Copyright (C) CEA, 2021
# Distributed under the terms of the CeCILL-B license, as published by
# the CEA-CNRS-INRIA. Refer to the LICENSE file or to
# http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
# for details.
##########################################################################
"""
This parametrizes the setup and uses building blocks from RAMP workflow.
"""
import os
import pandas as pd
import numpy as np
import rampwf as rw
from sklearn.model_selection import train_test_split
class DeepEstimator(rw.workflows.SKLearnPipeline):
""" Wrapper to convert a scikit-learn estimator into a Deep Learning
RAMP workflow.
Notes
-----
The training is not performed on the server side. Weights are required at
the submission time and fold indices are tracked at training time.
"""
def __init__(self, filename="estimator.py", additional_filenames=None):
super().__init__(filename, additional_filenames)
N_FOLDS = 1
problem_title = "Predict age from brain grey matter (regression with DL)"
_target_column_name = "age"
Predictions = rw.prediction_types.make_regression()
workflow = DeepEstimator(
filename="estimator.py", additional_filenames=["weights.pth"])
score_types = [
rw.score_types.RMSE()
]
def get_cv(X, y):
""" Get N folds cross validation indices.
Notes
-----
1-fold is not supported yet by ramp-board, use 2-fold insted.
"""
return [train_test_split(range(len(X)), shuffle=True, test_size=0.25,
random_state=0),
train_test_split(range(len(X)), shuffle=True, test_size=0.25,
random_state=20)]
def _read_data(path, dataset, datatype=["rois", "vbm"]):
""" Read data.
Parameters
----------
path: str
the data location.
dataset: str
'train' or 'test'.
datatype: list of str, default ['rois', 'vbm']
dataset type within 'rois', 'vbm'. By default returns a
concatenation of rois and vbm data.
Returns
-------
x_arr: array (n_samples, n_features)
input data.
y_arr: array (n_samples, )
target data.
"""
participants = pd.read_csv(os.path.join(
path, "data", "{0}_participants.csv".format(dataset)))
y_arr = participants[_target_column_name].values
x_arr_l = []
if "rois" in datatype:
rois = pd.read_csv(os.path.join(
path, "data", "{0}_rois.csv".format(dataset)))
x_rois_arr = rois.loc[:, "l3thVen_GM_Vol":]
assert x_rois_arr.shape[1] == 284
x_arr_l.append(x_rois_arr)
if "vbm" in datatype:
imgs_arr_zip = np.load(os.path.join(
path, "data", "{0}_vbm.npz".format(dataset)))
x_img_arr = imgs_arr_zip["imgs_arr"].squeeze()
mask_arr = imgs_arr_zip["mask_arr"]
x_img_arr = x_img_arr[:, mask_arr]
x_arr_l.append(x_img_arr)
x_arr = np.concatenate(x_arr_l, axis=1)
return x_arr, y_arr
def get_train_data(path=".", datatype=["rois", "vbm"]):
return _read_data(path, "train", datatype)
def get_test_data(path=".", datatype=["rois", "vbm"]):
return _read_data(path, "test", datatype)