-
Notifications
You must be signed in to change notification settings - Fork 2
/
gslda.m
162 lines (147 loc) · 5.48 KB
/
gslda.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
function [B, theta, delta] = gslda(X, Y, Delta, D1, D2, stop, plot, Q, maxSteps, convergenceCriterion, verbose)
% DESCRIPTION:
% gslda performs generalized sparse disciminant analysis [1], the
% penalized version of sparse discriminant analysis [2].
% It is implemented by using the orignal slda and larsen functions
% described in [3]. See 'spasm' folder to learn more about sdla
% implementation or read [3].
%
% INPUT:
% X matrix of n observations (rows) and p variables (columns).
% The columns are assumed centered and normalized to
% Euclidean length 1.
% Y matrix initializing the dummy variables representing the
% classes, e.g Y = [1 0; 1 0; 1 0; 0 1; 0 1] for two classes
% with the three first observations belonging to class 1 and
% the last two belonging to class 2. Values must be 0 or 1.
% Delta the weight on the l2-norm for elastic net regression. If
% Delta is a vector, parameter selection is made as described
% in Algorithm 1 in [1]. Default is Delta = 1e-6.
% D1 and D2 diagonal and positive definite anisotropy matrices. D1 has
% to be invertible. If D1=[] and/or D2=[], they are set as the
% identity matrix I. For SDA implementation set D1=D2=I
% (see test_SDA_GSDA_equivalence).
% stop if stop is negative, its absolute value corresponds to the
% desired number of variables, and while if stop is
% non-negative, it corresponds to an upper bound for the
% l1-norm. Defult is stop = -ceil(p/2), corresponding to
% ceil(p/2) non-zero elements in the discriminative direction.
% plot 'true' or 'false' for L-hypersurface plots. Deafault is
% 'false'.
% Q Number of desired discriminative directions. Default value
% for Q is one less than the number of classes. In binary
% classification Q is always equal to one.
% MAXSTEPS Maximum number of iterations. Default is MAXSTEPS = 100.
% TOL Tolerance for the stopping criterion (change in ridge cost
% function). Default is TOL = 1e-6.
% VERBOSE With VERBOSE set to true, the ridge cost and the L1 norm of
% the beta coefficients will be printed for each iteration. By
% default, VERBOSE is set to false.
%
% OUTPUT:
% B The regression parameters. X*B is the data projected onto the
% discriminative directions.
% theta The optimal scores.
% delta a ceil parameter containing the selected parameter in each
% gslda step.
%
% Usage
% -------
% b = gslda(Xtr, Y, Delta, D1, D2, stop, false)
% Xtr_lda=Xtr*b;
% Xte_lda=Xte*b;
% class = classify(Xtr_lda,Xtr_lda,label_tr,'linear');
%
% References
% -------
% [1] V. Peterson, H.L. Rufiner and R.D. Spies. Generalized sparse
% discriminant analysis for event-related potential classification.
% Biomedical Signal Processing and Control, 35, 70-78, 2017.
% [2] L. Clemmensen, T. Hastie, D. Witten and B. Ersbøll. Sparse
% discriminant analysis. Technometrics, 53(4), 406–413, 2011.
% [3] K. Sjöstrand, L.H. Clemmensen, R. Larsen, B. Ersbøll, SpaSM:
% a matlab toolbox for sparse statistical modeling, J. Stat. Softw.(2012)
% (in press) http://www.imm.dtu.dk/projects/spasm/references/spasm.pdf.
%
% See also BestBeta.
%
% V. Peterson
%% Input checking
if nargin < 2
error('SpaSM:slda', 'Input arguments X and Y must be specified.');
end
[n, p] = size(X); % n: #observations, p: #variables
K = size(Y,2); % K is the number of classes
if nargin < 11
verbose = false;
end
if nargin < 10
convergenceCriterion = 1e-6;
end
if nargin < 9
maxSteps = 100;
end
if nargin < 8
Q = K - 1; % Q is the number of components
elseif Q > K - 1
Q = K - 1; warning('SpaSM:slda', 'At most K-1 components allowed. Forcing Q = K - 1.')
end
if nargin < 7
plot=false;
end
if nargin < 6 || isempty(stop)
stop = -ceil(p/2);
end
if isempty(D1)
D1=eye(p,p);
end
if isempty(D2)
D2=eye(p,p);
end
if isempty(Delta)
Delta=1e-6;
end
% check stopping criterion
if length(stop) ~= K
stop = stop(1)*ones(1,K);
end
%% Setup
dpi = sum(Y)/n; % diagonal "matrix" of class priors
Ydpi = Y./(ones(n,1)*dpi); % class belongings scaled according to priors
B = zeros(p,Q); % coefficients of discriminative directions
theta = eye(K,Q); % optimal scores
%% Main loop
for q = 1:Q
step = 0; % iteration counter
converged = false;
if verbose
disp(['Estimating direction ' num2str(q)]);
end
%tunning parameter stimation
while ~converged && step < maxSteps
step = step + 1;
Bq_old = B(:,q);
% 1. Estimate B with automatic parameter selection
[beta, d, k]=BestBeta(X, Y*theta(:,q), Delta, D1, D2, stop(q), plot);
delta(step,1)=d;
delta(step,2)=k;
B(:,q) =beta;
yhatq = X*B(:,q);
% 2. Estimate theta
t = Ydpi'*yhatq;
s = t - theta(:,1:q-1)*(theta(:,1:q-1)'*(diag(dpi)*t));
theta(:,q) = s/sqrt(sum(dpi'.*s.^2));
% converged?
criterion = sum((Bq_old - B(:,q)).^2)/(Bq_old'*Bq_old);
if verbose && ~mod(step, 10)
disp([' Iteration: ' num2str(step) ', convergence criterion: ' num2str(criterion)]);
end
converged = criterion < convergenceCriterion;
if step == maxSteps
warning('SpaSM:slda', 'Forced exit. Maximum number of steps reached.');
end
end
if verbose
disp([' Iteration: ' num2str(step) ', convergence criterion: ' num2str(criterion)]);
end
end