-
Notifications
You must be signed in to change notification settings - Fork 1
/
pao_optimizer.F
346 lines (277 loc) · 15.2 KB
/
pao_optimizer.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
!--------------------------------------------------------------------------------------------------!
! CP2K: A general program to perform molecular dynamics simulations !
! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
! !
! SPDX-License-Identifier: GPL-2.0-or-later !
!--------------------------------------------------------------------------------------------------!
! **************************************************************************************************
!> \brief Optimizers used by pao_main.F
!> \author Ole Schuett
! **************************************************************************************************
MODULE pao_optimizer
USE arnoldi_api, ONLY: arnoldi_extremal
USE cp_dbcsr_api, ONLY: &
dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_dot, dbcsr_frobenius_norm, &
dbcsr_get_info, dbcsr_multiply, dbcsr_release, dbcsr_reserve_diag_blocks, dbcsr_scale, &
dbcsr_set, dbcsr_type
USE kinds, ONLY: dp
USE pao_input, ONLY: pao_opt_bfgs,&
pao_opt_cg
USE pao_types, ONLY: pao_env_type
#include "./base/base_uses.f90"
IMPLICIT NONE
PRIVATE
PUBLIC :: pao_opt_init, pao_opt_finalize, pao_opt_new_dir
CONTAINS
! **************************************************************************************************
!> \brief Initialize the optimizer
!> \param pao ...
! **************************************************************************************************
SUBROUTINE pao_opt_init(pao)
TYPE(pao_env_type), POINTER :: pao
CALL dbcsr_copy(pao%matrix_D, pao%matrix_G)
CALL dbcsr_set(pao%matrix_D, 0.0_dp)
CALL dbcsr_copy(pao%matrix_G_prev, pao%matrix_D)
IF (pao%precondition) THEN
CALL dbcsr_copy(pao%matrix_D_preconed, pao%matrix_D)
END IF
IF (pao%optimizer == pao_opt_bfgs) &
CALL pao_opt_init_bfgs(pao)
END SUBROUTINE pao_opt_init
! **************************************************************************************************
!> \brief Initialize the BFGS optimizer
!> \param pao ...
! **************************************************************************************************
SUBROUTINE pao_opt_init_bfgs(pao)
TYPE(pao_env_type), POINTER :: pao
INTEGER, DIMENSION(:), POINTER :: nparams
CALL dbcsr_get_info(pao%matrix_X, row_blk_size=nparams)
CALL dbcsr_create(pao%matrix_BFGS, &
template=pao%matrix_X, &
row_blk_size=nparams, &
col_blk_size=nparams, &
name="PAO matrix_BFGS")
CALL dbcsr_reserve_diag_blocks(pao%matrix_BFGS)
CALL dbcsr_set(pao%matrix_BFGS, 0.0_dp)
CALL dbcsr_add_on_diag(pao%matrix_BFGS, 1.0_dp)
END SUBROUTINE pao_opt_init_bfgs
! **************************************************************************************************
!> \brief Finalize the optimizer
!> \param pao ...
! **************************************************************************************************
SUBROUTINE pao_opt_finalize(pao)
TYPE(pao_env_type), POINTER :: pao
CALL dbcsr_release(pao%matrix_D)
CALL dbcsr_release(pao%matrix_G_prev)
IF (pao%precondition) &
CALL dbcsr_release(pao%matrix_D_preconed)
IF (pao%optimizer == pao_opt_bfgs) &
CALL dbcsr_release(pao%matrix_BFGS)
END SUBROUTINE pao_opt_finalize
! **************************************************************************************************
!> \brief Calculates the new search direction.
!> \param pao ...
!> \param icycle ...
! **************************************************************************************************
SUBROUTINE pao_opt_new_dir(pao, icycle)
TYPE(pao_env_type), POINTER :: pao
INTEGER, INTENT(IN) :: icycle
CHARACTER(len=*), PARAMETER :: routineN = 'pao_opt_new_dir'
INTEGER :: handle
TYPE(dbcsr_type) :: matrix_G_preconed
CALL timeset(routineN, handle)
IF (pao%precondition) THEN
! We can't convert matrix_D for and back every time, the numeric noise would disturb the CG,
! hence we keep matrix_D_preconed around.
CALL dbcsr_copy(matrix_G_preconed, pao%matrix_G)
CALL dbcsr_multiply("N", "N", 1.0_dp, pao%matrix_precon, pao%matrix_G, &
0.0_dp, matrix_G_preconed, retain_sparsity=.TRUE.)
CALL pao_opt_new_dir_low(pao, icycle, matrix_G_preconed, pao%matrix_G_prev, pao%matrix_D_preconed)
CALL dbcsr_multiply("N", "N", 1.0_dp, pao%matrix_precon, pao%matrix_D_preconed, &
0.0_dp, pao%matrix_D, retain_sparsity=.TRUE.)
! store preconditioned gradient for next iteration
CALL dbcsr_copy(pao%matrix_G_prev, matrix_G_preconed)
pao%norm_G = dbcsr_frobenius_norm(matrix_G_preconed)
IF (pao%iw > 0) WRITE (pao%iw, *) "PAO| norm of preconditioned gradient:", pao%norm_G
CALL dbcsr_release(matrix_G_preconed)
ELSE
CALL pao_opt_new_dir_low(pao, icycle, pao%matrix_G, pao%matrix_G_prev, pao%matrix_D)
CALL dbcsr_copy(pao%matrix_G_prev, pao%matrix_G) ! store gradient for next iteration
pao%norm_G = dbcsr_frobenius_norm(pao%matrix_G)
IF (pao%iw > 0) WRITE (pao%iw, *) "PAO| norm of gradient:", pao%norm_G
END IF
CALL timestop(handle)
END SUBROUTINE pao_opt_new_dir
! **************************************************************************************************
!> \brief Calculates the new search direction.
!> \param pao ...
!> \param icycle ...
!> \param matrix_G ...
!> \param matrix_G_prev ...
!> \param matrix_D ...
! **************************************************************************************************
SUBROUTINE pao_opt_new_dir_low(pao, icycle, matrix_G, matrix_G_prev, matrix_D)
TYPE(pao_env_type), POINTER :: pao
INTEGER, INTENT(IN) :: icycle
TYPE(dbcsr_type) :: matrix_G, matrix_G_prev, matrix_D
SELECT CASE (pao%optimizer)
CASE (pao_opt_cg)
CALL pao_opt_newdir_cg(pao, icycle, matrix_G, matrix_G_prev, matrix_D)
CASE (pao_opt_bfgs)
CALL pao_opt_newdir_bfgs(pao, icycle, matrix_G, matrix_G_prev, matrix_D)
CASE DEFAULT
CPABORT("PAO: unknown optimizer")
END SELECT
END SUBROUTINE pao_opt_new_dir_low
! **************************************************************************************************
!> \brief Conjugate Gradient algorithm
!> \param pao ...
!> \param icycle ...
!> \param matrix_G ...
!> \param matrix_G_prev ...
!> \param matrix_D ...
! **************************************************************************************************
SUBROUTINE pao_opt_newdir_cg(pao, icycle, matrix_G, matrix_G_prev, matrix_D)
TYPE(pao_env_type), POINTER :: pao
INTEGER, INTENT(IN) :: icycle
TYPE(dbcsr_type) :: matrix_G, matrix_G_prev, matrix_D
REAL(KIND=dp) :: beta, change, trace_D, trace_D_Gnew, &
trace_G_mix, trace_G_new, trace_G_prev
! determine CG mixing factor
IF (icycle <= pao%cg_init_steps) THEN
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| warming up with steepest descent"
beta = 0.0_dp
ELSE
CALL dbcsr_dot(matrix_G, matrix_G, trace_G_new)
CALL dbcsr_dot(matrix_G_prev, matrix_G_prev, trace_G_prev)
CALL dbcsr_dot(matrix_G, matrix_G_prev, trace_G_mix)
CALL dbcsr_dot(matrix_D, matrix_G, trace_D_Gnew)
CALL dbcsr_dot(matrix_D, matrix_D, trace_D)
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| trace_G_new ", trace_G_new
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| trace_G_prev ", trace_G_prev
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| trace_G_mix ", trace_G_mix
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| trace_D ", trace_D
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| trace_D_Gnew", trace_D_Gnew
IF (trace_G_prev /= 0.0_dp) THEN
beta = (trace_G_new - trace_G_mix)/trace_G_prev !Polak-Ribiere
END IF
IF (beta < 0.0_dp) THEN
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| resetting because beta < 0"
beta = 0.0_dp
END IF
change = trace_D_Gnew**2/trace_D*trace_G_new
IF (change > pao%cg_reset_limit) THEN
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| resetting because change > CG_RESET_LIMIT"
beta = 0.0_dp
END IF
END IF
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|CG| beta: ", beta
! calculate new CG direction matrix_D
CALL dbcsr_add(matrix_D, matrix_G, beta, -1.0_dp)
END SUBROUTINE pao_opt_newdir_cg
! **************************************************************************************************
!> \brief Broyden-Fletcher-Goldfarb-Shanno algorithm
!> \param pao ...
!> \param icycle ...
!> \param matrix_G ...
!> \param matrix_G_prev ...
!> \param matrix_D ...
! **************************************************************************************************
SUBROUTINE pao_opt_newdir_bfgs(pao, icycle, matrix_G, matrix_G_prev, matrix_D)
TYPE(pao_env_type), POINTER :: pao
INTEGER, INTENT(IN) :: icycle
TYPE(dbcsr_type) :: matrix_G, matrix_G_prev, matrix_D
CHARACTER(len=*), PARAMETER :: routineN = 'pao_opt_newdir_bfgs'
INTEGER :: handle
LOGICAL :: arnoldi_converged
REAL(dp) :: eval_max, eval_min, theta, trace_ry, &
trace_sy, trace_yHy, trace_yy
TYPE(dbcsr_type) :: matrix_Hy, matrix_Hyr, matrix_r, &
matrix_rr, matrix_ryH, matrix_ryHyr, &
matrix_s, matrix_y, matrix_yr
CALL timeset(routineN, handle)
!TODO add filtering?
! Notation according to the book from Nocedal and Wright, see chapter 6.
IF (icycle > 1) THEN
! y = G - G_prev
CALL dbcsr_copy(matrix_y, matrix_G)
CALL dbcsr_add(matrix_y, matrix_G_prev, 1.0_dp, -1.0_dp) ! dG
! s = X - X_prev
CALL dbcsr_copy(matrix_s, matrix_D)
CALL dbcsr_scale(matrix_s, pao%linesearch%step_size) ! dX
! sy = MATMUL(TRANPOSE(s), y)
CALL dbcsr_dot(matrix_s, matrix_y, trace_sy)
! heuristic initialization
IF (icycle == 2) THEN
CALL dbcsr_dot(matrix_Y, matrix_Y, trace_yy)
CALL dbcsr_scale(pao%matrix_BFGS, trace_sy/trace_yy)
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|BFGS| Initializing with:", trace_sy/trace_yy
END IF
! Hy = MATMUL(H, y)
CALL dbcsr_create(matrix_Hy, template=matrix_G, matrix_type="N")
CALL dbcsr_multiply("N", "N", 1.0_dp, pao%matrix_BFGS, matrix_y, 0.0_dp, matrix_Hy)
! yHy = MATMUL(TRANPOSE(y), Hy)
CALL dbcsr_dot(matrix_y, matrix_Hy, trace_yHy)
! Use damped BFGS algorithm to ensure H remains positive definite.
! See chapter 18 in Nocedal and Wright's book for details.
! The formulas were adopted to inverse Hessian algorithm.
IF (trace_sy < 0.2_dp*trace_yHy) THEN
theta = 0.8_dp*trace_yHy/(trace_yHy - trace_sy)
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|BFGS| Dampening theta:", theta
ELSE
theta = 1.0
END IF
! r = theta*s + (1-theta)*Hy
CALL dbcsr_copy(matrix_r, matrix_s)
CALL dbcsr_add(matrix_r, matrix_Hy, theta, (1.0_dp - theta))
! use t instead of y to update B matrix
CALL dbcsr_dot(matrix_r, matrix_y, trace_ry)
CPASSERT(trace_RY > 0.0_dp)
! yr = MATMUL(y, TRANSPOSE(r))
CALL dbcsr_create(matrix_yr, template=pao%matrix_BFGS, matrix_type="N")
CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_y, matrix_r, 0.0_dp, matrix_yr)
! Hyr = MATMUL(H, yr)
CALL dbcsr_create(matrix_Hyr, template=pao%matrix_BFGS, matrix_type="N")
CALL dbcsr_multiply("N", "N", 1.0_dp, pao%matrix_BFGS, matrix_yr, 0.0_dp, matrix_Hyr)
! ryH = MATMUL(TRANSPOSE(yr), H)
CALL dbcsr_create(matrix_ryH, template=pao%matrix_BFGS, matrix_type="N")
CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_yr, pao%matrix_BFGS, 0.0_dp, matrix_ryH)
! ryHry = MATMUL(ryH,yr)
CALL dbcsr_create(matrix_ryHyr, template=pao%matrix_BFGS, matrix_type="N")
CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_ryH, matrix_yr, 0.0_dp, matrix_ryHyr)
! rr = MATMUL(r,TRANSPOSE(r))
CALL dbcsr_create(matrix_rr, template=pao%matrix_BFGS, matrix_type="N")
CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_r, matrix_r, 0.0_dp, matrix_rr)
! H = H - Hyr/ry - ryH/ry + ryHyr/(ry**2) + rr/ry
CALL dbcsr_add(pao%matrix_BFGS, matrix_HYR, 1.0_dp, -1.0_dp/trace_ry)
CALL dbcsr_add(pao%matrix_BFGS, matrix_ryH, 1.0_dp, -1.0_dp/trace_ry)
CALL dbcsr_add(pao%matrix_BFGS, matrix_ryHyr, 1.0_dp, +1.0_dp/(trace_ry**2))
CALL dbcsr_add(pao%matrix_BFGS, matrix_rr, 1.0_dp, +1.0_dp/trace_ry)
! clean up
CALL dbcsr_release(matrix_y)
CALL dbcsr_release(matrix_s)
CALL dbcsr_release(matrix_r)
CALL dbcsr_release(matrix_Hy)
CALL dbcsr_release(matrix_yr)
CALL dbcsr_release(matrix_Hyr)
CALL dbcsr_release(matrix_ryH)
CALL dbcsr_release(matrix_ryHyr)
CALL dbcsr_release(matrix_rr)
END IF
! approximate condition of Hessian
!TODO: good setting for arnoldi?
CALL arnoldi_extremal(pao%matrix_BFGS, eval_max, eval_min, max_iter=100, &
threshold=1e-2_dp, converged=arnoldi_converged)
IF (arnoldi_converged) THEN
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|BFGS| evals of inv. Hessian: min, max, max/min", &
eval_min, eval_max, eval_max/eval_min
ELSE
IF (pao%iw_opt > 0) WRITE (pao%iw_opt, *) "PAO|BFGS| arnoldi of inv. Hessian did not converged."
END IF
! calculate new direction
! d = MATMUL(H, -g)
CALL dbcsr_multiply("N", "N", -1.0_dp, pao%matrix_BFGS, matrix_G, &
0.0_dp, matrix_D, retain_sparsity=.TRUE.)
CALL timestop(handle)
END SUBROUTINE pao_opt_newdir_bfgs
END MODULE pao_optimizer