-
Notifications
You must be signed in to change notification settings - Fork 1
/
hfx_ri_kp.F
5223 lines (4424 loc) · 248 KB
/
hfx_ri_kp.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
!--------------------------------------------------------------------------------------------------!
! CP2K: A general program to perform molecular dynamics simulations !
! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
! !
! SPDX-License-Identifier: GPL-2.0-or-later !
!--------------------------------------------------------------------------------------------------!
! **************************************************************************************************
!> \brief RI-methods for HFX and K-points.
!> \auhtor Augustin Bussy (01.2023)
! **************************************************************************************************
MODULE hfx_ri_kp
USE admm_types, ONLY: get_admm_env
USE atomic_kind_types, ONLY: atomic_kind_type,&
get_atomic_kind_set
USE basis_set_types, ONLY: get_gto_basis_set,&
gto_basis_set_p_type
USE bibliography, ONLY: Bussy2024,&
cite_reference
USE cell_types, ONLY: cell_type,&
pbc,&
real_to_scaled,&
scaled_to_real
USE cp_array_utils, ONLY: cp_1d_logical_p_type,&
cp_2d_r_p_type,&
cp_3d_r_p_type
USE cp_blacs_env, ONLY: cp_blacs_env_create,&
cp_blacs_env_release,&
cp_blacs_env_type
USE cp_control_types, ONLY: dft_control_type
USE cp_dbcsr_api, ONLY: &
dbcsr_add, dbcsr_clear, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, &
dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_dot, &
dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_p_type, dbcsr_put_block, dbcsr_release, &
dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
USE cp_dbcsr_cholesky, ONLY: cp_dbcsr_cholesky_decompose,&
cp_dbcsr_cholesky_invert
USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_alloc_block_from_nbl
USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
USE cp_dbcsr_operations, ONLY: cp_dbcsr_dist2d_to_dist
USE dbt_api, ONLY: &
dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, dbt_destroy, &
dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, dbt_filter, &
dbt_finalize, dbt_get_block, dbt_get_info, dbt_get_stored_coordinates, &
dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
dbt_iterator_type, dbt_mp_environ_pgrid, dbt_pgrid_create, dbt_pgrid_destroy, &
dbt_pgrid_type, dbt_put_block, dbt_scale, dbt_type
USE distribution_2d_types, ONLY: distribution_2d_release,&
distribution_2d_type
USE hfx_ri, ONLY: get_idx_to_atom,&
hfx_ri_pre_scf_calc_tensors
USE hfx_types, ONLY: hfx_ri_type
USE input_constants, ONLY: do_potential_short,&
hfx_ri_do_2c_cholesky,&
hfx_ri_do_2c_diag,&
hfx_ri_do_2c_iter
USE input_cp2k_hfx, ONLY: ri_pmat
USE input_section_types, ONLY: section_vals_get_subs_vals,&
section_vals_type,&
section_vals_val_get,&
section_vals_val_set
USE iterate_matrix, ONLY: invert_hotelling
USE kinds, ONLY: dp,&
int_8
USE kpoint_types, ONLY: get_kpoint_info,&
kpoint_type
USE libint_2c_3c, ONLY: cutoff_screen_factor
USE machine, ONLY: m_flush,&
m_walltime
USE mathlib, ONLY: erfc_cutoff
USE message_passing, ONLY: mp_cart_type,&
mp_para_env_type,&
mp_request_type,&
mp_waitall
USE particle_methods, ONLY: get_particle_set
USE particle_types, ONLY: particle_type
USE physcon, ONLY: angstrom
USE qs_environment_types, ONLY: get_qs_env,&
qs_environment_type
USE qs_force_types, ONLY: qs_force_type
USE qs_integral_utils, ONLY: basis_set_list_setup
USE qs_interactions, ONLY: init_interaction_radii_orb_basis
USE qs_kind_types, ONLY: qs_kind_type
USE qs_neighbor_list_types, ONLY: get_iterator_info,&
neighbor_list_iterate,&
neighbor_list_iterator_create,&
neighbor_list_iterator_p_type,&
neighbor_list_iterator_release,&
neighbor_list_set_p_type,&
release_neighbor_list_sets
USE qs_scf_types, ONLY: qs_scf_env_type
USE qs_tensors, ONLY: &
build_2c_derivatives, build_2c_neighbor_lists, build_3c_derivatives, &
build_3c_neighbor_lists, get_3c_iterator_info, get_tensor_occupancy, &
neighbor_list_3c_destroy, neighbor_list_3c_iterate, neighbor_list_3c_iterator_create, &
neighbor_list_3c_iterator_destroy
USE qs_tensors_types, ONLY: create_2c_tensor,&
create_3c_tensor,&
create_tensor_batches,&
distribution_2d_create,&
distribution_3d_create,&
distribution_3d_type,&
neighbor_list_3c_iterator_type,&
neighbor_list_3c_type
USE util, ONLY: get_limit
USE virial_types, ONLY: virial_type
#include "./base/base_uses.f90"
!$ USE OMP_LIB, ONLY: omp_get_num_threads
IMPLICIT NONE
PRIVATE
PUBLIC :: hfx_ri_update_ks_kp, hfx_ri_update_forces_kp
CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri_kp'
CONTAINS
! NOTES: for a start, we do not seek performance, but accuracy. So in this first implementation,
! we give little consideration to batching, load balance and such.
! We also put everything here, even if there is some code replication with the original RI_HFX
! We will only work in the RHO flavor
! For now, we will also always assume that there is a single para_env, and that there is no
! K-point subgroup. This might change in the future
! **************************************************************************************************
!> \brief I_1nitialize the ri_data for K-point. For now, we take the normal, usual existing ri_data
!> and we adapt it to our needs
!> \param dbcsr_template ...
!> \param ri_data ...
!> \param qs_env ...
! **************************************************************************************************
SUBROUTINE adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
TYPE(qs_environment_type), POINTER :: qs_env
INTEGER :: i_img, i_RI, i_spin, iatom, natom, &
nblks_RI, nimg, nkind, nspins
INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, dist1, dist2, dist3
TYPE(dft_control_type), POINTER :: dft_control
TYPE(mp_para_env_type), POINTER :: para_env
NULLIFY (dft_control, para_env)
!The main thing that we need to do is to allocate more space for the integrals, such that there
!is room for each periodic image. Note that we only go in 1D, i.e. we store (mu^0 sigma^a|P^0),
!and (P^0|Q^a) => the RI basis is always in the main cell.
!Get kpoint info
CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, para_env=para_env, nkind=nkind)
nimg = ri_data%nimg
!Along the RI direction we have basis elements spread accross ncell_RI images.
nblks_RI = SIZE(ri_data%bsizes_RI_split)
ALLOCATE (bsizes_RI_ext(nblks_RI*ri_data%ncell_RI))
DO i_RI = 1, ri_data%ncell_RI
bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
END DO
ALLOCATE (ri_data%t_3c_int_ctr_1(1, nimg))
CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), dist1, dist2, dist3, &
ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_RI_ext, &
ri_data%bsizes_AO_split, [1, 2], [3], name="(AO RI | AO)")
DO i_img = 2, nimg
CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(1, i_img))
END DO
DEALLOCATE (dist1, dist2, dist3)
ALLOCATE (ri_data%t_3c_int_ctr_2(1, 1))
CALL create_3c_tensor(ri_data%t_3c_int_ctr_2(1, 1), dist1, dist2, dist3, &
ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_RI_ext, &
ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
DEALLOCATE (dist1, dist2, dist3)
!We use full block sizes for the 2c quantities
DEALLOCATE (bsizes_RI_ext)
nblks_RI = SIZE(ri_data%bsizes_RI)
ALLOCATE (bsizes_RI_ext(nblks_RI*ri_data%ncell_RI))
DO i_RI = 1, ri_data%ncell_RI
bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI(:)
END DO
ALLOCATE (ri_data%t_2c_inv(1, natom), ri_data%t_2c_int(1, natom), ri_data%t_2c_pot(1, natom))
CALL create_2c_tensor(ri_data%t_2c_inv(1, 1), dist1, dist2, ri_data%pgrid_2d, &
bsizes_RI_ext, bsizes_RI_ext, &
name="(RI | RI)")
DEALLOCATE (dist1, dist2)
CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, 1))
CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1))
DO iatom = 2, natom
CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_inv(1, iatom))
CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, iatom))
CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, iatom))
END DO
ALLOCATE (ri_data%kp_cost(natom, natom, nimg))
ri_data%kp_cost = 0.0_dp
!We store the density and KS matrix in tensor format
nspins = dft_control%nspins
ALLOCATE (ri_data%rho_ao_t(nspins, nimg), ri_data%ks_t(nspins, nimg))
CALL create_2c_tensor(ri_data%rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
name="(AO | AO)")
DEALLOCATE (dist1, dist2)
CALL dbt_create(dbcsr_template, ri_data%ks_t(1, 1))
IF (nspins == 2) THEN
CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(2, 1))
CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(2, 1))
END IF
DO i_img = 2, nimg
DO i_spin = 1, nspins
CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(i_spin, i_img))
CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(i_spin, i_img))
END DO
END DO
END SUBROUTINE adapt_ri_data_to_kp
! **************************************************************************************************
!> \brief The pre-scf steps for RI-HFX k-points calculation. Namely the calculation of the integrals
!> \param dbcsr_template ...
!> \param ri_data ...
!> \param qs_env ...
! **************************************************************************************************
SUBROUTINE hfx_ri_pre_scf_kp(dbcsr_template, ri_data, qs_env)
TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
TYPE(qs_environment_type), POINTER :: qs_env
CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_kp'
INTEGER :: handle, i_img, iatom, natom, nimg, nkind
TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: t_2c_op_pot, t_2c_op_RI
TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
TYPE(dft_control_type), POINTER :: dft_control
NULLIFY (dft_control)
CALL timeset(routineN, handle)
CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, nkind=nkind)
CALL cleanup_kp(ri_data)
!We do all the checks on what we allow in this initial implementation
IF (ri_data%flavor .NE. ri_pmat) CPABORT("K-points RI-HFX only with RHO flavor")
IF (ri_data%same_op) ri_data%same_op = .FALSE. !force the full calculation with RI metric
IF (ABS(ri_data%eps_pgf_orb - dft_control%qs_control%eps_pgf_orb) > 1.0E-16_dp) &
CPABORT("RI%EPS_PGF_ORB and QS%EPS_PGF_ORB must be identical for RI-HFX k-points")
CALL get_kp_and_ri_images(ri_data, qs_env)
nimg = ri_data%nimg
!Calculate the integrals
ALLOCATE (t_2c_op_pot(nimg), t_2c_op_RI(nimg))
ALLOCATE (t_3c_int(1, nimg))
CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int, do_kpoints=.TRUE.)
!Make sure the internals have the k-point format
CALL adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
!For each atom i, we calculate the inverse RI metric (P^0 | Q^0)^-1 without external bumping yet
!Also store the off-diagonal integrals of the RI metric in case of forces, bumped from the left
DO iatom = 1, natom
CALL get_ext_2c_int(ri_data%t_2c_inv(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, qs_env, &
do_inverse=.TRUE.)
!for the forces:
!off-diagonl RI metric bumped from the left
CALL get_ext_2c_int(ri_data%t_2c_int(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, &
qs_env, off_diagonal=.TRUE.)
CALL apply_bump(ri_data%t_2c_int(1, iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.FALSE.)
!RI metric with bumped off-diagonal blocks (but not inverted), depumed from left and right
CALL get_ext_2c_int(ri_data%t_2c_pot(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, qs_env, &
do_inverse=.TRUE., skip_inverse=.TRUE.)
CALL apply_bump(ri_data%t_2c_pot(1, iatom), iatom, ri_data, qs_env, from_left=.TRUE., &
from_right=.TRUE., debump=.TRUE.)
END DO
DO i_img = 1, nimg
CALL dbcsr_release(t_2c_op_RI(i_img))
END DO
ALLOCATE (ri_data%kp_mat_2c_pot(1, nimg))
DO i_img = 1, nimg
CALL dbcsr_create(ri_data%kp_mat_2c_pot(1, i_img), template=t_2c_op_pot(i_img))
CALL dbcsr_copy(ri_data%kp_mat_2c_pot(1, i_img), t_2c_op_pot(i_img))
CALL dbcsr_release(t_2c_op_pot(i_img))
END DO
!Pre-contract all 3c integrals with the bumped inverse RI metric (P^0|Q^0)^-1,
!and store in ri_data%t_3c_int_ctr_1
CALL precontract_3c_ints(t_3c_int, ri_data, qs_env)
!reorder the 3c integrals such that empty images are bunched up together
CALL reorder_3c_ints(ri_data%t_3c_int_ctr_1(1, :), ri_data)
CALL timestop(handle)
END SUBROUTINE hfx_ri_pre_scf_kp
! **************************************************************************************************
!> \brief clean-up the KP specific data from ri_data
!> \param ri_data ...
! **************************************************************************************************
SUBROUTINE cleanup_kp(ri_data)
TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
INTEGER :: i, j
IF (ALLOCATED(ri_data%kp_cost)) DEALLOCATE (ri_data%kp_cost)
IF (ALLOCATED(ri_data%idx_to_img)) DEALLOCATE (ri_data%idx_to_img)
IF (ALLOCATED(ri_data%img_to_idx)) DEALLOCATE (ri_data%img_to_idx)
IF (ALLOCATED(ri_data%present_images)) DEALLOCATE (ri_data%present_images)
IF (ALLOCATED(ri_data%img_to_RI_cell)) DEALLOCATE (ri_data%img_to_RI_cell)
IF (ALLOCATED(ri_data%RI_cell_to_img)) DEALLOCATE (ri_data%RI_cell_to_img)
IF (ALLOCATED(ri_data%kp_mat_2c_pot)) THEN
DO j = 1, SIZE(ri_data%kp_mat_2c_pot, 2)
DO i = 1, SIZE(ri_data%kp_mat_2c_pot, 1)
CALL dbcsr_release(ri_data%kp_mat_2c_pot(i, j))
END DO
END DO
DEALLOCATE (ri_data%kp_mat_2c_pot)
END IF
IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
DO i = 1, SIZE(ri_data%kp_t_3c_int)
CALL dbt_destroy(ri_data%kp_t_3c_int(i))
END DO
DEALLOCATE (ri_data%kp_t_3c_int)
END IF
IF (ALLOCATED(ri_data%t_2c_inv)) THEN
DO j = 1, SIZE(ri_data%t_2c_inv, 2)
DO i = 1, SIZE(ri_data%t_2c_inv, 1)
CALL dbt_destroy(ri_data%t_2c_inv(i, j))
END DO
END DO
DEALLOCATE (ri_data%t_2c_inv)
END IF
IF (ALLOCATED(ri_data%t_2c_int)) THEN
DO j = 1, SIZE(ri_data%t_2c_int, 2)
DO i = 1, SIZE(ri_data%t_2c_int, 1)
CALL dbt_destroy(ri_data%t_2c_int(i, j))
END DO
END DO
DEALLOCATE (ri_data%t_2c_int)
END IF
IF (ALLOCATED(ri_data%t_2c_pot)) THEN
DO j = 1, SIZE(ri_data%t_2c_pot, 2)
DO i = 1, SIZE(ri_data%t_2c_pot, 1)
CALL dbt_destroy(ri_data%t_2c_pot(i, j))
END DO
END DO
DEALLOCATE (ri_data%t_2c_pot)
END IF
IF (ALLOCATED(ri_data%t_3c_int_ctr_1)) THEN
DO j = 1, SIZE(ri_data%t_3c_int_ctr_1, 2)
DO i = 1, SIZE(ri_data%t_3c_int_ctr_1, 1)
CALL dbt_destroy(ri_data%t_3c_int_ctr_1(i, j))
END DO
END DO
DEALLOCATE (ri_data%t_3c_int_ctr_1)
END IF
IF (ALLOCATED(ri_data%t_3c_int_ctr_2)) THEN
DO j = 1, SIZE(ri_data%t_3c_int_ctr_2, 2)
DO i = 1, SIZE(ri_data%t_3c_int_ctr_2, 1)
CALL dbt_destroy(ri_data%t_3c_int_ctr_2(i, j))
END DO
END DO
DEALLOCATE (ri_data%t_3c_int_ctr_2)
END IF
IF (ALLOCATED(ri_data%rho_ao_t)) THEN
DO j = 1, SIZE(ri_data%rho_ao_t, 2)
DO i = 1, SIZE(ri_data%rho_ao_t, 1)
CALL dbt_destroy(ri_data%rho_ao_t(i, j))
END DO
END DO
DEALLOCATE (ri_data%rho_ao_t)
END IF
IF (ALLOCATED(ri_data%ks_t)) THEN
DO j = 1, SIZE(ri_data%ks_t, 2)
DO i = 1, SIZE(ri_data%ks_t, 1)
CALL dbt_destroy(ri_data%ks_t(i, j))
END DO
END DO
DEALLOCATE (ri_data%ks_t)
END IF
END SUBROUTINE cleanup_kp
! **************************************************************************************************
!> \brief Update the KS matrices for each real-space image
!> \param qs_env ...
!> \param ri_data ...
!> \param ks_matrix ...
!> \param ehfx ...
!> \param rho_ao ...
!> \param geometry_did_change ...
!> \param nspins ...
!> \param hf_fraction ...
! **************************************************************************************************
SUBROUTINE hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, &
geometry_did_change, nspins, hf_fraction)
TYPE(qs_environment_type), POINTER :: qs_env
TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
REAL(KIND=dp), INTENT(OUT) :: ehfx
TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
LOGICAL, INTENT(IN) :: geometry_did_change
INTEGER, INTENT(IN) :: nspins
REAL(KIND=dp), INTENT(IN) :: hf_fraction
CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_kp'
INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_spin, iatom, &
iblk, igroup, jatom, mb_img, n_batch_nze, natom, ngroups, nimg, nimg_nze
INTEGER(int_8) :: nflop, nze
INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_at, batch_ranges_nze, &
idx_to_at_AO
INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: sparsity_pattern
LOGICAL :: use_delta_p
REAL(dp) :: etmp, fac, occ, pfac, pref, t1, t2, t3, &
t4
TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
TYPE(dbcsr_type) :: ks_desymm, rho_desymm, tmp
TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
TYPE(dbcsr_type), POINTER :: dbcsr_template
TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ks_t_split, t_2c_ao_tmp, t_2c_work, &
t_3c_int, t_3c_work_2, t_3c_work_3
TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: ks_t, ks_t_sub, t_3c_apc, t_3c_apc_sub
TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
TYPE(section_vals_type), POINTER :: hfx_section
NULLIFY (para_env, para_env_sub, blacs_env_sub, hfx_section, dbcsr_template)
CALL cite_reference(Bussy2024)
CALL timeset(routineN, handle)
CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
IF (nspins == 1) THEN
fac = 0.5_dp*hf_fraction
ELSE
fac = 1.0_dp*hf_fraction
END IF
IF (geometry_did_change) THEN
CALL hfx_ri_pre_scf_kp(ks_matrix(1, 1)%matrix, ri_data, qs_env)
END IF
nimg = ri_data%nimg
nimg_nze = ri_data%nimg_nze
!We need to calculate the KS matrix for each periodic cell with index b: F_mu^0,nu^b
!F_mu^0,nu^b = -0.5 sum_a,c P_sigma^0,lambda^c (mu^0, sigma^a| P^0) V_P^0,Q^b (Q^b| nu^b lambda^a+c)
!with V_P^0,Q^b = (P^0|R^0)^-1 * (R^0|S^b) * (S^b|Q^b)^-1
!We use a local RI basis set for each atom in the system, which inlcudes RI basis elements for
!each neighboring atom standing within the KIND radius (decay of Gaussian with smallest exponent)
!We also limit the number of periodic images we consider accorrding to the HFX potentail in the
!RI basis, because if V_P^0,Q^b is zero everywhere, then image b can be ignored (RI basis less diffuse)
!We manage to calculate each KS matrix doing a double loop on iamges, and a double loop on atoms
!First, we pre-contract and store P_sigma^0,lambda^c (mu^0, sigma^a| P^0) (P^0|R^0)^-1 into T_mu^0,lambda^a+c,P^0
!Then, we loop over b_img, iatom, jatom to get (R^0|S^b)
!Finally, we do an additional loop over a+c images where we do (R^0|S^b) (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c)
!and the final contraction with T_mu^0,lambda^a+c,P^0
!Note that the 3-center integrals are pre-contracted with the RI metric, and that the same tensor can be used
!(mu^0, sigma^a| P^0) (P^0|R^0) <===> (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c) by relabelling the images
hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
CALL section_vals_val_get(hfx_section, "KP_USE_DELTA_P", l_val=use_delta_p)
!By default, build the density tensor based on the difference of this SCF P and that of the prev. SCF
pfac = -1.0_dp
IF (.NOT. use_delta_p) pfac = 0.0_dp
CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, pfac, ri_data, qs_env)
ALLOCATE (ks_t(nspins, nimg))
DO i_img = 1, nimg
DO i_spin = 1, nspins
CALL dbt_create(ri_data%ks_t(1, 1), ks_t(i_spin, i_img))
END DO
END DO
ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
!First we calculate and store T^1_mu^0,lambda^a+c,P = P_mu^0,lambda^c * (mu_0 sigma^a | P^0) (P^0|R^0)^-1
!To avoid doing nimg**2 tiny contractions that do not scale well with a large number of CPUs,
!we instead do a single loop over the a+c image index. For each a+c, we get a list of allowed
!combination of a,c indices. Then we build TAS tensors P_mu^0,lambda^c with all concerned c's
!and (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 with all a's. Then we perform a single contraction with larger tensors,
!were the sum over a,c is automatically taken care of
ALLOCATE (t_3c_apc(nspins, nimg))
DO i_img = 1, nimg
DO i_spin = 1, nspins
CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
END DO
END DO
CALL contract_pmat_3c(t_3c_apc, ri_data%rho_ao_t, ri_data, qs_env)
hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
CALL section_vals_val_get(hfx_section, "KP_STACK_SIZE", i_val=batch_size)
ri_data%kp_stack_size = batch_size
IF (MOD(para_env%num_pe, ngroups) .NE. 0) THEN
CPWARN("KP_NGROUPS must be an integer divisor of the total number of MPI ranks. It was set to 1.")
ngroups = 1
CALL section_vals_val_set(hfx_section, "KP_NGROUPS", i_val=ngroups)
END IF
IF ((MOD(ngroups, natom) .NE. 0) .AND. (MOD(natom, ngroups) .NE. 0) .AND. geometry_did_change) THEN
IF (ngroups > 1) THEN
CPWARN("Better load balancing is reached if NGROUPS is a multiple/divisor of the number of atoms")
END IF
END IF
group_size = para_env%num_pe/ngroups
igroup = para_env%mepos/group_size
ALLOCATE (para_env_sub)
CALL para_env_sub%from_split(para_env, igroup)
CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
! The sparsity pattern of each iatom, jatom pair, on each b_img, and on which subgroup
ALLOCATE (sparsity_pattern(natom, natom, nimg))
CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
!Get all the required tensors in the subgroups
ALLOCATE (mat_2c_pot(nimg), ks_t_sub(nspins, nimg), t_2c_ao_tmp(1), ks_t_split(2), t_2c_work(3))
CALL get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
group_size, ngroups, para_env, para_env_sub, ri_data)
ALLOCATE (t_3c_int(nimg), t_3c_apc_sub(nspins, nimg), t_3c_work_2(3), t_3c_work_3(3))
CALL get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
group_size, ngroups, para_env, para_env_sub, ri_data)
!We go atom by atom, therefore there is an automatic batching along that direction
!Also, because we stack the 3c tensors nimg times, we naturally do some batching there too
ALLOCATE (batch_ranges_at(natom + 1))
batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
iatom = 0
DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
IF (idx_to_at_AO(iblk) == iatom + 1) THEN
iatom = iatom + 1
batch_ranges_at(iatom) = iblk
END IF
END DO
n_batch_nze = nimg_nze/batch_size
IF (MODULO(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
DO i_batch = 1, n_batch_nze
batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
END DO
batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
t1 = m_walltime()
ri_data%kp_cost(:, :, :) = 0.0_dp
ALLOCATE (iapc_pairs(nimg, 2))
DO b_img = 1, nimg
CALL dbt_batched_contract_init(ks_t_split(1))
CALL dbt_batched_contract_init(ks_t_split(2))
DO jatom = 1, natom
DO iatom = 1, natom
IF (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup) CYCLE
pref = 1.0_dp
IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp
!measure the cost of the given i, j, b configuration
t3 = m_walltime()
!Get the proper HFX potential 2c integrals (R_i^0|S_j^b)
CALL timeset(routineN//"_2c", handle2)
CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
dbcsr_template=dbcsr_template)
CALL dbt_copy(t_2c_work(1), t_2c_work(2), move_data=.TRUE.) !move to split blocks
CALL dbt_filter(t_2c_work(2), ri_data%filter_eps)
CALL timestop(handle2)
CALL dbt_batched_contract_init(t_2c_work(2))
CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
CALL timeset(routineN//"_3c", handle2)
!Stack the (S^b|Q^b)^-1 * (Q^b| nu^b lambda^a+c) integrals over a+c and multiply by (R_i^0|S_j^b)
DO i_batch = 1, n_batch_nze
CALL fill_3c_stack(t_3c_work_3(3), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_AO, &
img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1), move_data=.TRUE.)
CALL dbt_contract(1.0_dp, t_2c_work(2), t_3c_work_3(1), &
0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
contract_1=[2], notcontract_1=[1], &
contract_2=[1], notcontract_2=[2, 3], &
filter_eps=ri_data%filter_eps, flop=nflop)
ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
CALL dbt_copy(t_3c_work_3(2), t_3c_work_2(2), order=[2, 1, 3], move_data=.TRUE.)
CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1))
!Stack the P_sigma^a,lambda^a+c * (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 integrals over a+c and contract
!to get the final block of the KS matrix
DO i_spin = 1, nspins
CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_AO, &
img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
IF (nze == 0) CYCLE
CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.TRUE.)
CALL dbt_contract(-pref*fac, t_3c_work_2(1), t_3c_work_2(2), &
1.0_dp, ks_t_split(i_spin), map_1=[1], map_2=[2], &
contract_1=[2, 3], notcontract_1=[1], &
contract_2=[2, 3], notcontract_2=[1], &
filter_eps=ri_data%filter_eps, &
move_data=i_spin == nspins, flop=nflop)
ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
END DO
END DO !i_batch
CALL timestop(handle2)
CALL dbt_batched_contract_finalize(t_2c_work(2))
t4 = m_walltime()
ri_data%kp_cost(iatom, jatom, b_img) = t4 - t3
END DO !iatom
END DO !jatom
CALL dbt_batched_contract_finalize(ks_t_split(1))
CALL dbt_batched_contract_finalize(ks_t_split(2))
DO i_spin = 1, nspins
CALL dbt_copy(ks_t_split(i_spin), t_2c_ao_tmp(1), move_data=.TRUE.)
CALL dbt_copy(t_2c_ao_tmp(1), ks_t_sub(i_spin, b_img), summation=.TRUE.)
END DO
END DO !b_img
CALL dbt_batched_contract_finalize(t_3c_work_3(1))
CALL dbt_batched_contract_finalize(t_3c_work_3(2))
CALL dbt_batched_contract_finalize(t_3c_work_2(1))
CALL dbt_batched_contract_finalize(t_3c_work_2(2))
CALL para_env%sync()
CALL para_env%sum(ri_data%dbcsr_nflop)
CALL para_env%sum(ri_data%kp_cost)
t2 = m_walltime()
ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
!transfer KS tensor from subgroup to main group
CALL gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
!Keep the 3c integrals on the subgroups to avoid communication at next SCF step
DO i_img = 1, nimg
CALL dbt_copy(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img), move_data=.TRUE.)
END DO
!clean-up subgroup tensors
CALL dbt_destroy(t_2c_ao_tmp(1))
CALL dbt_destroy(ks_t_split(1))
CALL dbt_destroy(ks_t_split(2))
CALL dbt_destroy(t_2c_work(1))
CALL dbt_destroy(t_2c_work(2))
CALL dbt_destroy(t_3c_work_2(1))
CALL dbt_destroy(t_3c_work_2(2))
CALL dbt_destroy(t_3c_work_2(3))
CALL dbt_destroy(t_3c_work_3(1))
CALL dbt_destroy(t_3c_work_3(2))
CALL dbt_destroy(t_3c_work_3(3))
DO i_img = 1, nimg
CALL dbt_destroy(t_3c_int(i_img))
CALL dbcsr_release(mat_2c_pot(i_img))
DO i_spin = 1, nspins
CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
CALL dbt_destroy(ks_t_sub(i_spin, i_img))
END DO
END DO
IF (ASSOCIATED(dbcsr_template)) THEN
CALL dbcsr_release(dbcsr_template)
DEALLOCATE (dbcsr_template)
END IF
!End of subgroup parallelization
CALL cp_blacs_env_release(blacs_env_sub)
CALL para_env_sub%free()
DEALLOCATE (para_env_sub)
!Currently, rho_ao_t holds the density difference (wrt to pref SCF step).
!ks_t also hold that diff, while only having half the blocks => need to add to prev ks_t and symmetrize
!We need the full thing for the energy, on the next SCF step
CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
DO i_spin = 1, nspins
DO b_img = 1, nimg
CALL dbt_copy(ks_t(i_spin, b_img), ri_data%ks_t(i_spin, b_img), summation=.TRUE.)
!desymmetrize
mb_img = get_opp_index(b_img, qs_env)
IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
CALL dbt_copy(ks_t(i_spin, mb_img), ri_data%ks_t(i_spin, b_img), order=[2, 1], summation=.TRUE.)
END IF
END DO
END DO
DO b_img = 1, nimg
DO i_spin = 1, nspins
CALL dbt_destroy(ks_t(i_spin, b_img))
END DO
END DO
!calculate the energy
CALL dbt_create(ri_data%ks_t(1, 1), t_2c_ao_tmp(1))
CALL dbcsr_create(tmp, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_symmetric)
CALL dbcsr_create(ks_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
CALL dbcsr_create(rho_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
ehfx = 0.0_dp
DO i_img = 1, nimg
DO i_spin = 1, nspins
CALL dbt_filter(ri_data%ks_t(i_spin, i_img), ri_data%filter_eps)
CALL dbt_copy(ri_data%ks_t(i_spin, i_img), t_2c_ao_tmp(1))
CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), ks_desymm)
CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), tmp)
CALL dbcsr_add(ks_matrix(i_spin, i_img)%matrix, tmp, 1.0_dp, 1.0_dp)
CALL dbt_copy(ri_data%rho_ao_t(i_spin, i_img), t_2c_ao_tmp(1))
CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), rho_desymm)
CALL dbcsr_dot(ks_desymm, rho_desymm, etmp)
ehfx = ehfx + 0.5_dp*etmp
IF (.NOT. use_delta_p) CALL dbt_clear(ri_data%ks_t(i_spin, i_img))
END DO
END DO
CALL dbcsr_release(rho_desymm)
CALL dbcsr_release(ks_desymm)
CALL dbcsr_release(tmp)
CALL dbt_destroy(t_2c_ao_tmp(1))
CALL timestop(handle)
END SUBROUTINE hfx_ri_update_ks_kp
! **************************************************************************************************
!> \brief Update the K-points RI-HFX forces
!> \param qs_env ...
!> \param ri_data ...
!> \param nspins ...
!> \param hf_fraction ...
!> \param rho_ao ...
!> \param use_virial ...
!> \note Because this routine uses stored quantities calculated in the energy calculation, they should
!> always be called by pairs, and with the same input densities
! **************************************************************************************************
SUBROUTINE hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
TYPE(qs_environment_type), POINTER :: qs_env
TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
INTEGER, INTENT(IN) :: nspins
REAL(KIND=dp), INTENT(IN) :: hf_fraction
TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
LOGICAL, INTENT(IN), OPTIONAL :: use_virial
CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces_kp'
INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_loop, i_spin, &
i_xyz, iatom, iblk, igroup, j_xyz, jatom, k_xyz, n_batch, natom, ngroups, nimg, nimg_nze
INTEGER(int_8) :: nflop, nze
INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_ranges_at, &
batch_ranges_nze, dist1, dist2, &
i_images, idx_to_at_AO, idx_to_at_RI, &
kind_of
INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: force_pattern, sparsity_pattern
INTEGER, DIMENSION(2, 1) :: bounds_iat, bounds_jat
LOGICAL :: use_virial_prv
REAL(dp) :: fac, occ, pref, t1, t2
REAL(dp), DIMENSION(3, 3) :: work_virial
TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
TYPE(cell_type), POINTER :: cell
TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_pot, mat_der_pot_sub
TYPE(dbcsr_type), POINTER :: dbcsr_template
TYPE(dbt_type) :: t_2c_R, t_2c_R_split
TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_bint, t_2c_binv, t_2c_der_pot, &
t_2c_inv, t_2c_metric, t_2c_work, &
t_3c_der_stack, t_3c_work_2, &
t_3c_work_3
TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
t_2c_der_metric_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_AO, t_3c_der_AO_sub, t_3c_der_RI, &
t_3c_der_RI_sub
TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
TYPE(qs_force_type), DIMENSION(:), POINTER :: force
TYPE(section_vals_type), POINTER :: hfx_section
TYPE(virial_type), POINTER :: virial
NULLIFY (para_env, para_env_sub, hfx_section, blacs_env_sub, dbcsr_template, force, atomic_kind_set, &
virial, particle_set, cell)
CALL timeset(routineN, handle)
use_virial_prv = .FALSE.
IF (PRESENT(use_virial)) use_virial_prv = use_virial
IF (nspins == 1) THEN
fac = 0.5_dp*hf_fraction
ELSE
fac = 1.0_dp*hf_fraction
END IF
CALL get_qs_env(qs_env, natom=natom, para_env=para_env, force=force, cell=cell, virial=virial, &
atomic_kind_set=atomic_kind_set, particle_set=particle_set)
CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
nimg = ri_data%nimg
ALLOCATE (t_3c_der_RI(nimg, 3), t_3c_der_AO(nimg, 3), mat_der_pot(nimg, 3), t_2c_der_metric(natom, 3))
!We assume that the integrals are available from the SCF
!pre-calculate the derivs. 3c tensors as (P^0| sigma^a mu^0), with t_3c_der_AO holding deriv wrt mu^0
CALL precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
!Calculate the density matrix at each image
ALLOCATE (rho_ao_t(nspins, nimg))
CALL create_2c_tensor(rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
name="(AO | AO)")
DEALLOCATE (dist1, dist2)
IF (nspins == 2) CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(2, 1))
DO i_img = 2, nimg
DO i_spin = 1, nspins
CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(i_spin, i_img))
END DO
END DO
CALL get_pmat_images(rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
!Contract integrals with the density matrix
ALLOCATE (t_3c_apc(nspins, nimg))
DO i_img = 1, nimg
DO i_spin = 1, nspins
CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
END DO
END DO
CALL contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
!Setup the subgroups
hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
group_size = para_env%num_pe/ngroups
igroup = para_env%mepos/group_size
ALLOCATE (para_env_sub)
CALL para_env_sub%from_split(para_env, igroup)
CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
!Get the ususal sparsity pattern
ALLOCATE (sparsity_pattern(natom, natom, nimg))
CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
!Get the 2-center quantities in the subgroups (note: main group derivs are deleted wihtin)
ALLOCATE (t_2c_inv(natom), mat_2c_pot(nimg), rho_ao_t_sub(nspins, nimg), t_2c_work(5), &
t_2c_der_metric_sub(natom, 3), mat_der_pot_sub(nimg, 3), t_2c_bint(natom), &
t_2c_metric(natom), t_2c_binv(natom))
CALL get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
CALL dbt_create(t_2c_work(1), t_2c_R) !nRI x nRI
CALL dbt_create(t_2c_work(5), t_2c_R_split) !nRI x nRI with split blocks
ALLOCATE (t_2c_der_pot(3))
DO i_xyz = 1, 3
CALL dbt_create(t_2c_R, t_2c_der_pot(i_xyz))
END DO
!Get the 3-center quantities in the subgroups. The integrals and t_3c_apc already there
ALLOCATE (t_3c_work_2(3), t_3c_work_3(4), t_3c_der_stack(6), t_3c_der_AO_sub(nimg, 3), &
t_3c_der_RI_sub(nimg, 3), t_3c_apc_sub(nspins, nimg))
CALL get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_stack, &
group_size, ngroups, para_env, para_env_sub, ri_data)
!Set up batched contraction (go atom by atom)
ALLOCATE (batch_ranges_at(natom + 1))
batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
iatom = 0
DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
IF (idx_to_at_AO(iblk) == iatom + 1) THEN
iatom = iatom + 1
batch_ranges_at(iatom) = iblk
END IF
END DO
CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_3(3), batch_range_2=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
!Preparing for the stacking of 3c tensors
nimg_nze = ri_data%nimg_nze
batch_size = ri_data%kp_stack_size
n_batch = nimg_nze/batch_size
IF (MODULO(nimg_nze, batch_size) .NE. 0) n_batch = n_batch + 1
ALLOCATE (batch_ranges_nze(n_batch + 1))
DO i_batch = 1, n_batch
batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
END DO
batch_ranges_nze(n_batch + 1) = nimg_nze + 1
!Applying the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 from left and right
!And keep the bump on LHS only version as well, with B*M^-1 = (M^-1*B)^T
DO iatom = 1, natom
CALL dbt_create(t_2c_inv(iatom), t_2c_binv(iatom))
CALL dbt_copy(t_2c_inv(iatom), t_2c_binv(iatom))
CALL apply_bump(t_2c_binv(iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.FALSE.)
CALL apply_bump(t_2c_inv(iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
END DO
t1 = m_walltime()
work_virial = 0.0_dp
ALLOCATE (iapc_pairs(nimg, 2), i_images(nimg))
ALLOCATE (force_pattern(natom, natom, nimg))
force_pattern(:, :, :) = -1
!We proceed with 2 loops: one over the sparsity pattern from the SCF, one over the rest
!We use the SCF cost model for the first loop, while we calculate the cost of the upcoming loop
DO i_loop = 1, 2
DO b_img = 1, nimg
DO jatom = 1, natom
DO iatom = 1, natom
pref = -0.5_dp*fac
IF (i_loop == 1 .AND. (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup)) CYCLE
IF (i_loop == 2 .AND. (.NOT. force_pattern(iatom, jatom, b_img) == igroup)) CYCLE
!Get the proper HFX potential 2c integrals (R_i^0|S_j^b), times (S_j^b|Q_j^b)^-1
CALL timeset(routineN//"_2c_1", handle2)
CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
dbcsr_template=dbcsr_template)
CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_inv(jatom), &
0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
contract_1=[2], notcontract_1=[1], &
contract_2=[1], notcontract_2=[2], &
filter_eps=ri_data%filter_eps, flop=nflop)
CALL dbt_copy(t_2c_work(2), t_2c_work(5), move_data=.TRUE.) !move to split blocks
CALL dbt_filter(t_2c_work(5), ri_data%filter_eps)
CALL timestop(handle2)
CALL timeset(routineN//"_3c", handle2)
bounds_iat(:, 1) = [SUM(ri_data%bsizes_AO(1:iatom - 1)) + 1, SUM(ri_data%bsizes_AO(1:iatom))]
bounds_jat(:, 1) = [SUM(ri_data%bsizes_AO(1:jatom - 1)) + 1, SUM(ri_data%bsizes_AO(1:jatom))]
CALL dbt_clear(t_2c_R_split)
DO i_spin = 1, nspins
CALL dbt_batched_contract_init(rho_ao_t_sub(i_spin, b_img))
END DO
CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env, i_images) !i = a+c-b
DO i_batch = 1, n_batch
!Stack the 3c derivatives to take the trace later on
DO i_xyz = 1, 3
CALL dbt_clear(t_3c_der_stack(i_xyz))
CALL fill_3c_stack(t_3c_der_stack(i_xyz), t_3c_der_RI_sub(:, i_xyz), &
iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
filter_dim=2, idx_to_at=idx_to_at_AO, &
img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
CALL dbt_clear(t_3c_der_stack(3 + i_xyz))
CALL fill_3c_stack(t_3c_der_stack(3 + i_xyz), t_3c_der_AO_sub(:, i_xyz), &
iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
filter_dim=2, idx_to_at=idx_to_at_AO, &