-
Notifications
You must be signed in to change notification settings - Fork 1
/
hfx_energy_potential.F
2976 lines (2746 loc) · 140 KB
/
hfx_energy_potential.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
!--------------------------------------------------------------------------------------------------!
! CP2K: A general program to perform molecular dynamics simulations !
! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
! !
! SPDX-License-Identifier: GPL-2.0-or-later !
!--------------------------------------------------------------------------------------------------!
! **************************************************************************************************
!> \brief Routines to calculate HFX energy and potential
!> \par History
!> 11.2006 created [Manuel Guidon]
!> \author Manuel Guidon
! **************************************************************************************************
MODULE hfx_energy_potential
USE admm_types, ONLY: get_admm_env
USE atomic_kind_types, ONLY: atomic_kind_type, &
get_atomic_kind_set
USE bibliography, ONLY: cite_reference, &
guidon2008, &
guidon2009
USE cell_types, ONLY: cell_type, &
pbc
USE cp_control_types, ONLY: dft_control_type
USE cp_files, ONLY: close_file, &
open_file
USE cp_log_handling, ONLY: cp_get_default_logger, &
cp_logger_type
USE cp_output_handling, ONLY: cp_p_file, &
cp_print_key_finished_output, &
cp_print_key_should_output, &
cp_print_key_unit_nr
USE message_passing, ONLY: mp_para_env_type
USE cp_dbcsr_api, ONLY: dbcsr_copy, &
dbcsr_dot, &
dbcsr_get_matrix_type, &
dbcsr_p_type, &
dbcsr_type_antisymmetric
USE gamma, ONLY: init_md_ftable
USE hfx_communication, ONLY: distribute_ks_matrix, &
get_atomic_block_maps, &
get_full_density
USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
hfx_add_single_cache_element, &
hfx_decompress_first_cache, &
hfx_flush_last_cache, &
hfx_get_mult_cache_elements, &
hfx_get_single_cache_element, &
hfx_reset_cache_and_container
USE hfx_contract_block, ONLY: contract_block
USE hfx_libint_interface, ONLY: evaluate_eri
USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
hfx_load_balance, &
hfx_update_load_balance
USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
build_pair_list, &
build_pair_list_pgf, &
build_pgf_product_list, &
pgf_product_list_size
USE hfx_screening_methods, ONLY: calc_pair_dist_radii, &
calc_screening_functions, &
update_pmax_mat
USE hfx_types, ONLY: &
alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
hfx_cell_type, hfx_container_type, hfx_create_neighbor_cells, hfx_distribution, &
hfx_general_type, hfx_init_container, hfx_load_balance_type, hfx_memory_type, hfx_p_kind, &
hfx_pgf_list, hfx_pgf_product_list, hfx_potential_type, hfx_reset_memory_usage_counter, &
hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, hfx_type, init_t_c_g0_lmax, &
log_zero, pair_list_type, pair_set_list_type
USE input_constants, ONLY: do_potential_mix_cl_trunc, &
do_potential_truncated, &
hfx_do_eval_energy
USE input_section_types, ONLY: section_vals_type
USE kinds, ONLY: default_string_length, &
dp, &
int_8
USE kpoint_types, ONLY: get_kpoint_info, &
kpoint_type
USE libint_wrapper, ONLY: cp_libint_t
USE machine, ONLY: m_flush, &
m_memory, &
m_walltime
USE mathconstants, ONLY: fac
USE orbital_pointers, ONLY: nco, &
ncoset, &
nso
USE particle_types, ONLY: particle_type
USE qs_environment_types, ONLY: get_qs_env, &
qs_environment_type
USE qs_ks_types, ONLY: get_ks_env, &
qs_ks_env_type
USE t_c_g0, ONLY: init
USE util, ONLY: sort
!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
#include "./base/base_uses.f90"
IMPLICIT NONE
PRIVATE
PUBLIC :: integrate_four_center, coulomb4
CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_energy_potential'
!***
CONTAINS
! **************************************************************************************************
!> \brief computes four center integrals for a full basis set and updates the
!> Kohn-Sham-Matrix and energy. Uses all 8 eri symmetries
!> \param qs_env ...
!> \param x_data ...
!> \param ks_matrix ...
!> \param ehfx energy calculated with the updated HFX matrix
!> \param rho_ao density matrix in ao basis
!> \param hfx_section input_section HFX
!> \param para_env ...
!> \param geometry_did_change flag that indicates we have to recalc integrals
!> \param irep Index for the HFX replica
!> \param distribute_fock_matrix Flag that indicates whether to communicate the
!> new fock matrix or not
!> \param ispin ...
!> \par History
!> 06.2007 created [Manuel Guidon]
!> 08.2007 optimized load balance [Manuel Guidon]
!> 09.2007 new parallelization [Manuel Guidon]
!> 02.2009 completely rewritten screening part [Manuel Guidon]
!> 12.2017 major bug fix. removed wrong cycle that was caussing segfault.
!> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
!> [Tobias Binninger + Valery Weber]
!> \author Manuel Guidon
! **************************************************************************************************
SUBROUTINE integrate_four_center(qs_env, x_data, ks_matrix, ehfx, rho_ao, hfx_section, para_env, &
geometry_did_change, irep, distribute_fock_matrix, &
ispin)
TYPE(qs_environment_type), POINTER :: qs_env
TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data
TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
REAL(KIND=dp), INTENT(OUT) :: ehfx
TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
TYPE(section_vals_type), POINTER :: hfx_section
TYPE(mp_para_env_type), POINTER :: para_env
LOGICAL :: geometry_did_change
INTEGER :: irep
LOGICAL, INTENT(IN) :: distribute_fock_matrix
INTEGER, INTENT(IN) :: ispin
CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_four_center'
CHARACTER(len=default_string_length) :: eps_scaling_str, eps_schwarz_min_str
INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
handle_getP, handle_load, handle_main, i, i_list_ij, i_list_kl, i_set_list_ij, &
i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
katom_end
INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
nneighbors, nseta, nsetb, nsgf_max, nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
shm_storage_counter_integrals, stor_count_int_disk
INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
tmp_i8(8)
INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, last_sgf_global, nimages, &
tmp_index
INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
INTEGER, SAVE :: shm_number_of_p_entries
LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
REAL(dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
eps_storage, etmp, fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
spherical_estimate, symm_fac
REAL(dp), ALLOCATABLE, DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
primitive_integrals
REAL(dp), DIMENSION(:), POINTER :: p_work
REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd
REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
sphi_c_ext_set, sphi_d_ext_set
REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
sphi_d_ext
REAL(KIND=dp) :: coeffs_kind_max0
TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
TYPE(cell_type), POINTER :: cell
TYPE(cp_libint_t) :: private_lib
TYPE(cp_logger_type), POINTER :: logger
TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_ks_aux_fit_hfx
TYPE(dft_control_type), POINTER :: dft_control
TYPE(hfx_basis_info_type), POINTER :: basis_info
TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches, integral_caches_disk
TYPE(hfx_cache_type), POINTER :: maxval_cache, maxval_cache_disk
TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers, &
integral_containers_disk
TYPE(hfx_container_type), POINTER :: maxval_container, maxval_container_disk
TYPE(hfx_distribution), POINTER :: distribution_energy
TYPE(hfx_general_type) :: general_parameter
TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
TYPE(hfx_memory_type), POINTER :: memory_parameter
TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
TYPE(hfx_pgf_product_list), ALLOCATABLE, &
DIMENSION(:) :: pgf_product_list
TYPE(hfx_potential_type) :: potential_parameter
TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
tmp_screen_pgf1, tmp_screen_pgf2
TYPE(hfx_screen_coeff_type), &
DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
TYPE(hfx_screen_coeff_type), &
DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
TYPE(hfx_screening_type) :: screening_parameter
TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
TYPE(hfx_type), POINTER :: actual_x_data, shm_master_x_data
TYPE(kpoint_type), POINTER :: kpoints
TYPE(pair_list_type) :: list_ij, list_kl
TYPE(pair_set_list_type), ALLOCATABLE, &
DIMENSION(:) :: set_list_ij, set_list_kl
TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
TYPE(qs_ks_env_type), POINTER :: ks_env
NULLIFY (dft_control, matrix_ks_aux_fit_hfx)
CALL timeset(routineN, handle)
CALL cite_reference(Guidon2008)
CALL cite_reference(Guidon2009)
ehfx = 0.0_dp
! ** This is not very clean, but effective. ispin can only be 2 if we do the beta spin part in core
my_geo_change = geometry_did_change
IF (ispin == 2) my_geo_change = .FALSE.
logger => cp_get_default_logger()
is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
IF (my_geo_change) THEN
CALL m_memory(memsize_before)
CALL para_env%max(memsize_before)
iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
extension=".scfLog")
IF (iw > 0) THEN
WRITE (UNIT=iw, FMT="(/,(T3,A,T60,I21))") &
"HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
CALL m_flush(iw)
END IF
CALL cp_print_key_finished_output(iw, logger, hfx_section, &
"HF_INFO")
END IF
CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell)
NULLIFY (cell_to_index)
CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
IF (do_kpoints) THEN
CALL get_qs_env(qs_env=qs_env, ks_env=ks_env)
CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
END IF
!! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
nkind = SIZE(atomic_kind_set, 1)
l_max = 0
DO ikind = 1, nkind
l_max = MAX(l_max, MAXVAL(x_data(1, 1)%basis_parameter(ikind)%lmax))
END DO
l_max = 4*l_max
CALL init_md_ftable(l_max)
IF (x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
IF (l_max > init_t_c_g0_lmax) THEN
IF (para_env%is_source()) THEN
CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
END IF
CALL init(l_max, unit_id, para_env%mepos, para_env)
IF (para_env%is_source()) THEN
CALL close_file(unit_id)
END IF
init_t_c_g0_lmax = l_max
END IF
END IF
n_threads = 1
!$ n_threads = omp_get_max_threads()
shm_neris_total = 0
shm_nprim_ints = 0
shm_neris_onthefly = 0
shm_storage_counter_integrals = 0
shm_stor_count_int_disk = 0
shm_neris_incore = 0
shm_neris_disk = 0
shm_stor_count_max_val = 0
!$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
!$OMP SHARED(qs_env,&
!$OMP x_data,&
!$OMP ks_matrix,&
!$OMP ehfx,&
!$OMP rho_ao,&
!$OMP matrix_ks_aux_fit_hfx,&
!$OMP hfx_section,&
!$OMP para_env,&
!$OMP my_geo_change,&
!$OMP irep,&
!$OMP distribute_fock_matrix,&
!$OMP cell_to_index,&
!$OMP ncoset,&
!$OMP nso,&
!$OMP nco,&
!$OMP full_ks_alpha,&
!$OMP full_ks_beta,&
!$OMP n_threads,&
!$OMP full_density_alpha,&
!$OMP full_density_beta,&
!$OMP shm_initial_p,&
!$OMP shm_is_assoc_atomic_block,&
!$OMP shm_number_of_p_entries,&
!$OMP shm_neris_total,&
!$OMP shm_neris_onthefly,&
!$OMP shm_storage_counter_integrals,&
!$OMP shm_stor_count_int_disk,&
!$OMP shm_neris_incore,&
!$OMP shm_neris_disk,&
!$OMP shm_nprim_ints,&
!$OMP shm_stor_count_max_val,&
!$OMP cell,&
!$OMP screen_coeffs_set,&
!$OMP screen_coeffs_kind,&
!$OMP screen_coeffs_pgf,&
!$OMP pgf_product_list_size,&
!$OMP radii_pgf,&
!$OMP nkind,&
!$OMP ispin,&
!$OMP is_anti_symmetric,&
!$OMP shm_atomic_block_offset,&
!$OMP shm_set_offset,&
!$OMP shm_block_offset,&
!$OMP shm_task_counter,&
!$OMP shm_task_list,&
!$OMP shm_total_bins,&
!$OMP shm_master_x_data,&
!$OMP shm_pmax_atom,&
!$OMP shm_pmax_block,&
!$OMP shm_atomic_pair_list,&
!$OMP shm_mem_compression_counter,&
!$OMP do_print_load_balance_info) &
!$OMP PRIVATE(ln_10,i_thread,actual_x_data,do_periodic,screening_parameter,potential_parameter,&
!$OMP general_parameter,load_balance_parameter,memory_parameter,cache_size,bits_max_val,&
!$OMP basis_parameter,basis_info,treat_lsd_in_core,ncpu,n_processes,neris_total,neris_incore,&
!$OMP neris_disk,neris_onthefly,mem_eris,mem_eris_disk,mem_max_val,compression_factor,&
!$OMP compression_factor_disk,nprim_ints,neris_tmp,max_val_memory,max_am,do_p_screening,&
!$OMP max_set,particle_set,atomic_kind_set,natom,kind_of,ncos_max,nsgf_max,ikind,&
!$OMP nseta,npgfa,la_max,nsgfa,primitive_integrals,pbd_buf,pbc_buf,pad_buf,pac_buf,kbd_buf,kbc_buf,&
!$OMP kad_buf,kac_buf,ee_work,ee_work2,ee_buffer1,ee_buffer2,ee_primitives_tmp,nspins,max_contraction,&
!$OMP max_pgf,jkind,lb_max,nsetb,npgfb,first_sgfb,sphi_b,nsgfb,ncob,sgfb,nneighbors,pgf_list_ij,pgf_list_kl,&
!$OMP pgf_product_list,nimages,ks_fully_occ,subtr_size_mb,use_disk_storage,counter,do_disk_storage,&
!$OMP maxval_container_disk,maxval_cache_disk,integral_containers_disk,integral_caches_disk,eps_schwarz,&
!$OMP log10_eps_schwarz,eps_storage,hf_fraction,buffer_overflow,logger,private_lib,last_sgf_global,handle_getp,&
!$OMP p_work,fac,handle_load,do_dynamic_load_balancing,my_bin_size,maxval_container,integral_containers,maxval_cache,&
!$OMP integral_caches,tmp_task_list,tmp_task_list_cost,tmp_index,handle_main,coeffs_kind_max0,set_list_ij,&
!$OMP set_list_kl,iatom_start,iatom_end,jatom_start,jatom_end,nblocks,bins_left,do_it,distribution_energy,&
!$OMP my_thread_id,my_bin_id,handle_bin,bintime_start,my_istart,my_current_counter,latom_block,tmp_block,&
!$OMP katom_block,katom_start,katom_end,latom_start,latom_end,pmax_blocks,list_ij,list_kl,i_set_list_ij_start,&
!$OMP i_set_list_ij_stop,ra,rb,rab2,la_min,zeta,sphi_a_ext,nsgfl_a,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
!$OMP lb_min,zetb,sphi_b_ext,nsgfl_b,sphi_b_u1,sphi_b_u2,sphi_b_u3,katom,latom,i_set_list_kl_start,i_set_list_kl_stop,&
!$OMP kkind,lkind,rc,rd,rcd2,pmax_atom,screen_kind_ij,screen_kind_kl,symm_fac,lc_max,lc_min,npgfc,zetc,nsgfc,sphi_c_ext,&
!$OMP nsgfl_c,sphi_c_u1,sphi_c_u2,sphi_c_u3,ld_max,ld_min,npgfd,zetd,nsgfd,sphi_d_ext,nsgfl_d,sphi_d_u1,sphi_d_u2,&
!$OMP sphi_d_u3,atomic_offset_bd,atomic_offset_bc,atomic_offset_ad,atomic_offset_ac,offset_bd_set,offset_bc_set,&
!$OMP offset_ad_set,offset_ac_set,swap_id,kind_kind_idx,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,mem_compression_counter,&
!$OMP mem_compression_counter_disk,max_val1,sphi_a_ext_set,sphi_b_ext_set,kset,lset,max_val2_set,max_val2,&
!$OMP sphi_c_ext_set,sphi_d_ext_set,pmax_entry,log10_pmax,current_counter,nints,estimate_to_store_int,&
!$OMP spherical_estimate,nbits,buffer_left,buffer_start,buffer_size,max_contraction_val,tmp_r_1,tmp_r_2,&
!$OMP tmp_screen_pgf1,tmp_screen_pgf2,cartesian_estimate,bintime_stop,iw,memsize_after,storage_counter_integrals,&
!$OMP stor_count_int_disk,stor_count_max_val,ene_x_aa,ene_x_bb,mb_size_p,mb_size_f,mb_size_buffers,afac,ene_x_aa_diag,&
!$OMP ene_x_bb_diag,act_atomic_block_offset,act_set_offset,j,handle_dist_ks,tmp_i8,tmp_i4,dft_control,&
!$OMP etmp,nkimages,img,bin,eps_scaling_str,eps_schwarz_min_str)
ln_10 = LOG(10.0_dp)
i_thread = 0
!$ i_thread = omp_get_thread_num()
actual_x_data => x_data(irep, i_thread + 1)
!$OMP MASTER
shm_master_x_data => x_data(irep, 1)
!$OMP END MASTER
!$OMP BARRIER
do_periodic = actual_x_data%periodic_parameter%do_periodic
IF (do_periodic) THEN
! ** Rebuild neighbor lists in case the cell has changed (i.e. NPT MD)
actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
CALL hfx_create_neighbor_cells(actual_x_data, actual_x_data%periodic_parameter%number_of_shells_from_input, &
cell, i_thread)
END IF
screening_parameter = actual_x_data%screening_parameter
potential_parameter = actual_x_data%potential_parameter
general_parameter = actual_x_data%general_parameter
load_balance_parameter => actual_x_data%load_balance_parameter
memory_parameter => actual_x_data%memory_parameter
cache_size = memory_parameter%cache_size
bits_max_val = memory_parameter%bits_max_val
basis_parameter => actual_x_data%basis_parameter
basis_info => actual_x_data%basis_info
treat_lsd_in_core = general_parameter%treat_lsd_in_core
ncpu = para_env%num_pe
n_processes = ncpu*n_threads
!! initialize some counters
neris_total = 0_int_8
neris_incore = 0_int_8
neris_disk = 0_int_8
neris_onthefly = 0_int_8
mem_eris = 0_int_8
mem_eris_disk = 0_int_8
mem_max_val = 0_int_8
compression_factor = 0.0_dp
compression_factor_disk = 0.0_dp
nprim_ints = 0_int_8
neris_tmp = 0_int_8
max_val_memory = 1_int_8
max_am = basis_info%max_am
CALL get_qs_env(qs_env=qs_env, &
atomic_kind_set=atomic_kind_set, &
particle_set=particle_set, &
dft_control=dft_control)
IF (dft_control%do_admm) CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
do_p_screening = screening_parameter%do_initial_p_screening
! Special treatment for MP2 with initial density screening
IF (do_p_screening) THEN
IF (ASSOCIATED(qs_env%mp2_env)) THEN
IF ((qs_env%mp2_env%ri_grad%free_hfx_buffer)) THEN
do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
ELSE
do_p_screening = .FALSE.
END IF
END IF
END IF
max_set = basis_info%max_set
natom = SIZE(particle_set, 1)
! Number of image matrices in k-point calculations (nkimages==1 -> no kpoints)
nkimages = dft_control%nimages
CPASSERT(nkimages == 1)
CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
!! precompute maximum nco and allocate scratch
ncos_max = 0
nsgf_max = 0
DO iatom = 1, natom
ikind = kind_of(iatom)
nseta = basis_parameter(ikind)%nset
npgfa => basis_parameter(ikind)%npgf
la_max => basis_parameter(ikind)%lmax
nsgfa => basis_parameter(ikind)%nsgf
DO iset = 1, nseta
ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
nsgf_max = MAX(nsgf_max, nsgfa(iset))
END DO
END DO
!! Allocate the arrays for the integrals.
ALLOCATE (primitive_integrals(nsgf_max**4))
primitive_integrals = 0.0_dp
ALLOCATE (pbd_buf(nsgf_max**2))
ALLOCATE (pbc_buf(nsgf_max**2))
ALLOCATE (pad_buf(nsgf_max**2))
ALLOCATE (pac_buf(nsgf_max**2))
ALLOCATE (kbd_buf(nsgf_max**2))
ALLOCATE (kbc_buf(nsgf_max**2))
ALLOCATE (kad_buf(nsgf_max**2))
ALLOCATE (kac_buf(nsgf_max**2))
ALLOCATE (ee_work(ncos_max**4))
ALLOCATE (ee_work2(ncos_max**4))
ALLOCATE (ee_buffer1(ncos_max**4))
ALLOCATE (ee_buffer2(ncos_max**4))
ALLOCATE (ee_primitives_tmp(nsgf_max**4))
nspins = dft_control%nspins
ALLOCATE (max_contraction(max_set, natom))
max_contraction = 0.0_dp
max_pgf = 0
DO jatom = 1, natom
jkind = kind_of(jatom)
lb_max => basis_parameter(jkind)%lmax
nsetb = basis_parameter(jkind)%nset
npgfb => basis_parameter(jkind)%npgf
first_sgfb => basis_parameter(jkind)%first_sgf
sphi_b => basis_parameter(jkind)%sphi
nsgfb => basis_parameter(jkind)%nsgf
DO jset = 1, nsetb
! takes the primitive to contracted transformation into account
ncob = npgfb(jset)*ncoset(lb_max(jset))
sgfb = first_sgfb(1, jset)
! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
! the maximum value after multiplication with sphi_b
max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
max_pgf = MAX(max_pgf, npgfb(jset))
END DO
END DO
! ** Allocate buffers for pgf_lists
nneighbors = SIZE(actual_x_data%neighbor_cells)
ALLOCATE (pgf_list_ij(max_pgf**2))
ALLOCATE (pgf_list_kl(max_pgf**2))
! the size of pgf_product_list is allocated and resized as needed. The initial guess grows as needed
!$OMP ATOMIC READ
tmp_i4 = pgf_product_list_size
ALLOCATE (pgf_product_list(tmp_i4))
ALLOCATE (nimages(max_pgf**2))
DO i = 1, max_pgf**2
ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
END DO
!$OMP BARRIER
!$OMP MASTER
!! Calculate helper array that stores if a certain atomic pair is associated in the KS matrix
IF (my_geo_change) THEN
CALL get_atomic_block_maps(ks_matrix(1, 1)%matrix, basis_parameter, kind_of, &
shm_master_x_data%is_assoc_atomic_block, &
shm_master_x_data%number_of_p_entries, &
para_env, &
shm_master_x_data%atomic_block_offset, &
shm_master_x_data%set_offset, &
shm_master_x_data%block_offset, &
shm_master_x_data%map_atoms_to_cpus, &
nkind)
shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
!! Get occupation of KS-matrix
ks_fully_occ = .TRUE.
outer: DO iatom = 1, natom
DO jatom = iatom, natom
IF (shm_is_assoc_atomic_block(jatom, iatom) == 0) THEN
ks_fully_occ = .FALSE.
EXIT outer
END IF
END DO
END DO outer
IF (.NOT. ks_fully_occ) THEN
CALL cp_warn(__LOCATION__, &
"The Kohn Sham matrix is not 100% occupied. This "// &
"may result in incorrect Hartree-Fock results. Setting "// &
"MIN_PAIR_LIST_RADIUS to -1 in the QS section ensures a "// &
"fully occupied KS matrix. For more information "// &
"see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
END IF
END IF
! ** Set pointers
shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
shm_set_offset => shm_master_x_data%set_offset
shm_block_offset => shm_master_x_data%block_offset
!$OMP END MASTER
!$OMP BARRIER
! ** Reset storage counter given by MAX_MEMORY by subtracting all buffers
! ** Fock and density Matrices (shared)
subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
! ** if non restricted ==> alpha/beta spin
IF (.NOT. treat_lsd_in_core) THEN
IF (nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
END IF
! ** Initial P only MAX(alpha,beta) (shared)
IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
END IF
! ** In core forces require their own initial P
IF (screening_parameter%do_p_screening_forces) THEN
IF (memory_parameter%treat_forces_in_core) THEN
subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
END IF
END IF
! ** primitive buffer (not shared by the threads)
subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
! ** density + fock buffers
subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
! ** screening functions (shared)
! ** coeffs_pgf
subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
! ** coeffs_set
subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
! ** coeffs_kind
subtr_size_mb = subtr_size_mb + nkind**2
! ** radii_pgf
subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
! ** is_assoc (shared)
subtr_size_mb = subtr_size_mb + natom**2
! ** pmax_atom (shared)
IF (do_p_screening) THEN
subtr_size_mb = subtr_size_mb + natom**2
END IF
IF (screening_parameter%do_p_screening_forces) THEN
IF (memory_parameter%treat_forces_in_core) THEN
subtr_size_mb = subtr_size_mb + natom**2
END IF
END IF
! ** Convert into MiB's
subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
! ** Subtracting all these buffers from MAX_MEMORY yields the amount
! ** of RAM that is left for the compressed integrals. When using threads
! ** all the available memory is shared among all n_threads. i.e. the faster
! ** ones can steal from the slower ones
CALL hfx_reset_memory_usage_counter(memory_parameter, subtr_size_mb)
use_disk_storage = .FALSE.
counter = 0_int_8
do_disk_storage = memory_parameter%do_disk_storage
IF (do_disk_storage) THEN
maxval_container_disk => actual_x_data%store_ints%maxval_container_disk
maxval_cache_disk => actual_x_data%store_ints%maxval_cache_disk
integral_containers_disk => actual_x_data%store_ints%integral_containers_disk
integral_caches_disk => actual_x_data%store_ints%integral_caches_disk
END IF
IF (my_geo_change) THEN
memory_parameter%ram_counter = HUGE(memory_parameter%ram_counter)
END IF
IF (my_geo_change) THEN
memory_parameter%recalc_forces = .TRUE.
ELSE
IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .TRUE.
END IF
!! Get screening parameter
eps_schwarz = screening_parameter%eps_schwarz
IF (eps_schwarz <= 0.0_dp) THEN
log10_eps_schwarz = log_zero
ELSE
log10_eps_schwarz = LOG10(eps_schwarz)
END IF
!! get storage epsilon
eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
!! If we have a hybrid functional, we may need only a fraction of exact exchange
hf_fraction = general_parameter%fraction
!! The number of integrals that fit into the given MAX_MEMORY
!! Parameters related to the potential 1/r, erf(wr)/r, erfc(wr/r)
potential_parameter = actual_x_data%potential_parameter
!! Variable to check if we calculate the integrals in-core or on the fly
!! If TRUE -> on the fly
IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
buffer_overflow = .FALSE.
ELSE
buffer_overflow = .TRUE.
END IF
logger => cp_get_default_logger()
private_lib = actual_x_data%lib
!! Helper array to map local basis function indices to global ones
ALLOCATE (last_sgf_global(0:natom))
last_sgf_global(0) = 0
DO iatom = 1, natom
ikind = kind_of(iatom)
last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
END DO
!$OMP BARRIER
!$OMP MASTER
!! Let master thread get the density (avoid problems with MPI)
!! Get the full density from all the processors
NULLIFY (full_density_alpha, full_density_beta)
ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
IF (.NOT. treat_lsd_in_core .OR. nspins == 1) THEN
CALL timeset(routineN//"_getP", handle_getP)
DO img = 1, nkimages
CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
shm_master_x_data%block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
END DO
IF (nspins == 2) THEN
ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
DO img = 1, nkimages
CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
shm_master_x_data%block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
END DO
END IF
CALL timestop(handle_getP)
!! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
!! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
!! a changed geometry
NULLIFY (shm_initial_p)
IF (do_p_screening) THEN
shm_initial_p => shm_master_x_data%initial_p
shm_pmax_atom => shm_master_x_data%pmax_atom
IF (my_geo_change) THEN
CALL update_pmax_mat(shm_master_x_data%initial_p, &
shm_master_x_data%map_atom_to_kind_atom, &
shm_master_x_data%set_offset, &
shm_master_x_data%atomic_block_offset, &
shm_pmax_atom, &
full_density_alpha, full_density_beta, &
natom, kind_of, basis_parameter, &
nkind, shm_master_x_data%is_assoc_atomic_block)
END IF
END IF
ELSE
IF (do_p_screening) THEN
CALL timeset(routineN//"_getP", handle_getP)
DO img = 1, nkimages
CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
shm_master_x_data%block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.TRUE., &
rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
END DO
CALL timestop(handle_getP)
!! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
!! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
!! a changed geometry
NULLIFY (shm_initial_p)
shm_initial_p => actual_x_data%initial_p
shm_pmax_atom => shm_master_x_data%pmax_atom
IF (my_geo_change) THEN
CALL update_pmax_mat(shm_master_x_data%initial_p, &
shm_master_x_data%map_atom_to_kind_atom, &
shm_master_x_data%set_offset, &
shm_master_x_data%atomic_block_offset, &
shm_pmax_atom, &
full_density_alpha, full_density_beta, &
natom, kind_of, basis_parameter, &
nkind, shm_master_x_data%is_assoc_atomic_block)
END IF
END IF
! ** Now get the density(ispin)
DO img = 1, nkimages
CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
shm_master_x_data%block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
antisymmetric=is_anti_symmetric)
END DO
END IF
NULLIFY (full_ks_alpha, full_ks_beta)
ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
full_ks_alpha => shm_master_x_data%full_ks_alpha
full_ks_alpha = 0.0_dp
IF (.NOT. treat_lsd_in_core) THEN
IF (nspins == 2) THEN
ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
full_ks_beta => shm_master_x_data%full_ks_beta
full_ks_beta = 0.0_dp
END IF
END IF
!! Initialize schwarz screening matrices for near field estimates and boxing screening matrices
!! for far field estimates. The update is only performed if the geomtry of the system changed.
!! If the system is periodic, then the corresponding routines are called and some variables
!! are initialized
!$OMP END MASTER
!$OMP BARRIER
IF (.NOT. shm_master_x_data%screen_funct_is_initialized) THEN
CALL calc_pair_dist_radii(qs_env, basis_parameter, &
shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
n_threads, i_thread)
!$OMP BARRIER
CALL calc_screening_functions(qs_env, basis_parameter, private_lib, shm_master_x_data%potential_parameter, &
shm_master_x_data%screen_funct_coeffs_set, &
shm_master_x_data%screen_funct_coeffs_kind, &
shm_master_x_data%screen_funct_coeffs_pgf, &
shm_master_x_data%pair_dist_radii_pgf, &
max_set, max_pgf, n_threads, i_thread, p_work)
!$OMP MASTER
shm_master_x_data%screen_funct_is_initialized = .TRUE.
!$OMP END MASTER
END IF
!$OMP BARRIER
!$OMP MASTER
screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
radii_pgf => shm_master_x_data%pair_dist_radii_pgf
!$OMP END MASTER
!$OMP BARRIER
!! Initialize a prefactor depending on the fraction and the number of spins
IF (nspins == 1) THEN
fac = 0.5_dp*hf_fraction
ELSE
fac = 1.0_dp*hf_fraction
END IF
!! Call routines that distribute the load on all processes. If we want to screen on a initial density matrix, there is
!! an optional parameter. Of course, this is only done if the geometry did change
!$OMP BARRIER
!$OMP MASTER
CALL timeset(routineN//"_load", handle_load)
!$OMP END MASTER
!$OMP BARRIER
IF (my_geo_change) THEN
IF (actual_x_data%b_first_load_balance_energy) THEN
CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
screen_coeffs_set, screen_coeffs_kind, &
shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
nkind, hfx_do_eval_energy, shm_pmax_block, use_virial=.FALSE.)
actual_x_data%b_first_load_balance_energy = .FALSE.
ELSE
CALL hfx_update_load_balance(actual_x_data, para_env, &
load_balance_parameter, &
i_thread, n_threads, hfx_do_eval_energy)
END IF
END IF
!$OMP BARRIER
!$OMP MASTER
CALL timestop(handle_load)
!$OMP END MASTER
!$OMP BARRIER
!! Start caluclating integrals of the form (ab|cd) or (ij|kl)
!! In order to do so, there is a main four-loop structure that takes into account the two symmetries
!!
!! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
!!
!! by iterating in the following way
!!
!! DO iatom=1,natom and DO katom=1,natom
!! DO jatom=iatom,natom DO latom=katom,natom
!!
!! The third symmetry
!!
!! (ab|cd) = (cd|ab)
!!
!! is taken into account by the following criterion:
!!
!! IF(katom+latom<=iatom+jatom) THEN
!! IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
!!
!! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
!! factor ( symm_fac ).
!!
!! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
!! different hierarchies of short range screening matrices.
!!
!! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
!! defined as :
!!
!! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
!!
!! This tells the process where to start the main loops and how many bunches of integrals it has to
!! calculate. The original parallelization is a simple modulo distribution that is binned and
!! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
!! we need to know which was the initial cpu_id.
!! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
!! lstart only the first time the loop is executed. All subsequent loops have to start with one or
!! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
do_dynamic_load_balancing = .TRUE.
IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .FALSE.
IF (do_dynamic_load_balancing) THEN
my_bin_size = SIZE(actual_x_data%distribution_energy)
ELSE
my_bin_size = 1
END IF
!! We do not need the containers if MAX_MEM = 0
IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
!! IF new md step -> reinitialize containers
IF (my_geo_change) THEN
CALL dealloc_containers(actual_x_data%store_ints, memory_parameter%actual_memory_usage)
CALL alloc_containers(actual_x_data%store_ints, my_bin_size)
DO bin = 1, my_bin_size
maxval_container => actual_x_data%store_ints%maxval_container(bin)
integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
DO i = 1, 64
CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
END DO
END DO
END IF
!! Decompress the first cache for maxvals and integrals
IF (.NOT. my_geo_change) THEN
DO bin = 1, my_bin_size
maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
maxval_container => actual_x_data%store_ints%maxval_container(bin)
integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
memory_parameter%actual_memory_usage, .FALSE.)
DO i = 1, 64
CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
memory_parameter%actual_memory_usage, .FALSE.)
END DO
END DO
END IF
END IF
!! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
!$OMP CRITICAL(hfxenergy_io_critical)
!! If we do disk storage, we have to initialize the containers/caches
IF (do_disk_storage) THEN
!! IF new md step -> reinitialize containers
IF (my_geo_change) THEN
CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
DO i = 1, 64
CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
END DO
END IF
!! Decompress the first cache for maxvals and integrals
IF (.NOT. my_geo_change) THEN
CALL hfx_decompress_first_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
memory_parameter%actual_memory_usage_disk, .TRUE.)
DO i = 1, 64
CALL hfx_decompress_first_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
memory_parameter%actual_memory_usage_disk, .TRUE.)
END DO
END IF
END IF
!$OMP END CRITICAL(hfxenergy_io_critical)
!$OMP BARRIER
!$OMP MASTER
IF (do_dynamic_load_balancing) THEN
! ** Lets construct the task list
shm_total_bins = 0
DO i = 1, n_threads
shm_total_bins = shm_total_bins + SIZE(x_data(irep, i)%distribution_energy)
END DO
ALLOCATE (tmp_task_list(shm_total_bins))
shm_task_counter = 0
DO i = 1, n_threads
DO bin = 1, SIZE(x_data(irep, i)%distribution_energy)
shm_task_counter = shm_task_counter + 1
tmp_task_list(shm_task_counter)%thread_id = i
tmp_task_list(shm_task_counter)%bin_id = bin
tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
END DO
END DO