-
Notifications
You must be signed in to change notification settings - Fork 1
/
hfx_derivatives.F
2389 lines (2185 loc) · 119 KB
/
hfx_derivatives.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
!--------------------------------------------------------------------------------------------------!
! CP2K: A general program to perform molecular dynamics simulations !
! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
! !
! SPDX-License-Identifier: GPL-2.0-or-later !
!--------------------------------------------------------------------------------------------------!
! **************************************************************************************************
!> \brief Routines to calculate derivatives with respect to basis function origin
!> \par History
!> 04.2008 created [Manuel Guidon]
!> \author Manuel Guidon
! **************************************************************************************************
MODULE hfx_derivatives
USE admm_types, ONLY: admm_type
USE atomic_kind_types, ONLY: atomic_kind_type, &
get_atomic_kind_set
USE cell_types, ONLY: cell_type, &
pbc
USE cp_control_types, ONLY: dft_control_type
USE cp_files, ONLY: close_file, &
open_file
USE cp_log_handling, ONLY: cp_get_default_logger, &
cp_logger_type
USE cp_output_handling, ONLY: cp_p_file, &
cp_print_key_finished_output, &
cp_print_key_should_output, &
cp_print_key_unit_nr
USE message_passing, ONLY: mp_para_env_type
USE cp_dbcsr_api, ONLY: dbcsr_get_matrix_type, &
dbcsr_p_type, &
dbcsr_type_antisymmetric
USE gamma, ONLY: init_md_ftable
USE hfx_communication, ONLY: get_full_density
USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
hfx_add_single_cache_element, &
hfx_decompress_first_cache, &
hfx_flush_last_cache, &
hfx_get_mult_cache_elements, &
hfx_get_single_cache_element, &
hfx_reset_cache_and_container
USE hfx_libint_interface, ONLY: evaluate_deriv_eri
USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
hfx_load_balance, &
hfx_update_load_balance
USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
build_pair_list, &
build_pair_list_pgf, &
build_pgf_product_list, &
pgf_product_list_size
USE hfx_screening_methods, ONLY: update_pmax_mat
USE hfx_types, ONLY: &
alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
hfx_cell_type, hfx_container_type, hfx_distribution, hfx_general_type, hfx_init_container, &
hfx_load_balance_type, hfx_memory_type, hfx_p_kind, hfx_pgf_list, hfx_pgf_product_list, &
hfx_potential_type, hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, &
hfx_type, init_t_c_g0_lmax, log_zero, pair_list_type, pair_set_list_type
USE input_constants, ONLY: do_potential_mix_cl_trunc, &
do_potential_truncated, &
hfx_do_eval_forces
USE input_section_types, ONLY: section_vals_type
USE kinds, ONLY: dp, &
int_8
USE libint_wrapper, ONLY: cp_libint_t
USE machine, ONLY: m_walltime
USE mathconstants, ONLY: fac
USE orbital_pointers, ONLY: nco, &
ncoset, &
nso
USE particle_types, ONLY: particle_type
USE qs_environment_types, ONLY: get_qs_env, &
qs_environment_type
USE qs_force_types, ONLY: qs_force_type
USE t_c_g0, ONLY: init
USE util, ONLY: sort
USE virial_types, ONLY: virial_type
!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
#include "./base/base_uses.f90"
IMPLICIT NONE
PRIVATE
PUBLIC :: derivatives_four_center
CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_derivatives'
!***
CONTAINS
! **************************************************************************************************
!> \brief computes four center derivatives for a full basis set and updates the
!> forces%fock_4c arrays. Uses all 8 eri symmetries
!> \param qs_env ...
!> \param rho_ao density matrix
!> \param rho_ao_resp relaxed density matrix from response
!> \param hfx_section HFX input section
!> \param para_env para_env
!> \param irep ID of HFX replica
!> \param use_virial ...
!> \param adiabatic_rescale_factor parameter used for MCY3 hybrid
!> \param resp_only Calculate only forces from response density
!> \par History
!> 06.2007 created [Manuel Guidon]
!> 08.2007 optimized load balance [Manuel Guidon]
!> 02.2009 completely rewritten screening part [Manuel Guidon]
!> 12.2017 major bug fix. removed wrong cycle that was causing segfault.
!> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
!> [Tobias Binninger + Valery Weber]
!> \author Manuel Guidon
! **************************************************************************************************
SUBROUTINE derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, &
irep, use_virial, adiabatic_rescale_factor, resp_only, &
external_x_data)
TYPE(qs_environment_type), POINTER :: qs_env
TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: rho_ao_resp
TYPE(section_vals_type), POINTER :: hfx_section
TYPE(mp_para_env_type), POINTER :: para_env
INTEGER, INTENT(IN) :: irep
LOGICAL, INTENT(IN) :: use_virial
REAL(dp), INTENT(IN), OPTIONAL :: adiabatic_rescale_factor
LOGICAL, INTENT(IN), OPTIONAL :: resp_only
TYPE(hfx_type), DIMENSION(:, :), POINTER, OPTIONAL :: external_x_data
CHARACTER(LEN=*), PARAMETER :: routineN = 'derivatives_four_center'
INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
forces_map(4, 2), handle, handle_bin, handle_getP, handle_load, handle_main, i, i_atom, &
i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
nsetb, nsgf_max, nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
storage_counter_integrals, tmp_block, tmp_i8(6)
INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of, last_sgf_global, &
nimages, tmp_index
INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
INTEGER, SAVE :: shm_number_of_p_entries
LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
treat_forces_in_core, use_disk_storage, with_resp_density
LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
REAL(dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
compression_factor, eps_schwarz, eps_storage, fac, hf_fraction, ln_10, log10_eps_schwarz, &
log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
tmp_virial(3, 3)
REAL(dp), ALLOCATABLE, DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET :: primitive_forces, primitive_forces_virial
REAL(dp), DIMENSION(:), POINTER :: full_density_resp, &
full_density_resp_beta, T2
REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, &
max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
sphi_b, zeta, zetb, zetc, zetd
REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
sphi_c_ext_set, sphi_d_ext_set
REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
sphi_d_ext
REAL(KIND=dp) :: coeffs_kind_max0
TYPE(admm_type), POINTER :: admm_env
TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
TYPE(cell_type), POINTER :: cell
TYPE(cp_libint_t) :: private_deriv
TYPE(cp_logger_type), POINTER :: logger
TYPE(dft_control_type), POINTER :: dft_control
TYPE(hfx_basis_info_type), POINTER :: basis_info
TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches
TYPE(hfx_cache_type), POINTER :: maxval_cache
TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers
TYPE(hfx_container_type), POINTER :: maxval_container
TYPE(hfx_distribution), POINTER :: distribution_forces
TYPE(hfx_general_type) :: general_parameter
TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
TYPE(hfx_memory_type), POINTER :: memory_parameter
TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
TYPE(hfx_pgf_product_list), ALLOCATABLE, &
DIMENSION(:) :: pgf_product_list
TYPE(hfx_potential_type) :: potential_parameter
TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
tmp_screen_pgf1, tmp_screen_pgf2
TYPE(hfx_screen_coeff_type), &
DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
TYPE(hfx_screen_coeff_type), &
DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
TYPE(hfx_screening_type) :: screening_parameter
TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
TYPE(hfx_type), POINTER :: actual_x_data
TYPE(hfx_type), DIMENSION(:, :), POINTER :: my_x_data
TYPE(pair_list_type) :: list_ij, list_kl
TYPE(pair_set_list_type), ALLOCATABLE, &
DIMENSION(:) :: set_list_ij, set_list_kl
TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
TYPE(qs_force_type), DIMENSION(:), POINTER :: force
TYPE(virial_type), POINTER :: virial
NULLIFY (dft_control, admm_env)
with_resp_density = .FALSE.
IF (ASSOCIATED(rho_ao_resp)) with_resp_density = .TRUE.
my_resp_only = .FALSE.
IF (PRESENT(resp_only)) my_resp_only = resp_only
is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) .EQ. dbcsr_type_antisymmetric
CALL get_qs_env(qs_env, &
admm_env=admm_env, &
particle_set=particle_set, &
atomic_kind_set=atomic_kind_set, &
cell=cell, &
dft_control=dft_control)
nspins = dft_control%nspins
nkimages = dft_control%nimages
CPASSERT(nkimages == 1)
my_x_data => qs_env%x_data
IF (PRESENT(external_x_data)) my_x_data => external_x_data
!! One atom systems have no contribution to forces
IF (SIZE(particle_set, 1) == 1) THEN
IF (.NOT. use_virial) RETURN
END IF
CALL timeset(routineN, handle)
!! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
nkind = SIZE(atomic_kind_set, 1)
l_max = 0
DO ikind = 1, nkind
l_max = MAX(l_max, MAXVAL(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
END DO
l_max = 4*l_max + 1
CALL init_md_ftable(l_max)
IF (my_x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
my_x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
IF (l_max > init_t_c_g0_lmax) THEN
CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
CALL init(l_max, unit_id, para_env%mepos, para_env)
CALL close_file(unit_id)
init_t_c_g0_lmax = l_max
END IF
END IF
n_threads = 1
!$ n_threads = omp_get_max_threads()
shm_neris_total = 0
shm_nprim_ints = 0
shm_neris_onthefly = 0
shm_storage_counter_integrals = 0
shm_neris_incore = 0
shm_stor_count_max_val = 0
!! get force array
CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
my_adiabatic_rescale_factor = 1.0_dp
IF (PRESENT(adiabatic_rescale_factor)) THEN
my_adiabatic_rescale_factor = adiabatic_rescale_factor
END IF
!$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
!$OMP SHARED(qs_env,&
!$OMP rho_ao,&
!$OMP rho_ao_resp,&
!$OMP with_resp_density,&
!$OMP hfx_section,&
!$OMP para_env,&
!$OMP irep,&
!$OMP my_resp_only,&
!$OMP ncoset,&
!$OMP nco,&
!$OMP nso,&
!$OMP n_threads,&
!$OMP full_density_alpha,&
!$OMP full_density_resp,&
!$OMP full_density_resp_beta,&
!$OMP full_density_beta,&
!$OMP shm_initial_p,&
!$OMP shm_is_assoc_atomic_block,&
!$OMP shm_number_of_p_entries,&
!$OMP force,&
!$OMP virial,&
!$OMP my_adiabatic_rescale_factor,&
!$OMP shm_neris_total,&
!$OMP shm_neris_onthefly,&
!$OMP shm_storage_counter_integrals,&
!$OMP shm_neris_incore,&
!$OMP shm_nprim_ints,&
!$OMP shm_stor_count_max_val,&
!$OMP cell,&
!$OMP screen_coeffs_set,&
!$OMP screen_coeffs_kind,&
!$OMP screen_coeffs_pgf,&
!$OMP pgf_product_list_size,&
!$OMP nkind,&
!$OMP is_anti_symmetric,&
!$OMP radii_pgf,&
!$OMP shm_atomic_block_offset,&
!$OMP shm_set_offset,&
!$OMP shm_block_offset,&
!$OMP shm_task_counter,&
!$OMP shm_task_list,&
!$OMP shm_total_bins,&
!$OMP shm_pmax_block,&
!$OMP shm_atomic_pair_list,&
!$OMP shm_pmax_atom,&
!$OMP use_virial,&
!$OMP do_print_load_balance_info,&
!$OMP nkimages,nspins,my_x_data)&
!$OMP PRIVATE(actual_x_data,atomic_kind_set,atomic_offset_ac,atomic_offset_ad,atomic_offset_bc,&
!$OMP atomic_offset_bd,atom_of_kind,basis_info,&
!$OMP basis_parameter,bin,bins_left,bintime_start,bintime_stop,bits_max_val,buffer_left,buffer_overflow,buffer_size,&
!$OMP buffer_start,cache_size,cartesian_estimate,cartesian_estimate_virial,&
!$OMP coeffs_kind_max0,compression_factor,counter,current_counter,&
!$OMP distribution_forces,do_dynamic_load_balancing,do_it,do_periodic,ede_buffer1,ede_buffer2,&
!$OMP ede_primitives_tmp,ede_primitives_tmp_virial,&
!$OMP ede_work,ede_work2,ede_work2_virial,ede_work_forces,ede_work_virial,eps_schwarz,eps_storage,estimate_to_store_int,&
!$OMP fac,first_sgfb,forces_map,general_parameter,handle_bin,handle_getp,handle_load,&
!$OMP handle_main,hf_fraction,i_atom,iatom_end,iatom_start,ikind,integral_caches,integral_containers,&
!$OMP i_set_list_ij_start,i_set_list_ij_stop,i_set_list_kl_start,i_set_list_kl_stop,i_thread,iw,j_atom,jatom_end,&
!$OMP jatom_start,jkind,katom,k_atom,katom_block,katom_end,katom_start,kind_kind_idx,&
!$OMP kind_of,kkind,kset,la_max,la_min,last_sgf_global,latom,l_atom,&
!$OMP latom_block,latom_end,latom_start,lb_max,lb_min,lc_max,lc_min,ld_max,&
!$OMP ld_min,list_ij,list_kl,lkind,ln_10,load_balance_parameter,log10_eps_schwarz,log10_pmax,&
!$OMP log_2,logger,lset,max_am,max_contraction,max_contraction_val,max_pgf,max_set,&
!$OMP max_val1,max_val2,max_val2_set,maxval_cache,maxval_container,max_val_memory,mem_compression_counter,mem_eris,&
!$OMP mem_max_val,memory_parameter,my_bin_id,my_bin_size,my_current_counter,my_istart,my_thread_id,natom,&
!$OMP nbits,nblocks,ncob,ncos_max,ncpu,neris_incore,neris_onthefly,neris_tmp,&
!$OMP neris_total,nimages,nints,nneighbors,npgfa,npgfb,npgfc,npgfd,&
!$OMP nprim_ints,n_processes,nseta,nsetb,nsgfa,nsgfb,nsgfc,nsgfd,&
!$OMP nsgfl_a,nsgfl_b,nsgfl_c,nsgfl_d,nsgf_max,offset_ac_set,offset_ad_set,&
!$OMP offset_bc_set,offset_bd_set,pac_buf,pac_buf_beta,pac_buf_resp,pac_buf_resp_beta,pad_buf,pad_buf_beta,pad_buf_resp,&
!$OMP pad_buf_resp_beta,particle_set,pbc_buf,pbc_buf_beta,pbc_buf_resp,pbc_buf_resp_beta,pbd_buf,pbd_buf_beta, &
!$OMP pbd_buf_resp, pbd_buf_resp_beta, pgf_list_ij,&
!$OMP pgf_list_kl,pgf_product_list,pmax_atom,pmax_blocks,pmax_entry,pmax_tmp,potential_parameter,primitive_forces,&
!$OMP primitive_forces_virial,private_deriv,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,ra,rab2,&
!$OMP rb,rc,rcd2,rd,screening_parameter,screen_pmat_forces,set_list_ij,set_list_kl,&
!$OMP sgfb,spherical_estimate,spherical_estimate_virial,sphi_a_ext,sphi_a_ext_set,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
!$OMP sphi_b,sphi_b_ext,sphi_b_ext_set,sphi_b_u1,sphi_b_u2,sphi_b_u3,sphi_c_ext,sphi_c_ext_set,&
!$OMP sphi_c_u1,sphi_c_u2,sphi_c_u3,sphi_d_ext,sphi_d_ext_set,sphi_d_u1,sphi_d_u2,sphi_d_u3,&
!$OMP storage_counter_integrals,stor_count_max_val,swap_id,symm_fac,t2,tmp_block,tmp_i8, tmp_i4,&
!$OMP tmp_index,tmp_r_1,tmp_r_2,tmp_screen_pgf1,tmp_screen_pgf2,tmp_task_list,tmp_task_list_cost,tmp_virial,&
!$OMP treat_forces_in_core,use_disk_storage,zeta,zetb,zetc,zetd)
i_thread = 0
!$ i_thread = omp_get_thread_num()
ln_10 = LOG(10.0_dp)
log_2 = LOG10(2.0_dp)
actual_x_data => my_x_data(irep, i_thread + 1)
do_periodic = actual_x_data%periodic_parameter%do_periodic
screening_parameter = actual_x_data%screening_parameter
general_parameter = actual_x_data%general_parameter
potential_parameter = actual_x_data%potential_parameter
basis_info => actual_x_data%basis_info
load_balance_parameter => actual_x_data%load_balance_parameter
basis_parameter => actual_x_data%basis_parameter
! IF(with_resp_density) THEN
! ! here we also do a copy of the original load_balance_parameter
! ! since if MP2 forces are required after the calculation of the HFX
! ! derivatives we need to calculate again the SCF energy.
! ALLOCATE(load_balance_parameter_energy)
! load_balance_parameter_energy = actual_x_data%load_balance_parameter
! END IF
memory_parameter => actual_x_data%memory_parameter
cache_size = memory_parameter%cache_size
bits_max_val = memory_parameter%bits_max_val
use_disk_storage = .FALSE.
CALL get_qs_env(qs_env=qs_env, &
atomic_kind_set=atomic_kind_set, &
particle_set=particle_set)
max_set = basis_info%max_set
max_am = basis_info%max_am
natom = SIZE(particle_set, 1)
treat_forces_in_core = memory_parameter%treat_forces_in_core
hf_fraction = general_parameter%fraction
hf_fraction = hf_fraction*my_adiabatic_rescale_factor
eps_schwarz = screening_parameter%eps_schwarz_forces
IF (eps_schwarz < 0.0_dp) THEN
log10_eps_schwarz = log_zero
ELSE
!! ** make eps_schwarz tighter in case of a stress calculation
IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
log10_eps_schwarz = LOG10(eps_schwarz)
END IF
screen_pmat_forces = screening_parameter%do_p_screening_forces
! don't do density screening in case of response forces
IF (with_resp_density) screen_pmat_forces = .FALSE.
eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
counter = 0_int_8
!! Copy the libint_guy
private_deriv = actual_x_data%lib_deriv
!! Get screening parameter
CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
atom_of_kind=atom_of_kind, &
kind_of=kind_of)
!! Create helper arrray for mapping local basis functions to global ones
ALLOCATE (last_sgf_global(0:natom))
last_sgf_global(0) = 0
DO iatom = 1, natom
ikind = kind_of(iatom)
last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
END DO
ALLOCATE (max_contraction(max_set, natom))
max_contraction = 0.0_dp
max_pgf = 0
DO jatom = 1, natom
jkind = kind_of(jatom)
lb_max => basis_parameter(jkind)%lmax
npgfb => basis_parameter(jkind)%npgf
nsetb = basis_parameter(jkind)%nset
first_sgfb => basis_parameter(jkind)%first_sgf
sphi_b => basis_parameter(jkind)%sphi
nsgfb => basis_parameter(jkind)%nsgf
DO jset = 1, nsetb
! takes the primitive to contracted transformation into account
ncob = npgfb(jset)*ncoset(lb_max(jset))
sgfb = first_sgfb(1, jset)
! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
! the maximum value after multiplication with sphi_b
max_contraction(jset, jatom) = MAXVAL((/(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)/))
max_pgf = MAX(max_pgf, npgfb(jset))
END DO
END DO
ncpu = para_env%num_pe
n_processes = ncpu*n_threads
!! initialize some counters
neris_total = 0_int_8
neris_incore = 0_int_8
neris_onthefly = 0_int_8
mem_eris = 0_int_8
mem_max_val = 0_int_8
compression_factor = 0.0_dp
nprim_ints = 0_int_8
neris_tmp = 0_int_8
max_val_memory = 1_int_8
!$OMP MASTER
!! Set pointer for is_assoc helper
shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
shm_number_of_p_entries = actual_x_data%number_of_p_entries
shm_atomic_block_offset => actual_x_data%atomic_block_offset
shm_set_offset => actual_x_data%set_offset
shm_block_offset => actual_x_data%block_offset
NULLIFY (full_density_alpha)
NULLIFY (full_density_beta)
ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
!! Get the full density from all the processors
CALL timeset(routineN//"_getP", handle_getP)
CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
shm_block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
antisymmetric=is_anti_symmetric)
! for now only closed shell case
IF (with_resp_density) THEN
NULLIFY (full_density_resp)
ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
shm_block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
antisymmetric=is_anti_symmetric)
END IF
IF (nspins == 2) THEN
ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
shm_block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
antisymmetric=is_anti_symmetric)
! With resp density
IF (with_resp_density) THEN
NULLIFY (full_density_resp_beta)
ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
shm_block_offset, &
kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
antisymmetric=is_anti_symmetric)
END IF
END IF
CALL timestop(handle_getP)
!! Calculate max entries for screening on actual density. If screen_p_mat_forces = FALSE, the
!! matrix is initialized to 1.0
IF (screen_pmat_forces) THEN
NULLIFY (shm_initial_p)
shm_initial_p => actual_x_data%initial_p_forces
shm_pmax_atom => actual_x_data%pmax_atom_forces
IF (memory_parameter%recalc_forces) THEN
CALL update_pmax_mat(shm_initial_p, &
actual_x_data%map_atom_to_kind_atom, &
actual_x_data%set_offset, &
actual_x_data%atomic_block_offset, &
shm_pmax_atom, &
full_density_alpha, full_density_beta, &
natom, kind_of, basis_parameter, &
nkind, actual_x_data%is_assoc_atomic_block)
END IF
END IF
! restore as full density the HF density
! maybe in the future
IF (with_resp_density .AND. .NOT. my_resp_only) THEN
full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
IF (nspins == 2) THEN
full_density_beta(:, 1) = &
full_density_beta(:, 1) - full_density_resp_beta
END IF
! full_density_resp=full_density+full_density_resp
END IF
screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
radii_pgf => actual_x_data%pair_dist_radii_pgf
shm_pmax_block => actual_x_data%pmax_block
!$OMP END MASTER
!$OMP BARRIER
!$OMP MASTER
CALL timeset(routineN//"_load", handle_load)
!$OMP END MASTER
!! Load balance the work
IF (actual_x_data%memory_parameter%recalc_forces) THEN
IF (actual_x_data%b_first_load_balance_forces) THEN
CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
screen_coeffs_set, screen_coeffs_kind, &
shm_is_assoc_atomic_block, do_periodic, &
load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
shm_pmax_atom, i_thread, n_threads, &
cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
nkind, hfx_do_eval_forces, shm_pmax_block, use_virial)
actual_x_data%b_first_load_balance_forces = .FALSE.
ELSE
CALL hfx_update_load_balance(actual_x_data, para_env, &
load_balance_parameter, &
i_thread, n_threads, hfx_do_eval_forces)
END IF
END IF
!$OMP MASTER
CALL timestop(handle_load)
!$OMP END MASTER
!! precompute maximum nco and allocate scratch
ncos_max = 0
nsgf_max = 0
DO iatom = 1, natom
ikind = kind_of(iatom)
nseta = basis_parameter(ikind)%nset
npgfa => basis_parameter(ikind)%npgf
la_max => basis_parameter(ikind)%lmax
nsgfa => basis_parameter(ikind)%nsgf
DO iset = 1, nseta
ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
nsgf_max = MAX(nsgf_max, nsgfa(iset))
END DO
END DO
!! Allocate work arrays
ALLOCATE (primitive_forces(12*nsgf_max**4))
primitive_forces = 0.0_dp
! ** Allocate buffers for pgf_lists
nneighbors = SIZE(actual_x_data%neighbor_cells)
ALLOCATE (pgf_list_ij(max_pgf**2))
ALLOCATE (pgf_list_kl(max_pgf**2))
!$OMP ATOMIC READ
tmp_i4 = pgf_product_list_size
ALLOCATE (pgf_product_list(tmp_i4))
ALLOCATE (nimages(max_pgf**2))
DO i = 1, max_pgf**2
ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
END DO
ALLOCATE (pbd_buf(nsgf_max**2))
ALLOCATE (pbc_buf(nsgf_max**2))
ALLOCATE (pad_buf(nsgf_max**2))
ALLOCATE (pac_buf(nsgf_max**2))
IF (with_resp_density) THEN
ALLOCATE (pbd_buf_resp(nsgf_max**2))
ALLOCATE (pbc_buf_resp(nsgf_max**2))
ALLOCATE (pad_buf_resp(nsgf_max**2))
ALLOCATE (pac_buf_resp(nsgf_max**2))
END IF
IF (nspins == 2) THEN
ALLOCATE (pbd_buf_beta(nsgf_max**2))
ALLOCATE (pbc_buf_beta(nsgf_max**2))
ALLOCATE (pad_buf_beta(nsgf_max**2))
ALLOCATE (pac_buf_beta(nsgf_max**2))
IF (with_resp_density) THEN
ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
END IF
END IF
ALLOCATE (ede_work(ncos_max**4*12))
ALLOCATE (ede_work2(ncos_max**4*12))
ALLOCATE (ede_work_forces(ncos_max**4*12))
ALLOCATE (ede_buffer1(ncos_max**4))
ALLOCATE (ede_buffer2(ncos_max**4))
ALLOCATE (ede_primitives_tmp(nsgf_max**4))
IF (use_virial) THEN
ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
primitive_forces_virial = 0.0_dp
ALLOCATE (ede_work_virial(ncos_max**4*12*3))
ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
tmp_virial = 0.0_dp
ELSE
! dummy allocation
ALLOCATE (primitive_forces_virial(1))
primitive_forces_virial = 0.0_dp
ALLOCATE (ede_work_virial(1))
ALLOCATE (ede_work2_virial(1))
ALLOCATE (ede_primitives_tmp_virial(1))
END IF
!! Start caluclating integrals of the form (ab|cd) or (ij|kl)
!! In order to do so, there is a main four-loop structure that takes into account the two symmetries
!!
!! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
!!
!! by iterating in the following way
!!
!! DO iatom=1,natom and DO katom=1,natom
!! DO jatom=iatom,natom DO latom=katom,natom
!!
!! The third symmetry
!!
!! (ab|cd) = (cd|ab)
!!
!! is taken into account by the following criterion:
!!
!! IF(katom+latom<=iatom+jatom) THEN
!! IF( ((iatom+jatom).EQ.(katom+latom) ) .AND.(katom<iatom)) CYCLE
!!
!! Furthermore, if iatom==jatom==katom==latom we cycle, because the derivatives are zero anyway.
!!
!! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
!! factor ( symm_fac ).
!!
!! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
!! different hierarchies of short range screening matrices.
!!
!! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
!! defined as :
!!
!! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
!!
!! This tells the process where to start the main loops and how many bunches of integrals it has to
!! calculate. The original parallelization is a simple modulo distribution that is binned and
!! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
!! we need to know which was the initial cpu_id.
!! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
!! lstart only the first time the loop is executed. All subsequent loops have to start with one or
!! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
!$OMP BARRIER
!$OMP MASTER
CALL timeset(routineN//"_main", handle_main)
!$OMP END MASTER
!$OMP BARRIER
coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
!$OMP BARRIER
IF (actual_x_data%memory_parameter%recalc_forces) THEN
actual_x_data%distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
memory_parameter%ram_counter_forces = HUGE(memory_parameter%ram_counter_forces)
END IF
IF (treat_forces_in_core) THEN
buffer_overflow = .FALSE.
ELSE
buffer_overflow = .TRUE.
END IF
do_dynamic_load_balancing = .TRUE.
IF (n_threads == 1) do_dynamic_load_balancing = .FALSE.
IF (do_dynamic_load_balancing) THEN
my_bin_size = SIZE(actual_x_data%distribution_forces)
ELSE
my_bin_size = 1
END IF
!! We do not need the containers if MAX_MEM = 0
IF (treat_forces_in_core) THEN
!! IF new md step -> reinitialize containers
IF (actual_x_data%memory_parameter%recalc_forces) THEN
CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
CALL alloc_containers(actual_x_data%store_forces, my_bin_size)
DO bin = 1, my_bin_size
maxval_container => actual_x_data%store_forces%maxval_container(bin)
integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
DO i = 1, 64
CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
END DO
END DO
END IF
!! Decompress the first cache for maxvals and integrals
IF (.NOT. actual_x_data%memory_parameter%recalc_forces) THEN
DO bin = 1, my_bin_size
maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
maxval_container => actual_x_data%store_forces%maxval_container(bin)
integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
memory_parameter%actual_memory_usage, .FALSE.)
DO i = 1, 64
CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
memory_parameter%actual_memory_usage, .FALSE.)
END DO
END DO
END IF
END IF
!$OMP BARRIER
!$OMP MASTER
IF (do_dynamic_load_balancing) THEN
! ** Lets construct the task list
shm_total_bins = 0
DO i = 1, n_threads
shm_total_bins = shm_total_bins + SIZE(my_x_data(irep, i)%distribution_forces)
END DO
ALLOCATE (tmp_task_list(shm_total_bins))
shm_task_counter = 0
DO i = 1, n_threads
DO bin = 1, SIZE(my_x_data(irep, i)%distribution_forces)
shm_task_counter = shm_task_counter + 1
tmp_task_list(shm_task_counter)%thread_id = i
tmp_task_list(shm_task_counter)%bin_id = bin
tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
END DO
END DO
! ** Now sort the task list
ALLOCATE (tmp_task_list_cost(shm_total_bins))
ALLOCATE (tmp_index(shm_total_bins))
DO i = 1, shm_total_bins
tmp_task_list_cost(i) = tmp_task_list(i)%cost
END DO
CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
ALLOCATE (actual_x_data%task_list(shm_total_bins))
DO i = 1, shm_total_bins
actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
END DO
shm_task_list => actual_x_data%task_list
shm_task_counter = 0
DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
END IF
!! precalculate maximum density matrix elements in blocks
shm_pmax_block => actual_x_data%pmax_block
actual_x_data%pmax_block = 0.0_dp
IF (screen_pmat_forces) THEN
DO iatom_block = 1, SIZE(actual_x_data%blocks)
iatom_start = actual_x_data%blocks(iatom_block)%istart
iatom_end = actual_x_data%blocks(iatom_block)%iend
DO jatom_block = 1, SIZE(actual_x_data%blocks)
jatom_start = actual_x_data%blocks(jatom_block)%istart
jatom_end = actual_x_data%blocks(jatom_block)%iend
shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
END DO
END DO
END IF
shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
IF (actual_x_data%memory_parameter%recalc_forces) THEN
CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
actual_x_data%blocks)
END IF
my_bin_size = SIZE(actual_x_data%distribution_forces)
DO bin = 1, my_bin_size
actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
END DO
!$OMP END MASTER
!$OMP BARRIER
IF (treat_forces_in_core) THEN
IF (my_bin_size > 0) THEN
maxval_container => actual_x_data%store_forces%maxval_container(1)
maxval_cache => actual_x_data%store_forces%maxval_cache(1)
integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
END IF
END IF
nblocks = load_balance_parameter%nblocks
bins_left = .TRUE.
do_it = .TRUE.
bin = 0
DO WHILE (bins_left)
IF (.NOT. do_dynamic_load_balancing) THEN
bin = bin + 1
IF (bin > my_bin_size) THEN
do_it = .FALSE.
bins_left = .FALSE.
ELSE
do_it = .TRUE.
bins_left = .TRUE.
distribution_forces => actual_x_data%distribution_forces(bin)
END IF
ELSE
!$OMP CRITICAL(hfxderivatives_critical)
shm_task_counter = shm_task_counter + 1
IF (shm_task_counter <= shm_total_bins) THEN
my_thread_id = shm_task_list(shm_task_counter)%thread_id
my_bin_id = shm_task_list(shm_task_counter)%bin_id
IF (treat_forces_in_core) THEN
maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
END IF
distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
do_it = .TRUE.
bins_left = .TRUE.
IF (actual_x_data%memory_parameter%recalc_forces) THEN
distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
END IF
counter = 0_Int_8
ELSE
do_it = .FALSE.
bins_left = .FALSE.
END IF
!$OMP END CRITICAL(hfxderivatives_critical)
END IF
IF (.NOT. do_it) CYCLE
!$OMP MASTER
CALL timeset(routineN//"_bin", handle_bin)
!$OMP END MASTER
bintime_start = m_walltime()
my_istart = distribution_forces%istart
my_current_counter = 0
IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
my_istart == -1_int_8) my_istart = nblocks**4
atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
latom_block = INT(MODULO(atom_block, nblocks)) + 1
tmp_block = atom_block/nblocks
katom_block = INT(MODULO(tmp_block, nblocks)) + 1
IF (latom_block < katom_block) CYCLE
tmp_block = tmp_block/nblocks
jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
tmp_block = tmp_block/nblocks
iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
IF (jatom_block < iatom_block) CYCLE
my_current_counter = my_current_counter + 1
IF (my_current_counter > distribution_forces%number_of_atom_quartets) EXIT atomic_blocks
iatom_start = actual_x_data%blocks(iatom_block)%istart
iatom_end = actual_x_data%blocks(iatom_block)%iend
jatom_start = actual_x_data%blocks(jatom_block)%istart
jatom_end = actual_x_data%blocks(jatom_block)%iend
katom_start = actual_x_data%blocks(katom_block)%istart
katom_end = actual_x_data%blocks(katom_block)%iend
latom_start = actual_x_data%blocks(latom_block)%istart
latom_end = actual_x_data%blocks(latom_block)%iend
pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block) + &
shm_pmax_block(latom_block, jatom_block), &
shm_pmax_block(latom_block, iatom_block) + &
shm_pmax_block(katom_block, jatom_block))
IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE
CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
jatom_start, jatom_end, &
kind_of, basis_parameter, particle_set, &
do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
latom_start, latom_end, &
kind_of, basis_parameter, particle_set, &
do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
DO i_list_ij = 1, list_ij%n_element
iatom = list_ij%elements(i_list_ij)%pair(1)
jatom = list_ij%elements(i_list_ij)%pair(2)
i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
ikind = list_ij%elements(i_list_ij)%kind_pair(1)
jkind = list_ij%elements(i_list_ij)%kind_pair(2)
ra = list_ij%elements(i_list_ij)%r1
rb = list_ij%elements(i_list_ij)%r2
rab2 = list_ij%elements(i_list_ij)%dist2
la_max => basis_parameter(ikind)%lmax
la_min => basis_parameter(ikind)%lmin
npgfa => basis_parameter(ikind)%npgf
nseta = basis_parameter(ikind)%nset
zeta => basis_parameter(ikind)%zet
nsgfa => basis_parameter(ikind)%nsgf
sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
nsgfl_a => basis_parameter(ikind)%nsgfl
sphi_a_u1 = UBOUND(sphi_a_ext, 1)
sphi_a_u2 = UBOUND(sphi_a_ext, 2)
sphi_a_u3 = UBOUND(sphi_a_ext, 3)
lb_max => basis_parameter(jkind)%lmax
lb_min => basis_parameter(jkind)%lmin
npgfb => basis_parameter(jkind)%npgf
nsetb = basis_parameter(jkind)%nset
zetb => basis_parameter(jkind)%zet
nsgfb => basis_parameter(jkind)%nsgf
sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
nsgfl_b => basis_parameter(jkind)%nsgfl
sphi_b_u1 = UBOUND(sphi_b_ext, 1)
sphi_b_u2 = UBOUND(sphi_b_ext, 2)
sphi_b_u3 = UBOUND(sphi_b_ext, 3)
i_atom = atom_of_kind(iatom)
forces_map(1, 1) = ikind
forces_map(1, 2) = i_atom
j_atom = atom_of_kind(jatom)
forces_map(2, 1) = jkind
forces_map(2, 2) = j_atom
DO i_list_kl = 1, list_kl%n_element
katom = list_kl%elements(i_list_kl)%pair(1)
latom = list_kl%elements(i_list_kl)%pair(2)
!All four centers equivalent => zero-contribution
!VIIRAL IF((iatom==jatom .AND. iatom==katom .AND. iatom==latom)) CYCLE