From 578b4e2f43b8c669f1bb13009a252e3e04935afa Mon Sep 17 00:00:00 2001 From: Frederick Stein <43850145+fstein93@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:44:46 +0200 Subject: [PATCH] Fix AXK implementation and extend it to SOSEX (#3468) --- src/CMakeLists.txt | 2 +- src/aobasis/basis_set_types.F | 12 +- src/bse_util.F | 63 +- src/common/bibliography.F | 42 +- src/fm/cp_cfm_basic_linalg.F | 13 +- src/fm/cp_fm_basic_linalg.F | 10 +- src/fm/cp_fm_diag.F | 32 +- src/fm/cp_fm_struct.F | 398 +++++-- src/fm/cp_fm_types.F | 188 +--- src/group_dist_types.F | 6 +- src/hfx_admm_utils.F | 7 +- src/input_constants.F | 4 + src/input_cp2k_mp2.F | 67 +- src/local_gemm_api.F | 55 +- src/mp2.F | 24 +- src/mp2_gpw.F | 45 +- src/mp2_ri_2c.F | 23 +- src/mp2_ri_gpw.F | 182 ++-- src/mp2_ri_grad.F | 77 +- src/mp2_ri_grad_util.F | 113 +- src/mp2_setup.F | 7 +- src/mp2_types.F | 36 +- src/qs_active_space_types.F | 6 +- src/qs_density_matrices.F | 7 +- src/qs_kind_types.F | 14 +- src/qs_linres_op.F | 28 +- src/qs_localization_methods.F | 5 +- src/qs_matrix_pools.F | 50 +- src/rpa_axk.F | 493 --------- src/rpa_communication.F | 71 +- src/rpa_exchange.F | 977 ++++++++++++++++++ src/rpa_grad.F | 216 ++-- src/rpa_main.F | 80 +- src/rpa_rse.F | 1 + src/rpa_util.F | 10 +- tests/QS/regtest-ri-rpa-axk/TEST_FILES | 3 - tests/QS/regtest-ri-rpa-exchange/CH3.xyz | 7 + .../H2O_gas.xyz | 0 .../regtest-ri-rpa-exchange/RPA_AXK_CH3.inp | 100 ++ .../RPA_AXK_H2O.inp | 4 +- .../RPA_AXK_H2O_hfx.inp | 89 ++ .../RPA_AXK_H2O_svd.inp | 4 +- .../regtest-ri-rpa-exchange/RPA_SOSEX_H2O.inp | 89 ++ tests/QS/regtest-ri-rpa-exchange/TEST_FILES | 6 + tests/QS/regtest-rpa-lr/H2O-rpa-axk-lr.inp | 5 +- tests/QS/regtest-rpa-lr/TEST_FILES | 2 +- tests/TEST_DIRS | 2 +- 47 files changed, 2230 insertions(+), 1445 deletions(-) delete mode 100644 src/rpa_axk.F create mode 100644 src/rpa_exchange.F delete mode 100644 tests/QS/regtest-ri-rpa-axk/TEST_FILES create mode 100644 tests/QS/regtest-ri-rpa-exchange/CH3.xyz rename tests/QS/{regtest-ri-rpa-axk => regtest-ri-rpa-exchange}/H2O_gas.xyz (100%) create mode 100644 tests/QS/regtest-ri-rpa-exchange/RPA_AXK_CH3.inp rename tests/QS/{regtest-ri-rpa-axk => regtest-ri-rpa-exchange}/RPA_AXK_H2O.inp (94%) create mode 100644 tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_hfx.inp rename tests/QS/{regtest-ri-rpa-axk => regtest-ri-rpa-exchange}/RPA_AXK_H2O_svd.inp (94%) create mode 100644 tests/QS/regtest-ri-rpa-exchange/RPA_SOSEX_H2O.inp create mode 100644 tests/QS/regtest-ri-rpa-exchange/TEST_FILES diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e2899e4edd..893285a8df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -710,7 +710,7 @@ list( restraint.F ri_environment_methods.F rmsd.F - rpa_axk.F + rpa_exchange.F rpa_communication.F rpa_grad.F rpa_gw.F diff --git a/src/aobasis/basis_set_types.F b/src/aobasis/basis_set_types.F index e703256550..fca9410a0b 100644 --- a/src/aobasis/basis_set_types.F +++ b/src/aobasis/basis_set_types.F @@ -45,7 +45,8 @@ MODULE basis_set_types nsoset USE orbital_symbols, ONLY: cgf_symbol,& sgf_symbol - USE orbital_transformation_matrices, ONLY: orbtramat + USE orbital_transformation_matrices, ONLY: init_spherical_harmonics,& + orbtramat USE sto_ng, ONLY: get_sto_ng USE string_utilities, ONLY: integer_to_string,& remove_word,& @@ -822,6 +823,15 @@ SUBROUTINE init_cphi_and_sphi(gto_basis_set) gto_basis_set%sphi = 0.0_dp IF (n .GT. 0) THEN + lmax = -1 + ! Ensure proper setup of orbtramat + DO iset = 1, gto_basis_set%nset + DO ishell = 1, gto_basis_set%nshell(iset) + lmax = MAX(lmax, gto_basis_set%l(ishell, iset)) + END DO + END DO + CALL init_spherical_harmonics(lmax, -1) + DO iset = 1, gto_basis_set%nset DO ishell = 1, gto_basis_set%nshell(iset) l = gto_basis_set%l(ishell, iset) diff --git a/src/bse_util.F b/src/bse_util.F index 23bf60b171..a868b06c1a 100644 --- a/src/bse_util.F +++ b/src/bse_util.F @@ -20,8 +20,6 @@ MODULE bse_util cp_fm_struct_type USE cp_fm_types, ONLY: cp_fm_create,& cp_fm_get_info,& - cp_fm_indxg2l,& - cp_fm_indxg2p,& cp_fm_release,& cp_fm_set_all,& cp_fm_to_fm_submat_general,& @@ -160,9 +158,9 @@ SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_i CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse' INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, & - iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, npcol, nprocs, & - nprow, nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, & - row_idx_loc, send_pcol, send_prow + iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, nprocs, & + nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, row_idx_loc, & + send_pcol, send_prow INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, & num_entries_send INTEGER, DIMENSION(4) :: indices_in @@ -187,9 +185,6 @@ SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_i nrow_block=nrow_block_out, & ncol_block=ncol_block_out) - nprow = fm_out%matrix_struct%context%num_pe(1) - npcol = fm_out%matrix_struct%context%num_pe(2) - ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1)) ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1)) @@ -237,11 +232,8 @@ SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_i idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out - send_prow = cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(1), nprow) - - send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_out%matrix_struct%g2p_row(idx_row_out) + send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out) proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol) @@ -320,11 +312,8 @@ SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_i idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out - send_prow = cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(1), nprow) - - send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_out%matrix_struct%g2p_row(idx_row_out) + send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out) proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol) entry_counter(proc_send) = entry_counter(proc_send) + 1 @@ -362,15 +351,12 @@ SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_i nprocs = para_env_out%num_pe !$OMP PARALLEL DO DEFAULT(NONE) & -!$OMP SHARED(fm_out, nprocs, nrow_block_out, ncol_block_out, & -!$OMP num_entries_rec, buffer_rec, beta, dummy, nprow, npcol) & +!$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec, beta) & !$OMP PRIVATE(iproc, i_entry_rec, ii, jj) DO iproc = 0, nprocs - 1 DO i_entry_rec = 1, num_entries_rec(iproc) - ii = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, & - dummy, dummy, nprow) - jj = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, & - dummy, dummy, npcol) + ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1)) + jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2)) fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec) END DO @@ -430,8 +416,8 @@ SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, & INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, & idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, & - ncol_local_in, ncol_local_out, npcol, nprocs, nprow, nrow_block_in, nrow_block_out, & - nrow_local_in, nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow + ncol_local_in, ncol_local_out, nprocs, nrow_block_in, nrow_block_out, nrow_local_in, & + nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, & num_entries_send INTEGER, DIMENSION(:), POINTER :: col_indices_in, col_indices_out, & @@ -465,9 +451,6 @@ SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, & nrow_block=nrow_block_out, & ncol_block=ncol_block_out) - nprow = fm_out%matrix_struct%context%num_pe(1) - npcol = fm_out%matrix_struct%context%num_pe(2) - ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1)) ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1)) @@ -529,11 +512,8 @@ SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, & DO row_idx_loc = 1, nrow_local_in idx_row_in = row_indices_in(row_idx_loc) - send_prow = cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(1), nprow) - - send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_out%matrix_struct%g2p_row(idx_row_in) + send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out) proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol) @@ -628,11 +608,9 @@ SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, & DO row_idx_loc = 1, nrow_local_in idx_row_in = row_indices_in(row_idx_loc) - send_prow = cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(1), nprow) + send_prow = fm_out%matrix_struct%g2p_row(idx_row_in) - send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, & - fm_out%matrix_struct%first_p_pos(2), npcol) + send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out) proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol) entry_counter(proc_send) = entry_counter(proc_send) + 1 @@ -671,15 +649,12 @@ SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, & nprocs = para_env_out%num_pe !$OMP PARALLEL DO DEFAULT(NONE) & -!$OMP SHARED(fm_out, nprocs, nrow_block_out, ncol_block_out, & -!$OMP num_entries_rec, buffer_rec, nprow, npcol, dummy) & +!$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec) & !$OMP PRIVATE(iproc, i_entry_rec, ii, jj) DO iproc = 0, nprocs - 1 DO i_entry_rec = 1, num_entries_rec(iproc) - ii = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, & - dummy, dummy, nprow) - jj = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, & - dummy, dummy, npcol) + ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1)) + jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2)) fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec) END DO diff --git a/src/common/bibliography.F b/src/common/bibliography.F index 5642df6be4..d34ca18336 100644 --- a/src/common/bibliography.F +++ b/src/common/bibliography.F @@ -91,7 +91,7 @@ MODULE bibliography Rycroft2009, Thomas2015, Brehm2018, Brehm2020, Shigeta2001, Heinecke2016, & Brehm2021, Bussy2021a, Bussy2021b, Ditler2021, Ditler2022, Mattiat2019, & Mattiat2022, Belleflamme2023, Knizia2013, Musaelian2023, Eriksen2020, & - Bussy2023, Wang2018, Zeng2023, Graml2024, Solca2024 + Bussy2023, Wang2018, Zeng2023, Graml2024, Solca2024, Freeman1977, Gruneis2009 CONTAINS @@ -5010,6 +5010,46 @@ SUBROUTINE add_all_references() "ER"), & DOI="10.1007/978-3-031-61763-8_13") + CALL add_reference(key=Freeman1977, ISI_record=s2a( & + "TY JOUR", & + "PB American Physical Society", & + "ID 10.1103/PhysRevB.15.5512", & + "DO 10.1103/PhysRevB.15.5512", & + "TI Coupled-cluster expansion applied to the electron gas: Inclusion of ring and exchange effects", & + "PY 1977", & + "UR https://link.aps.org/doi/10.1103/PhysRevB.15.5512", & + "JF Physical Review B", & + "JA Phys. Rev. B", & + "J1 PRB", & + "VL 15", & + "IS 12", & + "SP 5512", & + "EP 5521", & + "AU Freeman, David L.", & + "ER"), & + DOI="10.1103/PhysRevB.15.5512") + + CALL add_reference(key=Gruneis2009, ISI_record=s2a( & + "TY JOUR", & + "AU Grüneis, Andreas", & + " Marsman, Martijn", & + " Harl, Judith", & + " Schimka, Laurids", & + " Kresse, Georg", & + "TI Making the random phase approximation to electronic correlation accurate", & + "PY 2009", & + "Y1 2009/10/21", & + "JO The Journal of Chemical Physics", & + "JA J. Chem. Phys.", & + "VL 131", & + "IS 15", & + "SP 154115", & + "SN 0021-9606", & + "Y2 5/1/2024", & + "UR https://doi.org/10.1063/1.3250347", & + "ER"), & + DOI="10.1063/1.3250347") + END SUBROUTINE add_all_references END MODULE bibliography diff --git a/src/fm/cp_cfm_basic_linalg.F b/src/fm/cp_cfm_basic_linalg.F index a719d6c3ce..2b0e5fb14e 100644 --- a/src/fm/cp_cfm_basic_linalg.F +++ b/src/fm/cp_cfm_basic_linalg.F @@ -84,9 +84,8 @@ SUBROUTINE cp_cfm_det(matrix_a, det_a) COMPLEX(KIND=dp), DIMENSION(:), POINTER :: diag #if defined(__SCALAPACK) - INTEGER, EXTERNAL :: indxl2g - INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local, & - mypcol, ncol_local, ncol_block, icol_local, j + INTEGER :: myprow, nprow, npcol, nrow_local, irow_local, & + mypcol, ncol_local, icol_local, j INTEGER, DIMENSION(9) :: desca #endif @@ -111,21 +110,19 @@ SUBROUTINE cp_cfm_det(matrix_a, det_a) nprow = matrix_lu%matrix_struct%context%num_pe(1) npcol = matrix_lu%matrix_struct%context%num_pe(2) nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow) - nrow_block = matrix_lu%matrix_struct%nrow_block - ncol_block = matrix_lu%matrix_struct%ncol_block ncol_local = matrix_lu%matrix_struct%ncol_locals(mypcol) DO irow_local = 1, nrow_local - i = indxl2g(irow_local, nrow_block, myprow, matrix_lu%matrix_struct%first_p_pos(1), nprow) + i = matrix_lu%matrix_struct%row_indices(irow_local) DO icol_local = 1, ncol_local - j = indxl2g(icol_local, ncol_block, mypcol, matrix_lu%matrix_struct%first_p_pos(2), npcol) + j = matrix_lu%matrix_struct%col_indices(icol_local) IF (i == j) diag(i) = matrix_lu%local_data(irow_local, icol_local) END DO END DO CALL matrix_lu%matrix_struct%para_env%sum(diag) determinant = PRODUCT(diag) DO irow_local = 1, nrow_local - i = indxl2g(irow_local, nrow_block, myprow, matrix_lu%matrix_struct%first_p_pos(1), nprow) + i = matrix_lu%matrix_struct%row_indices(irow_local) IF (ipivot(irow_local) /= i) P = P + 1 END DO CALL matrix_lu%matrix_struct%para_env%sum(P) diff --git a/src/fm/cp_fm_basic_linalg.F b/src/fm/cp_fm_basic_linalg.F index e03e1b9f7b..94e879c0c9 100644 --- a/src/fm/cp_fm_basic_linalg.F +++ b/src/fm/cp_fm_basic_linalg.F @@ -105,7 +105,6 @@ SUBROUTINE cp_fm_det(matrix_a, det_a) REAL(KIND=dp), DIMENSION(:), POINTER :: diag #if defined(__SCALAPACK) - INTEGER, EXTERNAL :: indxl2g INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local INTEGER, DIMENSION(9) :: desca #endif @@ -134,7 +133,7 @@ SUBROUTINE cp_fm_det(matrix_a, det_a) nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow) nrow_block = matrix_lu%matrix_struct%nrow_block DO irow_local = 1, nrow_local - i = indxl2g(irow_local, nrow_block, myprow, matrix_lu%matrix_struct%first_p_pos(1), nprow) + i = matrix_lu%matrix_struct%row_indices(irow_local) IF (ipivot(irow_local) /= i) P = P + 1 END DO CALL matrix_lu%matrix_struct%para_env%sum(P) @@ -1305,7 +1304,6 @@ SUBROUTINE cp_fm_upper_to_full(matrix, work) ncol_block, ncol_local, & nrow_block, nrow_local INTEGER, DIMENSION(9) :: desca, descc - INTEGER, EXTERNAL :: indxl2g REAL(KIND=dp), DIMENSION(:, :), POINTER :: c REAL(KIND=sp), DIMENSION(:, :), POINTER :: c_sp #endif @@ -1342,11 +1340,9 @@ SUBROUTINE cp_fm_upper_to_full(matrix, work) descc(:) = matrix%matrix_struct%descriptor(:) DO icol_local = 1, ncol_local - icol_global = indxl2g(icol_local, ncol_block, mypcol, & - matrix%matrix_struct%first_p_pos(2), npcol) + icol_global = matrix%matrix_struct%col_indices(icol_local) DO irow_local = 1, nrow_local - irow_global = indxl2g(irow_local, nrow_block, myprow, & - matrix%matrix_struct%first_p_pos(1), nprow) + irow_global = matrix%matrix_struct%row_indices(irow_local) IF (irow_global > icol_global) THEN IF (matrix%use_sp) THEN c_sp(irow_local, icol_local) = 0.0_sp diff --git a/src/fm/cp_fm_diag.F b/src/fm/cp_fm_diag.F index 92a7e2cc34..7baffb358f 100644 --- a/src/fm/cp_fm_diag.F +++ b/src/fm/cp_fm_diag.F @@ -922,8 +922,7 @@ SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, #if defined(__SCALAPACK) INTEGER :: icol_local, ipcol, iprow, irow_global, & - irow_local, ncol_block, nrow_block - INTEGER, EXTERNAL :: indxg2l, indxg2p + irow_local #endif CALL timeset(routineN, handle) @@ -956,9 +955,6 @@ SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, END IF #if defined(__SCALAPACK) - nrow_block = work%matrix_struct%nrow_block - ncol_block = work%matrix_struct%ncol_block - eigenvectors => work%local_data ! Build matrix**exponent with eigenvector quenching @@ -969,18 +965,14 @@ SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, n_dependent = n_dependent + 1 - ipcol = indxg2p(icol_global, ncol_block, mypcol, & - work%matrix_struct%first_p_pos(2), npcol) + ipcol = work%matrix_struct%g2p_col(icol_global) IF (mypcol == ipcol) THEN - icol_local = indxg2l(icol_global, ncol_block, mypcol, & - work%matrix_struct%first_p_pos(2), npcol) + icol_local = work%matrix_struct%g2l_col(icol_global) DO irow_global = 1, nrow_global - iprow = indxg2p(irow_global, nrow_block, myprow, & - work%matrix_struct%first_p_pos(1), nprow) + iprow = work%matrix_struct%g2p_row(irow_global) IF (myprow == iprow) THEN - irow_local = indxg2l(irow_global, nrow_block, myprow, & - work%matrix_struct%first_p_pos(1), nprow) + irow_local = work%matrix_struct%g2l_row(irow_global) eigenvectors(irow_local, icol_local) = 0.0_dp END IF END DO @@ -990,18 +982,14 @@ SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, f = eigenvalues(icol_global)**p - ipcol = indxg2p(icol_global, ncol_block, mypcol, & - work%matrix_struct%first_p_pos(2), npcol) + ipcol = work%matrix_struct%g2p_col(icol_global) IF (mypcol == ipcol) THEN - icol_local = indxg2l(icol_global, ncol_block, mypcol, & - work%matrix_struct%first_p_pos(2), npcol) + icol_local = work%matrix_struct%g2l_col(icol_global) DO irow_global = 1, nrow_global - iprow = indxg2p(irow_global, nrow_block, myprow, & - work%matrix_struct%first_p_pos(1), nprow) + iprow = work%matrix_struct%g2p_row(irow_global) IF (myprow == iprow) THEN - irow_local = indxg2l(irow_global, nrow_block, myprow, & - work%matrix_struct%first_p_pos(1), nprow) + irow_local = work%matrix_struct%g2l_row(irow_global) eigenvectors(irow_local, icol_local) = & f*eigenvectors(irow_local, icol_local) END IF @@ -1107,7 +1095,7 @@ SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, & desc_ev_loc TYPE(mp_comm_type):: allgrp TYPE(cp_blacs_type) :: ictxt_loc - INTEGER, EXTERNAL :: numroc, indxl2g, indxg2l + INTEGER, EXTERNAL :: numroc #endif ! ------------------------------------------------------------------------- diff --git a/src/fm/cp_fm_struct.F b/src/fm/cp_fm_struct.F index 7781ef7797..7622cd89e7 100644 --- a/src/fm/cp_fm_struct.F +++ b/src/fm/cp_fm_struct.F @@ -88,6 +88,13 @@ MODULE cp_fm_struct INTEGER, DIMENSION(:), POINTER :: row_indices => NULL(), col_indices => NULL(), & nrow_locals => NULL(), ncol_locals => NULL() INTEGER :: ref_count = -1, local_leading_dimension = -1 + CONTAINS + PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2p_row => cp_fm_indxg2p_row + PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2p_col => cp_fm_indxg2p_col + PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2l_row => cp_fm_indxg2l_row + PROCEDURE, PASS(struct), NON_OVERRIDABLE :: g2l_col => cp_fm_indxg2l_col + PROCEDURE, PASS(struct), NON_OVERRIDABLE :: l2g_row => cp_fm_indxl2g_row + PROCEDURE, PASS(struct), NON_OVERRIDABLE :: l2g_col => cp_fm_indxl2g_col END TYPE cp_fm_struct_type ! ************************************************************************************************** TYPE cp_fm_struct_p_type @@ -135,7 +142,7 @@ SUBROUTINE cp_fm_struct_create(fmstruct, para_env, context, nrow_global, & LOGICAL, OPTIONAL, INTENT(in) :: square_blocks LOGICAL, OPTIONAL, INTENT(in) :: force_block - INTEGER :: dumblock + INTEGER :: dumblock, i #if defined(__SCALAPACK) INTEGER :: iunit, stat INTEGER, EXTERNAL :: numroc @@ -277,6 +284,25 @@ SUBROUTINE cp_fm_struct_create(fmstruct, para_env, context, nrow_global, & END IF NULLIFY (fmstruct%row_indices, fmstruct%col_indices) + + ! the max should go away + ALLOCATE (fmstruct%row_indices(MAX(fmstruct%nrow_locals(fmstruct%context%mepos(1)), 1))) + DO i = 1, SIZE(fmstruct%row_indices) +#ifdef __SCALAPACK + fmstruct%row_indices(i) = fmstruct%l2g_row(i, fmstruct%context%mepos(1)) +#else + fmstruct%row_indices(i) = i +#endif + END DO + ALLOCATE (fmstruct%col_indices(MAX(fmstruct%ncol_locals(fmstruct%context%mepos(2)), 1))) + DO i = 1, SIZE(fmstruct%col_indices) +#ifdef __SCALAPACK + fmstruct%col_indices(i) = fmstruct%l2g_col(i, fmstruct%context%mepos(2)) +#else + fmstruct%col_indices(i) = i +#endif + END DO + fmstruct%ref_count = 1 IF (PRESENT(descriptor)) THEN @@ -406,21 +432,17 @@ SUBROUTINE cp_fm_struct_get(fmstruct, para_env, context, & ncol_global, first_p_pos, row_indices, & col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, & local_leading_dimension) - TYPE(cp_fm_struct_type), INTENT(INOUT) :: fmstruct - TYPE(mp_para_env_type), POINTER, OPTIONAL :: para_env - TYPE(cp_blacs_env_type), POINTER, OPTIONAL :: context - INTEGER, DIMENSION(9), INTENT(OUT), OPTIONAL :: descriptor - INTEGER, INTENT(out), OPTIONAL :: ncol_block, nrow_block, nrow_global, & - ncol_global, nrow_local, ncol_local, & - local_leading_dimension - INTEGER, DIMENSION(2), INTENT(out), OPTIONAL :: first_p_pos - INTEGER, DIMENSION(:), POINTER, OPTIONAL :: row_indices, col_indices, & - nrow_locals, ncol_locals - - INTEGER i, nprow, npcol, myprow, mypcol -#if defined(__SCALAPACK) - INTEGER, EXTERNAL :: indxl2g -#endif + TYPE(cp_fm_struct_type), INTENT(IN) :: fmstruct + TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env + TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context + INTEGER, DIMENSION(9), INTENT(OUT), OPTIONAL :: descriptor + INTEGER, INTENT(out), OPTIONAL :: ncol_block, nrow_block, nrow_global, & + ncol_global + INTEGER, DIMENSION(2), INTENT(out), OPTIONAL :: first_p_pos + INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices + INTEGER, INTENT(out), OPTIONAL :: nrow_local, ncol_local + INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals + INTEGER, INTENT(out), OPTIONAL :: local_leading_dimension IF (PRESENT(para_env)) para_env => fmstruct%para_env IF (PRESENT(context)) context => fmstruct%context @@ -435,51 +457,11 @@ SUBROUTINE cp_fm_struct_get(fmstruct, para_env, context, & IF (PRESENT(local_leading_dimension)) local_leading_dimension = & fmstruct%local_leading_dimension - myprow = fmstruct%context%mepos(1) - mypcol = fmstruct%context%mepos(2) - nprow = fmstruct%context%num_pe(1) - npcol = fmstruct%context%num_pe(2) - - IF (PRESENT(nrow_local)) nrow_local = fmstruct%nrow_locals(myprow) - IF (PRESENT(ncol_local)) ncol_local = fmstruct%ncol_locals(mypcol) + IF (PRESENT(nrow_local)) nrow_local = fmstruct%nrow_locals(fmstruct%context%mepos(1)) + IF (PRESENT(ncol_local)) ncol_local = fmstruct%ncol_locals(fmstruct%context%mepos(2)) - IF (PRESENT(row_indices)) THEN - row_indices => fmstruct%row_indices - IF (.NOT. ASSOCIATED(row_indices)) THEN - ! the max should go away - ALLOCATE (fmstruct%row_indices(MAX(fmstruct%nrow_locals(myprow), 1))) - row_indices => fmstruct%row_indices -#ifdef __SCALAPACK - DO i = 1, SIZE(row_indices) - row_indices(i) = & - indxl2g(i, fmstruct%nrow_block, myprow, fmstruct%first_p_pos(1), nprow) - END DO -#else - DO i = 1, SIZE(row_indices) - row_indices(i) = i - END DO -#endif - END IF - END IF - - IF (PRESENT(col_indices)) THEN - col_indices => fmstruct%col_indices - IF (.NOT. ASSOCIATED(col_indices)) THEN - ALLOCATE (fmstruct%col_indices(MAX(fmstruct%ncol_locals(mypcol), 1))) - col_indices => fmstruct%col_indices -#ifdef __SCALAPACK - DO i = 1, SIZE(col_indices) - col_indices(i) = & - indxl2g(i, fmstruct%ncol_block, mypcol, fmstruct%first_p_pos(2), npcol) - END DO -#else - DO i = 1, SIZE(col_indices) - col_indices(i) = i - END DO -#endif - END IF - - END IF + IF (PRESENT(row_indices)) row_indices => fmstruct%row_indices + IF (PRESENT(col_indices)) col_indices => fmstruct%col_indices END SUBROUTINE cp_fm_struct_get ! ************************************************************************************************** @@ -658,4 +640,300 @@ FUNCTION cp_fm_struct_get_ncol_block() RESULT(res) res = optimal_blacs_col_block_size END FUNCTION cp_fm_struct_get_ncol_block +! ************************************************************************************************** +!> \brief wrapper to scalapack function INDXG2P that computes the row process +!> coordinate which possesses the entry of a distributed matrix specified +!> by a global index INDXGLOB. +!> \param struct ... +!> \param INDXGLOB ... +!> \return ... +!> \author Mauro Del Ben [MDB] - 12.2012, modified by F. Stein +! ************************************************************************************************** + FUNCTION cp_fm_indxg2p_row(struct, INDXGLOB) RESULT(G2P) + CLASS(cp_fm_struct_type), INTENT(IN) :: struct + INTEGER, INTENT(IN) :: INDXGLOB + INTEGER :: G2P + +#if defined(__SCALAPACK) + INTEGER :: number_of_process_rows + INTEGER, EXTERNAL :: indxg2p +#endif + +#if defined(__SCALAPACK) + + CALL struct%context%get(number_of_process_rows=number_of_process_rows) + + G2P = indxg2p(INDXGLOB, struct%nrow_block, 0, struct%first_p_pos(1), number_of_process_rows) + +#else + MARK_USED(struct) + MARK_USED(indxglob) + + G2P = 0 + +#endif + + END FUNCTION cp_fm_indxg2p_row + +! ************************************************************************************************** +!> \brief wrapper to scalapack function INDXG2P that computes the col process +!> coordinate which possesses the entry of a distributed matrix specified +!> by a global index INDXGLOB. +!> \param struct ... +!> \param INDXGLOB ... +!> \return ... +!> \author Mauro Del Ben [MDB] - 12.2012, modified by F. Stein +! ************************************************************************************************** + FUNCTION cp_fm_indxg2p_col(struct, INDXGLOB) RESULT(G2P) + CLASS(cp_fm_struct_type), INTENT(IN) :: struct + INTEGER, INTENT(IN) :: INDXGLOB + INTEGER :: G2P + +#if defined(__SCALAPACK) + INTEGER :: number_of_process_columns + INTEGER, EXTERNAL :: indxg2p +#endif + +#if defined(__SCALAPACK) + + CALL struct%context%get(number_of_process_columns=number_of_process_columns) + + G2P = indxg2p(INDXGLOB, struct%ncol_block, 0, struct%first_p_pos(2), number_of_process_columns) + +#else + MARK_USED(struct) + MARK_USED(indxglob) + + G2P = 0 + +#endif + + END FUNCTION cp_fm_indxg2p_col + +! ************************************************************************************************** +!> \brief wrapper to scalapack function INDXG2L that computes the local index +!> of a distributed matrix entry pointed to by the global index INDXGLOB. +!> +!> Arguments +!> ========= +!> +!> INDXGLOB (global input) INTEGER +!> The global index of the distributed matrix entry. +!> +!> NB (global input) INTEGER +!> Block size, size of the blocks the distributed matrix is +!> split into. +!> +!> IPROC (local dummy) INTEGER +!> Dummy argument in this case in order to unify the calling +!> sequence of the tool-routines. +!> +!> ISRCPROC (local dummy) INTEGER +!> Dummy argument in this case in order to unify the calling +!> sequence of the tool-routines. +!> +!> NPROCS (global input) INTEGER +!> The total number processes over which the distributed +!> matrix is distributed. +!> +!> \param struct ... +!> \param INDXGLOB ... +!> \return ... +!> \author Mauro Del Ben [MDB] - 12.2012 +! ************************************************************************************************** + FUNCTION cp_fm_indxg2l_row(struct, INDXGLOB) RESULT(G2L) + CLASS(cp_fm_struct_type), INTENT(IN) :: struct + INTEGER, INTENT(IN) :: INDXGLOB + INTEGER :: G2L + +#if defined(__SCALAPACK) + INTEGER :: number_of_process_rows + INTEGER, EXTERNAL :: indxg2l +#endif + +#if defined(__SCALAPACK) + + CALL struct%context%get(number_of_process_rows=number_of_process_rows) + + G2L = indxg2l(INDXGLOB, struct%nrow_block, 0, struct%first_p_pos(1), number_of_process_rows) + +#else + MARK_USED(struct) + + G2L = INDXGLOB + +#endif + + END FUNCTION cp_fm_indxg2l_row + +! ************************************************************************************************** +!> \brief wrapper to scalapack function INDXG2L that computes the local index +!> of a distributed matrix entry pointed to by the global index INDXGLOB. +!> +!> Arguments +!> ========= +!> +!> INDXGLOB (global input) INTEGER +!> The global index of the distributed matrix entry. +!> +!> NB (global input) INTEGER +!> Block size, size of the blocks the distributed matrix is +!> split into. +!> +!> IPROC (local dummy) INTEGER +!> Dummy argument in this case in order to unify the calling +!> sequence of the tool-routines. +!> +!> ISRCPROC (local dummy) INTEGER +!> Dummy argument in this case in order to unify the calling +!> sequence of the tool-routines. +!> +!> NPROCS (global input) INTEGER +!> The total number processes over which the distributed +!> matrix is distributed. +!> +!> \param struct ... +!> \param INDXGLOB ... +!> \return ... +!> \author Mauro Del Ben [MDB] - 12.2012 +! ************************************************************************************************** + FUNCTION cp_fm_indxg2l_col(struct, INDXGLOB) RESULT(G2L) + CLASS(cp_fm_struct_type), INTENT(IN) :: struct + INTEGER, INTENT(IN) :: INDXGLOB + INTEGER :: G2L + +#if defined(__SCALAPACK) + INTEGER :: number_of_process_columns + INTEGER, EXTERNAL :: indxg2l +#endif + +#if defined(__SCALAPACK) + + CALL struct%context%get(number_of_process_columns=number_of_process_columns) + + G2L = indxg2l(INDXGLOB, struct%ncol_block, 0, struct%first_p_pos(2), number_of_process_columns) + +#else + MARK_USED(struct) + + G2L = INDXGLOB + +#endif + + END FUNCTION cp_fm_indxg2l_col + +! ************************************************************************************************** +!> \brief wrapper to scalapack function INDXL2G that computes the global index +!> of a distributed matrix entry pointed to by the local index INDXLOC +!> of the process indicated by IPROC. +!> +!> Arguments +!> ========= +!> +!> INDXLOC (global input) INTEGER +!> The local index of the distributed matrix entry. +!> +!> NB (global input) INTEGER +!> Block size, size of the blocks the distributed matrix is +!> split into. +!> +!> IPROC (local input) INTEGER +!> The coordinate of the process whose local array row or +!> column is to be determined. +!> +!> ISRCPROC (global input) INTEGER +!> The coordinate of the process that possesses the first +!> row/column of the distributed matrix. +!> +!> NPROCS (global input) INTEGER +!> The total number processes over which the distributed +!> matrix is distributed. +!> +!> \param struct ... +!> \param INDXLOC ... +!> \param IPROC ... +!> \return ... +!> \author Mauro Del Ben [MDB] - 12.2012 +! ************************************************************************************************** + FUNCTION cp_fm_indxl2g_row(struct, INDXLOC, IPROC) RESULT(L2G) + CLASS(cp_fm_struct_type), INTENT(IN) :: struct + INTEGER, INTENT(IN) :: INDXLOC, IPROC + INTEGER :: L2G + +#if defined(__SCALAPACK) + INTEGER :: number_of_process_rows + INTEGER, EXTERNAL :: indxl2g + + CALL struct%context%get(number_of_process_rows=number_of_process_rows) + + L2G = indxl2g(INDXLOC, struct%nrow_block, IPROC, struct%first_p_pos(1), number_of_process_rows) + +#else + MARK_USED(struct) + MARK_USED(indxloc) + MARK_USED(iproc) + + L2G = INDXLOC + +#endif + + END FUNCTION cp_fm_indxl2g_row + +! ************************************************************************************************** +!> \brief wrapper to scalapack function INDXL2G that computes the global index +!> of a distributed matrix entry pointed to by the local index INDXLOC +!> of the process indicated by IPROC. +!> +!> Arguments +!> ========= +!> +!> INDXLOC (global input) INTEGER +!> The local index of the distributed matrix entry. +!> +!> NB (global input) INTEGER +!> Block size, size of the blocks the distributed matrix is +!> split into. +!> +!> IPROC (local input) INTEGER +!> The coordinate of the process whose local array row or +!> column is to be determined. +!> +!> ISRCPROC (global input) INTEGER +!> The coordinate of the process that possesses the first +!> row/column of the distributed matrix. +!> +!> NPROCS (global input) INTEGER +!> The total number processes over which the distributed +!> matrix is distributed. +!> +!> \param struct ... +!> \param INDXLOC ... +!> \param IPROC ... +!> \return ... +!> \author Mauro Del Ben [MDB] - 12.2012 +! ************************************************************************************************** + FUNCTION cp_fm_indxl2g_col(struct, INDXLOC, IPROC) RESULT(L2G) + CLASS(cp_fm_struct_type), INTENT(IN) :: struct + INTEGER, INTENT(IN) :: INDXLOC, IPROC + INTEGER :: L2G + +#if defined(__SCALAPACK) + INTEGER :: number_of_process_columns + INTEGER, EXTERNAL :: indxl2g + + CALL struct%context%get(number_of_process_columns=number_of_process_columns) + + L2G = indxl2g(INDXLOC, struct%ncol_block, IPROC, struct%first_p_pos(2), number_of_process_columns) + +#else + MARK_USED(struct) + MARK_USED(indxloc) + MARK_USED(iproc) + + L2G = INDXLOC + +#endif + + END FUNCTION cp_fm_indxl2g_col + END MODULE cp_fm_struct diff --git a/src/fm/cp_fm_types.F b/src/fm/cp_fm_types.F index fff7532c34..6425e0cb31 100644 --- a/src/fm/cp_fm_types.F +++ b/src/fm/cp_fm_types.F @@ -77,10 +77,7 @@ MODULE cp_fm_types cp_fm_write_info, & cp_fm_to_fm_submat_general ! copy matrix across different contexts - PUBLIC :: cp_fm_indxg2p, & - cp_fm_indxg2l, & - cp_fm_indxl2g, & - cp_fm_pilaenv + PUBLIC :: cp_fm_pilaenv INTERFACE cp_fm_to_fm MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix @@ -2256,8 +2253,7 @@ SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format) CHARACTER(LEN=21) :: my_value_format INTEGER :: handle, i, j, max_block, & - ncol_global, nrow_global, & - nrow_block + ncol_global, nrow_global TYPE(mp_para_env_type), POINTER :: para_env #if defined(__SCALAPACK) INTEGER :: i_block, icol_local, & @@ -2274,7 +2270,7 @@ SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format) CALL timeset(routineN, handle) CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, & - nrow_block=nrow_block, para_env=para_env) + para_env=para_env) IF (PRESENT(value_format)) THEN CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11) @@ -2427,184 +2423,6 @@ SUBROUTINE cp_fm_read_unformatted(fm, unit) END SUBROUTINE cp_fm_read_unformatted -! ************************************************************************************************** -!> \brief wrapper to scalapack function INDXG2P that computes the process -!> coordinate which possesses the entry of a distributed matrix specified -!> by a global index INDXGLOB. -!> -!> Arguments -!> ========= -!> -!> INDXGLOB (global input) INTEGER -!> The global index of the element. -!> -!> NB (global input) INTEGER -!> Block size, size of the blocks the distributed matrix is -!> split into. -!> -!> IPROC (local dummy) INTEGER -!> Dummy argument in this case in order to unify the calling -!> sequence of the tool-routines. -!> -!> ISRCPROC (global input) INTEGER -!> The coordinate of the process that possesses the first -!> row/column of the distributed matrix. -!> -!> NPROCS (global input) INTEGER -!> The total number processes over which the matrix is -!> distributed. -!> -!> \param INDXGLOB ... -!> \param NB ... -!> \param IPROC ... -!> \param ISRCPROC ... -!> \param NPROCS ... -!> \return ... -!> \author Mauro Del Ben [MDB] - 12.2012 -! ************************************************************************************************** - FUNCTION cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2P) - INTEGER, INTENT(IN) :: INDXGLOB, NB, IPROC, ISRCPROC, NPROCS - INTEGER :: G2P - -#if defined(__SCALAPACK) - INTEGER, EXTERNAL :: indxg2p -#endif - -#if defined(__SCALAPACK) - - G2P = indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) - -#else - MARK_USED(indxglob) - MARK_USED(iproc) - MARK_USED(isrcproc) - MARK_USED(nb) - MARK_USED(nprocs) - - G2P = 0 - -#endif - - END FUNCTION cp_fm_indxg2p - -! ************************************************************************************************** -!> \brief wrapper to scalapack function INDXG2L that computes the local index -!> of a distributed matrix entry pointed to by the global index INDXGLOB. -!> -!> Arguments -!> ========= -!> -!> INDXGLOB (global input) INTEGER -!> The global index of the distributed matrix entry. -!> -!> NB (global input) INTEGER -!> Block size, size of the blocks the distributed matrix is -!> split into. -!> -!> IPROC (local dummy) INTEGER -!> Dummy argument in this case in order to unify the calling -!> sequence of the tool-routines. -!> -!> ISRCPROC (local dummy) INTEGER -!> Dummy argument in this case in order to unify the calling -!> sequence of the tool-routines. -!> -!> NPROCS (global input) INTEGER -!> The total number processes over which the distributed -!> matrix is distributed. -!> -!> \param INDXGLOB ... -!> \param NB ... -!> \param IPROC ... -!> \param ISRCPROC ... -!> \param NPROCS ... -!> \return ... -!> \author Mauro Del Ben [MDB] - 12.2012 -! ************************************************************************************************** - FUNCTION cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2L) - INTEGER, INTENT(IN) :: INDXGLOB, NB, IPROC, ISRCPROC, NPROCS - INTEGER :: G2L - -#if defined(__SCALAPACK) - INTEGER, EXTERNAL :: indxg2l -#endif - -#if defined(__SCALAPACK) - - G2L = indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) - -#else - MARK_USED(iproc) - MARK_USED(isrcproc) - MARK_USED(nb) - MARK_USED(nprocs) - MARK_USED(indxglob) - - G2L = INDXGLOB - -#endif - - END FUNCTION cp_fm_indxg2l - -! ************************************************************************************************** -!> \brief wrapper to scalapack function INDXL2G that computes the global index -!> of a distributed matrix entry pointed to by the local index INDXLOC -!> of the process indicated by IPROC. -!> -!> Arguments -!> ========= -!> -!> INDXLOC (global input) INTEGER -!> The local index of the distributed matrix entry. -!> -!> NB (global input) INTEGER -!> Block size, size of the blocks the distributed matrix is -!> split into. -!> -!> IPROC (local input) INTEGER -!> The coordinate of the process whose local array row or -!> column is to be determined. -!> -!> ISRCPROC (global input) INTEGER -!> The coordinate of the process that possesses the first -!> row/column of the distributed matrix. -!> -!> NPROCS (global input) INTEGER -!> The total number processes over which the distributed -!> matrix is distributed. -!> -!> \param INDXLOC ... -!> \param NB ... -!> \param IPROC ... -!> \param ISRCPROC ... -!> \param NPROCS ... -!> \return ... -!> \author Mauro Del Ben [MDB] - 12.2012 -! ************************************************************************************************** - FUNCTION cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS) RESULT(L2G) - INTEGER, INTENT(IN) :: INDXLOC, NB, IPROC, ISRCPROC, NPROCS - INTEGER :: L2G - -#if defined(__SCALAPACK) - INTEGER, EXTERNAL :: indxl2g -#endif - -#if defined(__SCALAPACK) - - L2G = indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS) - -#else - MARK_USED(iproc) - MARK_USED(isrcproc) - MARK_USED(nb) - MARK_USED(nprocs) - - L2G = INDXLOC - -#endif - - END FUNCTION cp_fm_indxl2g - ! ************************************************************************************************** !> \brief ... !> \param mult_type ... diff --git a/src/group_dist_types.F b/src/group_dist_types.F index 9d69b8170f..7cfc2dc3e8 100644 --- a/src/group_dist_types.F +++ b/src/group_dist_types.F @@ -255,9 +255,9 @@ END SUBROUTINE get_group_dist_gd1 PURE SUBROUTINE release_group_dist_d1(this) TYPE(group_dist_d1_type), INTENT(INOUT) :: this - DEALLOCATE (this%starts) - DEALLOCATE (this%ends) - DEALLOCATE (this%sizes) + IF (ALLOCATED(this%starts)) DEALLOCATE (this%starts) + IF (ALLOCATED(this%ends)) DEALLOCATE (this%ends) + IF (ALLOCATED(this%sizes)) DEALLOCATE (this%sizes) END SUBROUTINE release_group_dist_d1 diff --git a/src/hfx_admm_utils.F b/src/hfx_admm_utils.F index 4408be603e..3719ee8619 100644 --- a/src/hfx_admm_utils.F +++ b/src/hfx_admm_utils.F @@ -530,7 +530,7 @@ SUBROUTINE init_admm_gapw(qs_env) admm_kind_set(ikind)%natom = qs_kind_set(ikind)%natom admm_kind_set(ikind)%hard_radius = qs_kind_set(ikind)%hard_radius admm_kind_set(ikind)%max_rad_local = qs_kind_set(ikind)%max_rad_local - admm_kind_set(ikind)%gpw_r3d_rs_type_forced = qs_kind_set(ikind)%gpw_r3d_rs_type_forced + admm_kind_set(ikind)%gpw_type_forced = qs_kind_set(ikind)%gpw_type_forced admm_kind_set(ikind)%ngrid_rad = qs_kind_set(ikind)%ngrid_rad admm_kind_set(ikind)%ngrid_ang = qs_kind_set(ikind)%ngrid_ang @@ -2469,17 +2469,19 @@ END SUBROUTINE create_admm_xc_section !> \param recalc_integrals whether to recalculate integrals (default: value of HF%TREAT_LSD_IN_CORE) !> \param external_hfx_sections ... !> \param external_x_data ... +!> \param external_para_env ... !> \note !> Simplified version of subroutine hfx_ks_matrix() ! ************************************************************************************************** SUBROUTINE tddft_hfx_matrix(matrix_ks, rho_ao, qs_env, update_energy, recalc_integrals, & - external_hfx_sections, external_x_data) + external_hfx_sections, external_x_data, external_para_env) TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT), & TARGET :: matrix_ks, rho_ao TYPE(qs_environment_type), POINTER :: qs_env LOGICAL, INTENT(IN), OPTIONAL :: update_energy, recalc_integrals TYPE(section_vals_type), OPTIONAL, POINTER :: external_hfx_sections TYPE(hfx_type), DIMENSION(:, :), OPTIONAL, TARGET :: external_x_data + TYPE(mp_para_env_type), OPTIONAL, POINTER :: external_para_env CHARACTER(LEN=*), PARAMETER :: routineN = 'tddft_hfx_matrix' @@ -2513,6 +2515,7 @@ SUBROUTINE tddft_hfx_matrix(matrix_ks, rho_ao, qs_env, update_energy, recalc_int IF (PRESENT(external_hfx_sections)) hfx_sections => external_hfx_sections IF (PRESENT(external_x_data)) x_data => external_x_data + IF (PRESENT(external_para_env)) para_env => external_para_env my_update_energy = .TRUE. IF (PRESENT(update_energy)) my_update_energy = update_energy diff --git a/src/input_constants.F b/src/input_constants.F index 55af41ab69..f321b41bf7 100644 --- a/src/input_constants.F +++ b/src/input_constants.F @@ -1053,6 +1053,10 @@ MODULE input_constants wfc_mm_style_gemm = 11, & wfc_mm_style_syrk = 12 + ! RPA exchange corrections + INTEGER, PARAMETER, PUBLIC :: rpa_exchange_none = 0, & + rpa_exchange_axk = 1, rpa_exchange_sosex = 2 + ! solvers of the z-vector equations INTEGER, PARAMETER, PUBLIC :: z_solver_pople = 1, & z_solver_cg = 2, & diff --git a/src/input_cp2k_mp2.F b/src/input_cp2k_mp2.F index e893af12db..d9beac99e6 100644 --- a/src/input_cp2k_mp2.F +++ b/src/input_cp2k_mp2.F @@ -37,7 +37,7 @@ MODULE input_cp2k_mp2 ot_precond_s_inverse, ri_default, ri_rpa_g0w0_crossing_bisection, & ri_rpa_g0w0_crossing_newton, ri_rpa_g0w0_crossing_z_shot, soc_lda, soc_none, soc_pbe, & wfc_mm_style_gemm, wfc_mm_style_syrk, z_solver_cg, z_solver_pople, z_solver_richardson, & - z_solver_sd + z_solver_sd, rpa_exchange_none, rpa_exchange_axk, rpa_exchange_sosex USE input_cp2k_hfx, ONLY: create_hfx_section USE input_cp2k_kpoints, ONLY: create_kpoint_set_section USE input_keyword_types, ONLY: keyword_create, & @@ -559,23 +559,12 @@ SUBROUTINE create_ri_rpa(section) CALL section_add_keyword(section, keyword) CALL keyword_release(keyword) - CALL keyword_create( & - keyword, __LOCATION__, & - name="RI_AXK", & - variants=(/"AXK"/), & - description="Decide whether to perform an RPA-AXK calculation.", & - usage="RI_AXK", & - default_l_val=.FALSE., & - lone_keyword_l_val=.TRUE.) - CALL section_add_keyword(section, keyword) - CALL keyword_release(keyword) - CALL keyword_create( & keyword, __LOCATION__, & name="RSE", & variants=(/"SE"/), & description="Decide whether to add singles correction.", & - usage="RI_AXK", & + usage="RSE", & default_l_val=.FALSE., & lone_keyword_l_val=.TRUE.) CALL section_add_keyword(section, keyword) @@ -598,7 +587,7 @@ SUBROUTINE create_ri_rpa(section) CALL keyword_create( & keyword, __LOCATION__, & name="SCALE_RPA", & - description="Scales RPA energy contributions (RPA, AXK).", & + description="Scales RPA energy contributions (RPA, exchange correction).", & usage="SCALE_RPA 1.0", & default_r_val=1.0_dp) CALL section_add_keyword(section, keyword) @@ -623,8 +612,8 @@ SUBROUTINE create_ri_rpa(section) CALL section_add_subsection(section, subsection) CALL section_release(subsection) - ! here we generate a RI_AXK subsection - CALL create_ri_axk(subsection) + ! here we the RPA exchange section + CALL create_rpa_exchange(subsection) CALL section_add_subsection(section, subsection) CALL section_release(subsection) @@ -634,20 +623,50 @@ END SUBROUTINE create_ri_rpa !> \brief ... !> \param section ... ! ************************************************************************************************** - SUBROUTINE create_ri_axk(section) + SUBROUTINE create_rpa_exchange(section) TYPE(section_type), POINTER :: section - !TYPE(section_type), POINTER :: subsection + TYPE(keyword_type), POINTER :: keyword CPASSERT(.NOT. ASSOCIATED(section)) - CALL section_create(section, __LOCATION__, name="RI_AXK", & - description="Parameters influencing the RI-RPA-AXK method", & - n_keywords=0, n_subsections=0, repeats=.FALSE., & - citations=(/Bates2013/)) + CALL section_create(section, __LOCATION__, name="EXCHANGE_CORRECTION", & + description="Parameters influencing exchange corrections to RPA. No gradients available.", & + n_keywords=3, n_subsections=1, repeats=.FALSE.) + + NULLIFY (keyword) + CALL keyword_create(keyword, __LOCATION__, name="_SECTION_PARAMETERS_", & + description="Choose the kind of exchange correction.", & + usage="&EXCHANGE_CORRECTION AXK", & + enum_c_vals=s2a("NONE", "AXK", "SOSEX"), & + enum_i_vals=(/rpa_exchange_none, rpa_exchange_axk, rpa_exchange_sosex/), & + enum_desc=s2a("Apply no exchange correction.", & + "Apply Approximate eXchange Kernel (AXK) correction.", & + "Apply Second Order Screened eXchange (SOSEX) correction."), & + default_i_val=rpa_exchange_none) + CALL section_add_keyword(section, keyword) + CALL keyword_release(keyword) + + CALL keyword_create( & + keyword, __LOCATION__, & + name="BLOCK_SIZE", & + description="Choose the block size of the contraction step. Larger block sizes improve performance but "// & + "require more memory (quadratically!, number of stored elements: $o^2\cdot N_B^2$). "// & + "Nonpositive numbers turn off blocking.", & + usage="BLOCK_SIZE 1", & + default_i_val=1) + CALL section_add_keyword(section, keyword) + CALL keyword_release(keyword) - !NULLIFY (keyword, subsection) + CALL keyword_create( & + keyword, __LOCATION__, & + name="USE_HFX_IMPLEMENTATION", & + description="Use a HF-based implementation with RI_RPA%HF section. Recommended for large systems.", & + usage="USE_HFX_IMPLEMENTATION T", & + default_l_val=.FALSE., lone_keyword_l_val=.TRUE.) + CALL section_add_keyword(section, keyword) + CALL keyword_release(keyword) - END SUBROUTINE create_ri_axk + END SUBROUTINE create_rpa_exchange ! ************************************************************************************************** !> \brief ... diff --git a/src/local_gemm_api.F b/src/local_gemm_api.F index 2d894afc98..4f9a897467 100644 --- a/src/local_gemm_api.F +++ b/src/local_gemm_api.F @@ -6,11 +6,11 @@ !--------------------------------------------------------------------------------------------------! MODULE local_gemm_api - USE ISO_C_BINDING, ONLY: C_LOC, & - C_NULL_PTR, & + USE ISO_C_BINDING, ONLY: C_NULL_PTR, & C_PTR #if defined(__SPLA) && defined(__OFFLOAD_GEMM) USE input_constants, ONLY: do_dgemm_spla + USE ISO_C_BINDING, ONLY: IS_C_ASSOCIATED USE spla, ONLY: SPLA_PU_HOST, & SPLA_PU_GPU, & SPLA_OP_NONE, & @@ -36,10 +36,7 @@ MODULE local_gemm_api CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'local_gemm_api' - PUBLIC :: local_gemm, & - local_gemm_create, & - local_gemm_destroy, & - local_gemm_set_op_threshold_gpu, & + PUBLIC :: local_gemm_ctxt_type, & local_gemm_set_library INTEGER, PARAMETER, PUBLIC :: & @@ -48,6 +45,15 @@ MODULE local_gemm_api INTEGER, PRIVATE :: do_dgemm = 1 + TYPE local_gemm_ctxt_type + TYPE(C_PTR) :: spla_context = C_NULL_PTR + CONTAINS + PROCEDURE, PASS(ctx), NON_OVERRIDABLE :: create => local_gemm_create + PROCEDURE, PASS(ctx), NON_OVERRIDABLE :: destroy => local_gemm_destroy + PROCEDURE, PASS(ctx), NON_OVERRIDABLE :: set_op_threshold_gpu => local_gemm_set_op_threshold_gpu + PROCEDURE, PASS(ctx), NON_OVERRIDABLE :: gemm => local_gemm + END TYPE + CONTAINS ! ************************************************************************************************** @@ -96,7 +102,7 @@ SUBROUTINE local_gemm(opA, opB, m, n, k, & REAL(8), DIMENSION(:, :), INTENT(inout), TARGET :: C #endif INTEGER, INTENT(in) :: ldc - TYPE(C_ptr), OPTIONAL, INTENT(inout) :: ctx + CLASS(local_gemm_ctxt_type), INTENT(inout) :: ctx INTEGER :: handle ! no point of using SPLA offloading on CPU ONLY nodes @@ -108,7 +114,7 @@ SUBROUTINE local_gemm(opA, opB, m, n, k, & ! no point of using SPLA offloading on CPU ONLY nodes #if defined(__SPLA) && defined(__OFFLOAD_GEMM) - IF (PRESENT(ctx) .AND. do_dgemm == do_dgemm_spla) THEN + IF (do_dgemm == do_dgemm_spla) THEN IF (opA == 'N') spla_op_A = SPLA_OP_NONE IF (opA == 'T') spla_op_A = SPLA_OP_TRANSPOSE @@ -127,7 +133,7 @@ SUBROUTINE local_gemm(opA, opB, m, n, k, & m, n, k, alpha, & c_loc(A), lda, & c_loc(B), ldb, & - beta, c_loc(C), ldc, ctx) + beta, c_loc(C), ldc, ctx%spla_context) CPASSERT(spla_error == SPLA_SUCCESS) ELSE #endif @@ -149,24 +155,25 @@ END SUBROUTINE local_gemm !> \param pu processing unit to run the (s,d,c,z}dgemm ! ************************************************************************************************** SUBROUTINE local_gemm_create(ctx, pu) - TYPE(c_ptr), INTENT(out) :: ctx + CLASS(local_gemm_ctxt_type), INTENT(out) :: ctx INTEGER, INTENT(in) :: pu #if defined(__SPLA) && defined(__OFFLOAD_GEMM) INTEGER :: error_ - IF (do_dgemm == do_dgemm_spla) THEN - CALL offload_activate_chosen_device() + IF (.NOT. IS_C_ASSOCIATED(ctx%spla_context)) THEN + IF (do_dgemm == do_dgemm_spla) THEN + CALL offload_activate_chosen_device() - error_ = spla_ctx_create(ctx, pu) - CPASSERT(error_ == SPLA_SUCCESS) - ELSE - ctx = C_NULL_PTR + error_ = spla_ctx_create(ctx%spla_context, pu) + CPASSERT(error_ == SPLA_SUCCESS) + ELSE + ctx%spla_context = C_NULL_PTR + END IF END IF #else MARK_USED(pu) - MARK_USED(ctx) - ctx = C_NULL_PTR + ctx%spla_context = C_NULL_PTR #endif END SUBROUTINE local_gemm_create @@ -175,7 +182,7 @@ END SUBROUTINE local_gemm_create !> \param ctx handle ! ************************************************************************************************** SUBROUTINE local_gemm_destroy(ctx) - TYPE(c_ptr), INTENT(inout) :: ctx + CLASS(local_gemm_ctxt_type), INTENT(inout) :: ctx #if defined(__SPLA) && defined(__OFFLOAD_GEMM) INTEGER :: error_ @@ -183,13 +190,11 @@ SUBROUTINE local_gemm_destroy(ctx) IF (do_dgemm == do_dgemm_spla) THEN CALL offload_activate_chosen_device() - error_ = spla_ctx_destroy(ctx) + error_ = spla_ctx_destroy(ctx%spla_context) CPASSERT(error_ == SPLA_SUCCESS) END IF -#else - MARK_USED(ctx) #endif - ctx = C_NULL_PTR + ctx%spla_context = C_NULL_PTR END SUBROUTINE local_gemm_destroy ! ************************************************************************************************** @@ -198,14 +203,14 @@ END SUBROUTINE local_gemm_destroy !> \param opThresholdGPU ... ! ************************************************************************************************** SUBROUTINE local_gemm_set_op_threshold_gpu(ctx, opThresholdGPU) - TYPE(c_ptr) :: ctx + CLASS(local_gemm_ctxt_type), INTENT(INOUT) :: ctx INTEGER, INTENT(in) :: opThresholdGPU #if defined(__SPLA) && defined(__OFFLOAD_GEMM) INTEGER :: error__ CALL offload_activate_chosen_device() - error__ = spla_ctx_set_op_threshold_gpu(ctx, opThresholdGPU) + error__ = spla_ctx_set_op_threshold_gpu(ctx%spla_context, opThresholdGPU) #else MARK_USED(ctx) MARK_USED(opThresholdGPU) diff --git a/src/mp2.F b/src/mp2.F index f20b80e04f..158dde9aac 100644 --- a/src/mp2.F +++ b/src/mp2.F @@ -33,7 +33,6 @@ MODULE mp2 cp_fm_struct_release,& cp_fm_struct_type USE cp_fm_types, ONLY: cp_fm_create,& - cp_fm_get_info,& cp_fm_get_submatrix,& cp_fm_release,& cp_fm_set_all,& @@ -50,7 +49,10 @@ MODULE mp2 USE input_constants, ONLY: cholesky_inverse,& cholesky_off,& do_eri_gpw,& - do_eri_mme + do_eri_mme,& + rpa_exchange_axk,& + rpa_exchange_none,& + rpa_exchange_sosex USE input_section_types, ONLY: section_vals_get,& section_vals_get_subs_vals,& section_vals_type @@ -116,8 +118,8 @@ SUBROUTINE mp2_main(qs_env, calc_forces) CHARACTER(len=*), PARAMETER :: routineN = 'mp2_main' INTEGER :: bin, cholesky_method, dimen, handle, handle2, i, i_thread, iatom, ikind, irep, & - ispin, max_nset, my_bin_size, n_rep_hf, n_threads, nao, natom, ncol_block, ndep, & - nfullcols_total, nfullrows_total, nkind, nrow_block, nspins, unit_nr + ispin, max_nset, my_bin_size, n_rep_hf, n_threads, nao, natom, ndep, nfullcols_total, & + nfullrows_total, nkind, nspins, unit_nr INTEGER(KIND=int_8) :: mem INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, nelec LOGICAL :: calc_ex, do_admm, do_admm_rpa_exx, do_dynamic_load_balancing, do_exx, do_gw, & @@ -326,8 +328,6 @@ SUBROUTINE mp2_main(qs_env, calc_forces) CALL cp_fm_struct_release(fm_struct) - CALL cp_fm_get_info(matrix=fm_matrix_ks, nrow_block=nrow_block, ncol_block=ncol_block) - IF (scf_env%cholesky_method == cholesky_off) THEN CALL cp_fm_power(fm_matrix_s, fm_matrix_work, -0.5_dp, scf_control%eps_eigval, ndep) cholesky_method = cholesky_off @@ -602,7 +602,7 @@ SUBROUTINE mp2_main(qs_env, calc_forces) ! Scale energy contributions Emp2 = Emp2*mp2_env%ri_rpa%scale_rpa - mp2_env%ri_rpa%ener_axk = mp2_env%ri_rpa%ener_axk*mp2_env%ri_rpa%scale_rpa + mp2_env%ri_rpa%ener_exchange = mp2_env%ri_rpa%ener_exchange*mp2_env%ri_rpa%scale_rpa CASE (ri_mp2_laplace) ! perform RI-SOS-Laplace-MP2 energy calculation, most part of the code in common @@ -658,8 +658,10 @@ SUBROUTINE mp2_main(qs_env, calc_forces) IF (.NOT. update_xc_energy) Emp2 = 0.0_dp IF (unit_nr > 0 .AND. update_xc_energy) WRITE (unit_nr, '(T3,A,T56,F25.14)') 'RI-RPA energy = ', Emp2 - IF (mp2_env%ri_rpa%do_ri_axk) THEN - IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T56,F25.14)') 'RI-RPA-AXK energy=', mp2_env%ri_rpa%ener_axk + IF (mp2_env%ri_rpa%exchange_correction == rpa_exchange_axk) THEN + IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T56,F25.14)') 'RI-RPA-AXK energy=', mp2_env%ri_rpa%ener_exchange + ELSE IF (mp2_env%ri_rpa%exchange_correction == rpa_exchange_sosex) THEN + IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T56,F25.14)') 'RI-RPA-SOSEX energy=', mp2_env%ri_rpa%ener_exchange END IF IF (mp2_env%ri_rpa%do_rse) THEN IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T56,F25.14)') 'Diagonal singles correction (dRSE) = ', & @@ -672,8 +674,8 @@ SUBROUTINE mp2_main(qs_env, calc_forces) IF (unit_nr > 0) WRITE (unit_nr, *) ! we have it !!!! - IF (mp2_env%ri_rpa%do_ri_axk) THEN - Emp2 = Emp2 + mp2_env%ri_rpa%ener_axk + IF (mp2_env%ri_rpa%exchange_correction /= rpa_exchange_none) THEN + Emp2 = Emp2 + mp2_env%ri_rpa%ener_exchange END IF IF (mp2_env%ri_rpa%do_rse) THEN Emp2 = Emp2 + mp2_env%ri_rpa%rse_corr diff --git a/src/mp2_gpw.F b/src/mp2_gpw.F index 8c90b2d7d9..373a4b96de 100644 --- a/src/mp2_gpw.F +++ b/src/mp2_gpw.F @@ -54,7 +54,7 @@ MODULE mp2_gpw hfx_compression_type USE input_constants, ONLY: & do_eri_gpw, do_eri_os, do_potential_coulomb, do_potential_id, do_potential_truncated, & - eri_default, mp2_method_gpw, ri_default, ri_mp2_method_gpw + eri_default, mp2_method_gpw, ri_default, ri_mp2_method_gpw, rpa_exchange_none USE input_section_types, ONLY: section_vals_val_get USE kinds, ONLY: dp USE kpoint_types, ONLY: kpoint_type @@ -473,14 +473,13 @@ SUBROUTINE mp2_gpw_main(qs_env, mp2_env, Emp2, Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T END DO CALL get_group_dist(gd_array, color_sub, my_group_L_start, my_group_L_end, my_group_L_size) END IF - ! Copy mo coeffs for RPA AXK - IF (mp2_env%ri_rpa%do_ri_axk) THEN - NULLIFY (mp2_env%ri_rpa%mo_coeff_o) - CALL dbcsr_init_p(mp2_env%ri_rpa%mo_coeff_o) - CALL dbcsr_copy(mp2_env%ri_rpa%mo_coeff_o, mo_coeff_o(1)%matrix, name="mo_coeff_o") - NULLIFY (mp2_env%ri_rpa%mo_coeff_v) - CALL dbcsr_init_p(mp2_env%ri_rpa%mo_coeff_v) - CALL dbcsr_copy(mp2_env%ri_rpa%mo_coeff_v, mo_coeff_v(1)%matrix, name="mo_coeff_v") + ! Copy mo coeffs for RPA exchange correction + IF (mp2_env%ri_rpa%exchange_correction /= rpa_exchange_none) THEN + ALLOCATE (mp2_env%ri_rpa%mo_coeff_o(nspins), mp2_env%ri_rpa%mo_coeff_v(nspins)) + DO ispin = 1, nspins + CALL dbcsr_copy(mp2_env%ri_rpa%mo_coeff_o(ispin), mo_coeff_o(ispin)%matrix, name="mo_coeff_o") + CALL dbcsr_copy(mp2_env%ri_rpa%mo_coeff_v(ispin), mo_coeff_v(ispin)%matrix, name="mo_coeff_v") + END DO END IF IF (.NOT. do_im_time) THEN @@ -500,28 +499,21 @@ SUBROUTINE mp2_gpw_main(qs_env, mp2_env, Emp2, Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T END IF - IF (.NOT. calc_forces) THEN - IF (.NOT. mp2_env%ri_rpa%do_ri_axk) THEN + ! Release some memory for RPA exchange correction + IF (calc_forces .AND. do_im_time .OR. & + (.NOT. calc_forces .AND. mp2_env%ri_rpa%exchange_correction == rpa_exchange_none)) THEN - CALL dbcsr_release(mat_munu%matrix) - DEALLOCATE (mat_munu%matrix) - - CALL release_neighbor_list_sets(sab_orb_sub) + CALL dbcsr_release(mat_munu%matrix) + DEALLOCATE (mat_munu%matrix) - END IF + CALL release_neighbor_list_sets(sab_orb_sub) END IF ! decide if to do RI-RPA or RI-MP2 IF (my_do_ri_rpa .OR. my_do_ri_sos_laplace_mp2) THEN - IF (do_im_time) THEN - - CALL create_matrix_P(mat_P_global, qs_env, mp2_env, para_env) - - ! To be removed later - ALLOCATE (BIb_C(nspins)) - END IF + IF (do_im_time) CALL create_matrix_P(mat_P_global, qs_env, mp2_env, para_env) IF (.NOT. ALLOCATED(BIb_C)) ALLOCATE (BIb_C(nspins)) IF (.NOT. ALLOCATED(BIb_C_gw)) ALLOCATE (BIb_C_gw(nspins)) @@ -535,8 +527,7 @@ SUBROUTINE mp2_gpw_main(qs_env, mp2_env, Emp2, Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T fm_matrix_Minv, fm_matrix_Minv_Vtrunc_Minv, kpoints, & Eigenval, nmo, homo, dimen_RI, dimen_RI_red, gw_corr_lev_occ, gw_corr_lev_virt, & unit_nr, my_do_ri_sos_laplace_mp2, my_do_gw, do_im_time, do_bse, matrix_s, & - mat_munu, mat_P_global, & - t_3c_M, t_3c_O, t_3c_O_compressed, t_3c_O_ind, & + mat_munu, mat_P_global, t_3c_M, t_3c_O, t_3c_O_compressed, t_3c_O_ind, & starts_array_mc, ends_array_mc, & starts_array_mc_block, ends_array_mc_block, calc_forces) @@ -553,8 +544,8 @@ SUBROUTINE mp2_gpw_main(qs_env, mp2_env, Emp2, Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T IF (calc_forces) CALL cp_fm_release(fm_matrix_PQ) END IF - ! Release some memory for AXK - IF (mp2_env%ri_rpa%do_ri_axk .OR. (calc_forces .AND. do_im_time)) THEN + ! Release some memory for RPA exchange correction + IF (mp2_env%ri_rpa%exchange_correction /= rpa_exchange_none) THEN CALL dbcsr_release(mat_munu%matrix) DEALLOCATE (mat_munu%matrix) diff --git a/src/mp2_ri_2c.F b/src/mp2_ri_2c.F index 652eb720c8..479884d4af 100644 --- a/src/mp2_ri_2c.F +++ b/src/mp2_ri_2c.F @@ -52,7 +52,6 @@ MODULE mp2_ri_2c USE cp_fm_types, ONLY: cp_fm_copy_general,& cp_fm_create,& cp_fm_get_info,& - cp_fm_indxg2p,& cp_fm_release,& cp_fm_set_all,& cp_fm_to_fm,& @@ -1219,8 +1218,7 @@ SUBROUTINE fill_fm_L_from_L_loc_non_blocking(fm_matrix_L, L_local_col, para_env, CHARACTER(LEN=*), PARAMETER :: routineN = 'fill_fm_L_from_L_loc_non_blocking' INTEGER :: dummy_proc, handle, handle2, i_entry_rec, i_row, i_row_global, iproc, j_col, & - j_col_global, LLL, MMM, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, & - proc_send, send_pcol, send_prow + j_col_global, LLL, MMM, ncol_local, nrow_local, proc_send, send_pcol, send_prow INTEGER, ALLOCATABLE, DIMENSION(:) :: entry_counter, num_entries_rec, & num_entries_send INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices @@ -1237,12 +1235,7 @@ SUBROUTINE fill_fm_L_from_L_loc_non_blocking(fm_matrix_L, L_local_col, para_env, nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block) - - nprow = fm_matrix_L%matrix_struct%context%num_pe(1) - npcol = fm_matrix_L%matrix_struct%context%num_pe(2) + col_indices=col_indices) ALLOCATE (num_entries_rec(0:para_env%num_pe - 1)) ALLOCATE (num_entries_send(0:para_env%num_pe - 1)) @@ -1255,13 +1248,11 @@ SUBROUTINE fill_fm_L_from_L_loc_non_blocking(fm_matrix_L, L_local_col, para_env, ! get the process, where the elements from L_local_col have to be sent DO LLL = 1, dimen_RI - send_prow = cp_fm_indxg2p(LLL, nrow_block, dummy_proc, & - fm_matrix_L%matrix_struct%first_p_pos(1), nprow) + send_prow = fm_matrix_L%matrix_struct%g2p_row(LLL) DO MMM = my_group_L_start, my_group_L_end - send_pcol = cp_fm_indxg2p(MMM, ncol_block, dummy_proc, & - fm_matrix_L%matrix_struct%first_p_pos(2), npcol) + send_pcol = fm_matrix_L%matrix_struct%g2p_col(MMM) proc_send = fm_matrix_L%matrix_struct%context%blacs2mpi(send_prow, send_pcol) @@ -1337,13 +1328,11 @@ SUBROUTINE fill_fm_L_from_L_loc_non_blocking(fm_matrix_L, L_local_col, para_env, ! write the data and the info to buffer_send DO LLL = 1, dimen_RI - send_prow = cp_fm_indxg2p(LLL, nrow_block, dummy_proc, & - fm_matrix_L%matrix_struct%first_p_pos(1), nprow) + send_prow = fm_matrix_L%matrix_struct%g2p_row(LLL) DO MMM = my_group_L_start, my_group_L_end - send_pcol = cp_fm_indxg2p(MMM, ncol_block, dummy_proc, & - fm_matrix_L%matrix_struct%first_p_pos(2), npcol) + send_pcol = fm_matrix_L%matrix_struct%g2p_col(MMM) proc_send = fm_matrix_L%matrix_struct%context%blacs2mpi(send_prow, send_pcol) diff --git a/src/mp2_ri_gpw.F b/src/mp2_ri_gpw.F index f274eb3b33..69cefdeced 100644 --- a/src/mp2_ri_gpw.F +++ b/src/mp2_ri_gpw.F @@ -12,8 +12,6 @@ !> 03.2019 Refactored from mp2_ri_gpw [Frederick Stein] ! ************************************************************************************************** MODULE mp2_ri_gpw - USE ISO_C_BINDING, ONLY: C_NULL_PTR,& - c_associated USE cp_log_handling, ONLY: cp_to_string USE dgemm_counter_types, ONLY: dgemm_counter_init,& dgemm_counter_start,& @@ -27,11 +25,7 @@ MODULE mp2_ri_gpw USE kinds, ONLY: dp,& int_8 USE libint_2c_3c, ONLY: compare_potential_types - USE local_gemm_api, ONLY: LOCAL_GEMM_PU_GPU,& - local_gemm,& - local_gemm_create,& - local_gemm_destroy,& - local_gemm_set_op_threshold_gpu + USE local_gemm_api, ONLY: LOCAL_GEMM_PU_GPU USE machine, ONLY: m_flush,& m_memory,& m_walltime @@ -143,10 +137,8 @@ SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_e ! local_gemm_ctx has a very footprint the first time this routine is ! called. - IF (.NOT. c_associated(mp2_env%local_gemm_ctx)) THEN - CALL local_gemm_create(mp2_env%local_gemm_ctx, LOCAL_GEMM_PU_GPU) - CALL local_gemm_set_op_threshold_gpu(mp2_env%local_gemm_ctx, 128*128*128*2) - END IF + CALL mp2_env%local_gemm_ctx%create(LOCAL_GEMM_PU_GPU) + CALL mp2_env%local_gemm_ctx%set_op_threshold_gpu(128*128*128*2) CALL mp2_ri_get_integ_group_size( & mp2_env, para_env, para_env_sub, gd_array, gd_B_virtual, & @@ -346,10 +338,10 @@ SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_e local_ba = 0.0_dp END IF CALL dgemm_counter_start(dgemm_counter) - CALL local_gemm('T', 'N', my_B_size(ispin), my_B_size(jspin), dimen_RI, 1.0_dp, & - my_local_i_aL, dimen_RI, my_local_j_aL, dimen_RI, & - 0.0_dp, local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), & - my_B_size(ispin), mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(ispin), my_B_size(jspin), dimen_RI, 1.0_dp, & + my_local_i_aL, dimen_RI, my_local_j_aL, dimen_RI, & + 0.0_dp, local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), & + my_B_size(ispin)) ! Additional integrals only for alpha_beta case and forces IF (my_alpha_beta_case .AND. calc_forces) THEN local_ba(my_B_virtual_start(jspin):my_B_virtual_end(jspin), :) = & @@ -369,10 +361,10 @@ SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_e CALL para_env_sub%sendrecv(my_local_i_aL, proc_send, & external_i_aL, proc_receive, tag) - CALL local_gemm('T', 'N', rec_B_size, my_B_size(jspin), dimen_RI, 1.0_dp, & - external_i_aL, dimen_RI, my_local_j_aL, dimen_RI, & - 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size(jspin)), rec_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm( & + 'T', 'N', rec_B_size, my_B_size(jspin), dimen_RI, 1.0_dp, & + external_i_aL, dimen_RI, my_local_j_aL, dimen_RI, & + 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size(jspin)), rec_B_size) ! Additional integrals only for alpha_beta case and forces IF (my_alpha_beta_case .AND. calc_forces) THEN @@ -386,10 +378,9 @@ SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_e CALL para_env_sub%sendrecv(my_local_j_aL, proc_send, & external_i_aL, proc_receive, tag) - CALL local_gemm('T', 'N', rec_B_size, my_B_size(ispin), dimen_RI, 1.0_dp, & - external_i_aL, dimen_RI, my_local_i_aL, dimen_RI, & - 0.0_dp, local_ba(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size(ispin)), rec_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', rec_B_size, my_B_size(ispin), dimen_RI, 1.0_dp, & + external_i_aL, dimen_RI, my_local_i_aL, dimen_RI, & + 0.0_dp, local_ba(rec_B_virtual_start:rec_B_virtual_end, 1:my_B_size(ispin)), rec_B_size) END IF END DO IF (my_alpha_beta_case .AND. calc_forces) THEN @@ -688,8 +679,7 @@ SUBROUTINE mp2_ri_gpw_compute_en(Emp2_Cou, Emp2_EX, Emp2_S, Emp2_T, BIb_C, mp2_e CALL dgemm_counter_write(dgemm_counter, para_env) ! release memory allocated by local_gemm when run on GPU. local_gemm_ctx is null on cpu only runs - CALL local_gemm_destroy(mp2_env%local_gemm_ctx) - mp2_env%local_gemm_ctx = C_NULL_PTR + CALL mp2_env%local_gemm_ctx%destroy() CALL timestop(handle) END SUBROUTINE mp2_ri_gpw_compute_en @@ -1676,25 +1666,23 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & CALL dgemm_counter_start(dgemm_counter) IF (.NOT. (alpha_beta)) THEN - CALL local_gemm('T', 'N', my_B_size(ispin), my_B_size(ispin), virtual(ispin), 1.0_dp, & - t_ab, virtual(ispin), local_ab, virtual(ispin), & - 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, & - my_B_virtual_start(ispin):my_B_virtual_end(ispin)), my_B_size(ispin), mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(ispin), my_B_size(ispin), virtual(ispin), 1.0_dp, & + t_ab, virtual(ispin), local_ab, virtual(ispin), & + 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, & + my_B_virtual_start(ispin):my_B_virtual_end(ispin)), my_B_size(ispin)) mp2_env%ri_grad%P_ij(ispin)%array(my_i + iiB - 1, my_i + iiB - 1) = & mp2_env%ri_grad%P_ij(ispin)%array(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag ELSE - CALL local_gemm('T', 'N', my_B_size(ispin), my_B_size(ispin), virtual(jspin), mp2_env%scale_S, & - local_ba, virtual(jspin), local_ba, virtual(jspin), 1.0_dp, & - mp2_env%ri_grad%P_ab(ispin)%array(:, my_B_virtual_start(ispin):my_B_virtual_end(ispin)), my_B_size(ispin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(ispin), my_B_size(ispin), virtual(jspin), mp2_env%scale_S, & + local_ba, virtual(jspin), local_ba, virtual(jspin), 1.0_dp, & + mp2_env%ri_grad%P_ab(ispin)%array(:, my_B_virtual_start(ispin):my_B_virtual_end(ispin)), my_B_size(ispin)) mp2_env%ri_grad%P_ij(ispin)%array(my_i + iiB - 1, my_i + iiB - 1) = & mp2_env%ri_grad%P_ij(ispin)%array(my_i + iiB - 1, my_i + iiB - 1) + P_ij_diag - CALL local_gemm('T', 'N', my_B_size(jspin), my_B_size(jspin), virtual(ispin), mp2_env%scale_S, & - local_ab, virtual(ispin), local_ab, virtual(ispin), 1.0_dp, & - mp2_env%ri_grad%P_ab(jspin)%array(:, my_B_virtual_start(jspin):my_B_virtual_end(jspin)), my_B_size(jspin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(jspin), my_B_size(jspin), virtual(ispin), mp2_env%scale_S, & + local_ab, virtual(ispin), local_ab, virtual(ispin), 1.0_dp, & + mp2_env%ri_grad%P_ab(jspin)%array(:, my_B_virtual_start(jspin):my_B_virtual_end(jspin)), my_B_size(jspin)) mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjB - 1, my_j + jjB - 1) = & mp2_env%ri_grad%P_ij(jspin)%array(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag @@ -1704,11 +1692,10 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & ! due to spin in alpha-beta case. IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN - CALL local_gemm('N', 'T', my_B_size(ispin), virtual(ispin), my_B_size(ispin), 1.0_dp, & - t_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & - local_ab, virtual(ispin), & - 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array, my_B_size(ispin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', my_B_size(ispin), virtual(ispin), my_B_size(ispin), 1.0_dp, & + t_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & + local_ab, virtual(ispin), & + 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array, my_B_size(ispin)) mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjB - 1, my_j + jjB - 1) = & mp2_env%ri_grad%P_ij(ispin)%array(my_j + jjB - 1, my_j + jjB - 1) + P_ij_diag @@ -1727,15 +1714,14 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & external_ab, proc_receive) IF (.NOT. (alpha_beta)) THEN - CALL local_gemm('T', 'N', my_B_size(ispin), rec_B_size, virtual(ispin), 1.0_dp, & - t_ab, virtual(ispin), external_ab, virtual(ispin), & - 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_B_virtual_start:rec_B_virtual_end), my_B_size(ispin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(ispin), rec_B_size, virtual(ispin), 1.0_dp, & + t_ab, virtual(ispin), external_ab, virtual(ispin), & + 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_B_virtual_start:rec_B_virtual_end), my_B_size(ispin)) ELSE - CALL local_gemm('T', 'N', my_B_size(jspin), rec_B_size, virtual(ispin), mp2_env%scale_S, & - local_ab, virtual(ispin), external_ab, virtual(ispin), & - 1.0_dp, mp2_env%ri_grad%P_ab(jspin)%array(:, rec_B_virtual_start:rec_B_virtual_end), & - my_B_size(jspin), mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(jspin), rec_B_size, virtual(ispin), mp2_env%scale_S, & + local_ab, virtual(ispin), external_ab, virtual(ispin), & + 1.0_dp, mp2_env%ri_grad%P_ab(jspin)%array(:, rec_B_virtual_start:rec_B_virtual_end), & + my_B_size(jspin)) ! For alpha-beta part of alpha-density we need a new parallel code ! And new external_ab (of a different size) @@ -1745,10 +1731,9 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & external_ab = 0.0_dp CALL para_env_sub%sendrecv(local_ba, proc_send, & external_ab, proc_receive) - CALL local_gemm('T', 'N', my_B_size(ispin), rec_B_size, virtual(jspin), mp2_env%scale_S, & - local_ba, virtual(jspin), external_ab, virtual(jspin), & - 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_B_virtual_start:rec_B_virtual_end), my_B_size(ispin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', my_B_size(ispin), rec_B_size, virtual(jspin), mp2_env%scale_S, & + local_ba, virtual(jspin), external_ab, virtual(jspin), & + 1.0_dp, mp2_env%ri_grad%P_ab(ispin)%array(:, rec_B_virtual_start:rec_B_virtual_end), my_B_size(ispin)) END IF IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN @@ -1761,11 +1746,9 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & send_ab(1:send_B_size, 1:virtual(ispin)) => buffer_1D(offset + 1:offset + INT(send_B_size, int_8)*virtual(ispin)) send_ab = 0.0_dp - CALL local_gemm('N', 'T', send_B_size, virtual(ispin), my_B_size(ispin), 1.0_dp, & - t_ab(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & - local_ab, virtual(ispin), & - 0.0_dp, send_ab, send_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', send_B_size, virtual(ispin), my_B_size(ispin), 1.0_dp, & + t_ab(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & + local_ab, virtual(ispin), 0.0_dp, send_ab, send_B_size) CALL para_env_sub%sendrecv(send_ab, proc_send, & external_ab, proc_receive) @@ -1790,17 +1773,16 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & CALL dgemm_counter_start(dgemm_counter) IF (.NOT. alpha_beta) THEN ! Alpha-alpha, beta-beta and closed shell - CALL local_gemm('N', 'T', my_B_size(ispin), dimen_RI, my_B_size(ispin), 1.0_dp, & - t_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & - my_local_j_aL, dimen_RI, 1.0_dp, Y_i_aP, my_B_size(ispin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', my_B_size(ispin), dimen_RI, my_B_size(ispin), 1.0_dp, & + t_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & + my_local_j_aL, dimen_RI, 1.0_dp, Y_i_aP, my_B_size(ispin)) ELSE ! Alpha-beta - CALL local_gemm('N', 'T', my_B_size(ispin), dimen_RI, my_B_size(jspin), mp2_env%scale_S, & - local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & - my_local_j_aL, dimen_RI, 1.0_dp, Y_i_aP, my_B_size(ispin), mp2_env%local_gemm_ctx) - CALL local_gemm('T', 'T', my_B_size(jspin), dimen_RI, my_B_size(ispin), mp2_env%scale_S, & - local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & - my_local_i_aL, dimen_RI, 1.0_dp, Y_j_aP, my_B_size(jspin), mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', my_B_size(ispin), dimen_RI, my_B_size(jspin), mp2_env%scale_S, & + local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & + my_local_j_aL, dimen_RI, 1.0_dp, Y_i_aP, my_B_size(ispin)) + CALL mp2_env%local_gemm_ctx%gemm('T', 'T', my_B_size(jspin), dimen_RI, my_B_size(ispin), mp2_env%scale_S, & + local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & + my_local_i_aL, dimen_RI, 1.0_dp, Y_j_aP, my_B_size(jspin)) END IF IF (para_env_sub%num_pe > 1) THEN @@ -1820,20 +1802,18 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & send_ab(1:send_B_size, 1:dimen_RI) => buffer_1D(offset + 1:offset + INT(dimen_RI, int_8)*send_B_size) send_ab = 0.0_dp IF (.NOT. alpha_beta) THEN - CALL local_gemm('N', 'T', send_B_size, dimen_RI, my_B_size(ispin), 1.0_dp, & - t_ab(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & - my_local_j_aL, dimen_RI, 0.0_dp, send_ab, send_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', send_B_size, dimen_RI, my_B_size(ispin), 1.0_dp, & + t_ab(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & + my_local_j_aL, dimen_RI, 0.0_dp, send_ab, send_B_size) CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive) Y_i_aP(:, :) = Y_i_aP + external_ab ELSE ! Alpha-beta case ! Alpha-alpha part - CALL local_gemm('N', 'T', send_B_size, dimen_RI, my_B_size(jspin), mp2_env%scale_S, & - local_ab(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & - my_local_j_aL, dimen_RI, 0.0_dp, send_ab, send_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', send_B_size, dimen_RI, my_B_size(jspin), mp2_env%scale_S, & + local_ab(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & + my_local_j_aL, dimen_RI, 0.0_dp, send_ab, send_B_size) CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive) Y_i_aP(:, :) = Y_i_aP + external_ab END IF @@ -1854,10 +1834,9 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & CALL get_group_dist(gd_B_virtual(jspin), proc_send, send_B_virtual_start, send_B_virtual_end, send_B_size) send_ab(1:send_B_size, 1:dimen_RI) => buffer_1D(offset + 1:offset + INT(dimen_RI, int_8)*send_B_size) send_ab = 0.0_dp - CALL local_gemm('N', 'T', send_B_size, dimen_RI, my_B_size(ispin), mp2_env%scale_S, & - local_ba(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & - my_local_i_aL, dimen_RI, 0.0_dp, send_ab, send_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('N', 'T', send_B_size, dimen_RI, my_B_size(ispin), mp2_env%scale_S, & + local_ba(send_B_virtual_start:send_B_virtual_end, :), send_B_size, & + my_local_i_aL, dimen_RI, 0.0_dp, send_ab, send_B_size) CALL para_env_sub%sendrecv(send_ab, proc_send, external_ab, proc_receive) Y_j_aP(:, :) = Y_j_aP + external_ab @@ -1872,10 +1851,9 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & IF ((my_i /= my_j) .AND. (.NOT. alpha_beta)) THEN ! Alpha-alpha, beta-beta and closed shell CALL dgemm_counter_start(dgemm_counter) - CALL local_gemm('T', 'T', my_B_size(ispin), dimen_RI, my_B_size(ispin), 1.0_dp, & - t_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & - my_local_i_aL, dimen_RI, 1.0_dp, Y_j_aP, my_B_size(ispin), & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'T', my_B_size(ispin), dimen_RI, my_B_size(ispin), 1.0_dp, & + t_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), :), my_B_size(ispin), & + my_local_i_aL, dimen_RI, 1.0_dp, Y_j_aP, my_B_size(ispin)) DO proc_shift = 1, para_env_sub%num_pe - 1 proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe) proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe) @@ -1889,9 +1867,9 @@ SUBROUTINE mp2_update_P_gamma(mp2_env, para_env_sub, gd_B_virtual, & external_ab, proc_receive) ! Alpha-alpha, beta-beta and closed shell - CALL local_gemm('T', 'T', my_B_size(ispin), dimen_RI, rec_B_size, 1.0_dp, & - t_ab(rec_B_virtual_start:rec_B_virtual_end, :), rec_B_size, & - external_ab, dimen_RI, 1.0_dp, Y_j_aP, my_B_size(ispin), mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'T', my_B_size(ispin), dimen_RI, rec_B_size, 1.0_dp, & + t_ab(rec_B_virtual_start:rec_B_virtual_end, :), rec_B_size, & + external_ab, dimen_RI, 1.0_dp, Y_j_aP, my_B_size(ispin)) END DO CALL dgemm_counter_stop(dgemm_counter, my_B_size(ispin), dimen_RI, virtual(ispin)) @@ -2341,10 +2319,9 @@ SUBROUTINE quasi_degenerate_P_ij(mp2_env, Eigenval, homo, virtual, open_shell, & CALL dgemm_counter_start(dgemm_counter) ALLOCATE (local_ab(my_virtual, size_B_k)) local_ab = 0.0_dp - CALL local_gemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, & - local_aL_i, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & - 0.0_dp, local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), 1:size_B_k), size_B_i, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, & + local_aL_i, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & + 0.0_dp, local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), 1:size_B_k), size_B_i) DO proc_shift = 1, para_env_sub%num_pe - 1 proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe) proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe) @@ -2356,10 +2333,9 @@ SUBROUTINE quasi_degenerate_P_ij(mp2_env, Eigenval, homo, virtual, open_shell, & CALL comm_exchange%sendrecv(local_aL_i, proc_send, & external_aL, proc_receive, tag) - CALL local_gemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, & - external_aL, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & - 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, & + external_aL, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & + 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size) END DO CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_B_k, dimen_RI) CALL timestop(handle2) @@ -2418,10 +2394,9 @@ SUBROUTINE quasi_degenerate_P_ij(mp2_env, Eigenval, homo, virtual, open_shell, & CALL timeset(routineN//"_exp_jk", handle2) local_ab = 0.0_dp CALL dgemm_counter_start(dgemm_counter) - CALL local_gemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, & - local_aL_j, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & - 0.0_dp, local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), 1:size_B_k), size_B_i, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', size_B_i, size_B_k, dimen_RI, 1.0_dp, & + local_aL_j, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & + 0.0_dp, local_ab(my_B_virtual_start(ispin):my_B_virtual_end(ispin), 1:size_B_k), size_B_i) DO proc_shift = 1, para_env_sub%num_pe - 1 proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe) proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe) @@ -2432,10 +2407,9 @@ SUBROUTINE quasi_degenerate_P_ij(mp2_env, Eigenval, homo, virtual, open_shell, & CALL comm_exchange%sendrecv(local_aL_j, proc_send, & external_aL, proc_receive, tag) - CALL local_gemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, & - external_aL, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & - 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size, & - mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%gemm('T', 'N', rec_B_size, size_B_k, dimen_RI, 1.0_dp, & + external_aL, dimen_RI, local_aL_k(:, :, kkB), dimen_RI, & + 0.0_dp, local_ab(rec_B_virtual_start:rec_B_virtual_end, 1:size_B_k), rec_B_size) END DO CALL dgemm_counter_stop(dgemm_counter, my_virtual, size_B_k, dimen_RI) CALL timestop(handle2) diff --git a/src/mp2_ri_grad.F b/src/mp2_ri_grad.F index fd1780e554..2c8809251c 100644 --- a/src/mp2_ri_grad.F +++ b/src/mp2_ri_grad.F @@ -25,9 +25,12 @@ MODULE mp2_ri_grad USE cp_fm_struct, ONLY: cp_fm_struct_create,& cp_fm_struct_release,& cp_fm_struct_type - USE cp_fm_types, ONLY: & - cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_indxl2g, cp_fm_release, & - cp_fm_set_all, cp_fm_to_fm_submat, cp_fm_type + USE cp_fm_types, ONLY: cp_fm_create,& + cp_fm_get_info,& + cp_fm_release,& + cp_fm_set_all,& + cp_fm_to_fm_submat,& + cp_fm_type USE input_constants, ONLY: do_eri_gpw,& do_eri_mme,& ri_mp2_laplace,& @@ -652,14 +655,12 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, & iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, & - my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_block, ncol_block_1i, & - ncol_block_2a, ncol_local, ncol_local_1i, ncol_local_2a, npcol, npcol_1i, npcol_2a, & - nprow, nprow_1i, nprow_2a, nrow_block, nrow_block_1i, nrow_block_2a, nrow_local, & - nrow_local_1i, nrow_local_2a, number_of_rec, number_of_send, proc_receive, & - proc_receive_static, proc_send, proc_send_ex, proc_send_static, proc_send_sub, & - proc_shift, rec_col_size - INTEGER :: rec_counter, rec_row_size, send_col_size, send_counter, send_pcol, send_prow, & - send_row_size, size_rec_buffer, size_send_buffer + my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_local, ncol_local_1i, & + ncol_local_2a, npcol, nprow, nrow_local, nrow_local_1i, nrow_local_2a, number_of_rec, & + number_of_send, proc_receive, proc_receive_static, proc_send, proc_send_ex, & + proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, rec_row_size, & + send_col_size, send_counter, send_pcol, send_prow, send_row_size, size_rec_buffer, & + size_send_buffer INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size, & pos_info, pos_info_ex, proc_2_send_pos INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, & @@ -705,9 +706,7 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + col_indices=col_indices) myprow = L_mu_q%matrix_struct%context%mepos(1) mypcol = L_mu_q%matrix_struct%context%mepos(2) nprow = L_mu_q%matrix_struct%context%num_pe(1) @@ -723,11 +722,7 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, nrow_local=nrow_local_1i, & ncol_local=ncol_local_1i, & row_indices=row_indices_1i, & - col_indices=col_indices_1i, & - nrow_block=nrow_block_1i, & - ncol_block=ncol_block_1i) - nprow_1i = L1_mu_i%matrix_struct%context%num_pe(1) - npcol_1i = L1_mu_i%matrix_struct%context%num_pe(2) + col_indices=col_indices_1i) ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1)) CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i) @@ -737,11 +732,7 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, nrow_local=nrow_local_2a, & ncol_local=ncol_local_2a, & row_indices=row_indices_2a, & - col_indices=col_indices_2a, & - nrow_block=nrow_block_2a, & - ncol_block=ncol_block_2a) - nprow_2a = L2_nu_a%matrix_struct%context%num_pe(1) - npcol_2a = L2_nu_a%matrix_struct%context%num_pe(2) + col_indices=col_indices_2a) ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1)) CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a) @@ -778,20 +769,16 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, ! row DO iiB = 1, nrow_local_1i i_global = row_indices_1i(iiB) - send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(1), nprow) - i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(1), nprow) + send_prow = L_mu_q%matrix_struct%g2p_row(i_global) + i_local = L_mu_q%matrix_struct%g2l_row(i_global) my_row_indeces_info_1i(1, iiB) = send_prow my_row_indeces_info_1i(2, iiB) = i_local END DO ! col DO jjB = 1, ncol_local_1i j_global = col_indices_1i(jjB) - send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(2), npcol) - j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(2), npcol) + send_pcol = L_mu_q%matrix_struct%g2p_col(j_global) + j_local = L_mu_q%matrix_struct%g2l_col(j_global) my_col_indeces_info_1i(1, jjB) = send_pcol my_col_indeces_info_1i(2, jjB) = j_local END DO @@ -811,20 +798,16 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, ! row DO iiB = 1, nrow_local_2a i_global = row_indices_2a(iiB) - send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(1), nprow) - i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(1), nprow) + send_prow = L_mu_q%matrix_struct%g2p_row(i_global) + i_local = L_mu_q%matrix_struct%g2l_row(i_global) my_row_indeces_info_2a(1, iiB) = send_prow my_row_indeces_info_2a(2, iiB) = i_local END DO ! col DO jjB = 1, ncol_local_2a j_global = col_indices_2a(jjB) + homo - send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(2), npcol) - j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & - L_mu_q%matrix_struct%first_p_pos(2), npcol) + send_pcol = L_mu_q%matrix_struct%g2p_col(j_global) + j_local = L_mu_q%matrix_struct%g2l_col(j_global) my_col_indeces_info_2a(1, jjB) = send_pcol my_col_indeces_info_2a(2, jjB) = j_local END DO @@ -1189,13 +1172,7 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block) - myprow = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%mepos(1) - mypcol = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%mepos(2) - nprow = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%num_pe(1) - npcol = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%num_pe(2) + col_indices=col_indices) IF (mp2_env%method == ri_mp2_laplace) THEN CALL parallel_gemm('T', 'N', virtual, virtual, dimen, 1.0_dp, & @@ -1278,14 +1255,12 @@ SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, ! first loop over row since in this way we can cycle DO iiB = 1, send_row_size - i_global = cp_fm_indxl2g(iiB, nrow_block, send_prow, & - mp2_env%ri_grad%P_mo(kspin)%matrix_struct%first_p_pos(1), nprow) + i_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_row(iiB, send_prow) IF (i_global <= homo) CYCLE i_global = i_global - homo IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE DO jjB = 1, send_col_size - j_global = cp_fm_indxl2g(jjB, ncol_block, send_pcol, & - mp2_env%ri_grad%P_mo(kspin)%matrix_struct%first_p_pos(2), npcol) + j_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_col(jjB, send_pcol) IF (j_global <= homo) CYCLE j_global = j_global - homo ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global) diff --git a/src/mp2_ri_grad_util.F b/src/mp2_ri_grad_util.F index 47cca499bb..ff5f9aaaa0 100644 --- a/src/mp2_ri_grad_util.F +++ b/src/mp2_ri_grad_util.F @@ -25,9 +25,12 @@ MODULE mp2_ri_grad_util cp_fm_struct_get,& cp_fm_struct_release,& cp_fm_struct_type - USE cp_fm_types, ONLY: & - cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_indxl2g, cp_fm_release, & - cp_fm_set_all, cp_fm_to_fm, cp_fm_type + USE cp_fm_types, ONLY: cp_fm_create,& + cp_fm_get_info,& + cp_fm_release,& + cp_fm_set_all,& + cp_fm_to_fm,& + cp_fm_type USE group_dist_types, ONLY: create_group_dist,& get_group_dist,& group_dist_d1_type,& @@ -509,12 +512,11 @@ SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, & INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, & i_sub, iiB, iii, itmp(2), j_global, j_local, j_sub, jjB, my_num_col_blocks, & - my_num_row_blocks, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, & - nrow_local, num_cols, num_rec_cols, num_rows, number_of_rec, number_of_send, & - proc_receive, proc_send, proc_shift, rec_col_end, rec_col_size, rec_col_start, & - rec_counter, rec_pcol, rec_prow, rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, & - ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer - INTEGER :: start_col_block, start_row_block + my_num_row_blocks, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, & + num_rec_cols, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, & + proc_shift, rec_col_end, rec_col_size, rec_col_start, rec_counter, rec_pcol, rec_prow, & + rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, ref_send_prow, send_counter, & + send_pcol, send_prow, size_rec_buffer, size_send_buffer, start_col_block, start_row_block INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, & map_send_size INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, & @@ -545,9 +547,7 @@ SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, & nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + col_indices=col_indices) myprow = fm_mat%matrix_struct%context%mepos(1) mypcol = fm_mat%matrix_struct%context%mepos(2) nprow = fm_mat%matrix_struct%context%num_pe(1) @@ -570,11 +570,9 @@ SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, & map_send_size = 0 dummy_proc = 0 DO jjB = my_start_col, my_end_col - send_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(2), npcol) + send_pcol = fm_mat%matrix_struct%g2p_col(jjB) DO iiB = my_start_row, my_end_row - send_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(1), nprow) + send_prow = fm_mat%matrix_struct%g2p_row(iiB) proc_send = grid_2_mepos(send_prow, send_pcol) map_send_size(proc_send) = map_send_size(proc_send) + 1 END DO @@ -691,11 +689,9 @@ SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, & ALLOCATE (iii_vet(number_of_send)) iii_vet = 0 DO iiB = my_start_row, my_end_row - send_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(1), nprow) + send_prow = fm_mat%matrix_struct%g2p_row(iiB) DO jjB = my_start_col, my_end_col - send_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(2), npcol) + send_pcol = fm_mat%matrix_struct%g2p_col(jjB) ! we don't need to send to ourselves IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE @@ -823,8 +819,7 @@ SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, & end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end) DO j_sub = start_col_block, end_col_block iii = iii + 1 - j_local = cp_fm_indxg2l(j_sub, ncol_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(2), npcol) + j_local = fm_mat%matrix_struct%g2l_col(j_sub) index_col_rec(iii) = j_local END DO END DO @@ -840,8 +835,7 @@ SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, & start_row_block = MAX(blocks_ranges_row(1, iiB), rec_row_start) end_row_block = MIN(blocks_ranges_row(2, iiB), rec_row_end) DO i_sub = start_row_block, end_row_block - i_local = cp_fm_indxg2l(i_sub, nrow_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(1), nprow) + i_local = fm_mat%matrix_struct%g2l_row(i_sub) DO jjB = 1, num_rec_cols iii = iii + 1 j_local = index_col_rec(jjB) @@ -911,10 +905,10 @@ SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, & CHARACTER(LEN=*), PARAMETER :: routineN = 'fm2array' INTEGER :: dummy_proc, handle, handle2, i_global, iiB, iii, itmp(2), j_global, jjB, mypcol, & - myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, num_cols, & - num_rec_rows, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, & - proc_shift, rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, & - ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer + myprow, ncol_local, npcol, nprow, nrow_local, num_cols, num_rec_rows, num_rows, & + number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_col_size, & + rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, ref_send_prow, & + send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, & map_send_size INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, & @@ -939,8 +933,6 @@ SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, & ncol_local=ncol_local, & row_indices=row_indices, & col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block, & nrow_global=num_rows, & ncol_global=num_cols, & para_env=para_env) @@ -1009,11 +1001,9 @@ SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, & map_rec_size = 0 dummy_proc = 0 DO jjB = my_start_col, my_end_col - rec_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(2), npcol) + rec_pcol = fm_mat%matrix_struct%g2p_col(jjB) DO iiB = my_start_row, my_end_row - rec_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, & - fm_mat%matrix_struct%first_p_pos(1), nprow) + rec_prow = fm_mat%matrix_struct%g2p_row(iiB) proc_receive = grid_2_mepos(rec_prow, rec_pcol) map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1 END DO @@ -1190,8 +1180,7 @@ SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, & ! precompute the indeces of the rows num_rec_rows = 0 DO iiB = 1, rec_row_size - i_global = cp_fm_indxl2g(iiB, nrow_block, mepos_2_grid(1, proc_receive), & - fm_mat%matrix_struct%first_p_pos(1), nprow) + i_global = fm_mat%matrix_struct%l2g_row(iiB, mepos_2_grid(1, proc_receive)) IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN num_rec_rows = num_rec_rows + 1 index_row_rec(num_rec_rows) = i_global @@ -1202,8 +1191,7 @@ SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, & iii = 0 DO jjB = 1, rec_col_size - j_global = cp_fm_indxl2g(jjB, ncol_block, mepos_2_grid(2, proc_receive), & - fm_mat%matrix_struct%first_p_pos(2), npcol) + j_global = fm_mat%matrix_struct%l2g_col(jjB, mepos_2_grid(2, proc_receive)) IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN DO iiB = 1, num_rec_rows i_global = index_row_rec(iiB) @@ -1262,11 +1250,10 @@ SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, & CHARACTER(LEN=*), PARAMETER :: routineN = 'create_dbcsr_gamma' INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iiB, iii, itmp(2), j_global, & - j_local, jjB, jjj, kkB, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, & - nrow_local, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, & - rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, & - ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, & - size_send_buffer + j_local, jjB, jjj, kkB, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, & + number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_counter, & + rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, & + ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, & indeces_map_my, mepos_2_grid @@ -1301,9 +1288,7 @@ SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, & nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + col_indices=col_indices) myprow = fm_ia%matrix_struct%context%mepos(1) mypcol = fm_ia%matrix_struct%context%mepos(2) nprow = fm_ia%matrix_struct%context%num_pe(1) @@ -1326,10 +1311,8 @@ SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, & DO iaia = my_ia_start, my_ia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_ia%matrix_struct%g2p_row(i_global) + send_pcol = fm_ia%matrix_struct%g2p_col(j_global) proc_send = grid_2_mepos(send_prow, send_pcol) map_send_size(proc_send) = map_send_size(proc_send) + 1 END DO @@ -1419,16 +1402,12 @@ SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, & DO iaia = rec_iaia_start, rec_iaia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + rec_prow = fm_ia%matrix_struct%g2p_row(i_global) + rec_pcol = fm_ia%matrix_struct%g2p_col(j_global) IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE iii = iii + 1 - i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + i_local = fm_ia%matrix_struct%g2l_row(i_global) + j_local = fm_ia%matrix_struct%g2l_col(j_global) indeces_rec(rec_counter)%map(1, iii) = i_local indeces_rec(rec_counter)%map(2, iii) = j_local END DO @@ -1443,16 +1422,12 @@ SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, & DO iaia = my_ia_start, my_ia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + rec_prow = fm_ia%matrix_struct%g2p_row(i_global) + rec_pcol = fm_ia%matrix_struct%g2p_col(j_global) IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE iii = iii + 1 - i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + i_local = fm_ia%matrix_struct%g2l_row(i_global) + j_local = fm_ia%matrix_struct%g2l_col(j_global) indeces_map_my(1, iii) = i_local indeces_map_my(2, iii) = j_local END DO @@ -1486,10 +1461,8 @@ SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, & DO iaia = my_ia_start, my_ia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_ia%matrix_struct%g2p_row(i_global) + send_pcol = fm_ia%matrix_struct%g2p_col(j_global) proc_send = grid_2_mepos(send_prow, send_pcol) ! we don't need to send to ourselves IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN diff --git a/src/mp2_setup.F b/src/mp2_setup.F index 16dc8c01a9..acbe5aa8ee 100644 --- a/src/mp2_setup.F +++ b/src/mp2_setup.F @@ -110,7 +110,12 @@ SUBROUTINE read_mp2_section(input, mp2_env) CALL section_vals_val_get(mp2_section, "RI_RPA%SCALE_RPA", r_val=mp2_env%ri_rpa%scale_rpa) mp2_env%ri_rpa%reuse_hfx = .FALSE. - CALL section_vals_val_get(mp2_section, "RI_RPA%AXK", l_val=mp2_env%ri_rpa%do_ri_axk) + CALL section_vals_val_get(mp2_section, "RI_RPA%EXCHANGE_CORRECTION%_SECTION_PARAMETERS_", & + i_val=mp2_env%ri_rpa%exchange_correction) + CALL section_vals_val_get(mp2_section, "RI_RPA%EXCHANGE_CORRECTION%BLOCK_SIZE", & + i_val=mp2_env%ri_rpa%exchange_block_size) + CALL section_vals_val_get(mp2_section, "RI_RPA%EXCHANGE_CORRECTION%USE_HFX_IMPLEMENTATION", & + l_val=mp2_env%ri_rpa%use_hfx_implementation) CALL section_vals_val_get(mp2_section, "RI_RPA%RSE", l_val=mp2_env%ri_rpa%do_rse) CALL section_vals_val_get(mp2_section, "RI_RPA%PRINT_DGEMM_INFO", l_val=mp2_env%ri_rpa%print_dgemm_info) diff --git a/src/mp2_types.F b/src/mp2_types.F index a667c4e230..418f5c17bd 100644 --- a/src/mp2_types.F +++ b/src/mp2_types.F @@ -23,18 +23,14 @@ MODULE mp2_types USE input_constants, ONLY: & do_eri_mme, eri_default, gw_pade_approx, kp_weights_W_auto, mp2_method_direct, & mp2_method_gpw, mp2_method_none, mp2_ri_optimize_basis, ri_mp2_laplace, ri_mp2_method_gpw, & - ri_rpa_g0w0_crossing_z_shot, ri_rpa_method_gpw, soc_none, wfc_mm_style_gemm + ri_rpa_g0w0_crossing_z_shot, ri_rpa_method_gpw, rpa_exchange_none, soc_none, & + wfc_mm_style_gemm USE input_section_types, ONLY: section_vals_release,& section_vals_type - USE iso_c_binding, ONLY: c_null_ptr,& - c_ptr USE kinds, ONLY: dp USE kpoint_types, ONLY: kpoint_type USE libint_2c_3c, ONLY: libint_potential_type - USE local_gemm_api, ONLY: LOCAL_GEMM_PU_GPU,& - local_gemm_create,& - local_gemm_destroy,& - local_gemm_set_op_threshold_gpu + USE local_gemm_api, ONLY: local_gemm_ctxt_type USE message_passing, ONLY: mp_request_type USE qs_force_types, ONLY: qs_force_type USE qs_p_env_types, ONLY: qs_p_env_type @@ -105,7 +101,7 @@ MODULE mp2_types LOGICAL :: big_send = .FALSE. END TYPE - TYPE mp2_gpw_r3d_rs_type + TYPE mp2_gpw_type REAL(KIND=dp) :: eps_grid = 0.0_dp, & eps_filter = 0.0_dp, & eps_pgf_orb_S = 0.0_dp @@ -113,7 +109,7 @@ MODULE mp2_types REAL(KIND=dp) :: cutoff = 0.0_dp, & relative_cutoff = 0.0_dp INTEGER :: size_lattice_sum = 0 - END TYPE mp2_gpw_r3d_rs_type + END TYPE TYPE ri_mp2_type INTEGER :: block_size = 0, & @@ -132,12 +128,14 @@ MODULE mp2_types minimax_quad = .FALSE., & do_ri_g0w0 = .FALSE., & do_admm = .FALSE., & - do_ri_axk = .FALSE., & do_rse = .FALSE., & print_dgemm_info = .FALSE. - TYPE(dbcsr_type), POINTER :: mo_coeff_o => NULL(), & - mo_coeff_v => NULL() - REAL(KIND=dp) :: ener_axk = 0.0_dp, & + TYPE(dbcsr_type), DIMENSION(:), ALLOCATABLE :: mo_coeff_o, & + mo_coeff_v + INTEGER :: exchange_correction = rpa_exchange_none, & + exchange_block_size = -1 + LOGICAL :: use_hfx_implementation = .FALSE. + REAL(KIND=dp) :: ener_exchange = 0.0_dp, & rse_corr_diag = 0.0_dp, & rse_corr = 0.0_dp, & scale_rpa = 0.0_dp @@ -318,7 +316,7 @@ MODULE mp2_types TYPE(mp2_laplace_type) :: ri_laplace = mp2_laplace_type() TYPE(mp2_direct_type) :: direct_canonical = mp2_direct_type() TYPE(libint_potential_type) :: potential_parameter = libint_potential_type() - TYPE(mp2_gpw_r3d_rs_type) :: mp2_gpw = mp2_gpw_r3d_rs_type() + TYPE(mp2_gpw_type) :: mp2_gpw = mp2_gpw_type() TYPE(ri_mp2_type) :: ri_mp2 = ri_mp2_type() TYPE(ri_rpa_type) :: ri_rpa = ri_rpa_type() ! There is a bug with some older compilers preventing requiring an explicit initialization of allocatable components @@ -368,7 +366,7 @@ MODULE mp2_types LOGICAL :: do_svd = .FALSE. REAL(KIND=dp) :: eps_range = 0.0_dp TYPE(libint_potential_type) :: ri_metric = libint_potential_type() - TYPE(C_ptr) :: local_gemm_ctx = C_NULL_PTR + TYPE(local_gemm_ctxt_type) :: local_gemm_ctx = local_gemm_ctxt_type() REAL(dp) :: e_gap = 0.0_dp, & e_range = 0.0_dp END TYPE mp2_type @@ -424,7 +422,7 @@ SUBROUTINE mp2_env_release(mp2_env) IF (ASSOCIATED(mp2_env%ri_rpa_im_time%weights_cos_tf_t_to_w)) DEALLOCATE (mp2_env%ri_rpa_im_time%weights_cos_tf_t_to_w) IF (ASSOCIATED(mp2_env%ri_rpa_im_time%weights_cos_tf_w_to_t)) DEALLOCATE (mp2_env%ri_rpa_im_time%weights_cos_tf_w_to_t) - CALL local_gemm_destroy(mp2_env%local_gemm_ctx) + CALL mp2_env%local_gemm_ctx%destroy() CALL timestop(handle) @@ -447,12 +445,6 @@ SUBROUTINE mp2_env_create(mp2_env) ALLOCATE (mp2_env) - ! these two functions are empty if spla is build without gpu support and - ! OFFLOAD_GEMM is not given at compilation time - - CALL local_gemm_create(mp2_env%local_gemm_ctx, LOCAL_GEMM_PU_GPU) - CALL local_gemm_set_op_threshold_gpu(mp2_env%local_gemm_ctx, 128*128*128*2) - NULLIFY (mp2_env%ri_rpa%x_data) CALL timestop(handle) diff --git a/src/qs_active_space_types.F b/src/qs_active_space_types.F index 1f145c5f95..6386caa9e1 100644 --- a/src/qs_active_space_types.F +++ b/src/qs_active_space_types.F @@ -40,7 +40,7 @@ MODULE qs_active_space_types !> \brief Quantities needed for AS determination !> \author JGH ! ************************************************************************************************** - TYPE eri_gpw_r3d_rs_type + TYPE eri_gpw_type LOGICAL :: redo_poisson = .FALSE. LOGICAL :: store_wfn = .FALSE. REAL(KIND=dp) :: cutoff = 0.0_dp @@ -49,7 +49,7 @@ MODULE qs_active_space_types REAL(KIND=dp) :: eps_filter = 0.0_dp INTEGER :: print_level = 0 INTEGER :: group_size = 0 - END TYPE eri_gpw_r3d_rs_type + END TYPE eri_gpw_type TYPE eri_type INTEGER :: method = 0 @@ -58,7 +58,7 @@ MODULE qs_active_space_types INTEGER, DIMENSION(3) :: periodicity = 0 REAL(KIND=dp) :: cutoff_radius = 0.0_dp REAL(KIND=dp) :: eps_integral = 0.0_dp - TYPE(eri_gpw_r3d_rs_type) :: eri_gpw = eri_gpw_r3d_rs_type() + TYPE(eri_gpw_type) :: eri_gpw = eri_gpw_type() TYPE(dbcsr_csr_p_type), & DIMENSION(:), POINTER :: eri => NULL() INTEGER :: norb = 0 diff --git a/src/qs_density_matrices.F b/src/qs_density_matrices.F index c970065f8c..6a5d8af903 100644 --- a/src/qs_density_matrices.F +++ b/src/qs_density_matrices.F @@ -215,8 +215,7 @@ SUBROUTINE calculate_w_matrix_ot(mo_set, mo_deriv, w_matrix, s_matrix) LOGICAL, PARAMETER :: check_gradient = .FALSE., & do_symm = .FALSE. - INTEGER :: handle, iounit, ncol_block, ncol_global, & - nrow_block, nrow_global + INTEGER :: handle, iounit, ncol_global, nrow_global REAL(KIND=dp), DIMENSION(:), POINTER :: occupation_numbers, scaling_factor TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp TYPE(cp_fm_type) :: gradient, h_block, h_block_t, & @@ -228,9 +227,7 @@ SUBROUTINE calculate_w_matrix_ot(mo_set, mo_deriv, w_matrix, s_matrix) CALL cp_fm_get_info(matrix=mo_set%mo_coeff, & ncol_global=ncol_global, & - nrow_global=nrow_global, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + nrow_global=nrow_global) CALL cp_fm_create(weighted_vectors, mo_set%mo_coeff%matrix_struct, "weighted_vectors") CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol_global, ncol_global=ncol_global, & diff --git a/src/qs_kind_types.F b/src/qs_kind_types.F index d5f84054eb..e646baf006 100644 --- a/src/qs_kind_types.F +++ b/src/qs_kind_types.F @@ -193,7 +193,7 @@ MODULE qs_kind_types REAL(KIND=dp) :: hard0_radius = 0.8_dp*bohr ! for hard exp of rho0 REAL(KIND=dp) :: max_rad_local = 13.2_dp*bohr ! max GTO radius used in GAPW LOGICAL :: paw_atom = .FALSE. ! needs atomic rho1 - LOGICAL :: gpw_r3d_rs_type_forced = .FALSE. ! gpw atom even if with hard exponents + LOGICAL :: gpw_type_forced = .FALSE. ! gpw atom even if with hard exponents ! LOGICAL :: ghost = .FALSE. LOGICAL :: floating = .FALSE. @@ -388,7 +388,7 @@ END SUBROUTINE deallocate_qs_kind_set !> \param max_rad_local ... !> \param covalent_radius ... !> \param vdw_radius ... -!> \param gpw_r3d_rs_type_forced ... +!> \param gpw_type_forced ... !> \param harmonics ... !> \param max_iso_not0 ... !> \param max_s_harm ... @@ -439,7 +439,7 @@ SUBROUTINE get_qs_kind(qs_kind, & alpha_core_charge, ccore_charge, core_charge, core_charge_radius, & paw_proj_set, paw_atom, hard_radius, hard0_radius, max_rad_local, & covalent_radius, vdw_radius, & - gpw_r3d_rs_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, & + gpw_type_forced, harmonics, max_iso_not0, max_s_harm, grid_atom, & ngrid_ang, ngrid_rad, lmax_rho0, & dft_plus_u_atom, l_of_dft_plus_u, n_of_dft_plus_u, & u_minus_j, U_of_dft_plus_u, J_of_dft_plus_u, & @@ -471,7 +471,7 @@ SUBROUTINE get_qs_kind(qs_kind, & REAL(KIND=dp), INTENT(OUT), OPTIONAL :: hard_radius, hard0_radius, & max_rad_local, covalent_radius, & vdw_radius - LOGICAL, INTENT(OUT), OPTIONAL :: gpw_r3d_rs_type_forced + LOGICAL, INTENT(OUT), OPTIONAL :: gpw_type_forced TYPE(harmonics_atom_type), OPTIONAL, POINTER :: harmonics INTEGER, INTENT(OUT), OPTIONAL :: max_iso_not0, max_s_harm TYPE(grid_atom_type), OPTIONAL, POINTER :: grid_atom @@ -640,7 +640,7 @@ SUBROUTINE get_qs_kind(qs_kind, & IF (PRESENT(paw_proj_set)) paw_proj_set => qs_kind%paw_proj_set IF (PRESENT(paw_atom)) paw_atom = qs_kind%paw_atom - IF (PRESENT(gpw_r3d_rs_type_forced)) gpw_r3d_rs_type_forced = qs_kind%gpw_r3d_rs_type_forced + IF (PRESENT(gpw_type_forced)) gpw_type_forced = qs_kind%gpw_type_forced IF (PRESENT(hard_radius)) hard_radius = qs_kind%hard_radius IF (PRESENT(hard0_radius)) hard0_radius = qs_kind%hard0_radius IF (PRESENT(max_rad_local)) max_rad_local = qs_kind%max_rad_local @@ -1326,7 +1326,7 @@ SUBROUTINE init_gapw_basis_set(qs_kind_set, qs_control, force_env_section, modif CALL get_qs_kind(qs_kind=qs_kind, basis_set=orb_basis) CALL get_qs_kind(qs_kind=qs_kind, hard_radius=rc, & - max_rad_local=max_rad_local_type, gpw_r3d_rs_type_forced=gpw) + max_rad_local=max_rad_local_type, gpw_type_forced=gpw) NULLIFY (soft_basis) CALL allocate_gto_basis_set(soft_basis) @@ -1794,7 +1794,7 @@ SUBROUTINE read_qs_kind(qs_kind, kind_section, para_env, force_env_section, no_f IF (qs_kind%ngrid_rad <= 0) & CPABORT("# point radial grid < 0") CALL section_vals_val_get(kind_section, i_rep_section=k_rep, & - keyword_name="GPW_TYPE", l_val=qs_kind%gpw_r3d_rs_type_forced) + keyword_name="GPW_TYPE", l_val=qs_kind%gpw_type_forced) CALL section_vals_val_get(kind_section, i_rep_section=k_rep, & keyword_name="GHOST", l_val=qs_kind%ghost) CALL section_vals_val_get(kind_section, i_rep_section=k_rep, & diff --git a/src/qs_linres_op.F b/src/qs_linres_op.F index 505fc0ed69..00b28f349b 100644 --- a/src/qs_linres_op.F +++ b/src/qs_linres_op.F @@ -41,9 +41,14 @@ MODULE qs_linres_op USE cp_fm_struct, ONLY: cp_fm_struct_create,& cp_fm_struct_release,& cp_fm_struct_type - USE cp_fm_types, ONLY: & - cp_fm_create, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_indxl2g, cp_fm_release, & - cp_fm_set_all, cp_fm_set_submatrix, cp_fm_to_fm, cp_fm_type + USE cp_fm_types, ONLY: cp_fm_create,& + cp_fm_get_info,& + cp_fm_get_submatrix,& + cp_fm_release,& + cp_fm_set_all,& + cp_fm_set_submatrix,& + cp_fm_to_fm,& + cp_fm_type USE cp_log_handling, ONLY: cp_get_default_logger,& cp_logger_type,& cp_to_string @@ -1208,8 +1213,9 @@ SUBROUTINE fm_scale_by_pbc_AC(matrix, ra, rc, cell, ixyz) CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_scale_by_pbc_AC' - INTEGER :: handle, icol_global, icol_local, irow_global, irow_local, m, mypcol, myprow, n, & - ncol_block, ncol_global, ncol_local, npcol, nprow, nrow_block, nrow_global, nrow_local + INTEGER :: handle, icol_global, icol_local, & + irow_global, irow_local, m, mypcol, & + myprow, n, ncol_local, nrow_local REAL(KIND=dp) :: dist(3), rra(3), rrc(3) REAL(KIND=dp), DIMENSION(:, :), POINTER :: a @@ -1217,13 +1223,7 @@ SUBROUTINE fm_scale_by_pbc_AC(matrix, ra, rc, cell, ixyz) myprow = matrix%matrix_struct%context%mepos(1) mypcol = matrix%matrix_struct%context%mepos(2) - nprow = matrix%matrix_struct%context%num_pe(1) - npcol = matrix%matrix_struct%context%num_pe(2) - nrow_block = matrix%matrix_struct%nrow_block - ncol_block = matrix%matrix_struct%ncol_block - nrow_global = matrix%matrix_struct%nrow_global - ncol_global = matrix%matrix_struct%ncol_global nrow_local = matrix%matrix_struct%nrow_locals(myprow) ncol_local = matrix%matrix_struct%ncol_locals(mypcol) @@ -1232,13 +1232,11 @@ SUBROUTINE fm_scale_by_pbc_AC(matrix, ra, rc, cell, ixyz) a => matrix%local_data DO icol_local = 1, ncol_local - icol_global = cp_fm_indxl2g(icol_local, ncol_block, mypcol, & - matrix%matrix_struct%first_p_pos(2), npcol) + icol_global = matrix%matrix_struct%col_indices(icol_local) IF (icol_global .GT. n) CYCLE rrc = rc(:, icol_global) DO irow_local = 1, nrow_local - irow_global = cp_fm_indxl2g(irow_local, nrow_block, myprow, & - matrix%matrix_struct%first_p_pos(1), nprow) + irow_global = matrix%matrix_struct%row_indices(irow_local) IF (irow_global .GT. m) CYCLE rra = ra(:, irow_global) dist = pbc(rrc, rra, cell) diff --git a/src/qs_localization_methods.F b/src/qs_localization_methods.F index bb1d679cd0..31f27e092e 100644 --- a/src/qs_localization_methods.F +++ b/src/qs_localization_methods.F @@ -3042,9 +3042,8 @@ SUBROUTINE scdm_qrfact(vectors) nrowT = vectors%matrix_struct%ncol_global ncolT = vectors%matrix_struct%nrow_global - CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, & - vectors%matrix_struct%context, nrowT, ncolT, vectors%matrix_struct%ncol_block, & - vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos) + CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, & + nrow_global=nrowT, ncol_global=ncolT) CALL cp_fm_create(CTp, cstruct) CALL cp_fm_struct_release(cstruct) diff --git a/src/qs_matrix_pools.F b/src/qs_matrix_pools.F index 72aa8f6af7..debf17adb0 100644 --- a/src/qs_matrix_pools.F +++ b/src/qs_matrix_pools.F @@ -22,8 +22,6 @@ MODULE qs_matrix_pools fm_pools_dealloc USE cp_fm_struct, ONLY: cp_fm_struct_create,& cp_fm_struct_get,& - cp_fm_struct_get_ncol_block,& - cp_fm_struct_get_nrow_block,& cp_fm_struct_release,& cp_fm_struct_type USE message_passing, ONLY: mp_para_env_type @@ -217,17 +215,13 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & CHARACTER(len=*), PARAMETER :: routineN = 'mpools_rebuild_fm_pools' INTEGER :: handle, ispin, max_nmo, min_nmo, nao, & - ncg, ncol_block, nmo, nrg, nrow_block, & - nspins + ncg, nmo, nrg, nspins LOGICAL :: prepare_subset, should_rebuild TYPE(cp_fm_pool_type), POINTER :: p_att TYPE(cp_fm_struct_type), POINTER :: fmstruct CALL timeset(routineN, handle) - nrow_block = cp_fm_struct_get_nrow_block() - ncol_block = cp_fm_struct_get_ncol_block() - NULLIFY (fmstruct, p_att) prepare_subset = .FALSE. IF (PRESENT(nmosub)) THEN @@ -320,8 +314,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & should_rebuild = (should_rebuild .OR. (.NOT. ASSOCIATED(p_att))) IF (.NOT. should_rebuild) THEN fmstruct => fm_pool_get_el_struct(mpools%ao_ao_fm_pools(ispin)%pool) - CALL cp_fm_struct_get(fmstruct, nrow_global=nrg, & - ncol_global=ncg) + CALL cp_fm_struct_get(fmstruct, nrow_global=nrg, ncol_global=ncg) CALL get_mo_set(mos(1), nao=nao, nmo=nmo) should_rebuild = nao /= nrg .OR. nao /= ncg END IF @@ -333,9 +326,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & CALL cp_fm_struct_create(fmstruct, nrow_global=nao, & ncol_global=nao, para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%ao_ao_fm_pools(1)%pool, fmstruct) CALL cp_fm_struct_release(fmstruct) DO ispin = 2, SIZE(mos) @@ -352,8 +343,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & IF (.NOT. should_rebuild) THEN fmstruct => fm_pool_get_el_struct(mpools%ao_mo_fm_pools(ispin) & %pool) - CALL cp_fm_struct_get(fmstruct, nrow_global=nrg, & - ncol_global=ncg) + CALL cp_fm_struct_get(fmstruct, nrow_global=nrg, ncol_global=ncg) CALL get_mo_set(mos(1), nao=nao, nmo=nmo) should_rebuild = nao /= nrg .OR. nmo /= ncg END IF @@ -366,9 +356,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & IF (max_nmo == min_nmo) THEN CALL cp_fm_struct_create(fmstruct, nrow_global=nao, & ncol_global=max_nmo, para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%ao_mo_fm_pools(1)%pool, fmstruct) CALL cp_fm_struct_release(fmstruct) DO ispin = 2, SIZE(mos) @@ -380,9 +368,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & CALL get_mo_set(mos(ispin), nmo=nmo, nao=nao) CALL cp_fm_struct_create(fmstruct, nrow_global=nao, & ncol_global=nmo, para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%ao_mo_fm_pools(ispin)%pool, & fmstruct) CALL cp_fm_struct_release(fmstruct) @@ -411,9 +397,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & IF (max_nmo == min_nmo) THEN CALL cp_fm_struct_create(fmstruct, nrow_global=max_nmo, & ncol_global=max_nmo, para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%mo_mo_fm_pools(1)%pool, & fmstruct) CALL cp_fm_struct_release(fmstruct) @@ -427,9 +411,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & CALL get_mo_set(mos(ispin), nmo=nmo, nao=nao) CALL cp_fm_struct_create(fmstruct, nrow_global=nmo, & ncol_global=nmo, para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%mo_mo_fm_pools(ispin)%pool, & fmstruct) CALL cp_fm_struct_release(fmstruct) @@ -460,9 +442,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & IF (nspins == 1 .OR. nmosub(1) == nmosub(2)) THEN CALL cp_fm_struct_create(fmstruct, nrow_global=nao, & ncol_global=nmosub(1), para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%ao_mosub_fm_pools(1)%pool, fmstruct) CALL cp_fm_struct_release(fmstruct) DO ispin = 2, SIZE(mos) @@ -474,9 +454,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & CALL get_mo_set(mos(ispin), nao=nao) CALL cp_fm_struct_create(fmstruct, nrow_global=nao, & ncol_global=nmosub(1), para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%ao_mosub_fm_pools(ispin)%pool, & fmstruct) CALL cp_fm_struct_release(fmstruct) @@ -504,9 +482,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & IF (nspins == 1 .OR. nmosub(1) == nmosub(2)) THEN CALL cp_fm_struct_create(fmstruct, nrow_global=nmosub(1), & ncol_global=nmosub(1), para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%mosub_mosub_fm_pools(1)%pool, & fmstruct) CALL cp_fm_struct_release(fmstruct) @@ -519,9 +495,7 @@ SUBROUTINE mpools_rebuild_fm_pools(mpools, mos, blacs_env, para_env, & NULLIFY (mpools%mosub_mosub_fm_pools(ispin)%pool) CALL cp_fm_struct_create(fmstruct, nrow_global=nmosub(ispin), & ncol_global=nmosub(ispin), para_env=para_env, & - context=blacs_env, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + context=blacs_env) CALL fm_pool_create(mpools%mosub_mosub_fm_pools(ispin)%pool, & fmstruct) CALL cp_fm_struct_release(fmstruct) diff --git a/src/rpa_axk.F b/src/rpa_axk.F deleted file mode 100644 index ec83a6fc5f..0000000000 --- a/src/rpa_axk.F +++ /dev/null @@ -1,493 +0,0 @@ -!--------------------------------------------------------------------------------------------------! -! CP2K: A general program to perform molecular dynamics simulations ! -! Copyright 2000-2024 CP2K developers group ! -! ! -! SPDX-License-Identifier: GPL-2.0-or-later ! -!--------------------------------------------------------------------------------------------------! - -! ************************************************************************************************** -!> \brief Auxiliary routines needed for RPA-AXK -!> given blacs_env to another -!> \par History -!> 09.2016 created [Vladimir Rybkin] -!> 03.2019 Renamed [Frederick Stein] -!> 03.2019 Moved Functions from rpa_ri_gpw.F [Frederick Stein] -!> \author Vladimir Rybkin -! ************************************************************************************************** -MODULE rpa_axk - USE atomic_kind_types, ONLY: atomic_kind_type - USE cell_types, ONLY: cell_type - USE cp_control_types, ONLY: dft_control_type - USE cp_dbcsr_api, ONLY: & - dbcsr_copy, dbcsr_create, dbcsr_init_p, dbcsr_multiply, dbcsr_p_type, dbcsr_release, & - dbcsr_set, dbcsr_trace, dbcsr_type, dbcsr_type_no_symmetry - USE cp_dbcsr_operations, ONLY: dbcsr_allocate_matrix_set - USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale,& - cp_fm_scale - USE cp_fm_diag, ONLY: choose_eigv_solver - USE cp_fm_struct, ONLY: cp_fm_struct_create,& - cp_fm_struct_release,& - cp_fm_struct_type - USE cp_fm_types, ONLY: cp_fm_create,& - cp_fm_get_info,& - cp_fm_release,& - cp_fm_set_all,& - cp_fm_to_fm,& - cp_fm_to_fm_submat_general,& - cp_fm_type - USE hfx_energy_potential, ONLY: integrate_four_center - USE hfx_ri, ONLY: hfx_ri_update_ks - USE hfx_types, ONLY: hfx_create,& - hfx_release,& - hfx_type - USE input_section_types, ONLY: section_vals_get,& - section_vals_get_subs_vals,& - section_vals_type - USE kinds, ONLY: dp - USE message_passing, ONLY: mp_para_env_type - USE mp2_types, ONLY: mp2_type - USE parallel_gemm_api, ONLY: parallel_gemm - USE particle_types, ONLY: particle_type - USE qs_energy_types, ONLY: qs_energy_type - USE qs_environment_types, ONLY: get_qs_env,& - qs_environment_type - USE qs_kind_types, ONLY: qs_kind_type - USE qs_subsys_types, ONLY: qs_subsys_get,& - qs_subsys_type - USE rpa_communication, ONLY: gamma_fm_to_dbcsr - USE rpa_util, ONLY: calc_fm_mat_S_rpa,& - remove_scaling_factor_rpa - USE scf_control_types, ONLY: scf_control_type - USE util, ONLY: get_limit -#include "./base/base_uses.f90" - - IMPLICIT NONE - - PRIVATE - - CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'rpa_axk' - - PUBLIC :: compute_axk_ener - -CONTAINS - -! ************************************************************************************************** -!> \brief Main driver for RPA-AXK energies -!> \param qs_env ... -!> \param fm_mat_Q ... -!> \param fm_mat_Q_gemm ... -!> \param dimen_RI ... -!> \param dimen_ia ... -!> \param para_env_sub ... -!> \param para_env_RPA ... -!> \param eig ... -!> \param fm_mat_S ... -!> \param homo ... -!> \param virtual ... -!> \param omega ... -!> \param mp2_env ... -!> \param mat_munu ... -!> \param unit_nr ... -!> \param e_axk_corr ... AXK energy correctrion for a quadrature point -!> \author Vladimir Rybkin, 07/2016 -! ************************************************************************************************** - SUBROUTINE compute_axk_ener(qs_env, fm_mat_Q, fm_mat_Q_gemm, dimen_RI, dimen_ia, & - para_env_sub, para_env_RPA, & - eig, fm_mat_S, homo, virtual, omega, & - mp2_env, mat_munu, unit_nr, e_axk_corr) - TYPE(qs_environment_type), POINTER :: qs_env - TYPE(cp_fm_type), INTENT(IN) :: fm_mat_Q, fm_mat_Q_gemm - INTEGER, INTENT(IN) :: dimen_RI, dimen_ia - TYPE(mp_para_env_type), POINTER :: para_env_sub, para_env_RPA - REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eig - TYPE(cp_fm_type), INTENT(IN) :: fm_mat_S - INTEGER, INTENT(IN) :: homo, virtual - REAL(KIND=dp), INTENT(IN) :: omega - TYPE(mp2_type) :: mp2_env - TYPE(dbcsr_p_type), INTENT(IN) :: mat_munu - INTEGER, INTENT(IN) :: unit_nr - REAL(KIND=dp), INTENT(INOUT) :: e_axk_corr - - CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_axk_ener' - REAL(KIND=dp), PARAMETER :: thresh = 0.0000001_dp - - INTEGER :: color_sub, handle, iib, iitmp(2), kkb, L_counter, my_group_L_end, & - my_group_L_size, my_group_L_start, ncol_local, ngroup - INTEGER, DIMENSION(:), POINTER :: col_indices - REAL(KIND=dp) :: eps_filter, trace_corr - REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenval - TYPE(cp_fm_struct_type), POINTER :: fm_struct - TYPE(cp_fm_type) :: fm_mat_Gamma_3, fm_mat_Q_tmp, & - fm_mat_R_half, fm_mat_R_half_gemm, & - fm_mat_U - TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: dbcsr_Gamma_3, dbcsr_Gamma_inu_P, & - dbcsr_Gamma_munu_P - TYPE(dbcsr_type), POINTER :: mo_coeff_o, mo_coeff_v - - CALL timeset(routineN, handle) - - ! Eigenvalues - ALLOCATE (eigenval(dimen_RI)) - eigenval = 0.0_dp - ! create the R_half and U matrices with a different blacs env similar to Q - ! and a tmp_Q needed for diagonalization - - NULLIFY (fm_struct) - - CALL cp_fm_get_info(matrix=fm_mat_Q, & - matrix_struct=fm_struct) - CALL cp_fm_create(fm_mat_U, fm_struct, name="fm_mat_U") - CALL cp_fm_create(fm_mat_R_half, fm_struct, name="fm_mat_R_half") - CALL cp_fm_create(fm_mat_Q_tmp, fm_struct, name="fm_mat_Q_tmp") - CALL cp_fm_set_all(matrix=fm_mat_Q_tmp, alpha=0.0_dp) - CALL cp_fm_set_all(matrix=fm_mat_U, alpha=0.0_dp) - CALL cp_fm_set_all(matrix=fm_mat_R_half, alpha=0.0_dp) - - ! Copy Q to Q_tmp - CALL cp_fm_to_fm(fm_mat_Q, fm_mat_Q_tmp) - - CALL cp_fm_scale(0.50_dp, fm_mat_Q_tmp) - ! Diagonalize Q - CALL choose_eigv_solver(fm_mat_Q_tmp, fm_mat_U, eigenval) - - !Calculate diagonal matrix for R_half - - ! U*diag stored in U, whereas eigenvectors are in fm_mat_Q_tmp - !CALL cp_fm_to_fm(fm_mat_Q_tmp, fm_mat_U) - CALL cp_fm_to_fm(fm_mat_U, fm_mat_Q_tmp) - - ! Manipulate eigenvalues to get diagonal matrix - DO iib = 1, dimen_RI - IF (ABS(eigenval(iib)) .GE. thresh) THEN - eigenval(iib) = & - SQRT((1.0_dp/(eigenval(iib)**2))*LOG(1.0_dp + eigenval(iib)) & - - 1.0_dp/(eigenval(iib)*(eigenval(iib) + 1.0_dp))) - ELSE - eigenval(iib) = 0.707_dp - END IF - END DO - - CALL cp_fm_column_scale(fm_mat_U, eigenval) - - ! Release memory - DEALLOCATE (eigenval) - - ! Get R_half by multiplication - CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, & - matrix_a=fm_mat_U, matrix_b=fm_mat_Q_tmp, beta=0.0_dp, & - matrix_c=fm_mat_R_half) - - ! get info of fm_mat_S and initialize Gamma_3 - NULLIFY (fm_struct) - CALL cp_fm_struct_create(fm_struct, template_fmstruct=fm_mat_S%matrix_struct, nrow_global=dimen_ia, ncol_global=dimen_RI) - CALL cp_fm_create(fm_mat_Gamma_3, fm_struct) - CALL cp_fm_struct_release(fm_struct) - CALL cp_fm_set_all(matrix=fm_mat_Gamma_3, alpha=0.0_dp) - CALL cp_fm_get_info(matrix=fm_mat_S, & - ncol_local=ncol_local, & - col_indices=col_indices) - - ! Update G with a new value of Omega: in practice, it is G*S - - ! Here eig are orbital energies, don't confuse with eigenval, which are eigenvalues of Q! - - ! Scale fm_work_iaP - CALL calc_fm_mat_S_rpa(fm_mat_S, .TRUE., virtual, eig, & - homo, omega, 0.0_dp) - - ! Redistribute fm_mat_R_half for "rectangular" multiplication: ia*P P*P - CALL cp_fm_create(fm_mat_R_half_gemm, fm_mat_Q_gemm%matrix_struct) - CALL cp_fm_set_all(matrix=fm_mat_R_half_gemm, alpha=0.0_dp) - - CALL cp_fm_to_fm_submat_general(fm_mat_R_half, fm_mat_R_half_gemm, dimen_RI, dimen_RI, 1, 1, 1, 1, & - fm_mat_R_half%matrix_struct%context) - - ! Calculate Gamma_3: Gamma_3 = G*S*R^(1/2) = G*S*R^(1/2) ) - CALL parallel_gemm(transa="T", transb="N", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, & - matrix_a=fm_mat_S, matrix_b=fm_mat_R_half_gemm, beta=0.0_dp, & - matrix_c=fm_mat_Gamma_3) - - ! Remove extra factor from S after the multiplication - CALL remove_scaling_factor_rpa(fm_mat_S, virtual, eig, homo, omega) - - ! Release full matrix stuff - CALL cp_fm_release(fm_mat_Q_tmp) - CALL cp_fm_release(fm_mat_U) - CALL cp_fm_release(fm_mat_R_half) - CALL cp_fm_release(fm_mat_R_half_gemm) - - ! Retrieve mo coefficients in dbcsr format - NULLIFY (mo_coeff_o, mo_coeff_v) - mo_coeff_o => mp2_env%ri_rpa%mo_coeff_o - mo_coeff_v => mp2_env%ri_rpa%mo_coeff_v - - ! Get aux sizes - ngroup = para_env_RPA%num_pe/para_env_sub%num_pe - - color_sub = para_env_RPA%mepos/para_env_sub%num_pe - - iitmp = get_limit(dimen_RI, ngroup, color_sub) - my_group_L_start = iitmp(1) - my_group_L_end = iitmp(2) - my_group_L_size = iitmp(2) - iitmp(1) + 1 - - ! Copy Gamma_ia_P^3 to dbcsr matrix set - CALL gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_env_sub, & - homo, virtual, mo_coeff_o, ngroup, my_group_L_start, & - my_group_L_end, my_group_L_size) - - ! Create more dbcsr matrices - - NULLIFY (dbcsr_Gamma_inu_P) - !CALL dbcsr_allocate_matrix_set(dbcsr_Gamma_inu_P, ncol_local) - CALL dbcsr_allocate_matrix_set(dbcsr_Gamma_inu_P, my_group_L_size) - NULLIFY (dbcsr_Gamma_munu_P) - !CALL dbcsr_allocate_matrix_set(dbcsr_Gamma_munu_P, ncol_local) - CALL dbcsr_allocate_matrix_set(dbcsr_Gamma_munu_P, my_group_L_size) - eps_filter = mp2_env%mp2_gpw%eps_filter - - L_counter = 0 - DO kkb = my_group_L_start, my_group_L_end - L_counter = L_counter + 1 - ! One-index transformed Gamma_3 - ALLOCATE (dbcsr_Gamma_inu_P(L_counter)%matrix) - CALL dbcsr_init_p(dbcsr_Gamma_inu_P(L_counter)%matrix) - CALL dbcsr_create(dbcsr_Gamma_inu_P(L_counter)%matrix, template=mo_coeff_o) - CALL dbcsr_copy(dbcsr_Gamma_inu_P(L_counter)%matrix, mo_coeff_o) - CALL dbcsr_set(dbcsr_Gamma_inu_P(L_counter)%matrix, 0.0_dp) - ! Init Gamma_3 in AO basis - ALLOCATE (dbcsr_Gamma_munu_P(L_counter)%matrix) - CALL dbcsr_init_p(dbcsr_Gamma_munu_P(L_counter)%matrix) - CALL dbcsr_create(dbcsr_Gamma_munu_P(L_counter)%matrix, template=mat_munu%matrix, & - matrix_type=dbcsr_type_no_symmetry) - CALL dbcsr_copy(dbcsr_Gamma_munu_P(L_counter)%matrix, mat_munu%matrix) - CALL dbcsr_set(dbcsr_Gamma_munu_P(L_counter)%matrix, 0.0_dp) - END DO - - !! Loup over auxiliary basis functions: multiplication - L_counter = 0 - DO kkb = my_group_L_start, my_group_L_end - L_counter = L_counter + 1 - ! Do dbcsr multiplication: transform the virtual index - CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v, dbcsr_Gamma_3(L_counter)%matrix, & - 0.0_dp, dbcsr_Gamma_inu_P(L_counter)%matrix, filter_eps=eps_filter) - - !Do dbcsr multiplication: transform the occupied index - CALL dbcsr_multiply("N", "T", 1.0_dp, dbcsr_Gamma_inu_P(L_counter)%matrix, mo_coeff_o, & - 0.0_dp, dbcsr_Gamma_munu_P(L_counter)%matrix, filter_eps=eps_filter) - ! - CALL dbcsr_trace(dbcsr_Gamma_munu_P(L_counter)%matrix, trace_corr) - END DO - - ! Gamma_3 not needed anymore - L_counter = 0 - DO kkb = my_group_L_start, my_group_L_end - L_counter = L_counter + 1 - CALL dbcsr_release(dbcsr_Gamma_3(L_counter)%matrix) - DEALLOCATE (dbcsr_Gamma_3(L_counter)%matrix) - END DO - DEALLOCATE (dbcsr_Gamma_3) - - ! Contract DM with exchange integrals - !CALL integrate_exchange(qs_env, dbcsr_Gamma_munu_P, mat_munu, para_env_sub, ncol_local, eps_filter, e_axk_corr) - CALL integrate_exchange(qs_env, dbcsr_Gamma_munu_P, mat_munu, para_env_sub, my_group_L_size, eps_filter, e_axk_corr, & - my_group_L_start, my_group_L_end) - - !CALL para_env_RPA%sum(e_axk_corr) - - ! Print AXK correlation energy to the file - IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T68,F25.14,A4)') 'AXK correlation energy for a quadrature point:', & - e_axk_corr, ' a.u.' - - L_counter = 0 - DO kkb = my_group_L_start, my_group_L_end - L_counter = L_counter + 1 - CALL dbcsr_release(dbcsr_Gamma_inu_P(L_counter)%matrix) - CALL dbcsr_release(dbcsr_Gamma_munu_P(L_counter)%matrix) - DEALLOCATE (dbcsr_Gamma_inu_P(L_counter)%matrix) - DEALLOCATE (dbcsr_Gamma_munu_P(L_counter)%matrix) - END DO - DEALLOCATE (dbcsr_Gamma_inu_P) - DEALLOCATE (dbcsr_Gamma_munu_P) - - CALL timestop(handle) - - END SUBROUTINE compute_axk_ener - -! ************************************************************************************************** -!> \brief Contract RPA-AXK density matrix with HF exchange integrals and evaluate the correction -!> \param qs_env ... -!> \param dbcsr_Gamma_munu_P ... AXK density matrix in AO basis to be contracted -!> \param mat_munu ... -!> \param para_env_sub ... -!> \param P_stack_size ... -!> \param eps_filter ... -!> \param axk_corr ... The AXK energy correction -!> \param my_group_L_start ... -!> \param my_group_L_end ... -!> \author Vladimir Rybkin, 08/2016 -! ************************************************************************************************** - SUBROUTINE integrate_exchange(qs_env, dbcsr_Gamma_munu_P, mat_munu, para_env_sub, P_stack_size, & - eps_filter, axk_corr, & - my_group_L_start, my_group_L_end) - TYPE(qs_environment_type), POINTER :: qs_env - TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: dbcsr_Gamma_munu_P - TYPE(dbcsr_p_type), INTENT(IN) :: mat_munu - TYPE(mp_para_env_type), POINTER :: para_env_sub - INTEGER, INTENT(INOUT) :: P_stack_size - REAL(KIND=dp), INTENT(IN) :: eps_filter - REAL(KIND=dp), INTENT(OUT) :: axk_corr - INTEGER, INTENT(IN) :: my_group_L_start, my_group_L_end - - CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_exchange' - - INTEGER :: aux, handle, irep, kkb, n_rep_hf, ns - LOGICAL :: my_recalc_hfx_integrals - REAL(KIND=dp) :: e_axk_P, ehfx - TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: rho_work_ao - TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_2d, rho_ao_2d - TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data - TYPE(qs_energy_type), POINTER :: energy - TYPE(section_vals_type), POINTER :: hfx_sections - - CALL timeset(routineN, handle) - - ! Get qs environment - NULLIFY (energy) - CALL get_qs_env(qs_env, & - energy=energy) - - ! hfx section - CALL hfx_create_subgroup(qs_env, para_env_sub, hfx_sections, x_data, n_rep_hf) - - ! create a working rho environment - NULLIFY (rho_work_ao) - CALL dbcsr_allocate_matrix_set(rho_work_ao, 1) - ALLOCATE (rho_work_ao(1)%matrix) - CALL dbcsr_init_p(rho_work_ao(1)%matrix) - CALL dbcsr_create(rho_work_ao(1)%matrix, template=mat_munu%matrix) - - ! For the first aux function in the group we recalculate integrals, but only for the first - my_recalc_hfx_integrals = .TRUE. - - NULLIFY (mat_2d) - CALL dbcsr_allocate_matrix_set(mat_2d, 1, 1) - ALLOCATE (mat_2d(1, 1)%matrix) - CALL dbcsr_init_p(mat_2d(1, 1)%matrix) - CALL dbcsr_create(mat_2d(1, 1)%matrix, template=mat_munu%matrix, & - matrix_type=dbcsr_type_no_symmetry) - CALL dbcsr_copy(mat_2d(1, 1)%matrix, mat_munu%matrix) - - ! The loop over auxiliary basis functions - axk_corr = 0.0_dp - !DO aux = 1, P_stack_size - P_stack_size = P_stack_size - aux = 0 - DO kkb = my_group_L_start, my_group_L_end - aux = aux + 1 - - CALL dbcsr_copy(rho_work_ao(1)%matrix, dbcsr_Gamma_munu_P(aux)%matrix) - - DO irep = 1, n_rep_hf - ns = SIZE(rho_work_ao) - rho_ao_2d(1:ns, 1:1) => rho_work_ao(1:ns) - - CALL dbcsr_set(mat_2d(1, 1)%matrix, 0.0_dp) - - IF (x_data(irep, 1)%do_hfx_ri) THEN - CALL hfx_ri_update_ks(qs_env, x_data(irep, 1)%ri_data, mat_2d, ehfx, & - rho_ao=rho_ao_2d, geometry_did_change=my_recalc_hfx_integrals, & - nspins=ns, hf_fraction=x_data(irep, 1)%general_parameter%fraction) - - ELSE - CALL integrate_four_center(qs_env, x_data, mat_2d, ehfx, rho_ao_2d, hfx_sections, & - para_env_sub, my_recalc_hfx_integrals, irep, .TRUE., & - ispin=1) - END IF - END DO - - my_recalc_hfx_integrals = .FALSE. - ! One more dbcsr multiplication and trace - CALL dbcsr_multiply("T", "N", 1.0_dp, mat_2d(1, 1)%matrix, rho_work_ao(1)%matrix, & - 0.0_dp, dbcsr_Gamma_munu_P(aux)%matrix, filter_eps=eps_filter) - CALL dbcsr_trace(dbcsr_Gamma_munu_P(aux)%matrix, e_axk_p) - axk_corr = axk_corr + e_axk_P - END DO - - CALL dbcsr_release(mat_2d(1, 1)%matrix) - ! release rho stuff - CALL dbcsr_release(mat_2d(1, 1)%matrix) - DEALLOCATE (mat_2d(1, 1)%matrix) - DEALLOCATE (mat_2d) - CALL dbcsr_release(rho_work_ao(1)%matrix) - DEALLOCATE (rho_work_ao(1)%matrix) - DEALLOCATE (rho_work_ao) - CALL hfx_release(x_data) - - CALL timestop(handle) - - END SUBROUTINE integrate_exchange - -! ************************************************************************************************** -!> \brief ... Initializes x_data on a subgroup -!> \param qs_env ... -!> \param para_env_sub ... -!> \param hfx_section ... -!> \param x_data ... -!> \param n_rep_hf ... -!> \author Vladimir Rybkin -! ************************************************************************************************** - SUBROUTINE hfx_create_subgroup(qs_env, para_env_sub, hfx_section, x_data, n_rep_hf) - TYPE(qs_environment_type), POINTER :: qs_env - TYPE(mp_para_env_type), POINTER :: para_env_sub - TYPE(section_vals_type), POINTER :: hfx_section - TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data - INTEGER, INTENT(OUT) :: n_rep_hf - - CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_create_subgroup' - - INTEGER :: handle, nelectron_total - LOGICAL :: do_hfx - TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set - TYPE(cell_type), POINTER :: my_cell - TYPE(dft_control_type), POINTER :: dft_control - TYPE(particle_type), DIMENSION(:), POINTER :: particle_set - TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set - TYPE(qs_subsys_type), POINTER :: subsys - TYPE(scf_control_type), POINTER :: scf_control - TYPE(section_vals_type), POINTER :: input - - CALL timeset(routineN, handle) - - NULLIFY (my_cell, atomic_kind_set, particle_set, dft_control, x_data, qs_kind_set, scf_control) - - CALL get_qs_env(qs_env, & - subsys=subsys, & - input=input, & - scf_control=scf_control, & - nelectron_total=nelectron_total) - - CALL qs_subsys_get(subsys, & - cell=my_cell, & - atomic_kind_set=atomic_kind_set, & - qs_kind_set=qs_kind_set, & - particle_set=particle_set) - - do_hfx = .TRUE. - hfx_section => section_vals_get_subs_vals(input, "DFT%XC%WF_CORRELATION%RI_RPA%HF") - !hfx_section => section_vals_get_subs_vals(input, "DFT%XC%HF") - CALL section_vals_get(hfx_section, explicit=do_hfx, n_repetition=n_rep_hf) - CALL get_qs_env(qs_env, dft_control=dft_control) - - IF (do_hfx) THEN - ! Retrieve particle_set and atomic_kind_set - CALL hfx_create(x_data, para_env_sub, hfx_section, atomic_kind_set, & - qs_kind_set, particle_set, dft_control, my_cell, orb_basis='ORB', & - nelectron_total=nelectron_total) - END IF - - CALL timestop(handle) - - END SUBROUTINE hfx_create_subgroup - -END MODULE rpa_axk diff --git a/src/rpa_communication.F b/src/rpa_communication.F index 44f0ddbed3..bbd1bf0b0b 100644 --- a/src/rpa_communication.F +++ b/src/rpa_communication.F @@ -15,19 +15,15 @@ MODULE rpa_communication USE cp_blacs_env, ONLY: cp_blacs_env_create,& cp_blacs_env_release,& cp_blacs_env_type - USE cp_dbcsr_api, ONLY: dbcsr_p_type,& - dbcsr_type,& + USE cp_dbcsr_api, ONLY: dbcsr_type,& dbcsr_type_no_symmetry USE cp_dbcsr_operations, ONLY: copy_fm_to_dbcsr,& - cp_dbcsr_m_by_n_from_template,& - dbcsr_allocate_matrix_set + cp_dbcsr_m_by_n_from_template USE cp_fm_struct, ONLY: cp_fm_struct_create,& cp_fm_struct_release,& cp_fm_struct_type USE cp_fm_types, ONLY: cp_fm_create,& cp_fm_get_info,& - cp_fm_indxg2l,& - cp_fm_indxg2p,& cp_fm_release,& cp_fm_set_all,& cp_fm_type @@ -80,11 +76,11 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e homo, virtual, mo_coeff_o, ngroup, my_group_L_start, my_group_L_end, & my_group_L_size) TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_Gamma_3 - TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: dbcsr_Gamma_3 + TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: dbcsr_Gamma_3 TYPE(mp_para_env_type), INTENT(IN) :: para_env_RPA TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub INTEGER, INTENT(IN) :: homo, virtual - TYPE(dbcsr_type), POINTER :: mo_coeff_o + TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o INTEGER, INTENT(IN) :: ngroup, my_group_L_start, & my_group_L_end, my_group_L_size @@ -92,10 +88,10 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e INTEGER :: dimen_ia, dummy_proc, handle, i_global, i_local, iaia, iib, iii, itmp(2), & j_global, j_local, jjb, jjj, kkb, my_ia_end, my_ia_size, my_ia_start, mypcol, myprow, & - ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, number_of_rec, & - number_of_send, proc_receive, proc_send, proc_shift, rec_counter, rec_iaia_end, & - rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, ref_send_prow, & - send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer + ncol_local, npcol, nprow, nrow_local, number_of_rec, number_of_send, proc_receive, & + proc_send, proc_shift, rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, & + rec_pcol, rec_prow, ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, & + size_rec_buffer, size_send_buffer INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, & group_grid_2_mepos, indices_map_my, & @@ -152,9 +148,7 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices, & - nrow_block=nrow_block, & - ncol_block=ncol_block) + col_indices=col_indices) myprow = fm_ia%matrix_struct%context%mepos(1) mypcol = fm_ia%matrix_struct%context%mepos(2) nprow = fm_ia%matrix_struct%context%num_pe(1) @@ -177,10 +171,8 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e DO iaia = my_ia_start, my_ia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_ia%matrix_struct%g2p_row(i_global) + send_pcol = fm_ia%matrix_struct%g2p_col(j_global) proc_send = grid_2_mepos(send_prow, send_pcol) map_send_size(proc_send) = map_send_size(proc_send) + 1 END DO @@ -271,16 +263,12 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e DO iaia = rec_iaia_start, rec_iaia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + rec_prow = fm_ia%matrix_struct%g2p_row(i_global) + rec_pcol = fm_ia%matrix_struct%g2p_col(j_global) IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE iii = iii + 1 - i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + i_local = fm_ia%matrix_struct%g2l_row(i_global) + j_local = fm_ia%matrix_struct%g2l_col(j_global) indices_rec(rec_counter)%map(1, iii) = i_local indices_rec(rec_counter)%map(2, iii) = j_local END DO @@ -296,26 +284,19 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e DO iaia = my_ia_start, my_ia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + rec_prow = fm_ia%matrix_struct%g2p_row(i_global) + rec_pcol = fm_ia%matrix_struct%g2p_col(j_global) IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE iii = iii + 1 - i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + i_local = fm_ia%matrix_struct%g2l_row(i_global) + j_local = fm_ia%matrix_struct%g2l_col(j_global) indices_map_my(1, iii) = i_local indices_map_my(2, iii) = j_local END DO END IF ! Allocate dbcsr_Gamma_3 - NULLIFY (dbcsr_Gamma_3) - - !CALL dbcsr_allocate_matrix_set(dbcsr_Gamma_3, ncol_local) - CALL dbcsr_allocate_matrix_set(dbcsr_Gamma_3, my_group_L_size) + ALLOCATE (dbcsr_Gamma_3(my_group_L_size)) ! auxiliary vector of indices for the send buffer ALLOCATE (iii_vet(number_of_send)) @@ -347,10 +328,8 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e DO iaia = my_ia_start, my_ia_end i_global = (iaia - 1)/virtual + 1 j_global = MOD(iaia - 1, virtual) + 1 - send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(1), nprow) - send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, & - fm_ia%matrix_struct%first_p_pos(2), npcol) + send_prow = fm_ia%matrix_struct%g2p_row(i_global) + send_pcol = fm_ia%matrix_struct%g2p_col(j_global) proc_send = grid_2_mepos(send_prow, send_pcol) ! we don't need to send to ourselves IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN @@ -402,11 +381,9 @@ SUBROUTINE gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3, para_env_RPA, para_e CALL mp_waitall(req_send(:)) ! now create the DBCSR matrix and copy fm_ia into it - ALLOCATE (dbcsr_Gamma_3(kkB)%matrix) - CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma_3(kkB)%matrix, & - template=mo_coeff_o, & + CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma_3(kkB), template=mo_coeff_o, & m=homo, n=virtual, sym=dbcsr_type_no_symmetry) - CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma_3(kkB)%matrix, keep_sparsity=.FALSE.) + CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma_3(kkB), keep_sparsity=.FALSE.) END DO diff --git a/src/rpa_exchange.F b/src/rpa_exchange.F new file mode 100644 index 0000000000..0d5520356b --- /dev/null +++ b/src/rpa_exchange.F @@ -0,0 +1,977 @@ +!--------------------------------------------------------------------------------------------------! +! CP2K: A general program to perform molecular dynamics simulations ! +! Copyright 2000-2024 CP2K developers group ! +! ! +! SPDX-License-Identifier: GPL-2.0-or-later ! +!--------------------------------------------------------------------------------------------------! + +! ************************************************************************************************** +!> \brief Auxiliary routines needed for RPA-exchange +!> given blacs_env to another +!> \par History +!> 09.2016 created [Vladimir Rybkin] +!> 03.2019 Renamed [Frederick Stein] +!> 03.2019 Moved Functions from rpa_ri_gpw.F [Frederick Stein] +!> 04.2024 Added open-shell calculations, SOSEX [Frederick Stein] +!> \author Vladimir Rybkin +! ************************************************************************************************** +MODULE rpa_exchange + USE atomic_kind_types, ONLY: atomic_kind_type + USE cell_types, ONLY: cell_type + USE cp_blacs_env, ONLY: cp_blacs_env_type + USE cp_control_types, ONLY: dft_control_type + USE cp_dbcsr_api, ONLY: & + dbcsr_copy, dbcsr_create, dbcsr_get_info, dbcsr_init_p, dbcsr_multiply, dbcsr_p_type, & + dbcsr_release, dbcsr_set, dbcsr_trace, dbcsr_type, dbcsr_type_no_symmetry + USE cp_dbcsr_operations, ONLY: dbcsr_allocate_matrix_set + USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale + USE cp_fm_diag, ONLY: choose_eigv_solver + USE cp_fm_struct, ONLY: cp_fm_struct_create,& + cp_fm_struct_p_type,& + cp_fm_struct_release + USE cp_fm_types, ONLY: cp_fm_create,& + cp_fm_get_info,& + cp_fm_release,& + cp_fm_set_all,& + cp_fm_to_fm,& + cp_fm_to_fm_submat_general,& + cp_fm_type + USE group_dist_types, ONLY: create_group_dist,& + get_group_dist,& + group_dist_d1_type,& + group_dist_proc,& + maxsize,& + release_group_dist + USE hfx_admm_utils, ONLY: tddft_hfx_matrix + USE hfx_types, ONLY: hfx_create,& + hfx_release,& + hfx_type + USE input_constants, ONLY: rpa_exchange_axk,& + rpa_exchange_none,& + rpa_exchange_sosex + USE input_section_types, ONLY: section_vals_get_subs_vals,& + section_vals_type + USE kinds, ONLY: dp,& + int_8 + USE local_gemm_api, ONLY: LOCAL_GEMM_PU_GPU + USE mathconstants, ONLY: sqrthalf + USE message_passing, ONLY: mp_para_env_type,& + mp_proc_null + USE mp2_types, ONLY: mp2_type + USE parallel_gemm_api, ONLY: parallel_gemm + USE particle_types, ONLY: particle_type + USE qs_environment_types, ONLY: get_qs_env,& + qs_environment_type + USE qs_kind_types, ONLY: qs_kind_type + USE qs_subsys_types, ONLY: qs_subsys_get,& + qs_subsys_type + USE rpa_communication, ONLY: gamma_fm_to_dbcsr + USE rpa_util, ONLY: calc_fm_mat_S_rpa,& + remove_scaling_factor_rpa + USE scf_control_types, ONLY: scf_control_type +#include "./base/base_uses.f90" + + IMPLICIT NONE + + PRIVATE + + CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'rpa_exchange' + + PUBLIC :: rpa_exchange_work_type, rpa_exchange_needed_mem + + TYPE rpa_exchange_env_type + PRIVATE + TYPE(qs_environment_type), POINTER :: qs_env => NULL() + TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_hfx => NULL() + TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: dbcsr_Gamma_munu_P => NULL() + TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: dbcsr_Gamma_inu_P + TYPE(dbcsr_type), DIMENSION(:), ALLOCATABLE :: mo_coeff_o + TYPE(dbcsr_type), DIMENSION(:), ALLOCATABLE :: mo_coeff_v + TYPE(dbcsr_type) :: work_ao + TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data => NULL() + TYPE(mp_para_env_type), POINTER :: para_env => NULL() + TYPE(section_vals_type), POINTER :: hfx_sections => NULL() + LOGICAL :: my_recalc_hfx_integrals = .FALSE. + REAL(KIND=dp) :: eps_filter = 0.0_dp + TYPE(cp_fm_struct_p_type), DIMENSION(:), ALLOCATABLE :: struct_Gamma + CONTAINS + PROCEDURE, PASS(exchange_env), NON_OVERRIDABLE :: create => hfx_create_subgroup + !PROCEDURE, PASS(exchange_env), NON_OVERRIDABLE :: integrate => integrate_exchange + PROCEDURE, PASS(exchange_env), NON_OVERRIDABLE :: release => hfx_release_subgroup + END TYPE + + TYPE dbcsr_matrix_p_set + TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: matrix_set + END TYPE + + TYPE rpa_exchange_work_type + PRIVATE + INTEGER :: exchange_correction = rpa_exchange_none + TYPE(rpa_exchange_env_type) :: exchange_env + INTEGER, DIMENSION(:), ALLOCATABLE :: homo, virtual, dimen_ia + TYPE(group_dist_d1_type) :: aux_func_dist = group_dist_d1_type() + INTEGER, DIMENSION(:), ALLOCATABLE :: aux2send + INTEGER :: dimen_RI = 0 + INTEGER :: block_size = 0 + INTEGER :: color_sub = 0 + INTEGER :: ngroup = 0 + TYPE(cp_fm_type) :: fm_mat_Q_tmp = cp_fm_type() + TYPE(cp_fm_type) :: fm_mat_R_half_gemm = cp_fm_type() + TYPE(cp_fm_type) :: fm_mat_U = cp_fm_type() + TYPE(mp_para_env_type), POINTER :: para_env_sub => NULL() + CONTAINS + PROCEDURE, PUBLIC, PASS(exchange_work), NON_OVERRIDABLE :: create => rpa_exchange_work_create + PROCEDURE, PUBLIC, PASS(exchange_work), NON_OVERRIDABLE :: compute => rpa_exchange_work_compute + PROCEDURE, PUBLIC, PASS(exchange_work), NON_OVERRIDABLE :: release => rpa_exchange_work_release + PROCEDURE, PRIVATE, PASS(exchange_work), NON_OVERRIDABLE :: redistribute_into_subgroups + PROCEDURE, PRIVATE, PASS(exchange_work), NON_OVERRIDABLE :: compute_fm => rpa_exchange_work_compute_fm + PROCEDURE, PRIVATE, PASS(exchange_work), NON_OVERRIDABLE :: compute_hfx => rpa_exchange_work_compute_hfx + END TYPE + +CONTAINS + +! ************************************************************************************************** +!> \brief ... +!> \param mp2_env ... +!> \param homo ... +!> \param virtual ... +!> \param dimen_RI ... +!> \param para_env ... +!> \param mem_per_rank ... +!> \param mem_per_repl ... +! ************************************************************************************************** + SUBROUTINE rpa_exchange_needed_mem(mp2_env, homo, virtual, dimen_RI, para_env, mem_per_rank, mem_per_repl) + TYPE(mp2_type), INTENT(IN) :: mp2_env + INTEGER, DIMENSION(:), INTENT(IN) :: homo, virtual + INTEGER, INTENT(IN) :: dimen_RI + TYPE(mp_para_env_type), INTENT(IN) :: para_env + REAL(KIND=dp), INTENT(INOUT) :: mem_per_rank, mem_per_repl + + INTEGER :: block_size + + ! We need the block size and if it is unknown, an upper bound + block_size = mp2_env%ri_rpa%exchange_block_size + IF (block_size <= 0) block_size = MAX(1, (dimen_RI + para_env%num_pe - 1)/para_env%num_pe) + + ! storage of product matrix (upper bound only as it depends on the square of the potential still unknown block size) + mem_per_rank = mem_per_rank + REAL(MAXVAL(homo), KIND=dp)**2*block_size**2*8.0_dp/(1024_dp**2) + + ! work arrays R (2x) and U, copies of Gamma (2x), communication buffer (as expensive as Gamma) + mem_per_repl = mem_per_repl + 3.0_dp*dimen_RI*dimen_RI*8.0_dp/(1024_dp**2) & + + 3.0_dp*MAXVAL(homo*virtual)*dimen_RI*8.0_dp/(1024_dp**2) + END SUBROUTINE rpa_exchange_needed_mem + +! ************************************************************************************************** +!> \brief ... +!> \param exchange_work ... +!> \param qs_env ... +!> \param para_env_sub ... +!> \param mat_munu ... +!> \param dimen_RI ... +!> \param fm_mat_S ... +!> \param fm_mat_Q ... +!> \param fm_mat_Q_gemm ... +!> \param homo ... +!> \param virtual ... +! ************************************************************************************************** + SUBROUTINE rpa_exchange_work_create(exchange_work, qs_env, para_env_sub, mat_munu, dimen_RI, & + fm_mat_S, fm_mat_Q, fm_mat_Q_gemm, homo, virtual) + CLASS(rpa_exchange_work_type), INTENT(INOUT) :: exchange_work + TYPE(qs_environment_type), POINTER :: qs_env + TYPE(mp_para_env_type), POINTER, INTENT(IN) :: para_env_sub + TYPE(dbcsr_p_type), INTENT(IN) :: mat_munu + INTEGER, INTENT(IN) :: dimen_RI + TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_S + TYPE(cp_fm_type), INTENT(IN) :: fm_mat_Q, fm_mat_Q_gemm + INTEGER, DIMENSION(SIZE(fm_mat_S)), INTENT(IN) :: homo, virtual + + INTEGER :: nspins, aux_global, aux_local, my_process_row, proc, ispin + INTEGER, DIMENSION(:), POINTER :: row_indices, aux_distribution_fm + TYPE(cp_blacs_env_type), POINTER :: context + + exchange_work%exchange_correction = qs_env%mp2_env%ri_rpa%exchange_correction + + IF (exchange_work%exchange_correction == rpa_exchange_none) RETURN + + ASSOCIATE (para_env => fm_mat_S(1)%matrix_struct%para_env) + exchange_work%para_env_sub => para_env_sub + exchange_work%ngroup = para_env%num_pe/para_env_sub%num_pe + exchange_work%color_sub = para_env%mepos/para_env_sub%num_pe + END ASSOCIATE + + CALL cp_fm_get_info(fm_mat_S(1), row_indices=row_indices, nrow_locals=aux_distribution_fm, context=context) + CALL context%get(my_process_row=my_process_row) + + CALL create_group_dist(exchange_work%aux_func_dist, exchange_work%ngroup, dimen_RI) + ALLOCATE (exchange_work%aux2send(0:exchange_work%ngroup - 1)) + exchange_work%aux2send = 0 + DO aux_local = 1, aux_distribution_fm(my_process_row) + aux_global = row_indices(aux_local) + proc = group_dist_proc(exchange_work%aux_func_dist, aux_global) + exchange_work%aux2send(proc) = exchange_work%aux2send(proc) + 1 + END DO + + nspins = SIZE(fm_mat_S) + + ALLOCATE (exchange_work%homo(nspins), exchange_work%virtual(nspins), exchange_work%dimen_ia(nspins)) + exchange_work%homo(:) = homo + exchange_work%virtual(:) = virtual + exchange_work%dimen_ia(:) = homo*virtual + exchange_work%dimen_RI = dimen_RI + + exchange_work%block_size = qs_env%mp2_env%ri_rpa%exchange_block_size + IF (exchange_work%block_size <= 0) exchange_work%block_size = dimen_RI + + CALL cp_fm_create(exchange_work%fm_mat_U, fm_mat_Q%matrix_struct, name="fm_mat_U") + CALL cp_fm_create(exchange_work%fm_mat_Q_tmp, fm_mat_Q%matrix_struct, name="fm_mat_Q_tmp") + CALL cp_fm_create(exchange_work%fm_mat_R_half_gemm, fm_mat_Q_gemm%matrix_struct) + + IF (qs_env%mp2_env%ri_rpa%use_hfx_implementation) THEN + CALL exchange_work%exchange_env%create(qs_env, mat_munu%matrix, para_env_sub, fm_mat_S) + END IF + + IF (ALLOCATED(qs_env%mp2_env%ri_rpa%mo_coeff_o)) THEN + DO ispin = 1, SIZE(qs_env%mp2_env%ri_rpa%mo_coeff_o) + CALL dbcsr_release(qs_env%mp2_env%ri_rpa%mo_coeff_o(ispin)) + END DO + DEALLOCATE (qs_env%mp2_env%ri_rpa%mo_coeff_o) + END IF + + IF (ALLOCATED(qs_env%mp2_env%ri_rpa%mo_coeff_v)) THEN + DO ispin = 1, SIZE(qs_env%mp2_env%ri_rpa%mo_coeff_v) + CALL dbcsr_release(qs_env%mp2_env%ri_rpa%mo_coeff_v(ispin)) + END DO + DEALLOCATE (qs_env%mp2_env%ri_rpa%mo_coeff_v) + END IF + END SUBROUTINE + +! ************************************************************************************************** +!> \brief ... Initializes x_data on a subgroup +!> \param exchange_env ... +!> \param qs_env ... +!> \param mat_munu ... +!> \param para_env_sub ... +!> \param fm_mat_S ... +!> \author Vladimir Rybkin +! ************************************************************************************************** + SUBROUTINE hfx_create_subgroup(exchange_env, qs_env, mat_munu, para_env_sub, fm_mat_S) + CLASS(rpa_exchange_env_type), INTENT(INOUT) :: exchange_env + TYPE(dbcsr_type), INTENT(IN) :: mat_munu + TYPE(qs_environment_type), POINTER, INTENT(IN) :: qs_env + TYPE(mp_para_env_type), POINTER, INTENT(IN) :: para_env_sub + TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_S + + CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_create_subgroup' + + INTEGER :: handle, nelectron_total, ispin, & + number_of_aos, nspins, dimen_RI, dimen_ia + TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set + TYPE(cell_type), POINTER :: my_cell + TYPE(dft_control_type), POINTER :: dft_control + TYPE(particle_type), DIMENSION(:), POINTER :: particle_set + TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set + TYPE(qs_subsys_type), POINTER :: subsys + TYPE(scf_control_type), POINTER :: scf_control + TYPE(section_vals_type), POINTER :: input + + CALL timeset(routineN, handle) + + CALL MOVE_ALLOC(qs_env%mp2_env%ri_rpa%mo_coeff_o, exchange_env%mo_coeff_o) + CALL MOVE_ALLOC(qs_env%mp2_env%ri_rpa%mo_coeff_v, exchange_env%mo_coeff_v) + + nspins = SIZE(exchange_env%mo_coeff_o) + + exchange_env%qs_env => qs_env + exchange_env%para_env => para_env_sub + exchange_env%eps_filter = qs_env%mp2_env%mp2_gpw%eps_filter + + NULLIFY (my_cell, atomic_kind_set, particle_set, dft_control, qs_kind_set, scf_control) + + CALL get_qs_env(qs_env, & + subsys=subsys, & + input=input, & + scf_control=scf_control, & + nelectron_total=nelectron_total) + + CALL qs_subsys_get(subsys, & + cell=my_cell, & + atomic_kind_set=atomic_kind_set, & + qs_kind_set=qs_kind_set, & + particle_set=particle_set) + + exchange_env%hfx_sections => section_vals_get_subs_vals(input, "DFT%XC%WF_CORRELATION%RI_RPA%HF") + CALL get_qs_env(qs_env, dft_control=dft_control) + + ! Retrieve particle_set and atomic_kind_set + CALL hfx_create(exchange_env%x_data, para_env_sub, exchange_env%hfx_sections, atomic_kind_set, & + qs_kind_set, particle_set, dft_control, my_cell, orb_basis='ORB', & + nelectron_total=nelectron_total) + + exchange_env%my_recalc_hfx_integrals = .TRUE. + + CALL dbcsr_allocate_matrix_set(exchange_env%mat_hfx, nspins) + DO ispin = 1, nspins + ALLOCATE (exchange_env%mat_hfx(ispin)%matrix) + CALL dbcsr_init_p(exchange_env%mat_hfx(ispin)%matrix) + CALL dbcsr_create(exchange_env%mat_hfx(ispin)%matrix, template=mat_munu, & + matrix_type=dbcsr_type_no_symmetry) + CALL dbcsr_copy(exchange_env%mat_hfx(ispin)%matrix, mat_munu) + END DO + + CALL dbcsr_get_info(mat_munu, nfullcols_total=number_of_aos) + + CALL dbcsr_create(exchange_env%work_ao, template=mat_munu, & + matrix_type=dbcsr_type_no_symmetry) + + ALLOCATE (exchange_env%dbcsr_Gamma_inu_P(nspins)) + CALL dbcsr_allocate_matrix_set(exchange_env%dbcsr_Gamma_munu_P, nspins) + DO ispin = 1, nspins + ALLOCATE (exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix) + CALL dbcsr_create(exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix, template=mat_munu, & + matrix_type=dbcsr_type_no_symmetry) + CALL dbcsr_copy(exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix, mat_munu) + CALL dbcsr_set(exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix, 0.0_dp) + + CALL dbcsr_create(exchange_env%dbcsr_Gamma_inu_P(ispin), template=exchange_env%mo_coeff_o(ispin)) + CALL dbcsr_copy(exchange_env%dbcsr_Gamma_inu_P(ispin), exchange_env%mo_coeff_o(ispin)) + CALL dbcsr_set(exchange_env%dbcsr_Gamma_inu_P(ispin), 0.0_dp) + END DO + + ALLOCATE (exchange_env%struct_Gamma(nspins)) + DO ispin = 1, nspins + CALL cp_fm_get_info(fm_mat_S(ispin), nrow_global=dimen_RI, ncol_global=dimen_ia) + CALL cp_fm_struct_create(exchange_env%struct_Gamma(ispin)%struct, template_fmstruct=fm_mat_S(ispin)%matrix_struct, & + nrow_global=dimen_ia, ncol_global=dimen_RI) + END DO + + CALL timestop(handle) + + END SUBROUTINE hfx_create_subgroup + +! ************************************************************************************************** +!> \brief ... +!> \param exchange_work ... +! ************************************************************************************************** + SUBROUTINE rpa_exchange_work_release(exchange_work) + CLASS(rpa_exchange_work_type), INTENT(INOUT) :: exchange_work + + IF (ALLOCATED(exchange_work%homo)) DEALLOCATE (exchange_work%homo) + IF (ALLOCATED(exchange_work%virtual)) DEALLOCATE (exchange_work%virtual) + IF (ALLOCATED(exchange_work%dimen_ia)) DEALLOCATE (exchange_work%dimen_ia) + NULLIFY (exchange_work%para_env_sub) + CALL release_group_dist(exchange_work%aux_func_dist) + IF (ALLOCATED(exchange_work%aux2send)) DEALLOCATE (exchange_work%aux2send) + CALL cp_fm_release(exchange_work%fm_mat_Q_tmp) + CALL cp_fm_release(exchange_work%fm_mat_U) + CALL cp_fm_release(exchange_work%fm_mat_R_half_gemm) + + CALL exchange_work%exchange_env%release() + END SUBROUTINE + +! ************************************************************************************************** +!> \brief ... +!> \param exchange_env ... +! ************************************************************************************************** + SUBROUTINE hfx_release_subgroup(exchange_env) + CLASS(rpa_exchange_env_type), INTENT(INOUT) :: exchange_env + + INTEGER :: ispin + + NULLIFY (exchange_env%para_env, exchange_env%hfx_sections) + + IF (ASSOCIATED(exchange_env%x_data)) THEN + CALL hfx_release(exchange_env%x_data) + NULLIFY (exchange_env%x_data) + END IF + + CALL dbcsr_release(exchange_env%work_ao) + + IF (ASSOCIATED(exchange_env%dbcsr_Gamma_munu_P)) THEN + DO ispin = 1, SIZE(exchange_env%mat_hfx, 1) + CALL dbcsr_release(exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix) + CALL dbcsr_release(exchange_env%mat_hfx(ispin)%matrix) + CALL dbcsr_release(exchange_env%dbcsr_Gamma_inu_P(ispin)) + CALL dbcsr_release(exchange_env%mo_coeff_o(ispin)) + CALL dbcsr_release(exchange_env%mo_coeff_v(ispin)) + DEALLOCATE (exchange_env%mat_hfx(ispin)%matrix) + DEALLOCATE (exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix) + END DO + DEALLOCATE (exchange_env%mat_hfx, exchange_env%dbcsr_Gamma_munu_P) + DEALLOCATE (exchange_env%dbcsr_Gamma_inu_P, exchange_env%mo_coeff_o, exchange_env%mo_coeff_v) + NULLIFY (exchange_env%mat_hfx, exchange_env%dbcsr_Gamma_munu_P) + END IF + IF (ALLOCATED(exchange_env%struct_Gamma)) THEN + DO ispin = 1, SIZE(exchange_env%struct_Gamma) + CALL cp_fm_struct_release(exchange_env%struct_Gamma(ispin)%struct) + END DO + DEALLOCATE (exchange_env%struct_Gamma) + END IF + END SUBROUTINE hfx_release_subgroup + +! ************************************************************************************************** +!> \brief Main driver for RPA-exchange energies +!> \param exchange_work ... +!> \param fm_mat_Q ... +!> \param eig ... +!> \param fm_mat_S ... +!> \param omega ... +!> \param e_exchange_corr exchange energy correction for a quadrature point +!> \param mp2_env ... +!> \author Vladimir Rybkin, 07/2016 +! ************************************************************************************************** + SUBROUTINE rpa_exchange_work_compute(exchange_work, fm_mat_Q, eig, fm_mat_S, omega, & + e_exchange_corr, mp2_env) + CLASS(rpa_exchange_work_type), INTENT(INOUT) :: exchange_work + TYPE(cp_fm_type), INTENT(IN) :: fm_mat_Q + REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: eig + TYPE(cp_fm_type), DIMENSION(:), INTENT(INOUT) :: fm_mat_S + REAL(KIND=dp), INTENT(IN) :: omega + REAL(KIND=dp), INTENT(INOUT) :: e_exchange_corr + TYPE(mp2_type), INTENT(INOUT) :: mp2_env + + CHARACTER(LEN=*), PARAMETER :: routineN = 'rpa_exchange_work_compute' + REAL(KIND=dp), PARAMETER :: thresh = 0.0000001_dp + + INTEGER :: handle, nspins, dimen_RI, iiB + REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenval + + IF (exchange_work%exchange_correction == rpa_exchange_none) RETURN + + CALL timeset(routineN, handle) + + CALL cp_fm_get_info(fm_mat_Q, ncol_global=dimen_RI) + + nspins = SIZE(fm_mat_S) + + ! Eigenvalues + ALLOCATE (eigenval(dimen_RI)) + eigenval = 0.0_dp + + CALL cp_fm_set_all(matrix=exchange_work%fm_mat_Q_tmp, alpha=0.0_dp) + CALL cp_fm_set_all(matrix=exchange_work%fm_mat_U, alpha=0.0_dp) + + ! Copy Q to Q_tmp + CALL cp_fm_to_fm(fm_mat_Q, exchange_work%fm_mat_Q_tmp) + ! Diagonalize Q + CALL choose_eigv_solver(exchange_work%fm_mat_Q_tmp, exchange_work%fm_mat_U, eigenval) + + ! Calculate diagonal matrix for R_half + + ! Manipulate eigenvalues to get diagonal matrix + IF (exchange_work%exchange_correction == rpa_exchange_axk) THEN + DO iib = 1, dimen_RI + IF (ABS(eigenval(iib)) .GE. thresh) THEN + eigenval(iib) = & + SQRT((1.0_dp/(eigenval(iib)**2))*LOG(1.0_dp + eigenval(iib)) & + - 1.0_dp/(eigenval(iib)*(eigenval(iib) + 1.0_dp))) + ELSE + eigenval(iib) = sqrthalf + END IF + END DO + ELSE IF (exchange_work%exchange_correction == rpa_exchange_sosex) THEN + DO iib = 1, dimen_RI + IF (ABS(eigenval(iib)) .GE. thresh) THEN + eigenval(iib) = & + SQRT(-(1.0_dp/(eigenval(iib)**2))*LOG(1.0_dp + eigenval(iib)) & + + 1.0_dp/eigenval(iib)) + ELSE + eigenval(iib) = sqrthalf + END IF + END DO + ELSE + CPABORT("Unknown RPA exchange correction") + END IF + + ! fm_mat_U now contains some sqrt of the required matrix-valued function + CALL cp_fm_column_scale(exchange_work%fm_mat_U, eigenval) + + ! Release memory + DEALLOCATE (eigenval) + + ! Redistribute fm_mat_U for "rectangular" multiplication: ia*P P*P + CALL cp_fm_set_all(matrix=exchange_work%fm_mat_R_half_gemm, alpha=0.0_dp) + + CALL cp_fm_to_fm_submat_general(exchange_work%fm_mat_U, exchange_work%fm_mat_R_half_gemm, dimen_RI, & + dimen_RI, 1, 1, 1, 1, exchange_work%fm_mat_U%matrix_struct%context) + + IF (mp2_env%ri_rpa%use_hfx_implementation) THEN + CALL exchange_work%compute_hfx(fm_mat_S, eig, omega, e_exchange_corr) + ELSE + CALL exchange_work%compute_fm(fm_mat_S, eig, omega, e_exchange_corr, mp2_env) + END IF + + CALL timestop(handle) + + END SUBROUTINE rpa_exchange_work_compute + +! ************************************************************************************************** +!> \brief Main driver for RPA-exchange energies +!> \param exchange_work ... +!> \param fm_mat_S ... +!> \param eig ... +!> \param omega ... +!> \param e_exchange_corr exchange energy correction for a quadrature point +!> \param mp2_env ... +!> \author Frederick Stein, May-June 2024 +! ************************************************************************************************** + SUBROUTINE rpa_exchange_work_compute_fm(exchange_work, fm_mat_S, eig, omega, & + e_exchange_corr, mp2_env) + CLASS(rpa_exchange_work_type), INTENT(INOUT) :: exchange_work + TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_S + REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: eig + REAL(KIND=dp), INTENT(IN) :: omega + REAL(KIND=dp), INTENT(INOUT) :: e_exchange_corr + TYPE(mp2_type), INTENT(INOUT) :: mp2_env + + CHARACTER(LEN=*), PARAMETER :: routineN = 'rpa_exchange_work_compute_fm' + + INTEGER :: handle, ispin, nspins, P, Q, L_size_Gamma, hom, virt, i, & + send_proc, recv_proc, recv_size, max_aux_size, proc_shift, dimen_ia, & + block_size, P_start, P_end, P_size, Q_start, Q_size, Q_end, handle2, my_aux_size, my_virt + REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: mat_Gamma_3_3D + REAL(KIND=dp), POINTER, DIMENSION(:), CONTIGUOUS :: mat_Gamma_3_1D + REAL(KIND=dp), POINTER, DIMENSION(:, :), CONTIGUOUS :: mat_Gamma_3_2D + REAL(KIND=dp), ALLOCATABLE, TARGET, DIMENSION(:) :: recv_buffer_1D + REAL(KIND=dp), POINTER, DIMENSION(:, :), CONTIGUOUS :: recv_buffer_2D + REAL(KIND=dp), POINTER, DIMENSION(:, :, :), CONTIGUOUS :: recv_buffer_3D + REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: mat_B_iaP + REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), TARGET :: product_matrix_1D + REAL(KIND=dp), POINTER, DIMENSION(:, :), CONTIGUOUS :: product_matrix_2D + REAL(KIND=dp), POINTER, DIMENSION(:, :, :, :), CONTIGUOUS :: product_matrix_4D + TYPE(cp_fm_type) :: fm_mat_Gamma_3 + TYPE(mp_para_env_type), POINTER :: para_env + TYPE(group_dist_d1_type) :: virt_dist + + CALL timeset(routineN, handle) + + nspins = SIZE(fm_mat_S) + + CALL get_group_dist(exchange_work%aux_func_dist, exchange_work%color_sub, sizes=my_aux_size) + + e_exchange_corr = 0.0_dp + max_aux_size = maxsize(exchange_work%aux_func_dist) + + ! local_gemm_ctx has a very large footprint the first time this routine is + ! called. + CALL mp2_env%local_gemm_ctx%create(LOCAL_GEMM_PU_GPU) + CALL mp2_env%local_gemm_ctx%set_op_threshold_gpu(128*128*128*2) + + DO ispin = 1, nspins + hom = exchange_work%homo(ispin) + virt = exchange_work%virtual(ispin) + dimen_ia = hom*virt + IF (hom < 1 .OR. virt < 1) CYCLE + + CALL cp_fm_get_info(fm_mat_S(ispin), para_env=para_env) + + CALL cp_fm_create(fm_mat_Gamma_3, fm_mat_S(ispin)%matrix_struct) + CALL cp_fm_set_all(matrix=fm_mat_Gamma_3, alpha=0.0_dp) + + ! Update G with a new value of Omega: in practice, it is G*S + + ! Scale fm_work_iaP + CALL calc_fm_mat_S_rpa(fm_mat_S(ispin), .TRUE., virt, eig(:, ispin), & + hom, omega, 0.0_dp) + + ! Calculate Gamma_3: Gamma_3 = G*S*R^(1/2) = G*S*R^(1/2) + CALL parallel_gemm(transa="T", transb="N", m=exchange_work%dimen_RI, n=dimen_ia, k=exchange_work%dimen_RI, alpha=1.0_dp, & + matrix_a=exchange_work%fm_mat_R_half_gemm, matrix_b=fm_mat_S(ispin), beta=0.0_dp, & + matrix_c=fm_mat_Gamma_3) + + CALL create_group_dist(virt_dist, exchange_work%para_env_sub%num_pe, virt) + + ! Remove extra factor from S after the multiplication (to return to the original matrix) + CALL remove_scaling_factor_rpa(fm_mat_S(ispin), virt, eig(:, ispin), hom, omega) + + CALL exchange_work%redistribute_into_subgroups(fm_mat_Gamma_3, mat_Gamma_3_3D, ispin, virt_dist) + CALL cp_fm_release(fm_mat_Gamma_3) + + ! We need only the pure matrix + CALL remove_scaling_factor_rpa(fm_mat_S(ispin), virt, eig(:, ispin), hom, omega) + + ! Reorder matrix from (P, i*a) -> (a, i, P) with P being distributed within subgroups + CALL exchange_work%redistribute_into_subgroups(fm_mat_S(ispin), mat_B_iaP, ispin, virt_dist) + + ! Return to the original tensor + CALL calc_fm_mat_S_rpa(fm_mat_S(ispin), .TRUE., virt, eig(:, ispin), hom, omega, 0.0_dp) + + L_size_Gamma = SIZE(mat_Gamma_3_3D, 3) + my_virt = SIZE(mat_Gamma_3_3D, 1) + block_size = exchange_work%block_size + + mat_Gamma_3_1D(1:INT(my_virt, KIND=int_8)*hom*my_aux_size) => mat_Gamma_3_3D(:, :, 1:my_aux_size) + mat_Gamma_3_2D(1:my_virt, 1:hom*my_aux_size) => mat_Gamma_3_1D(1:INT(my_virt, KIND=int_8)*hom*my_aux_size) + + ALLOCATE (product_matrix_1D(INT(hom*MIN(block_size, L_size_gamma), KIND=int_8)* & + INT(hom*MIN(block_size, max_aux_size), KIND=int_8))) + ALLOCATE (recv_buffer_1D(INT(virt, KIND=int_8)*hom*max_aux_size)) + recv_buffer_2D(1:my_virt, 1:hom*max_aux_size) => recv_buffer_1D(1:INT(virt, KIND=int_8)*hom*max_aux_size) + recv_buffer_3D(1:my_virt, 1:hom, 1:max_aux_size) => recv_buffer_1D(1:INT(virt, KIND=int_8)*hom*max_aux_size) + DO proc_shift = 0, para_env%num_pe - 1, exchange_work%para_env_sub%num_pe + send_proc = MODULO(para_env%mepos + proc_shift, para_env%num_pe) + recv_proc = MODULO(para_env%mepos - proc_shift, para_env%num_pe) + + CALL get_group_dist(exchange_work%aux_func_dist, recv_proc/exchange_work%para_env_sub%num_pe, sizes=recv_size) + + IF (recv_size == 0) recv_proc = mp_proc_null + + CALL para_env%sendrecv(mat_B_iaP, send_proc, recv_buffer_3D(:, :, 1:recv_size), recv_proc) + + IF (recv_size == 0) CYCLE + + DO P_start = 1, L_size_Gamma, block_size + P_end = MIN(L_size_Gamma, P_start + block_size - 1) + P_size = P_end - P_start + 1 + DO Q_start = 1, recv_size, block_size + Q_end = MIN(recv_size, Q_start + block_size - 1) + Q_size = Q_end - Q_start + 1 + + ! Reassign product_matrix pointers to enforce contiguity of target array + product_matrix_2D(1:hom*P_size, 1:hom*Q_size) => & + product_matrix_1D(1:INT(hom*P_size, KIND=int_8)*INT(hom*Q_size, KIND=int_8)) + product_matrix_4D(1:hom, 1:P_size, 1:hom, 1:Q_size) => & + product_matrix_1D(1:INT(hom*P_size, KIND=int_8)*INT(hom*Q_size, KIND=int_8)) + + CALL timeset(routineN//"_gemm", handle2) + CALL mp2_env%local_gemm_ctx%gemm("T", "N", hom*P_size, hom*Q_size, my_virt, 1.0_dp, & + mat_Gamma_3_2D(:, hom*(P_start - 1) + 1:hom*P_end), my_virt, & + recv_buffer_2D(:, hom*(Q_start - 1) + 1:hom*Q_end), my_virt, & + 0.0_dp, product_matrix_2D, hom*P_size) + CALL timestop(handle2) + + CALL timeset(routineN//"_energy", handle2) +!$OMP PARALLEL DO DEFAULT(NONE) SHARED(P_size, Q_size, hom, product_matrix_4D) & +!$OMP COLLAPSE(3) REDUCTION(+: e_exchange_corr) PRIVATE(P, Q, i) + DO P = 1, P_size + DO Q = 1, Q_size + DO i = 1, hom + e_exchange_corr = e_exchange_corr + DOT_PRODUCT(product_matrix_4D(i, P, :, Q), product_matrix_4D(:, P, i, Q)) + END DO + END DO + END DO + CALL timestop(handle2) + END DO + END DO + END DO + + CALL release_group_dist(virt_dist) + IF (ALLOCATED(mat_B_iaP)) DEALLOCATE (mat_B_iaP) + IF (ALLOCATED(mat_Gamma_3_3D)) DEALLOCATE (mat_Gamma_3_3D) + IF (ALLOCATED(product_matrix_1D)) DEALLOCATE (product_matrix_1D) + IF (ALLOCATED(recv_buffer_1D)) DEALLOCATE (recv_buffer_1D) + END DO + + CALL mp2_env%local_gemm_ctx%destroy() + + IF (nspins == 2) e_exchange_corr = e_exchange_corr*2.0_dp + IF (nspins == 1) e_exchange_corr = e_exchange_corr*4.0_dp + + CALL timestop(handle) + + END SUBROUTINE rpa_exchange_work_compute_fm + +! ************************************************************************************************** +!> \brief Contract RPA-exchange density matrix with HF exchange integrals and evaluate the correction +!> \param exchange_work ... +!> \param fm_mat_S ... +!> \param eig ... +!> \param omega ... +!> \param e_exchange_corr ... +!> \author Vladimir Rybkin, 08/2016 +! ************************************************************************************************** + SUBROUTINE rpa_exchange_work_compute_hfx(exchange_work, fm_mat_S, eig, omega, e_exchange_corr) + CLASS(rpa_exchange_work_type), INTENT(INOUT) :: exchange_work + TYPE(cp_fm_type), DIMENSION(:), INTENT(INOUT) :: fm_mat_S + REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: eig + REAL(KIND=dp), INTENT(IN) :: omega + REAL(KIND=dp), INTENT(OUT) :: e_exchange_corr + + CHARACTER(LEN=*), PARAMETER :: routineN = 'rpa_exchange_work_compute_hfx' + + INTEGER :: handle, ispin, my_aux_start, my_aux_end, & + my_aux_size, nspins, L_counter, dimen_ia, hom, virt + REAL(KIND=dp) :: e_exchange_P + TYPE(dbcsr_matrix_p_set), DIMENSION(:), ALLOCATABLE :: dbcsr_Gamma_3 + TYPE(cp_fm_type) :: fm_mat_Gamma_3 + TYPE(mp_para_env_type), POINTER :: para_env + + CALL timeset(routineN, handle) + + e_exchange_corr = 0.0_dp + + nspins = SIZE(fm_mat_S) + + CALL get_group_dist(exchange_work%aux_func_dist, exchange_work%color_sub, my_aux_start, my_aux_end, my_aux_size) + + ALLOCATE (dbcsr_Gamma_3(nspins)) + DO ispin = 1, nspins + hom = exchange_work%homo(ispin) + virt = exchange_work%virtual(ispin) + dimen_ia = hom*virt + IF (hom < 1 .OR. virt < 1) CYCLE + + CALL cp_fm_get_info(fm_mat_S(ispin), para_env=para_env) + + CALL cp_fm_create(fm_mat_Gamma_3, exchange_work%exchange_env%struct_Gamma(ispin)%struct) + CALL cp_fm_set_all(matrix=fm_mat_Gamma_3, alpha=0.0_dp) + + ! Update G with a new value of Omega: in practice, it is G*S + + ! Scale fm_work_iaP + CALL calc_fm_mat_S_rpa(fm_mat_S(ispin), .TRUE., virt, eig(:, ispin), & + hom, omega, 0.0_dp) + + ! Calculate Gamma_3: Gamma_3 = G*S*R^(1/2) = G*S*R^(1/2) + CALL parallel_gemm(transa="T", transb="N", m=dimen_ia, n=exchange_work%dimen_RI, & + k=exchange_work%dimen_RI, alpha=1.0_dp, & + matrix_a=fm_mat_S(ispin), matrix_b=exchange_work%fm_mat_R_half_gemm, beta=0.0_dp, & + matrix_c=fm_mat_Gamma_3) + + ! Remove extra factor from S after the multiplication (to return to the original matrix) + CALL remove_scaling_factor_rpa(fm_mat_S(ispin), virt, eig(:, ispin), hom, omega) + + ! Copy Gamma_ia_P^3 to dbcsr matrix set + CALL gamma_fm_to_dbcsr(fm_mat_Gamma_3, dbcsr_Gamma_3(ispin)%matrix_set, & + para_env, exchange_work%para_env_sub, hom, virt, & + exchange_work%exchange_env%mo_coeff_o(ispin), & + exchange_work%ngroup, my_aux_start, my_aux_end, my_aux_size) + END DO + + DO L_counter = 1, my_aux_size + DO ispin = 1, nspins + ! Do dbcsr multiplication: transform the virtual index + CALL dbcsr_multiply("N", "T", 1.0_dp, exchange_work%exchange_env%mo_coeff_v(ispin), & + dbcsr_Gamma_3(ispin)%matrix_set(L_counter), & + 0.0_dp, exchange_work%exchange_env%dbcsr_Gamma_inu_P(ispin), & + filter_eps=exchange_work%exchange_env%eps_filter) + + CALL dbcsr_release(dbcsr_Gamma_3(ispin)%matrix_set(L_counter)) + + ! Do dbcsr multiplication: transform the occupied index + CALL dbcsr_multiply("N", "T", 0.5_dp, exchange_work%exchange_env%dbcsr_Gamma_inu_P(ispin), & + exchange_work%exchange_env%mo_coeff_o(ispin), & + 0.0_dp, exchange_work%exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix, & + filter_eps=exchange_work%exchange_env%eps_filter) + CALL dbcsr_multiply("N", "T", 0.5_dp, exchange_work%exchange_env%mo_coeff_o(ispin), & + exchange_work%exchange_env%dbcsr_Gamma_inu_P(ispin), & + 1.0_dp, exchange_work%exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix, & + filter_eps=exchange_work%exchange_env%eps_filter) + + CALL dbcsr_set(exchange_work%exchange_env%mat_hfx(ispin)%matrix, 0.0_dp) + END DO + + CALL tddft_hfx_matrix(exchange_work%exchange_env%mat_hfx, exchange_work%exchange_env%dbcsr_Gamma_munu_P, & + exchange_work%exchange_env%qs_env, .FALSE., & + exchange_work%exchange_env%my_recalc_hfx_integrals, & + exchange_work%exchange_env%hfx_sections, exchange_work%exchange_env%x_data, & + exchange_work%exchange_env%para_env) + + exchange_work%exchange_env%my_recalc_hfx_integrals = .FALSE. + DO ispin = 1, nspins + CALL dbcsr_multiply("N", "T", 1.0_dp, exchange_work%exchange_env%mat_hfx(ispin)%matrix, & + exchange_work%exchange_env%dbcsr_Gamma_munu_P(ispin)%matrix, & + 0.0_dp, exchange_work%exchange_env%work_ao, filter_eps=exchange_work%exchange_env%eps_filter) + CALL dbcsr_trace(exchange_work%exchange_env%work_ao, e_exchange_P) + e_exchange_corr = e_exchange_corr - e_exchange_P + END DO + END DO + + IF (nspins == 2) e_exchange_corr = e_exchange_corr + IF (nspins == 1) e_exchange_corr = e_exchange_corr*4.0_dp + + CALL timestop(handle) + + END SUBROUTINE rpa_exchange_work_compute_hfx + +! ************************************************************************************************** +!> \brief ... +!> \param exchange_work ... +!> \param fm_mat ... +!> \param mat ... +!> \param ispin ... +!> \param virt_dist ... +! ************************************************************************************************** + SUBROUTINE redistribute_into_subgroups(exchange_work, fm_mat, mat, ispin, virt_dist) + CLASS(rpa_exchange_work_type), INTENT(IN) :: exchange_work + TYPE(cp_fm_type), INTENT(IN) :: fm_mat + REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), & + INTENT(OUT) :: mat + INTEGER, INTENT(IN) :: ispin + TYPE(group_dist_d1_type), INTENT(IN) :: virt_dist + + CHARACTER(LEN=*), PARAMETER :: routineN = 'redistribute_into_subgroups' + + INTEGER :: aux_counter, aux_global, aux_local, aux_proc, avirt, dimen_RI, handle, handle2, & + ia_global, ia_local, iocc, max_number_recv, max_number_send, my_aux_end, my_aux_size, & + my_aux_start, my_process_column, my_process_row, my_virt_end, my_virt_size, & + my_virt_start, proc, proc_shift, recv_proc, send_proc, virt_counter, virt_proc, group_size + INTEGER, ALLOCATABLE, DIMENSION(:) :: data2send, recv_col_indices, & + recv_row_indices, send_aux_indices, send_virt_indices, virt2send + INTEGER, DIMENSION(2) :: recv_shape + INTEGER, DIMENSION(:), POINTER :: aux_distribution_fm, col_indices, & + ia_distribution_fm, row_indices + INTEGER, DIMENSION(:, :), POINTER :: mpi2blacs + REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), TARGET :: recv_buffer, send_buffer + REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), & + POINTER :: recv_ptr, send_ptr + TYPE(cp_blacs_env_type), POINTER :: context + TYPE(mp_para_env_type), POINTER :: para_env + + CALL timeset(routineN, handle) + + CALL cp_fm_get_info(matrix=fm_mat, & + nrow_locals=aux_distribution_fm, & + col_indices=col_indices, & + row_indices=row_indices, & + ncol_locals=ia_distribution_fm, & + context=context, & + nrow_global=dimen_RI, & + para_env=para_env) + + IF (exchange_work%homo(ispin) <= 0 .OR. exchange_work%virtual(ispin) <= 0) THEN + CALL get_group_dist(virt_dist, exchange_work%para_env_sub%mepos, my_virt_start, my_virt_end, my_virt_size) + ALLOCATE (mat(exchange_work%homo(ispin), my_virt_size, dimen_RI)) + CALL timestop(handle) + RETURN + END IF + + group_size = exchange_work%para_env_sub%num_pe + + CALL timeset(routineN//"_prep", handle2) + CALL get_group_dist(exchange_work%aux_func_dist, exchange_work%color_sub, my_aux_start, my_aux_end, my_aux_size) + CALL get_group_dist(virt_dist, exchange_work%para_env_sub%mepos, my_virt_start, my_virt_end, my_virt_size) + CALL context%get(my_process_column=my_process_column, my_process_row=my_process_row, mpi2blacs=mpi2blacs) + + ! Determine the number of columns to send + ALLOCATE (send_aux_indices(MAXVAL(exchange_work%aux2send))) + ALLOCATE (virt2send(0:group_size - 1)) + virt2send = 0 + DO ia_local = 1, ia_distribution_fm(my_process_column) + ia_global = col_indices(ia_local) + avirt = MOD(ia_global - 1, exchange_work%virtual(ispin)) + 1 + proc = group_dist_proc(virt_dist, avirt) + virt2send(proc) = virt2send(proc) + 1 + END DO + + ALLOCATE (data2send(0:para_env%num_pe - 1)) + DO aux_proc = 0, exchange_work%ngroup - 1 + DO virt_proc = 0, group_size - 1 + data2send(aux_proc*group_size + virt_proc) = exchange_work%aux2send(aux_proc)*virt2send(virt_proc) + END DO + END DO + + ALLOCATE (send_virt_indices(MAXVAL(virt2send))) + max_number_send = MAXVAL(data2send) + + ALLOCATE (send_buffer(INT(max_number_send, KIND=int_8)*exchange_work%homo(ispin))) + max_number_recv = max_number_send + CALL para_env%max(max_number_recv) + ALLOCATE (recv_buffer(max_number_recv)) + + ALLOCATE (mat(my_virt_size, exchange_work%homo(ispin), my_aux_size)) + + CALL timestop(handle2) + + CALL timeset(routineN//"_own", handle2) + ! Start with own data + DO aux_local = 1, aux_distribution_fm(my_process_row) + aux_global = row_indices(aux_local) + IF (aux_global < my_aux_start .OR. aux_global > my_aux_end) CYCLE + DO ia_local = 1, ia_distribution_fm(my_process_column) + ia_global = fm_mat%matrix_struct%col_indices(ia_local) + + iocc = (ia_global - 1)/exchange_work%virtual(ispin) + 1 + avirt = MOD(ia_global - 1, exchange_work%virtual(ispin)) + 1 + + IF (my_virt_start > avirt .OR. my_virt_end < avirt) CYCLE + + mat(avirt - my_virt_start + 1, iocc, aux_global - my_aux_start + 1) = fm_mat%local_data(aux_local, ia_local) + END DO + END DO + CALL timestop(handle2) + + DO proc_shift = 1, para_env%num_pe - 1 + send_proc = MODULO(para_env%mepos + proc_shift, para_env%num_pe) + recv_proc = MODULO(para_env%mepos - proc_shift, para_env%num_pe) + + CALL timeset(routineN//"_pack_buffer", handle2) + send_ptr(1:virt2send(MOD(send_proc, group_size)), & + 1:exchange_work%aux2send(send_proc/group_size)) => & + send_buffer(1:INT(virt2send(MOD(send_proc, group_size)), KIND=int_8)* & + exchange_work%aux2send(send_proc/group_size)) +! Pack send buffer + aux_counter = 0 + DO aux_local = 1, aux_distribution_fm(my_process_row) + aux_global = row_indices(aux_local) + proc = group_dist_proc(exchange_work%aux_func_dist, aux_global) + IF (proc /= send_proc/group_size) CYCLE + aux_counter = aux_counter + 1 + virt_counter = 0 + DO ia_local = 1, ia_distribution_fm(my_process_column) + ia_global = col_indices(ia_local) + avirt = MOD(ia_global - 1, exchange_work%virtual(ispin)) + 1 + + proc = group_dist_proc(virt_dist, avirt) + IF (proc /= MOD(send_proc, group_size)) CYCLE + virt_counter = virt_counter + 1 + send_ptr(virt_counter, aux_counter) = fm_mat%local_data(aux_local, ia_local) + send_virt_indices(virt_counter) = ia_global + END DO + send_aux_indices(aux_counter) = aux_global + END DO + CALL timestop(handle2) + + CALL timeset(routineN//"_ex_size", handle2) + recv_shape = [1, 1] + CALL para_env%sendrecv(SHAPE(send_ptr), send_proc, recv_shape, recv_proc) + CALL timestop(handle2) + + IF (SIZE(send_ptr) == 0) send_proc = mp_proc_null + IF (PRODUCT(recv_shape) == 0) recv_proc = mp_proc_null + + CALL timeset(routineN//"_ex_idx", handle2) + ALLOCATE (recv_row_indices(recv_shape(1)), recv_col_indices(recv_shape(2))) + CALL para_env%sendrecv(send_virt_indices(1:virt_counter), send_proc, recv_row_indices, recv_proc) + CALL para_env%sendrecv(send_aux_indices(1:aux_counter), send_proc, recv_col_indices, recv_proc) + CALL timestop(handle2) + + ! Prepare pointer to recv buffer (consider transposition while packing the send buffer) + recv_ptr(1:recv_shape(1), 1:MAX(1, recv_shape(2))) => recv_buffer(1:recv_shape(1)*MAX(1, recv_shape(2))) + + CALL timeset(routineN//"_sendrecv", handle2) +! Perform communication + CALL para_env%sendrecv(send_ptr, send_proc, recv_ptr, recv_proc) + CALL timestop(handle2) + + IF (recv_proc == mp_proc_null) THEN + DEALLOCATE (recv_row_indices, recv_col_indices) + CYCLE + END IF + + CALL timeset(routineN//"_unpack", handle2) +! Unpack receive buffer + DO aux_local = 1, SIZE(recv_col_indices) + aux_global = recv_col_indices(aux_local) + + DO ia_local = 1, SIZE(recv_row_indices) + ia_global = recv_row_indices(ia_local) + + iocc = (ia_global - 1)/exchange_work%virtual(ispin) + 1 + avirt = MOD(ia_global - 1, exchange_work%virtual(ispin)) + 1 + + mat(avirt - my_virt_start + 1, iocc, aux_global - my_aux_start + 1) = recv_ptr(ia_local, aux_local) + END DO + END DO + CALL timestop(handle2) + + IF (ALLOCATED(recv_row_indices)) DEALLOCATE (recv_row_indices) + IF (ALLOCATED(recv_col_indices)) DEALLOCATE (recv_col_indices) + END DO + + DEALLOCATE (send_aux_indices, send_virt_indices) + + CALL timestop(handle) + + END SUBROUTINE redistribute_into_subgroups + +END MODULE rpa_exchange diff --git a/src/rpa_grad.F b/src/rpa_grad.F index cc7ddf1453..925023c7f1 100644 --- a/src/rpa_grad.F +++ b/src/rpa_grad.F @@ -11,9 +11,6 @@ !> 10.2021 created [Frederick Stein] ! ************************************************************************************************** MODULE rpa_grad - USE ISO_C_BINDING, ONLY: C_NULL_PTR,& - C_PTR,& - c_associated USE cp_array_utils, ONLY: cp_1d_r_cp_type,& cp_3d_r_cp_type USE cp_blacs_env, ONLY: cp_blacs_env_type @@ -25,9 +22,13 @@ MODULE rpa_grad cp_fm_struct_get,& cp_fm_struct_release,& cp_fm_struct_type - USE cp_fm_types, ONLY: & - cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_release, cp_fm_set_all, & - cp_fm_to_fm, cp_fm_to_fm_submat_general, cp_fm_type + USE cp_fm_types, ONLY: cp_fm_create,& + cp_fm_get_info,& + cp_fm_release,& + cp_fm_set_all,& + cp_fm_to_fm,& + cp_fm_to_fm_submat_general,& + cp_fm_type USE dgemm_counter_types, ONLY: dgemm_counter_start,& dgemm_counter_stop,& dgemm_counter_type @@ -43,10 +44,7 @@ MODULE rpa_grad int_8 USE libint_2c_3c, ONLY: compare_potential_types USE local_gemm_api, ONLY: LOCAL_GEMM_PU_GPU,& - local_gemm,& - local_gemm_create,& - local_gemm_destroy,& - local_gemm_set_op_threshold_gpu + local_gemm_ctxt_type USE machine, ONLY: m_flush,& m_memory USE mathconstants, ONLY: pi @@ -281,10 +279,10 @@ SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual) CHARACTER(LEN=*), PARAMETER :: routineN = 'rpa_work_type_create' - INTEGER :: avirt, col_global, col_local, first_p_pos(2), first_p_pos_col, handle, iocc, & - ispin, my_a, my_a_end, my_a_size, my_a_start, my_i, my_i_end, my_i_size, my_i_start, & - my_pcol, ncol_block, ncol_local, nspins, num_pe_col, proc_homo, proc_homo_send, & - proc_recv, proc_send, proc_virtual, proc_virtual_send + INTEGER :: avirt, col_global, col_local, handle, iocc, ispin, my_a, my_a_end, my_a_size, & + my_a_start, my_i, my_i_end, my_i_size, my_i_start, my_pcol, ncol_local, nspins, & + num_pe_col, proc_homo, proc_homo_send, proc_recv, proc_send, proc_virtual, & + proc_virtual_send INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send INTEGER, DIMENSION(:), POINTER :: col_indices @@ -321,9 +319,7 @@ SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual) CALL create_group_dist(rpa_work%gd_homo(ispin), proc_homo, homo(ispin)) CALL create_group_dist(rpa_work%gd_virtual(ispin), proc_virtual, virtual(ispin)) - CALL cp_fm_struct_get(fm_mat_S(ispin)%matrix_struct, ncol_local=ncol_local, col_indices=col_indices, & - first_p_pos=first_p_pos, ncol_block=ncol_block) - first_p_pos_col = first_p_pos(2) + CALL cp_fm_struct_get(fm_mat_S(ispin)%matrix_struct, ncol_local=ncol_local, col_indices=col_indices) data2send = 0 ! Count the amount of data2send to each process @@ -370,7 +366,7 @@ SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual) data2recv = 0 DO my_i = my_i_start, my_i_end DO my_a = my_a_start, my_a_end - proc_recv = cp_fm_indxg2p((my_i - 1)*virtual(ispin) + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + proc_recv = fm_mat_S(ispin)%matrix_struct%g2p_col((my_i - 1)*virtual(ispin) + my_a) data2recv(proc_recv) = data2recv(proc_recv) + 1 END DO END DO @@ -382,7 +378,7 @@ SUBROUTINE rpa_work_type_create(rpa_work, fm_mat_Q, fm_mat_S, homo, virtual) data2recv = 0 DO my_i = my_i_start, my_i_end DO my_a = my_a_start, my_a_end - proc_recv = cp_fm_indxg2p((my_i - 1)*virtual(ispin) + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + proc_recv = fm_mat_S(ispin)%matrix_struct%g2p_col((my_i - 1)*virtual(ispin) + my_a) data2recv(proc_recv) = data2recv(proc_recv) + 1 rpa_work%index2recv(proc_recv, ispin)%array(2, data2recv(proc_recv)) = my_i - my_i_start + 1 @@ -458,9 +454,9 @@ SUBROUTINE prepare_comm_Pij(sos_mp2_work, virtual, fm_mat_S) CHARACTER(LEN=*), PARAMETER :: routineN = 'prepare_comm_Pij' - INTEGER :: avirt, col_global, col_local, counter, first_p_pos(2), first_p_pos_col, handle, & - ij_counter, iocc, my_i, my_j, my_pcol, my_prow, ncol_block, ncol_local, nrow_local, & - num_ij_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, tag + INTEGER :: avirt, col_global, col_local, counter, handle, ij_counter, iocc, my_i, my_j, & + my_pcol, my_prow, ncol_local, nrow_local, num_ij_pairs, num_pe_col, pcol, pcol_recv, & + pcol_send, proc_shift, tag INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi @@ -481,10 +477,9 @@ SUBROUTINE prepare_comm_Pij(sos_mp2_work, virtual, fm_mat_S) CALL cp_fm_struct_get(fm_mat_S%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, & ncol_local=ncol_local, col_indices=col_indices, & - context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local) + context=context, nrow_local=nrow_local) CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, & blacs2mpi=blacs2mpi) - first_p_pos_col = first_p_pos(2) num_ij_pairs = SIZE(sos_mp2_work%pair_list, 2) @@ -511,7 +506,7 @@ SUBROUTINE prepare_comm_Pij(sos_mp2_work, virtual, fm_mat_S) my_j = sos_mp2_work%pair_list(2, ij_counter) IF (iocc /= my_j) CYCLE - pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_mat_S%matrix_struct%g2p_col((my_i - 1)*virtual + avirt) IF (pcol /= pcol_send) CYCLE counter = counter + 1 @@ -545,7 +540,7 @@ SUBROUTINE prepare_comm_Pij(sos_mp2_work, virtual, fm_mat_S) my_j = sos_mp2_work%pair_list(2, ij_counter) IF (iocc /= my_j) CYCLE - pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_mat_S%matrix_struct%g2p_col((my_i - 1)*virtual + avirt) IF (pcol /= pcol_send) CYCLE counter = counter + 1 @@ -565,8 +560,7 @@ SUBROUTINE prepare_comm_Pij(sos_mp2_work, virtual, fm_mat_S) ! Convert to global coordinates to local coordinates as we always work with them DO counter = 1, data2send(pcol_send) sos_mp2_work%index2send(pcol_send)%array(counter) = & - cp_fm_indxg2l(sos_mp2_work%index2send(pcol_send)%array(counter), & - ncol_block, 0, first_p_pos_col, num_pe_col) + fm_mat_S%matrix_struct%g2l_col(sos_mp2_work%index2send(pcol_send)%array(counter)) END DO END DO @@ -592,9 +586,9 @@ SUBROUTINE prepare_comm_Pab(sos_mp2_work, virtual, fm_mat_S) CHARACTER(LEN=*), PARAMETER :: routineN = 'prepare_comm_Pab' - INTEGER :: ab_counter, avirt, col_global, col_local, counter, first_p_pos(2), & - first_p_pos_col, handle, iocc, my_a, my_b, my_pcol, my_prow, ncol_block, ncol_local, & - nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, tag + INTEGER :: ab_counter, avirt, col_global, col_local, counter, handle, iocc, my_a, my_b, & + my_pcol, my_prow, ncol_local, nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, & + pcol_send, proc_shift, tag INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv, data2send INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi @@ -615,10 +609,9 @@ SUBROUTINE prepare_comm_Pab(sos_mp2_work, virtual, fm_mat_S) CALL cp_fm_struct_get(fm_mat_S%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, & ncol_local=ncol_local, col_indices=col_indices, & - context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local) + context=context, nrow_local=nrow_local) CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, & blacs2mpi=blacs2mpi) - first_p_pos_col = first_p_pos(2) CALL comm_exchange%from_split(para_env, my_prow) @@ -644,7 +637,7 @@ SUBROUTINE prepare_comm_Pab(sos_mp2_work, virtual, fm_mat_S) my_b = sos_mp2_work%pair_list(2, ab_counter) IF (avirt /= my_b) CYCLE - pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_mat_S%matrix_struct%g2p_col((iocc - 1)*virtual + my_a) IF (pcol /= pcol_send) CYCLE counter = counter + 1 @@ -678,7 +671,7 @@ SUBROUTINE prepare_comm_Pab(sos_mp2_work, virtual, fm_mat_S) my_b = sos_mp2_work%pair_list(2, ab_counter) IF (avirt /= my_b) CYCLE - pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_mat_S%matrix_struct%g2p_col((iocc - 1)*virtual + my_a) IF (pcol /= pcol_send) CYCLE counter = counter + 1 @@ -698,8 +691,7 @@ SUBROUTINE prepare_comm_Pab(sos_mp2_work, virtual, fm_mat_S) ! Convert to global coordinates to local coordinates as we always work with them DO counter = 1, data2send(pcol_send) sos_mp2_work%index2send(pcol_send)%array(counter) = & - cp_fm_indxg2l(sos_mp2_work%index2send(pcol_send)%array(counter), & - ncol_block, 0, first_p_pos_col, num_pe_col) + fm_mat_S%matrix_struct%g2l_col(sos_mp2_work%index2send(pcol_send)%array(counter)) END DO END DO @@ -1034,10 +1026,8 @@ SUBROUTINE calc_P_rpa(mat_S_1D, mat_work_iaP_3D, gd_homo, gd_virtual, grid, mepo ! We allocate it at every step to reduce potential memory conflicts with COSMA IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN - IF (.NOT. c_associated(mp2_env%local_gemm_ctx)) THEN - CALL local_gemm_create(mp2_env%local_gemm_ctx, LOCAL_GEMM_PU_GPU) - CALL local_gemm_set_op_threshold_gpu(mp2_env%local_gemm_ctx, spla_threshold) - END IF + CALL mp2_env%local_gemm_ctx%create(LOCAL_GEMM_PU_GPU) + CALL mp2_env%local_gemm_ctx%set_op_threshold_gpu(spla_threshold) END IF tag = 47 @@ -1269,8 +1259,7 @@ SUBROUTINE calc_P_rpa(mat_S_1D, mat_work_iaP_3D, gd_homo, gd_virtual, grid, mepo IF (mp2_env%ri_grad%dot_blksize >= blksize_threshold) THEN ! release memory allocated by local_gemm when run on GPU. local_gemm_ctx is null on cpu only runs - CALL local_gemm_destroy(mp2_env%local_gemm_ctx) - mp2_env%local_gemm_ctx = C_NULL_PTR + CALL mp2_env%local_gemm_ctx%destroy() DEALLOCATE (buffer_compens_1D) END IF @@ -1305,7 +1294,7 @@ SUBROUTINE calc_P_rpa_a(P_ab, mat_S, mat_work, dot_blksize, buffer_1D, local_gem INTEGER, INTENT(IN) :: dot_blksize REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), & INTENT(INOUT), TARGET :: buffer_1D - TYPE(C_PTR), INTENT(INOUT) :: local_gemm_ctx + TYPE(local_gemm_ctxt_type), INTENT(INOUT) :: local_gemm_ctx REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_virt REAL(KIND=dp), INTENT(IN) :: omega, weight @@ -1334,10 +1323,10 @@ SUBROUTINE calc_P_rpa_a(P_ab, mat_S, mat_work, dot_blksize, buffer_1D, local_gem stripesize = MIN(dot_blksize, my_P_size - P_start + 1) P_end = P_start + stripesize - 1 - CALL local_gemm("T", "N", my_a_size, recv_a_size, stripesize, & - -weight, mat_S(P_start:P_end, :, my_i), stripesize, & - mat_work(P_start:P_end, :, my_i), stripesize, & - 0.0_dp, buffer_unscaled, my_a_size, local_gemm_ctx) + CALL local_gemm_ctx%gemm("T", "N", my_a_size, recv_a_size, stripesize, & + -weight, mat_S(P_start:P_end, :, my_i), stripesize, & + mat_work(P_start:P_end, :, my_i), stripesize, & + 0.0_dp, buffer_unscaled, my_a_size) CALL scale_buffer_and_add_compens_virt(buffer_unscaled, buffer_compens, omega, & my_eval_virt, recv_eval_virt, my_eval_occ(my_i)) @@ -1399,7 +1388,7 @@ SUBROUTINE calc_P_rpa_i(P_ij, mat_S, mat_work, dot_blksize, buffer_1D, local_gem INTEGER, INTENT(IN) :: dot_blksize REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), & INTENT(INOUT), TARGET :: buffer_1D - TYPE(C_PTR), INTENT(INOUT) :: local_gemm_ctx + TYPE(local_gemm_ctxt_type), INTENT(INOUT) :: local_gemm_ctx REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: my_eval_virt, my_eval_occ, recv_eval_occ REAL(KIND=dp), INTENT(IN) :: omega, weight @@ -1428,10 +1417,10 @@ SUBROUTINE calc_P_rpa_i(P_ij, mat_S, mat_work, dot_blksize, buffer_1D, local_gem stripesize = MIN(dot_blksize, my_P_size - P_start + 1) P_end = P_start + stripesize - 1 - CALL local_gemm("T", "N", my_i_size, recv_i_size, stripesize, & - weight, mat_S(P_start:P_end, my_a, :), stripesize, & - mat_work(P_start:P_end, my_a, :), stripesize, & - 0.0_dp, buffer_unscaled, my_i_size, local_gemm_ctx) + CALL local_gemm_ctx%gemm("T", "N", my_i_size, recv_i_size, stripesize, & + weight, mat_S(P_start:P_end, my_a, :), stripesize, & + mat_work(P_start:P_end, my_a, :), stripesize, & + 0.0_dp, buffer_unscaled, my_i_size) CALL scale_buffer_and_add_compens_occ(buffer_unscaled, buffer_compens, omega, & my_eval_occ, recv_eval_occ, my_eval_virt(my_a)) @@ -1625,10 +1614,10 @@ SUBROUTINE calc_Pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigen CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_Pij_degen' - INTEGER :: avirt, col_global, col_local, counter, first_p_pos(2), first_p_pos_col, handle, & - handle2, ij_counter, iocc, my_col_local, my_i, my_j, my_pcol, my_prow, ncol_block, & - ncol_local, nrow_local, num_ij_pairs, num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, & - recv_size, send_size, size_recv_buffer, size_send_buffer, tag + INTEGER :: avirt, col_global, col_local, counter, handle, handle2, ij_counter, iocc, & + my_col_local, my_i, my_j, my_pcol, my_prow, ncol_local, nrow_local, num_ij_pairs, & + num_pe_col, pcol, pcol_recv, pcol_send, proc_shift, recv_size, send_size, & + size_recv_buffer, size_send_buffer, tag INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi REAL(KIND=dp) :: trace @@ -1640,10 +1629,9 @@ SUBROUTINE calc_Pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigen CALL cp_fm_struct_get(fm_work_iaP%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, & ncol_local=ncol_local, col_indices=col_indices, & - context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local) + context=context, nrow_local=nrow_local) CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, & number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi) - first_p_pos_col = first_p_pos(2) num_ij_pairs = SIZE(pair_list, 2) @@ -1663,10 +1651,10 @@ SUBROUTINE calc_Pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigen avirt = col_global - (iocc - 1)*virtual IF (iocc /= my_j) CYCLE - pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_work_iaP%matrix_struct%g2p_col((my_i - 1)*virtual + avirt) IF (pcol /= my_pcol) CYCLE - my_col_local = cp_fm_indxg2l((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col) + my_col_local = fm_work_iaP%matrix_struct%g2l_col((my_i - 1)*virtual + avirt) trace = trace + accurate_dot_product_2(fm_mat_S%local_data(:, my_col_local), fm_work_iaP%local_data(:, col_local), & dot_blksize) @@ -1730,10 +1718,10 @@ SUBROUTINE calc_Pij_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ij, Eigen iocc = MAX(1, col_global - 1)/virtual + 1 IF (iocc /= my_j) CYCLE avirt = col_global - (iocc - 1)*virtual - pcol = cp_fm_indxg2p((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_work_iaP%matrix_struct%g2p_col((my_i - 1)*virtual + avirt) IF (pcol /= my_pcol) CYCLE - my_col_local = cp_fm_indxg2l((my_i - 1)*virtual + avirt, ncol_block, 0, first_p_pos_col, num_pe_col) + my_col_local = fm_work_iaP%matrix_struct%g2l_col((my_i - 1)*virtual + avirt) trace = trace + accurate_dot_product_2(fm_mat_S%local_data(:, my_col_local), buffer_recv(:, col_local), & dot_blksize) @@ -1783,10 +1771,10 @@ SUBROUTINE calc_Pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigen CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_Pab_degen' - INTEGER :: ab_counter, avirt, col_global, col_local, counter, first_p_pos(2), & - first_p_pos_col, handle, handle2, iocc, my_a, my_b, my_col_local, my_pcol, my_prow, & - ncol_block, ncol_local, nrow_local, num_ab_pairs, num_pe_col, pcol, pcol_recv, pcol_send, & - proc_shift, recv_size, send_size, size_recv_buffer, size_send_buffer, tag + INTEGER :: ab_counter, avirt, col_global, col_local, counter, handle, handle2, iocc, my_a, & + my_b, my_col_local, my_pcol, my_prow, ncol_local, nrow_local, num_ab_pairs, num_pe_col, & + pcol, pcol_recv, pcol_send, proc_shift, recv_size, send_size, size_recv_buffer, & + size_send_buffer, tag INTEGER, DIMENSION(:), POINTER :: col_indices, ncol_locals INTEGER, DIMENSION(:, :), POINTER :: blacs2mpi REAL(KIND=dp) :: trace @@ -1798,10 +1786,9 @@ SUBROUTINE calc_Pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigen CALL cp_fm_struct_get(fm_work_iaP%matrix_struct, para_env=para_env, ncol_locals=ncol_locals, & ncol_local=ncol_local, col_indices=col_indices, & - context=context, ncol_block=ncol_block, first_p_pos=first_p_pos, nrow_local=nrow_local) + context=context, nrow_local=nrow_local) CALL context%get(my_process_row=my_prow, my_process_column=my_pcol, & number_of_process_columns=num_pe_col, blacs2mpi=blacs2mpi) - first_p_pos_col = first_p_pos(2) num_ab_pairs = SIZE(pair_list, 2) @@ -1821,9 +1808,9 @@ SUBROUTINE calc_Pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigen avirt = col_global - (iocc - 1)*virtual IF (avirt /= my_b) CYCLE - pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_work_iaP%matrix_struct%g2p_col((iocc - 1)*virtual + my_a) IF (pcol /= my_pcol) CYCLE - my_col_local = cp_fm_indxg2l((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + my_col_local = fm_work_iaP%matrix_struct%g2l_col((iocc - 1)*virtual + my_a) trace = trace + accurate_dot_product_2(fm_mat_S%local_data(:, my_col_local), fm_work_iaP%local_data(:, col_local), & dot_blksize) @@ -1889,10 +1876,10 @@ SUBROUTINE calc_Pab_degen(fm_work_iaP, fm_mat_S, pair_list, virtual, P_ab, Eigen iocc = MAX(1, col_global - 1)/virtual + 1 avirt = col_global - (iocc - 1)*virtual IF (avirt /= my_b) CYCLE - pcol = cp_fm_indxg2p((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + pcol = fm_work_iaP%matrix_struct%g2p_col((iocc - 1)*virtual + my_a) IF (pcol /= my_pcol) CYCLE - my_col_local = cp_fm_indxg2l((iocc - 1)*virtual + my_a, ncol_block, 0, first_p_pos_col, num_pe_col) + my_col_local = fm_work_iaP%matrix_struct%g2l_col((iocc - 1)*virtual + my_a) trace = trace + accurate_dot_product_2(fm_mat_S%local_data(:, my_col_local), buffer_recv(:, col_local), & dot_blksize) @@ -2610,11 +2597,10 @@ SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global) CHARACTER(LEN=*), PARAMETER :: routineN = 'dereplicate_and_sum_fm' INTEGER :: col_local, elements2recv_col, elements2recv_row, elements2send_col, & - elements2send_row, first_p_pos_global(2), first_p_pos_sub(2), handle, handle2, & - mypcol_global, myprow_global, ncol_block_global, ncol_block_sub, ncol_local_global, & - ncol_local_sub, npcol_global, npcol_sub, nprow_global, nprow_sub, nrow_block_global, & - nrow_block_sub, nrow_local_global, nrow_local_sub, pcol_recv, pcol_send, proc_recv, & - proc_send, proc_send_global, proc_shift, prow_recv, prow_send, row_local, tag + elements2send_row, handle, handle2, mypcol_global, myprow_global, ncol_local_global, & + ncol_local_sub, npcol_global, npcol_sub, nprow_global, nprow_sub, nrow_local_global, & + nrow_local_sub, pcol_recv, pcol_send, proc_recv, proc_send, proc_send_global, proc_shift, & + prow_recv, prow_send, row_local, tag INTEGER(int_8) :: size_recv_buffer, size_send_buffer INTEGER, ALLOCATABLE, DIMENSION(:) :: data2recv_col, data2recv_row, & data2send_col, data2send_row, & @@ -2643,10 +2629,8 @@ SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global) CALL cp_fm_get_info(fm_sub, col_indices=col_indices_sub, row_indices=row_indices_sub, & nrow_local=nrow_local_sub, ncol_local=ncol_local_sub) - CALL cp_fm_struct_get(fm_sub%matrix_struct, para_env=para_env_sub, first_p_pos=first_p_pos_sub, & - nrow_block=nrow_block_sub, ncol_block=ncol_block_sub) - CALL cp_fm_struct_get(fm_global%matrix_struct, first_p_pos=first_p_pos_global, nrow_block=nrow_block_global, & - ncol_block=ncol_block_global, para_env=para_env, & + CALL cp_fm_struct_get(fm_sub%matrix_struct, para_env=para_env_sub) + CALL cp_fm_struct_get(fm_global%matrix_struct, para_env=para_env, & col_indices=col_indices_global, row_indices=row_indices_global, & nrow_local=nrow_local_global, ncol_local=ncol_local_global) CALL fm_sub%matrix_struct%context%get(blacs2mpi=blacs2mpi_sub, mpi2blacs=mpi2blacs_sub) @@ -2666,13 +2650,13 @@ SUBROUTINE dereplicate_and_sum_fm(fm_sub, fm_global) CALL timeset(routineN//"_data2", handle2) ! Create a map how much data has to be sent to what process coordinate, interchange rows and columns to transpose the matrices - CALL get_elements2send(data2send_col, npcol_global, row_indices_sub, ncol_block_global, first_p_pos_global(2), index2send_col) - CALL get_elements2send(data2send_row, nprow_global, col_indices_sub, nrow_block_global, first_p_pos_global(1), index2send_row) + CALL get_elements2send_col(data2send_col, fm_global%matrix_struct, row_indices_sub, index2send_col) + CALL get_elements2send_row(data2send_row, fm_global%matrix_struct, col_indices_sub, index2send_row) ! Create a map how much data has to be sent to what process coordinate, interchange rows and columns to transpose the matrices ! Do the reverse for the recieve processes - CALL get_elements2send(data2recv_col, npcol_sub, row_indices_global, ncol_block_sub, first_p_pos_sub(2), index2recv_col) - CALL get_elements2send(data2recv_row, nprow_sub, col_indices_global, nrow_block_sub, first_p_pos_sub(1), index2recv_row) + CALL get_elements2send_col(data2recv_col, fm_sub%matrix_struct, row_indices_global, index2recv_col) + CALL get_elements2send_row(data2recv_row, fm_sub%matrix_struct, col_indices_global, index2recv_row) CALL timestop(handle2) CALL timeset(routineN//"_local", handle2) @@ -2809,27 +2793,69 @@ END SUBROUTINE dereplicate_and_sum_fm ! ************************************************************************************************** !> \brief ... !> \param data2send ... -!> \param np_global ... +!> \param struct_global ... !> \param indices_sub ... -!> \param n_block_global ... -!> \param first_p_pos_global ... !> \param index2send ... ! ************************************************************************************************** - SUBROUTINE get_elements2send(data2send, np_global, indices_sub, n_block_global, first_p_pos_global, index2send) + SUBROUTINE get_elements2send_col(data2send, struct_global, indices_sub, index2send) INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: data2send - INTEGER, INTENT(IN) :: np_global + TYPE(cp_fm_struct_type), INTENT(INOUT) :: struct_global INTEGER, DIMENSION(:), INTENT(IN) :: indices_sub - INTEGER, INTENT(IN) :: n_block_global, first_p_pos_global TYPE(one_dim_int_array), ALLOCATABLE, & DIMENSION(:), INTENT(OUT) :: index2send - INTEGER :: i_global, i_local, proc + INTEGER :: i_global, i_local, np_global, proc + + CALL struct_global%context%get(number_of_process_rows=np_global) + + ALLOCATE (data2send(0:np_global - 1)) + data2send = 0 + DO i_local = 1, SIZE(indices_sub) + i_global = indices_sub(i_local) + proc = struct_global%g2p_col(i_global) + data2send(proc) = data2send(proc) + 1 + END DO + + ALLOCATE (index2send(0:np_global - 1)) + DO proc = 0, np_global - 1 + ALLOCATE (index2send(proc)%array(data2send(proc))) + ! We want to crash if there is an error + index2send(proc)%array = -1 + END DO + + data2send = 0 + DO i_local = 1, SIZE(indices_sub) + i_global = indices_sub(i_local) + proc = struct_global%g2p_col(i_global) + data2send(proc) = data2send(proc) + 1 + index2send(proc)%array(data2send(proc)) = i_local + END DO + + END SUBROUTINE + +! ************************************************************************************************** +!> \brief ... +!> \param data2send ... +!> \param struct_global ... +!> \param indices_sub ... +!> \param index2send ... +! ************************************************************************************************** + SUBROUTINE get_elements2send_row(data2send, struct_global, indices_sub, index2send) + INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: data2send + TYPE(cp_fm_struct_type), INTENT(INOUT) :: struct_global + INTEGER, DIMENSION(:), INTENT(IN) :: indices_sub + TYPE(one_dim_int_array), ALLOCATABLE, & + DIMENSION(:), INTENT(OUT) :: index2send + + INTEGER :: i_global, i_local, np_global, proc + + CALL struct_global%context%get(number_of_process_rows=np_global) ALLOCATE (data2send(0:np_global - 1)) data2send = 0 DO i_local = 1, SIZE(indices_sub) i_global = indices_sub(i_local) - proc = cp_fm_indxg2p(i_global, n_block_global, 0, first_p_pos_global, np_global) + proc = struct_global%g2p_row(i_global) data2send(proc) = data2send(proc) + 1 END DO @@ -2843,7 +2869,7 @@ SUBROUTINE get_elements2send(data2send, np_global, indices_sub, n_block_global, data2send = 0 DO i_local = 1, SIZE(indices_sub) i_global = indices_sub(i_local) - proc = cp_fm_indxg2p(i_global, n_block_global, 0, first_p_pos_global, np_global) + proc = struct_global%g2p_row(i_global) data2send(proc) = data2send(proc) + 1 index2send(proc)%array(data2send(proc)) = i_local END DO diff --git a/src/rpa_main.F b/src/rpa_main.F index a5d81d9f2a..2703568bbb 100644 --- a/src/rpa_main.F +++ b/src/rpa_main.F @@ -16,8 +16,8 @@ ! ************************************************************************************************** MODULE rpa_main USE bibliography, ONLY: & - Bates2013, DelBen2013, DelBen2015, Ren2011, Ren2013, Wilhelm2016a, Wilhelm2016b, & - Wilhelm2017, Wilhelm2018, cite_reference + Bates2013, DelBen2013, DelBen2015, Freeman1977, Gruneis2009, Ren2011, Ren2013, & + Wilhelm2016a, Wilhelm2016b, Wilhelm2017, Wilhelm2018, cite_reference USE bse_main, ONLY: start_bse_calculation USE cp_blacs_env, ONLY: cp_blacs_env_create,& cp_blacs_env_release,& @@ -27,7 +27,6 @@ MODULE rpa_main dbcsr_clear,& dbcsr_get_info,& dbcsr_p_type,& - dbcsr_release,& dbcsr_type USE cp_fm_basic_linalg, ONLY: cp_fm_scale_and_add USE cp_fm_struct, ONLY: cp_fm_struct_create,& @@ -49,7 +48,10 @@ MODULE rpa_main release_group_dist USE hfx_types, ONLY: block_ind_type,& hfx_compression_type - USE input_constants, ONLY: wfc_mm_style_gemm + USE input_constants, ONLY: rpa_exchange_axk,& + rpa_exchange_none,& + rpa_exchange_sosex,& + wfc_mm_style_gemm USE kinds, ONLY: dp,& int_8 USE kpoint_types, ONLY: kpoint_type @@ -71,7 +73,8 @@ MODULE rpa_main two_dim_real_array USE qs_environment_types, ONLY: get_qs_env,& qs_environment_type - USE rpa_axk, ONLY: compute_axk_ener + USE rpa_exchange, ONLY: rpa_exchange_needed_mem,& + rpa_exchange_work_type USE rpa_grad, ONLY: rpa_grad_copy_Q,& rpa_grad_create,& rpa_grad_finalize,& @@ -174,8 +177,7 @@ SUBROUTINE rpa_ri_compute_en(qs_env, Erpa, mp2_env, BIb_C, BIb_C_gw, BIb_C_bse_i fm_matrix_Minv, fm_matrix_Minv_Vtrunc_Minv, kpoints, & Eigenval, nmo, homo, dimen_RI, dimen_RI_red, gw_corr_lev_occ, gw_corr_lev_virt, & unit_nr, do_ri_sos_laplace_mp2, my_do_gw, do_im_time, do_bse, matrix_s, & - mat_munu, mat_P_global, & - t_3c_M, t_3c_O, t_3c_O_compressed, t_3c_O_ind, & + mat_munu, mat_P_global, t_3c_M, t_3c_O, t_3c_O_compressed, t_3c_O_ind, & starts_array_mc, ends_array_mc, & starts_array_mc_block, ends_array_mc_block, calc_forces) @@ -253,8 +255,11 @@ SUBROUTINE rpa_ri_compute_en(qs_env, Erpa, mp2_env, BIb_C, BIb_C_gw, BIb_C_bse_i CALL cite_reference(DelBen2013) CALL cite_reference(DelBen2015) - IF (mp2_env%ri_rpa%do_ri_axk) THEN + IF (mp2_env%ri_rpa%exchange_correction == rpa_exchange_axk) THEN CALL cite_reference(Bates2013) + ELSE IF (mp2_env%ri_rpa%exchange_correction == rpa_exchange_sosex) THEN + CALL cite_reference(Freeman1977) + CALL cite_reference(Gruneis2009) END IF IF (mp2_env%ri_rpa%do_rse) THEN CALL cite_reference(Ren2011) @@ -366,6 +371,7 @@ SUBROUTINE rpa_ri_compute_en(qs_env, Erpa, mp2_env, BIb_C, BIb_C_gw, BIb_C_bse_i mem_per_repl = mem_per_repl + 2.0_dp*mem_for_QK IF (calc_forces) CALL rpa_grad_needed_mem(homo, virtual, dimen_RI_red, mem_per_rank, mem_per_repl, do_ri_sos_laplace_mp2) + CALL rpa_exchange_needed_mem(mp2_env, homo, virtual, dimen_RI_red, para_env, mem_per_rank, mem_per_repl) mem_min = mem_per_repl/para_env%num_pe + mem_per_rank @@ -615,11 +621,9 @@ SUBROUTINE rpa_ri_compute_en(qs_env, Erpa, mp2_env, BIb_C, BIb_C_gw, BIb_C_bse_i fm_matrix_PQ, fm_mat_S, fm_mat_Q_gemm, fm_mat_Q, fm_mat_S_gw, fm_mat_R_gw(1), & fm_mat_S_ij_bse(1), fm_mat_S_ab_bse(1), & my_do_gw, do_bse, gw_corr_lev_occ, gw_corr_lev_virt, & - do_minimax_quad, & - do_im_time, mo_coeff, & + do_minimax_quad, do_im_time, mo_coeff, & fm_matrix_L_kpoints, fm_matrix_Minv_L_kpoints, & - fm_matrix_Minv, fm_matrix_Minv_Vtrunc_Minv, & - mat_munu, mat_P_global, & + fm_matrix_Minv, fm_matrix_Minv_Vtrunc_Minv, mat_munu, mat_P_global, & t_3c_M, t_3c_O, t_3c_O_compressed, t_3c_O_ind, & starts_array_mc, ends_array_mc, & starts_array_mc_block, ends_array_mc_block, & @@ -646,13 +650,6 @@ SUBROUTINE rpa_ri_compute_en(qs_env, Erpa, mp2_env, BIb_C, BIb_C_gw, BIb_C_bse_i CALL cp_fm_release(fm_mat_S_ab_bse(1)) END IF - IF (mp2_env%ri_rpa%do_ri_axk) THEN - CALL dbcsr_release(mp2_env%ri_rpa%mo_coeff_o) - DEALLOCATE (mp2_env%ri_rpa%mo_coeff_o) - CALL dbcsr_release(mp2_env%ri_rpa%mo_coeff_v) - DEALLOCATE (mp2_env%ri_rpa%mo_coeff_v) - END IF - CALL timestop(handle) END SUBROUTINE rpa_ri_compute_en @@ -1042,8 +1039,7 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s my_do_gw, do_bse, gw_corr_lev_occ, gw_corr_lev_virt, & do_minimax_quad, do_im_time, mo_coeff, & fm_matrix_L_kpoints, fm_matrix_Minv_L_kpoints, & - fm_matrix_Minv, fm_matrix_Minv_Vtrunc_Minv, & - mat_munu, mat_P_global, & + fm_matrix_Minv, fm_matrix_Minv_Vtrunc_Minv, mat_munu, mat_P_global, & t_3c_M, t_3c_O, t_3c_O_compressed, t_3c_O_ind, & starts_array_mc, ends_array_mc, & starts_array_mc_block, ends_array_mc_block, & @@ -1064,8 +1060,8 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s INTEGER, INTENT(IN) :: num_integ_points, num_integ_group, & color_rpa_group TYPE(cp_fm_type), INTENT(IN) :: fm_matrix_PQ - TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_S, fm_mat_Q_gemm, fm_mat_Q, & - fm_mat_S_gw + TYPE(cp_fm_type), DIMENSION(:), INTENT(INOUT) :: fm_mat_S + TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: fm_mat_Q_gemm, fm_mat_Q, fm_mat_S_gw TYPE(cp_fm_type), INTENT(IN) :: fm_mat_R_gw, fm_mat_S_ij_bse, & fm_mat_S_ab_bse LOGICAL, INTENT(IN) :: my_do_gw, do_bse @@ -1112,7 +1108,7 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s do_periodic, do_print, do_ri_Sigma_x, exit_ev_gw, first_cycle, & first_cycle_periodic_correction, my_open_shell, print_ic_values LOGICAL, ALLOCATABLE, DIMENSION(:, :, :, :, :) :: has_mat_P_blocks - REAL(KIND=dp) :: a_scaling, alpha, dbcsr_time, e_axk, e_axk_corr, eps_filter, & + REAL(KIND=dp) :: a_scaling, alpha, dbcsr_time, e_exchange, e_exchange_corr, eps_filter, & eps_filter_im_time, ext_scaling, fermi_level_offset, fermi_level_offset_input, & my_flop_rate, omega, omega_max_fit, omega_old, tau, tau_old REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: delta_corr, e_fermi, tau_tj, tau_wj, tj, & @@ -1147,6 +1143,7 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s TYPE(hfx_compression_type), ALLOCATABLE, & DIMENSION(:) :: t_3c_O_mo_compressed TYPE(im_time_force_type) :: force_data + TYPE(rpa_exchange_work_type) :: exchange_work TYPE(rpa_grad_type) :: rpa_grad TYPE(two_dim_int_array), ALLOCATABLE, DIMENSION(:) :: t_3c_O_mo_ind @@ -1325,8 +1322,12 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s fm_mat_S, homo, virtual, mp2_env, Eigenval(:, 1, :), & unit_nr, do_ri_sos_laplace_mp2) + IF (.NOT. do_im_time .AND. .NOT. do_ri_sos_laplace_mp2) & + CALL exchange_work%create(qs_env, para_env_sub, mat_munu, dimen_RI_red, & + fm_mat_S, fm_mat_Q(1), fm_mat_Q_gemm(1), homo, virtual) + Erpa = 0.0_dp - IF (mp2_env%ri_rpa%do_ri_axk) e_axk = 0.0_dp + IF (mp2_env%ri_rpa%exchange_correction /= rpa_exchange_none) e_exchange = 0.0_dp first_cycle = .TRUE. omega_old = 0.0_dp CALL dgemm_counter_init(dgemm_counter, unit_nr, mp2_env%ri_rpa%print_dgemm_info) @@ -1489,15 +1490,14 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s END IF ! im time - ! Calculate AXK energy correction - IF (mp2_env%ri_rpa%do_ri_axk) THEN - CALL compute_axk_ener(qs_env, fm_mat_Q(1), fm_mat_Q_gemm(1), dimen_RI_red, dimen_ia(1), para_env_sub, & - para_env_RPA, Eigenval(:, 1, 1), fm_mat_S(1), homo(1), virtual(1), omega, & - mp2_env, mat_munu, unit_nr, e_axk_corr) + ! Calculate RPA exchange energy correction + IF (mp2_env%ri_rpa%exchange_correction /= rpa_exchange_none) THEN + e_exchange_corr = 0.0_dp + CALL exchange_work%compute(fm_mat_Q(1), Eigenval(:, 1, :), fm_mat_S, omega, e_exchange_corr, mp2_env) - ! Evaluate the final AXK energy correction - e_axk = e_axk + e_axk_corr*wj(jquad) - END IF ! do_ri_axk + ! Evaluate the final exchange energy correction + e_exchange = e_exchange + e_exchange_corr*wj(jquad) + END IF IF (do_ri_sos_laplace_mp2) THEN CALL SOS_MP2_postprocessing(fm_mat_Q, Erpa, tau_wj(jquad)) @@ -1508,7 +1508,7 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s ELSE IF (calc_forces .AND. .NOT. do_im_time) CALL rpa_grad_copy_Q(fm_mat_Q(1), rpa_grad) - CALL Q_trace_and_add_unit_matrix(dimen_RI_red, trace_Qomega, fm_mat_Q(1), para_env_RPA) + CALL Q_trace_and_add_unit_matrix(dimen_RI_red, trace_Qomega, fm_mat_Q(1)) IF (do_kpoints_cubic_RPA .OR. do_kpoints_from_Gamma) THEN CALL invert_eps_compute_W_and_Erpa_kp(dimen_RI, num_integ_points, jquad, nkp, count_ev_sc_GW, para_env, & @@ -1570,11 +1570,11 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s IF (do_minimax_quad) Erpa = Erpa/2.0_dp END IF - IF (mp2_env%ri_rpa%do_ri_axk) THEN - CALL para_env%sum(E_axk) - E_axk = E_axk/(pi*2.0_dp) - IF (do_minimax_quad) E_axk = E_axk/2.0_dp - mp2_env%ri_rpa%ener_axk = E_axk + IF (mp2_env%ri_rpa%exchange_correction /= rpa_exchange_none) THEN + CALL para_env%sum(E_exchange) + E_exchange = E_exchange/(pi*2.0_dp) + IF (do_minimax_quad) E_exchange = E_exchange/2.0_dp + mp2_env%ri_rpa%ener_exchange = E_exchange END IF IF (calc_forces .AND. do_ri_sos_laplace_mp2 .AND. do_im_time) THEN @@ -1759,6 +1759,8 @@ SUBROUTINE rpa_num_int(qs_env, Erpa, mp2_env, para_env, para_env_RPA, para_env_s END IF + IF (.NOT. do_im_time .AND. .NOT. do_ri_sos_laplace_mp2) CALL exchange_work%release() + IF (.NOT. do_ri_sos_laplace_mp2) THEN DEALLOCATE (tj) DEALLOCATE (wj) diff --git a/src/rpa_rse.F b/src/rpa_rse.F index 5c1fa03ed6..3f953a767c 100644 --- a/src/rpa_rse.F +++ b/src/rpa_rse.F @@ -531,6 +531,7 @@ SUBROUTINE non_diag_rse(fm_F_mo, eigenval, dimen, homo, para_env, & rse_corr = 0.0_dp DO ispin = 1, nspins + IF (homo(ispin) <= 0 .OR. homo(ispin) >= dimen) CYCLE ! Create the occupied-occupied and virtual-virtual blocks, eigenvectors NULLIFY (fm_struct_tmp) CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, & diff --git a/src/rpa_util.F b/src/rpa_util.F index 56aa78f3a5..fb0210bb14 100644 --- a/src/rpa_util.F +++ b/src/rpa_util.F @@ -878,20 +878,19 @@ END SUBROUTINE contract_S_to_Q !> \param dimen_RI ... !> \param trace_Qomega ... !> \param fm_mat_Q ... -!> \param para_env_RPA ... ! ************************************************************************************************** - SUBROUTINE Q_trace_and_add_unit_matrix(dimen_RI, trace_Qomega, fm_mat_Q, para_env_RPA) + SUBROUTINE Q_trace_and_add_unit_matrix(dimen_RI, trace_Qomega, fm_mat_Q) INTEGER, INTENT(IN) :: dimen_RI REAL(KIND=dp), DIMENSION(dimen_RI), INTENT(OUT) :: trace_Qomega TYPE(cp_fm_type), INTENT(IN) :: fm_mat_Q - TYPE(mp_para_env_type), INTENT(IN) :: para_env_RPA CHARACTER(LEN=*), PARAMETER :: routineN = 'Q_trace_and_add_unit_matrix' INTEGER :: handle, i_global, iiB, j_global, jjB, & ncol_local, nrow_local INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices + TYPE(mp_para_env_type), POINTER :: para_env CALL timeset(routineN, handle) @@ -899,7 +898,8 @@ SUBROUTINE Q_trace_and_add_unit_matrix(dimen_RI, trace_Qomega, fm_mat_Q, para_en nrow_local=nrow_local, & ncol_local=ncol_local, & row_indices=row_indices, & - col_indices=col_indices) + col_indices=col_indices, & + para_env=para_env) ! calculate the trace of Q and add 1 on the diagonal trace_Qomega = 0.0_dp @@ -915,7 +915,7 @@ SUBROUTINE Q_trace_and_add_unit_matrix(dimen_RI, trace_Qomega, fm_mat_Q, para_en END IF END DO END DO - CALL para_env_RPA%sum(trace_Qomega) + CALL para_env%sum(trace_Qomega) CALL timestop(handle) diff --git a/tests/QS/regtest-ri-rpa-axk/TEST_FILES b/tests/QS/regtest-ri-rpa-axk/TEST_FILES deleted file mode 100644 index 26c4f4c539..0000000000 --- a/tests/QS/regtest-ri-rpa-axk/TEST_FILES +++ /dev/null @@ -1,3 +0,0 @@ -RPA_AXK_H2O.inp 11 1e-09 -17.314245724313864 -RPA_AXK_H2O_svd.inp 11 1e-09 -17.314244711493288 -#EOF diff --git a/tests/QS/regtest-ri-rpa-exchange/CH3.xyz b/tests/QS/regtest-ri-rpa-exchange/CH3.xyz new file mode 100644 index 0000000000..0783fd1310 --- /dev/null +++ b/tests/QS/regtest-ri-rpa-exchange/CH3.xyz @@ -0,0 +1,7 @@ + 4 + + C 0.000 0.000 0.300 + H 0.000 1.173 0.000 + H 0.929 -0.536 0.000 + H -0.929 -0.536 0.000 + diff --git a/tests/QS/regtest-ri-rpa-axk/H2O_gas.xyz b/tests/QS/regtest-ri-rpa-exchange/H2O_gas.xyz similarity index 100% rename from tests/QS/regtest-ri-rpa-axk/H2O_gas.xyz rename to tests/QS/regtest-ri-rpa-exchange/H2O_gas.xyz diff --git a/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_CH3.inp b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_CH3.inp new file mode 100644 index 0000000000..d476b72295 --- /dev/null +++ b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_CH3.inp @@ -0,0 +1,100 @@ +&GLOBAL + PRINT_LEVEL MEDIUM + PROJECT RPA_AXK_H2O + RUN_TYPE ENERGY + &TIMINGS + THRESHOLD 0.01 + &END TIMINGS +&END GLOBAL + +&FORCE_EVAL + METHOD Quickstep + &DFT + BASIS_SET_FILE_NAME HFX_BASIS + MULTIPLICITY 2 + POTENTIAL_FILE_NAME GTH_POTENTIALS + UKS + &MGRID + CUTOFF 100 + REL_CUTOFF 50 + &END MGRID + &POISSON + PERIODIC XYZ + POISSON_SOLVER WAVELET + &END POISSON + &QS + EPS_DEFAULT 1.0E-10 + METHOD GPW + &END QS + &SCF + EPS_SCF 1.0E-8 + MAX_SCF 100 + SCF_GUESS ATOMIC + &PRINT + &RESTART OFF + &END RESTART + &END PRINT + &END SCF + &XC + &WF_CORRELATION + MEMORY 200. + NUMBER_PROC 1 + &INTEGRALS + ERI_METHOD GPW + &WFC_GPW + CUTOFF 100 + EPS_FILTER 1.0E-6 + EPS_GRID 1.0E-6 + REL_CUTOFF 25 + &END WFC_GPW + &END INTEGRALS + &RI_RPA + MINIMAX + # 6-8 are sufficient + RPA_NUM_QUAD_POINTS 1 + &EXCHANGE_CORRECTION AXK + USE_HFX_IMPLEMENTATION F + &END EXCHANGE_CORRECTION + &HF + FRACTION 1.0000000 + &INTERACTION_POTENTIAL + # Use half the system size + CUTOFF_RADIUS 1.99 + POTENTIAL_TYPE TRUNCATED + &END INTERACTION_POTENTIAL + &SCREENING + EPS_SCHWARZ 1.0E-2 + SCREEN_ON_INITIAL_P FALSE + &END SCREENING + &END HF + &END RI_RPA + &END WF_CORRELATION + &XC_FUNCTIONAL PBE + &END XC_FUNCTIONAL + &END XC + &END DFT + &SUBSYS + &CELL + ABC [angstrom] 4.000 4.000 4.000 + MULTIPLE_UNIT_CELL 1 1 1 + PERIODIC XYZ + &END CELL + &KIND H + BASIS_SET DZVP-GTH + BASIS_SET RI_AUX RI_DZVP-GTH + POTENTIAL GTH-PBE-q1 + &END KIND + &KIND C + BASIS_SET DZVP-GTH + BASIS_SET RI_AUX RI_DZVP-GTH + POTENTIAL GTH-PBE-q4 + &END KIND + &TOPOLOGY + COORD_FILE_FORMAT xyz + COORD_FILE_NAME CH3.xyz + MULTIPLE_UNIT_CELL 1 1 1 + &CENTER_COORDINATES + &END CENTER_COORDINATES + &END TOPOLOGY + &END SUBSYS +&END FORCE_EVAL diff --git a/tests/QS/regtest-ri-rpa-axk/RPA_AXK_H2O.inp b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O.inp similarity index 94% rename from tests/QS/regtest-ri-rpa-axk/RPA_AXK_H2O.inp rename to tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O.inp index bbe78c6468..59516504db 100644 --- a/tests/QS/regtest-ri-rpa-axk/RPA_AXK_H2O.inp +++ b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O.inp @@ -47,8 +47,10 @@ &END WFC_GPW &END INTEGRALS &RI_RPA - RI_AXK .TRUE. RPA_NUM_QUAD_POINTS 1 + &EXCHANGE_CORRECTION AXK + USE_HFX_IMPLEMENTATION F + &END EXCHANGE_CORRECTION &HF FRACTION 1.0000000 &SCREENING diff --git a/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_hfx.inp b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_hfx.inp new file mode 100644 index 0000000000..85c3410ef4 --- /dev/null +++ b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_hfx.inp @@ -0,0 +1,89 @@ +&GLOBAL + PRINT_LEVEL MEDIUM + PROJECT RPA_AXK_H2O + RUN_TYPE ENERGY + &TIMINGS + THRESHOLD 0.01 + &END TIMINGS +&END GLOBAL + +&FORCE_EVAL + METHOD Quickstep + &DFT + BASIS_SET_FILE_NAME HFX_BASIS + POTENTIAL_FILE_NAME GTH_POTENTIALS + &MGRID + CUTOFF 100 + REL_CUTOFF 20 + &END MGRID + &POISSON + PERIODIC XYZ + POISSON_SOLVER WAVELET + &END POISSON + &QS + EPS_DEFAULT 1.0E-11 + METHOD GPW + &END QS + &SCF + EPS_SCF 1.0E-6 + MAX_SCF 100 + SCF_GUESS ATOMIC + &PRINT + &RESTART OFF + &END RESTART + &END PRINT + &END SCF + &XC + &WF_CORRELATION + MEMORY 200. + NUMBER_PROC 1 + &INTEGRALS + ERI_METHOD GPW + &WFC_GPW + CUTOFF 50 + EPS_FILTER 1.0E-6 + EPS_GRID 1.0E-6 + REL_CUTOFF 20 + &END WFC_GPW + &END INTEGRALS + &RI_RPA + RPA_NUM_QUAD_POINTS 1 + &EXCHANGE_CORRECTION AXK + USE_HFX_IMPLEMENTATION T + &END EXCHANGE_CORRECTION + &HF + FRACTION 1.0000000 + &SCREENING + EPS_SCHWARZ 1.0E-6 + SCREEN_ON_INITIAL_P FALSE + &END SCREENING + &END HF + &END RI_RPA + &END WF_CORRELATION + &XC_FUNCTIONAL PBE + &END XC_FUNCTIONAL + &END XC + &END DFT + &SUBSYS + &CELL + ABC [angstrom] 4.000 4.000 4.000 + PERIODIC XYZ + &END CELL + &KIND H + BASIS_SET DZVP-GTH + BASIS_SET RI_AUX RI_DZVP-GTH + POTENTIAL GTH-PBE-q1 + &END KIND + &KIND O + BASIS_SET DZVP-GTH + BASIS_SET RI_AUX RI_DZVP-GTH + POTENTIAL GTH-PBE-q6 + &END KIND + &TOPOLOGY + COORD_FILE_FORMAT xyz + COORD_FILE_NAME H2O_gas.xyz + &CENTER_COORDINATES + &END CENTER_COORDINATES + &END TOPOLOGY + &END SUBSYS +&END FORCE_EVAL diff --git a/tests/QS/regtest-ri-rpa-axk/RPA_AXK_H2O_svd.inp b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_svd.inp similarity index 94% rename from tests/QS/regtest-ri-rpa-axk/RPA_AXK_H2O_svd.inp rename to tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_svd.inp index 56e1cd81f6..e762eb2369 100644 --- a/tests/QS/regtest-ri-rpa-axk/RPA_AXK_H2O_svd.inp +++ b/tests/QS/regtest-ri-rpa-exchange/RPA_AXK_H2O_svd.inp @@ -50,8 +50,10 @@ DO_SVD &END RI &RI_RPA - RI_AXK .TRUE. RPA_NUM_QUAD_POINTS 1 + &EXCHANGE_CORRECTION AXK + USE_HFX_IMPLEMENTATION F + &END EXCHANGE_CORRECTION &HF FRACTION 1.0000000 &SCREENING diff --git a/tests/QS/regtest-ri-rpa-exchange/RPA_SOSEX_H2O.inp b/tests/QS/regtest-ri-rpa-exchange/RPA_SOSEX_H2O.inp new file mode 100644 index 0000000000..8d84d1a85e --- /dev/null +++ b/tests/QS/regtest-ri-rpa-exchange/RPA_SOSEX_H2O.inp @@ -0,0 +1,89 @@ +&GLOBAL + PRINT_LEVEL MEDIUM + PROJECT RPA_AXK_H2O + RUN_TYPE ENERGY + &TIMINGS + THRESHOLD 0.01 + &END TIMINGS +&END GLOBAL + +&FORCE_EVAL + METHOD Quickstep + &DFT + BASIS_SET_FILE_NAME HFX_BASIS + POTENTIAL_FILE_NAME GTH_POTENTIALS + &MGRID + CUTOFF 100 + REL_CUTOFF 20 + &END MGRID + &POISSON + PERIODIC XYZ + POISSON_SOLVER WAVELET + &END POISSON + &QS + EPS_DEFAULT 1.0E-11 + METHOD GPW + &END QS + &SCF + EPS_SCF 1.0E-6 + MAX_SCF 100 + SCF_GUESS ATOMIC + &PRINT + &RESTART OFF + &END RESTART + &END PRINT + &END SCF + &XC + &WF_CORRELATION + MEMORY 200. + NUMBER_PROC 1 + &INTEGRALS + ERI_METHOD GPW + &WFC_GPW + CUTOFF 50 + EPS_FILTER 1.0E-6 + EPS_GRID 1.0E-6 + REL_CUTOFF 20 + &END WFC_GPW + &END INTEGRALS + &RI_RPA + RPA_NUM_QUAD_POINTS 1 + &EXCHANGE_CORRECTION SOSEX + USE_HFX_IMPLEMENTATION F + &END EXCHANGE_CORRECTION + &HF + FRACTION 1.0000000 + &SCREENING + EPS_SCHWARZ 1.0E-6 + SCREEN_ON_INITIAL_P FALSE + &END SCREENING + &END HF + &END RI_RPA + &END WF_CORRELATION + &XC_FUNCTIONAL PBE + &END XC_FUNCTIONAL + &END XC + &END DFT + &SUBSYS + &CELL + ABC [angstrom] 4.000 4.000 4.000 + PERIODIC XYZ + &END CELL + &KIND H + BASIS_SET DZVP-GTH + BASIS_SET RI_AUX RI_DZVP-GTH + POTENTIAL GTH-PBE-q1 + &END KIND + &KIND O + BASIS_SET DZVP-GTH + BASIS_SET RI_AUX RI_DZVP-GTH + POTENTIAL GTH-PBE-q6 + &END KIND + &TOPOLOGY + COORD_FILE_FORMAT xyz + COORD_FILE_NAME H2O_gas.xyz + &CENTER_COORDINATES + &END CENTER_COORDINATES + &END TOPOLOGY + &END SUBSYS +&END FORCE_EVAL diff --git a/tests/QS/regtest-ri-rpa-exchange/TEST_FILES b/tests/QS/regtest-ri-rpa-exchange/TEST_FILES new file mode 100644 index 0000000000..cb73a90265 --- /dev/null +++ b/tests/QS/regtest-ri-rpa-exchange/TEST_FILES @@ -0,0 +1,6 @@ +RPA_AXK_H2O.inp 11 1e-09 -17.160620212320048 +RPA_AXK_H2O_svd.inp 11 1e-09 -17.160620212321199 +RPA_AXK_H2O_hfx.inp 11 1e-09 -17.122001748342520 +RPA_SOSEX_H2O.inp 11 1e-09 -17.132129178553146 +RPA_AXK_CH3.inp 11 3e-05 -7.177515835708672 +#EOF diff --git a/tests/QS/regtest-rpa-lr/H2O-rpa-axk-lr.inp b/tests/QS/regtest-rpa-lr/H2O-rpa-axk-lr.inp index e662ed2868..f069a303f0 100644 --- a/tests/QS/regtest-rpa-lr/H2O-rpa-axk-lr.inp +++ b/tests/QS/regtest-rpa-lr/H2O-rpa-axk-lr.inp @@ -53,8 +53,9 @@ DO_SVD &END RI &RI_RPA - RI_AXK .TRUE. - RPA_NUM_QUAD_POINTS 5 + RPA_NUM_QUAD_POINTS 1 + &EXCHANGE_CORRECTION AXK + &END EXCHANGE_CORRECTION &HF FRACTION 1.0000000 &SCREENING diff --git a/tests/QS/regtest-rpa-lr/TEST_FILES b/tests/QS/regtest-rpa-lr/TEST_FILES index 5bbcb087c7..2eca0399d2 100644 --- a/tests/QS/regtest-rpa-lr/TEST_FILES +++ b/tests/QS/regtest-rpa-lr/TEST_FILES @@ -1,6 +1,6 @@ H2O-ri-rpa-lr.inp 11 1e-8 -16.847828959621648 H2O-ri-rpa-lr-mme.inp 11 1e-5 -16.851159582018898 -H2O-rpa-axk-lr.inp 11 1e-8 -16.840956522844035 +H2O-rpa-axk-lr.inp 11 1e-8 -16.825928320373368 H2O-rpa-cubic-lr.inp 11 1e-8 -16.867724875643752 H2O-rpa-cubic-lr-mme.inp 11 1e-8 -16.857667483475399 #EOF diff --git a/tests/TEST_DIRS b/tests/TEST_DIRS index 405f328579..0262fe27c5 100644 --- a/tests/TEST_DIRS +++ b/tests/TEST_DIRS @@ -61,7 +61,7 @@ QS/regtest-xastdp libint libxc QS/regtest-gw2x libint libxc QS/regtest-negf QS/regtest-negf-fft fftw3 -QS/regtest-ri-rpa-axk libint +QS/regtest-ri-rpa-exchange libint QS/regtest-ri-rpa-rse libint QS/regtest-cdft-hirshfeld-3 QS/regtest-cdft-hirshfeld-2 parallel mpiranks>1