From e682d7672aef3e0d6d2420306a9919bb103e1066 Mon Sep 17 00:00:00 2001 From: Frederick Stein <43850145+fstein93@users.noreply.github.com> Date: Wed, 23 Oct 2024 09:12:00 +0200 Subject: [PATCH] Fix pw_derive with the Intel compilers (#3736) --- src/pw/pw_methods.F | 153 ++++++++++++++++++++++--------------------- src/qs_loc_methods.F | 2 + 2 files changed, 81 insertions(+), 74 deletions(-) diff --git a/src/pw/pw_methods.F b/src/pw/pw_methods.F index ed9cf9dc65..2dee727fa8 100644 --- a/src/pw/pw_methods.F +++ b/src/pw/pw_methods.F @@ -1014,7 +1014,7 @@ FUNCTION pw_integral_ab_${kind}$_${kind2}$_${space}$ (pw1, pw2, sumtype, just_su LOGICAL, INTENT(IN), OPTIONAL :: just_sum, local_only REAL(KIND=dp) :: integral_value - CHARACTER(len=*), PARAMETER :: routineN = 'pw_integral_ab' + CHARACTER(len=*), PARAMETER :: routineN = 'pw_integral_ab_${kind}$_${kind2}$_${space}$' INTEGER :: handle, loc_sumtype LOGICAL :: my_just_sum, my_local_only @@ -1062,8 +1062,7 @@ FUNCTION pw_integral_ab_${kind}$_${kind2}$_${space}$ (pw1, pw2, sumtype, just_su #:elif kind2[0]=="r" integral_value = accurate_sum(REAL(pw1%array, KIND=dp)*pw2%array) !? complex bit #:else - integral_value = accurate_sum(REAL(CONJG(pw1%array) & - *pw2%array, KIND=dp)) !? complex bit + integral_value = accurate_sum(pw1%array%re*pw2%array%re + pw1%array%im*pw2%array%im) !? complex bit #:endif END IF @@ -1676,8 +1675,8 @@ SUBROUTINE pw_gauss_damp(pw, omega) CHARACTER(len=*), PARAMETER :: routineN = 'pw_gauss_damp' - INTEGER :: handle - REAL(KIND=dp) :: omega_2 + INTEGER :: handle, i + REAL(KIND=dp) :: omega_2, tmp CALL timeset(routineN, handle) CPASSERT(omega >= 0) @@ -1685,9 +1684,12 @@ SUBROUTINE pw_gauss_damp(pw, omega) omega_2 = omega*omega omega_2 = 0.25_dp/omega_2 -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw,omega_2) - pw%array = pw%array*EXP(-pw%pw_grid%gsq*omega_2) -!$OMP END PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp) SHARED(pw,omega_2) + DO i = 1, SIZE(pw%array) + tmp = EXP(-pw%pw_grid%gsq(i)*omega_2) + pw%array(i) = CMPLX(pw%array(i)%re*tmp, pw%array(i)%im*tmp, KIND=dp) + END DO +!$OMP END PARALLEL DO CALL timestop(handle) @@ -1707,8 +1709,8 @@ SUBROUTINE pw_log_deriv_gauss(pw, omega) CHARACTER(len=*), PARAMETER :: routineN = 'pw_log_deriv_gauss' - INTEGER :: handle - REAL(KIND=dp) :: omega_2 + INTEGER :: handle, i + REAL(KIND=dp) :: omega_2, tmp CALL timeset(routineN, handle) CPASSERT(omega >= 0) @@ -1716,9 +1718,12 @@ SUBROUTINE pw_log_deriv_gauss(pw, omega) omega_2 = omega*omega omega_2 = 0.25_dp/omega_2 -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw,omega_2) - pw%array = pw%array*(1.0_dp + omega_2*pw%pw_grid%gsq) -!$OMP END PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp) SHARED(pw,omega_2) + DO i = 1, SIZE(pw%array) + tmp = 1.0_dp + omega_2*pw%pw_grid%gsq(i) + pw%array(i) = CMPLX(pw%array(i)%re*tmp, pw%array(i)%im*tmp, KIND=dp) + END DO +!$OMP END PARALLEL DO CALL timestop(handle) END SUBROUTINE pw_log_deriv_gauss @@ -1744,7 +1749,7 @@ SUBROUTINE pw_compl_gauss_damp(pw, omega) CHARACTER(len=*), PARAMETER :: routineN = 'pw_compl_gauss_damp' INTEGER :: cnt, handle, i - REAL(KIND=dp) :: omega_2, tmp + REAL(KIND=dp) :: omega_2, tmp, tmp2 CALL timeset(routineN, handle) @@ -1753,14 +1758,15 @@ SUBROUTINE pw_compl_gauss_damp(pw, omega) cnt = SIZE(pw%array) -!$OMP PARALLEL DO PRIVATE(i, tmp) DEFAULT(NONE) SHARED(cnt, pw,omega_2) +!$OMP PARALLEL DO PRIVATE(i, tmp, tmp2) DEFAULT(NONE) SHARED(cnt, pw,omega_2) DO i = 1, cnt tmp = -omega_2*pw%pw_grid%gsq(i) IF (ABS(tmp) > 1.0E-5_dp) THEN - pw%array(i) = pw%array(i)*(1.0_dp - EXP(tmp)) + tmp2 = 1.0_dp - EXP(tmp) ELSE - pw%array(i) = pw%array(i)*(tmp + 0.5_dp*tmp*(tmp + (1.0_dp/3.0_dp)*tmp**2)) + tmp2 = tmp + 0.5_dp*tmp*(tmp + (1.0_dp/3.0_dp)*tmp**2) END IF + pw%array(i) = CMPLX(pw%array(i)%re*tmp2, pw%array(i)%im*tmp2, KIND=dp) END DO !$OMP END PARALLEL DO @@ -1782,23 +1788,24 @@ SUBROUTINE pw_log_deriv_compl_gauss(pw, omega) CHARACTER(len=*), PARAMETER :: routineN = 'pw_log_deriv_compl_gauss' INTEGER :: handle, i - REAL(KIND=dp) :: omega_2, tmp + REAL(KIND=dp) :: omega_2, tmp, tmp2 CALL timeset(routineN, handle) omega_2 = omega*omega omega_2 = 0.25_dp/omega_2 -!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp) & +!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp,tmp2) & !$OMP SHARED(pw,omega_2) DO i = 1, SIZE(pw%array) tmp = omega_2*pw%pw_grid%gsq(i) ! For too small arguments, use the Taylor polynomial to prevent division by zero IF (ABS(tmp) >= 0.003_dp) THEN - pw%array(i) = pw%array(i)*(1.0_dp - tmp*EXP(-tmp)/(1.0_dp - EXP(-tmp))) + tmp2 = 1.0_dp - tmp*EXP(-tmp)/(1.0_dp - EXP(-tmp)) ELSE - pw%array(i) = pw%array(i)*(0.5_dp*tmp - tmp**2/12.0_dp) + tmp2 = 0.5_dp*tmp - tmp**2/12.0_dp END IF + pw%array(i) = CMPLX(pw%array(i)%re*tmp2, pw%array(i)%im*tmp2, KIND=dp) END DO !$OMP END PARALLEL DO @@ -1828,17 +1835,20 @@ SUBROUTINE pw_gauss_damp_mix(pw, omega, scale_coul, scale_long) CHARACTER(len=*), PARAMETER :: routineN = 'pw_gauss_damp_mix' - INTEGER :: handle - REAL(KIND=dp) :: omega_2 + INTEGER :: handle, i + REAL(KIND=dp) :: omega_2, tmp CALL timeset(routineN, handle) omega_2 = omega*omega omega_2 = 0.25_dp/omega_2 -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw, omega_2, scale_coul, scale_long) - pw%array = pw%array*(scale_coul + scale_long*EXP(-pw%pw_grid%gsq*omega_2)) -!$OMP END PARALLEL WORKSHARE +!$OMP PARALLEL DO DEFAULT(NONE) SHARED(pw, omega_2, scale_coul, scale_long) PRIVATE(i,tmp) + DO i = 1, SIZE(pw%array) + tmp = scale_coul + scale_long*EXP(-pw%pw_grid%gsq(i)*omega_2) + pw%array(i) = CMPLX(pw%array(i)%re*tmp, pw%array(i)%im*tmp, KIND=dp) + END DO +!$OMP END PARALLEL DO CALL timestop(handle) @@ -1861,18 +1871,19 @@ SUBROUTINE pw_log_deriv_mix_cl(pw, omega, scale_coul, scale_long) CHARACTER(len=*), PARAMETER :: routineN = 'pw_log_deriv_mix_cl' INTEGER :: handle, i - REAL(KIND=dp) :: omega_2, tmp + REAL(KIND=dp) :: omega_2, tmp, tmp2 CALL timeset(routineN, handle) omega_2 = omega*omega omega_2 = 0.25_dp/omega_2 -!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp) & +!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp,tmp2) & !$OMP SHARED(pw,omega_2,scale_long,scale_coul) DO i = 1, SIZE(pw%array) tmp = omega_2*pw%pw_grid%gsq(i) - pw%array(i) = pw%array(i)*(1.0_dp + scale_long*tmp*EXP(-tmp)/(scale_coul + scale_long*EXP(-tmp))) + tmp2 = 1.0_dp + scale_long*tmp*EXP(-tmp)/(scale_coul + scale_long*EXP(-tmp)) + pw%array(i) = CMPLX(pw%array(i)%re*tmp2, pw%array(i)%im*tmp2, KIND=dp) END DO !$OMP END PARALLEL DO @@ -1901,19 +1912,20 @@ SUBROUTINE pw_truncated(pw, rcutoff) CHARACTER(len=*), PARAMETER :: routineN = 'pw_truncated' INTEGER :: handle, i - REAL(KIND=dp) :: tmp + REAL(KIND=dp) :: tmp, tmp2 CALL timeset(routineN, handle) CPASSERT(rcutoff >= 0) -!$OMP PARALLEL DO PRIVATE(i,tmp) DEFAULT(NONE) SHARED(pw, rcutoff) +!$OMP PARALLEL DO PRIVATE(i,tmp,tmp2) DEFAULT(NONE) SHARED(pw, rcutoff) DO i = 1, SIZE(pw%array) tmp = SQRT(pw%pw_grid%gsq(i))*rcutoff IF (tmp >= 0.005_dp) THEN - pw%array(i) = pw%array(i)*(1.0_dp - COS(tmp)) + tmp2 = 1.0_dp - COS(tmp) ELSE - pw%array(i) = pw%array(i)*tmp**2/2.0_dp*(1.0 - tmp**2/12.0_dp) + tmp2 = tmp**2/2.0_dp*(1.0 - tmp**2/12.0_dp) END IF + pw%array(i) = CMPLX(pw%array(i)%re*tmp2, pw%array(i)%im*tmp2, KIND=dp) END DO !$OMP END PARALLEL DO @@ -1936,22 +1948,23 @@ SUBROUTINE pw_log_deriv_trunc(pw, rcutoff) CHARACTER(len=*), PARAMETER :: routineN = 'pw_log_deriv_trunc' INTEGER :: handle, i - REAL(KIND=dp) :: rchalf, tmp + REAL(KIND=dp) :: rchalf, tmp, tmp2 CALL timeset(routineN, handle) CPASSERT(rcutoff >= 0) rchalf = 0.5_dp*rcutoff -!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp) & +!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,tmp,tmp2) & !$OMP SHARED(pw,rchalf) DO i = 1, SIZE(pw%array) tmp = rchalf*SQRT(pw%pw_grid%gsq(i)) ! For too small arguments, use the Taylor polynomial to prevent division by zero IF (ABS(tmp) >= 0.0001_dp) THEN - pw%array(i) = pw%array(i)*(1.0_dp - tmp/TAN(tmp)) + tmp2 = 1.0_dp - tmp/TAN(tmp) ELSE - pw%array(i) = pw%array(i)*tmp**2*(1.0_dp + tmp**2/15.0_dp)/3.0_dp + tmp2 = tmp**2*(1.0_dp + tmp**2/15.0_dp)/3.0_dp END IF + pw%array(i) = CMPLX(pw%array(i)%re*tmp2, pw%array(i)%im*tmp2, KIND=dp) END DO !$OMP END PARALLEL DO @@ -1977,7 +1990,7 @@ SUBROUTINE pw_derive(pw, n) CHARACTER(len=*), PARAMETER :: routineN = 'pw_derive' COMPLEX(KIND=dp) :: im - INTEGER :: handle, m + INTEGER :: handle, m, idx, idir CALL timeset(routineN, handle) @@ -1987,35 +2000,23 @@ SUBROUTINE pw_derive(pw, n) m = SUM(n) im = CMPLX(0.0_dp, 1.0_dp, KIND=dp)**m - IF (n(1) == 1) THEN -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw) - pw%array(:) = pw%array(:)*pw%pw_grid%g(1, :) -!$OMP END PARALLEL WORKSHARE - ELSE IF (n(1) > 1) THEN -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(n, pw) - pw%array(:) = pw%array(:)*(pw%pw_grid%g(1, :)**n(1)) -!$OMP END PARALLEL WORKSHARE - END IF - - IF (n(2) == 1) THEN -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw) - pw%array(:) = pw%array(:)*pw%pw_grid%g(2, :) -!$OMP END PARALLEL WORKSHARE - ELSE IF (n(2) > 1) THEN -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(n, pw) - pw%array(:) = pw%array(:)*(pw%pw_grid%g(2, :)**n(2)) -!$OMP END PARALLEL WORKSHARE - END IF - - IF (n(3) == 1) THEN -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw) - pw%array(:) = pw%array(:)*pw%pw_grid%g(3, :) -!$OMP END PARALLEL WORKSHARE - ELSE IF (n(3) > 1) THEN -!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(n, pw) - pw%array(:) = pw%array(:)*(pw%pw_grid%g(3, :)**n(3)) -!$OMP END PARALLEL WORKSHARE - END IF + DO idir = 1, 3 + IF (n(idir) == 1) THEN +!$OMP PARALLEL DO DEFAULT(NONE) SHARED(pw,idir) PRIVATE(idx) + DO idx = 1, SIZE(pw%array) + pw%array(idx) = CMPLX(pw%array(idx)%re*pw%pw_grid%g(idir, idx), & + pw%array(idx)%im*pw%pw_grid%g(idir, idx), KIND=dp) + END DO +!$OMP END PARALLEL DO + ELSE IF (n(idir) > 1) THEN +!$OMP PARALLEL DO DEFAULT(NONE) SHARED(n, pw,idir) PRIVATE(idx) + DO idx = 1, SIZE(pw%array) + pw%array(idx) = CMPLX(pw%array(idx)%re*pw%pw_grid%g(idir, idx)**n(idir), & + pw%array(idx)%im*pw%pw_grid%g(idir, idx)**n(idir), KIND=dp) + END DO +!$OMP END PARALLEL DO + END IF + END DO ! im can take the values 1, -1, i, -i ! skip this if im == 1 @@ -2049,7 +2050,7 @@ SUBROUTINE pw_laplace(pw) CALL timeset(routineN, handle) !$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(pw) - pw%array(:) = -pw%array(:)*pw%pw_grid%gsq(:) + pw%array(:) = -CMPLX(pw%array(:)%re*pw%pw_grid%gsq(:), pw%array(:)%im*pw%pw_grid%gsq(:), KIND=dp) !$OMP END PARALLEL WORKSHARE CALL timestop(handle) @@ -2087,13 +2088,14 @@ SUBROUTINE pw_dr2(pw, pwdr2, i, j) !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(ig,gg) SHARED(cnt, i, o3, pw, pwdr2) DO ig = 1, cnt gg = pw%pw_grid%g(i, ig)**2 - o3*pw%pw_grid%gsq(ig) - pwdr2%array(ig) = gg*pw%array(ig) + pwdr2%array(ig) = CMPLX(gg*pw%array(ig)%re, gg*pw%array(ig)%im, KIND=dp) END DO !$OMP END PARALLEL DO ELSE !$OMP PARALLEL DO PRIVATE (ig) DEFAULT(NONE) SHARED(cnt, i, j, pw, pwdr2) DO ig = 1, cnt - pwdr2%array(ig) = pw%array(ig)*(pw%pw_grid%g(i, ig)*pw%pw_grid%g(j, ig)) + pwdr2%array(ig) = CMPLX(pw%array(ig)%re*(pw%pw_grid%g(i, ig)*pw%pw_grid%g(j, ig)), & + pw%array(ig)%im*(pw%pw_grid%g(i, ig)*pw%pw_grid%g(j, ig)), KIND=dp) END DO !$OMP END PARALLEL DO END IF @@ -2134,14 +2136,17 @@ SUBROUTINE pw_dr2_gg(pw, pwdr2_gg, i, j) !$OMP PARALLEL DO PRIVATE (ig) DEFAULT(NONE) PRIVATE(gg) SHARED(cnt, i, o3, pw, pwdr2_gg) DO ig = pw%pw_grid%first_gne0, cnt gg = pw%pw_grid%g(i, ig)**2 - o3*pw%pw_grid%gsq(ig) - pwdr2_gg%array(ig) = gg*pw%array(ig)/pw%pw_grid%gsq(ig) + pwdr2_gg%array(ig) = CMPLX(gg/pw%pw_grid%gsq(ig)*pw%array(ig)%re, & + gg/pw%pw_grid%gsq(ig)*pw%array(ig)%im, KIND=dp) END DO !$OMP END PARALLEL DO ELSE !$OMP PARALLEL DO PRIVATE (ig) DEFAULT(NONE) SHARED(cnt, i, j, pw, pwdr2_gg) DO ig = pw%pw_grid%first_gne0, cnt - pwdr2_gg%array(ig) = pw%array(ig)*(pw%pw_grid%g(i, ig)*pw%pw_grid%g(j, ig)) & - /pw%pw_grid%gsq(ig) + pwdr2_gg%array(ig) = CMPLX(pw%array(ig)%re*((pw%pw_grid%g(i, ig)*pw%pw_grid%g(j, ig)) & + /pw%pw_grid%gsq(ig)), & + pw%array(ig)%im*((pw%pw_grid%g(i, ig)*pw%pw_grid%g(j, ig)) & + /pw%pw_grid%gsq(ig)), KIND=dp) END DO !$OMP END PARALLEL DO END IF @@ -2181,7 +2186,7 @@ SUBROUTINE pw_smoothing(pw, ecut, sigma) DO ig = 1, cnt arg = (ecut - pw%pw_grid%gsq(ig))/sigma f = EXP(arg)/(1 + EXP(arg)) - pw%array(ig) = f*pw%array(ig) + pw%array(ig) = CMPLX(f*pw%array(ig)%re, f*pw%array(ig)%im, KIND=dp) END DO !$OMP END PARALLEL DO diff --git a/src/qs_loc_methods.F b/src/qs_loc_methods.F index 8de5f6cef8..f1c53caaf2 100644 --- a/src/qs_loc_methods.F +++ b/src/qs_loc_methods.F @@ -208,6 +208,8 @@ SUBROUTINE optimize_loc_berry(method, qs_loc_env, vectors, op_sm_set, & CALL centers_spreads_berry(qs_loc_env, zij_fm_set, nmoloc, cell, weights, & ispin, print_loc_section, only_initial_out=.TRUE.) + sweeps = 0 + SELECT CASE (method) CASE (do_loc_jacobi) CALL jacobi_rotations(weights, zij_fm_set, vectors, para_env, max_iter=max_iter, &