diff --git a/src/cp_realspace_grid_cube.F b/src/cp_realspace_grid_cube.F index abf48f340c..c5b2f4017c 100644 --- a/src/cp_realspace_grid_cube.F +++ b/src/cp_realspace_grid_cube.F @@ -122,7 +122,7 @@ SUBROUTINE cp_cube_to_pw(grid, filename, scaling, silent) ! Parallel routine falls back to stream read in serial mode, ! but it has slight overhead compared to sequential read ! Therefore, we use sequential version in serial mode - IF (grid%pw_grid%para%group_size == 1) parallel_read = .FALSE. + IF (grid%pw_grid%para%group%num_pe == 1) parallel_read = .FALSE. ! Check if MPI I/O was disabled in GLOBAL section IF (.NOT. cp_mpi_io_get()) parallel_read = .FALSE. diff --git a/src/ewald_spline_util.F b/src/ewald_spline_util.F index 59efda93b9..a05709c1a0 100644 --- a/src/ewald_spline_util.F +++ b/src/ewald_spline_util.F @@ -217,15 +217,15 @@ SUBROUTINE eval_pw_TabLR(grid, pw_pool, TabLR, Lg, gx, gy, gz, hmat_mm, & !NB use DGEMM to compute sum over kg for each i, j, k ! number of elements per node, round down - NLg_loc = SIZE(Lg)/grid%pw_grid%para%group_size + NLg_loc = SIZE(Lg)/grid%pw_grid%para%group%num_pe ! number of extra elements not yet accounted for - n_extra = MOD(SIZE(Lg), grid%pw_grid%para%group_size) + n_extra = MOD(SIZE(Lg), grid%pw_grid%para%group%num_pe) ! first n_extra nodes get NLg_loc+1, remaining get NLg_loc - IF (grid%pw_grid%para%my_pos < n_extra) THEN - Lg_loc_min = (NLg_loc + 1)*grid%pw_grid%para%my_pos + 1 + IF (grid%pw_grid%para%group%mepos < n_extra) THEN + Lg_loc_min = (NLg_loc + 1)*grid%pw_grid%para%group%mepos + 1 Lg_loc_max = Lg_loc_min + (NLg_loc + 1) - 1 ELSE - Lg_loc_min = (NLg_loc + 1)*n_extra + NLg_loc*(grid%pw_grid%para%my_pos - n_extra) + 1 + Lg_loc_min = (NLg_loc + 1)*n_extra + NLg_loc*(grid%pw_grid%para%group%mepos - n_extra) + 1 Lg_loc_max = Lg_loc_min + NLg_loc - 1 END IF ! shouldn't be necessary diff --git a/src/maxwell_solver_interface.F b/src/maxwell_solver_interface.F index 0faf527e70..6c3257b1a4 100644 --- a/src/maxwell_solver_interface.F +++ b/src/maxwell_solver_interface.F @@ -101,8 +101,8 @@ SUBROUTINE maxwell_solver(maxwell_control, v_ee, sim_step, sim_time, scaling_fac logger => cp_get_default_logger() iounit = cp_logger_get_default_io_unit(logger) - my_rank = v_ee%pw_grid%para%my_pos - num_pe = v_ee%pw_grid%para%group_size + my_rank = v_ee%pw_grid%para%group%mepos + num_pe = v_ee%pw_grid%para%group%num_pe gid = v_ee%pw_grid%para%group tag = 1 diff --git a/src/mixed_cdft_utils.F b/src/mixed_cdft_utils.F index 7edd4bb3f8..d772564cb1 100644 --- a/src/mixed_cdft_utils.F +++ b/src/mixed_cdft_utils.F @@ -223,7 +223,7 @@ SUBROUTINE mixed_cdft_parse_settings(force_env, mixed_env, mixed_cdft, & settings%cutoff(iforce_eval) = auxbas_pw_pool%pw_grid%cutoff settings%rel_cutoff(iforce_eval) = dft_control%qs_control%relative_cutoff IF (auxbas_pw_pool%pw_grid%spherical) settings%spherical(iforce_eval) = 1 - settings%rs_dims(:, iforce_eval) = auxbas_pw_pool%pw_grid%para%rs_dims + settings%rs_dims(:, iforce_eval) = auxbas_pw_pool%pw_grid%para%group%num_pe_cart IF (auxbas_pw_pool%pw_grid%grid_span == HALFSPACE) settings%odd(iforce_eval) = 1 ! Becke constraint atoms/coeffs IF (cdft_control%natoms .GT. SIZE(settings%atoms, 1)) & @@ -733,11 +733,11 @@ SUBROUTINE mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_ iounit=iounit) ! Check if the layout was successfully created IF (mixed_cdft%is_special) THEN - IF (.NOT. pw_grid%para%rs_dims(2) /= 1) is_match = .FALSE. + IF (.NOT. pw_grid%para%group%num_pe_cart(2) /= 1) is_match = .FALSE. ELSE IF (mixed_cdft%is_pencil) THEN - IF (.NOT. pw_grid%para%rs_dims(1) == mixed_rs_dims(1)) is_match = .FALSE. + IF (.NOT. pw_grid%para%group%num_pe_cart(1) == mixed_rs_dims(1)) is_match = .FALSE. ELSE - IF (.NOT. pw_grid%para%rs_dims(2) == 1) is_match = .FALSE. + IF (.NOT. pw_grid%para%group%num_pe_cart(2) == 1) is_match = .FALSE. END IF IF (.NOT. is_match) & CALL cp_abort(__LOCATION__, & @@ -856,8 +856,8 @@ SUBROUTINE mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_ CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool) ! work out the pw grid points each proc holds in the two (identical) parallel proc groups ! note we only care about the x dir since we assume the y dir is not subdivided - ALLOCATE (bounds(0:auxbas_pw_pool%pw_grid%para%group_size - 1, 1:2)) - DO i = 0, auxbas_pw_pool%pw_grid%para%group_size - 1 + ALLOCATE (bounds(0:auxbas_pw_pool%pw_grid%para%group%num_pe - 1, 1:2)) + DO i = 0, auxbas_pw_pool%pw_grid%para%group%num_pe - 1 bounds(i, 1:2) = auxbas_pw_pool%pw_grid%para%bo(1:2, 1, i, 1) bounds(i, 1:2) = bounds(i, 1:2) - auxbas_pw_pool%pw_grid%npts(1)/2 - 1 END DO @@ -865,7 +865,7 @@ SUBROUTINE mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_ ! first get the number of target procs per group ntargets = 0 offset = -1 - DO i = 0, auxbas_pw_pool%pw_grid%para%group_size - 1 + DO i = 0, auxbas_pw_pool%pw_grid%para%group%num_pe - 1 IF ((bounds(i, 1) .GE. bo_mixed(1, 1) .AND. bounds(i, 1) .LE. bo_mixed(2, 1)) .OR. & (bounds(i, 2) .GE. bo_mixed(1, 1) .AND. bounds(i, 2) .LE. bo_mixed(2, 1))) THEN ntargets = ntargets + 1 @@ -893,8 +893,8 @@ SUBROUTINE mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_ ! finally determine which procs will send me grid points ! now we need info about y dir also DEALLOCATE (bounds) - ALLOCATE (bounds(0:pw_pools(1)%pool%pw_grid%para%group_size - 1, 1:4)) - DO i = 0, pw_pools(1)%pool%pw_grid%para%group_size - 1 + ALLOCATE (bounds(0:pw_pools(1)%pool%pw_grid%para%group%num_pe - 1, 1:4)) + DO i = 0, pw_pools(1)%pool%pw_grid%para%group%num_pe - 1 bounds(i, 1:2) = pw_pools(1)%pool%pw_grid%para%bo(1:2, 1, i, 1) bounds(i, 3:4) = pw_pools(1)%pool%pw_grid%para%bo(1:2, 2, i, 1) bounds(i, 1:2) = bounds(i, 1:2) - pw_pools(1)%pool%pw_grid%npts(1)/2 - 1 @@ -902,7 +902,7 @@ SUBROUTINE mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_ END DO ntargets = 0 offset = -1 - DO i = 0, pw_pools(1)%pool%pw_grid%para%group_size - 1 + DO i = 0, pw_pools(1)%pool%pw_grid%para%group%num_pe - 1 IF ((bo(1, 1) .GE. bounds(i, 1) .AND. bo(1, 1) .LE. bounds(i, 2)) .OR. & (bo(2, 1) .GE. bounds(i, 1) .AND. bo(2, 1) .LE. bounds(i, 2))) THEN ntargets = ntargets + 1 diff --git a/src/pw/dct.F b/src/pw/dct.F index 53eef30808..cb2d3d1922 100644 --- a/src/pw/dct.F +++ b/src/pw/dct.F @@ -228,9 +228,9 @@ SUBROUTINE setup_dct_pw_grids(pw_grid, cell_hmat, neumann_directions, dct_pw_gri blocked = 0 END IF - CALL pw_grid_create(dct_pw_grid, pw_grid%para%rs_group, hmat2, & + CALL pw_grid_create(dct_pw_grid, pw_grid%para%group, hmat2, & bounds=bounds_new, & - rs_dims=pw_grid%para%rs_dims, & + rs_dims=pw_grid%para%group%num_pe_cart, & blocked=blocked, & bounds_local=bounds_local_new) @@ -320,12 +320,12 @@ SUBROUTINE set_dests_srcs_pid(pw_grid, neumann_directions, dests_expand, srcs_ex ! rs_dim1 = 3 --> src_pos1 = [1 2 3] ! rs_dim2 = 4 --> src_pos2 = [1 2 3 4] - rs_group = pw_grid%para%rs_group - rs_mpo = pw_grid%para%rs_mpo - group_size = pw_grid%para%group_size - rs_dims = pw_grid%para%rs_dims + rs_group = pw_grid%para%group + rs_mpo = pw_grid%para%group%mepos + group_size = pw_grid%para%group%num_pe + rs_dims = pw_grid%para%group%num_pe_cart - rs_pos = pw_grid%para%rs_pos + rs_pos = pw_grid%para%group%mepos_cart rs_dim1 = rs_dims(1); rs_dim2 = rs_dims(2) ! prepare srcs_coord @@ -434,7 +434,7 @@ SUBROUTINE set_dests_srcs_pid(pw_grid, neumann_directions, dests_expand, srcs_ex DO k = 1, maxn_sendrecv ! convert srcs_coord to pid - CALL pw_grid%para%rs_group%rank_cart(ABS(srcs_coord(:, k)) - 1, srcs_expand(k)) + CALL pw_grid%para%group%rank_cart(ABS(srcs_coord(:, k)) - 1, srcs_expand(k)) ! find out the flipping status IF ((srcs_coord(1, k) .GT. 0) .AND. (srcs_coord(2, k) .GT. 0)) THEN flipg_stat(k) = NOT_FLIPPED @@ -538,9 +538,9 @@ SUBROUTINE pw_expand(neumann_directions, recv_msgs_bnds, dests_expand, srcs_expa CALL timeset(routineN, handle) pw_grid => pw_in%pw_grid - rs_group = pw_grid%para%rs_group - rs_mpo = pw_grid%para%my_pos - group_size = pw_grid%para%group_size + rs_group = pw_grid%para%group + rs_mpo = pw_grid%para%group%mepos + group_size = pw_grid%para%group%num_pe bounds_local_new = pw_expanded%pw_grid%bounds_local @@ -719,9 +719,9 @@ SUBROUTINE pw_shrink(neumann_directions, dests_shrink, srcs_shrink, bounds_local CALL timeset(routineN, handle) pw_grid_orig => pw_shrinked%pw_grid - rs_group = pw_grid_orig%para%rs_group - rs_mpo = pw_grid_orig%para%my_pos - group_size = pw_grid_orig%para%group_size + rs_group = pw_grid_orig%para%group + rs_mpo = pw_grid_orig%para%group%mepos + group_size = pw_grid_orig%para%group%num_pe bounds_local_xpnd = pw_in%pw_grid%bounds_local tag = 1 @@ -1054,9 +1054,9 @@ SUBROUTINE expansion_bounds(pw_grid, neumann_directions, srcs_expand, flipg_stat CALL timeset(routineN, handle) - rs_group = pw_grid%para%rs_group - rs_mpo = pw_grid%para%my_pos - group_size = pw_grid%para%group_size + rs_group = pw_grid%para%group + rs_mpo = pw_grid%para%group%mepos + group_size = pw_grid%para%group%num_pe bounds = pw_grid%bounds bounds_local = pw_grid%bounds_local diff --git a/src/pw/fft_tools.F b/src/pw/fft_tools.F index b517ba49d3..ea1d561a5e 100644 --- a/src/pw/fft_tools.F +++ b/src/pw/fft_tools.F @@ -68,7 +68,6 @@ MODULE fft_tools INTEGER :: lg = 0, mg = 0 INTEGER :: nbx = 0, nbz = 0 INTEGER :: nmray = 0, nyzray = 0 - TYPE(mp_comm_type) :: gs_group = mp_comm_type() TYPE(mp_cart_type) :: rs_group = mp_cart_type() INTEGER, DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0 INTEGER :: numtask = 0 @@ -485,7 +484,6 @@ END SUBROUTINE fft3d_s !> \param n ... !> \param cin ... !> \param gin ... -!> \param gs_group ... !> \param rs_group ... !> \param yzp ... !> \param nyzray ... @@ -494,7 +492,7 @@ END SUBROUTINE fft3d_s !> \param status ... !> \param debug ... ! ************************************************************************************************** - SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & + SUBROUTINE fft3d_ps(fsign, n, cin, gin, rs_group, yzp, nyzray, & bo, scale, status, debug) INTEGER, INTENT(IN) :: fsign @@ -503,7 +501,6 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & INTENT(INOUT) :: cin COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), & INTENT(INOUT) :: gin - TYPE(mp_comm_type), INTENT(IN) :: gs_group TYPE(mp_cart_type), INTENT(IN) :: rs_group INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), & INTENT(IN) :: yzp @@ -521,8 +518,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), & POINTER :: tbuf INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, & - nmax, numtask, numtask_g, numtask_r, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, & - sign, stat + nmax, numtask, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, sign, stat INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p LOGICAL :: test REAL(KIND=dp) :: norm, sum_data @@ -538,15 +534,10 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & test = .FALSE. END IF - numtask_g = gs_group%num_pe - g_pos = gs_group%mepos - numtask_r = rs_group%num_pe + g_pos = rs_group%mepos + numtask = rs_group%num_pe r_dim = rs_group%num_pe_cart r_pos = rs_group%mepos_cart - IF (numtask_g /= numtask_r) THEN - CPABORT("Real space and G space groups are different.") - END IF - numtask = numtask_r IF (PRESENT(scale)) THEN norm = scale @@ -572,7 +563,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & ALLOCATE (p2p(0:numtask - 1)) - CALL gs_group%rank_compare(rs_group, p2p) + CALL rs_group%rank_compare(rs_group, p2p) rp = p2p(g_pos) mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1 @@ -609,7 +600,6 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & fft_scratch_size%nmax = nmax fft_scratch_size%nmray = MAXVAL(nyzray) fft_scratch_size%nyzray = nyzray(g_pos) - fft_scratch_size%gs_group = gs_group fft_scratch_size%rs_group = rs_group fft_scratch_size%g_pos = g_pos fft_scratch_size%r_pos = r_pos @@ -642,7 +632,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(cin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A)') " Two step communication algorithm " WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1 @@ -662,7 +652,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(qbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data END IF @@ -673,7 +663,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(rbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) T", sum_data END IF @@ -688,7 +678,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(pbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) TS", sum_data END IF @@ -700,7 +690,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(qbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) TS", sum_data END IF @@ -711,7 +701,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(gin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data END IF @@ -722,7 +712,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(gin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A)') " Two step communication algorithm " WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos) @@ -741,7 +731,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(pbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data END IF @@ -753,7 +743,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(qbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data END IF @@ -768,7 +758,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(rbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data END IF @@ -779,7 +769,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(pbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) T", sum_data END IF @@ -792,7 +782,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(cin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data END IF @@ -827,7 +817,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(cin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A)') " One step communication algorithm " WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx @@ -842,19 +832,19 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(tbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data END IF END IF ! Exchange data ( transpose of matrix ) and sort - CALL yz_to_x(tbuf, gs_group, g_pos, p2p, yzp, nyzray, & + CALL yz_to_x(tbuf, rs_group, g_pos, p2p, yzp, nyzray, & bo(:, :, :, 2), sbuf, fft_scratch) IF (test) THEN sum_data = ABS(SUM(sbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data END IF @@ -864,7 +854,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(gin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data END IF @@ -875,7 +865,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(gin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A)') " One step communication algorithm " WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos) @@ -889,19 +879,19 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(sbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data END IF END IF ! Exchange data ( transpose of matrix ) and sort - CALL x_to_yz(sbuf, gs_group, g_pos, p2p, yzp, nyzray, & + CALL x_to_yz(sbuf, rs_group, g_pos, p2p, yzp, nyzray, & bo(:, :, :, 2), tbuf, fft_scratch) IF (test) THEN sum_data = ABS(SUM(tbuf)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data END IF @@ -913,7 +903,7 @@ SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, & IF (test) THEN sum_data = ABS(SUM(cin)) - CALL gs_group%sum(sum_data) + CALL rs_group%sum(sum_data) IF (g_pos == 0 .AND. output_unit > 0) THEN WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data END IF @@ -1056,7 +1046,6 @@ SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, scale, status, debug) fft_scratch_size%mcx2 = mcx2 fft_scratch_size%mcz2 = mcz2 fft_scratch_size%mcy3 = mcy3 - fft_scratch_size%gs_group = group fft_scratch_size%rs_group = group fft_scratch_size%g_pos = my_pos fft_scratch_size%numtask = DIM(1)*DIM(2) @@ -1382,7 +1371,8 @@ SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch) COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), & INTENT(IN) :: sb - TYPE(mp_comm_type), INTENT(IN) :: group + + CLASS(mp_comm_type), INTENT(IN) :: group INTEGER, INTENT(IN) :: my_pos INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), & @@ -1497,7 +1487,8 @@ SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch) COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), & INTENT(IN) :: tb - TYPE(mp_comm_type), INTENT(IN) :: group + + CLASS(mp_comm_type), INTENT(IN) :: group INTEGER, INTENT(IN) :: my_pos INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), & @@ -2923,7 +2914,7 @@ SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes) CYCLE END IF IF (PRESENT(fft_sizes)) THEN - IF (fft_sizes%gs_group /= fft_scratch_current%fft_scratch%group) THEN + IF (fft_sizes%rs_group /= fft_scratch_current%fft_scratch%group) THEN fft_scratch_last => fft_scratch_current fft_scratch_current => fft_scratch_current%fft_scratch_next CYCLE @@ -2969,7 +2960,7 @@ SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes) CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2]) CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)]) CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3]) - fft_scratch_new%fft_scratch%group = fft_sizes%gs_group + fft_scratch_new%fft_scratch%group = fft_sizes%rs_group dim = fft_sizes%rs_group%num_pe_cart pos = fft_sizes%rs_group%mepos_cart @@ -3022,7 +3013,7 @@ SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes) mz3 = fft_sizes%mz3 CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)]) CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1]) - fft_scratch_new%fft_scratch%group = fft_sizes%gs_group + fft_scratch_new%fft_scratch%group = fft_sizes%rs_group CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)]) CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1]) CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)]) @@ -3076,7 +3067,7 @@ SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes) CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax]) CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx]) #endif - fft_scratch_new%fft_scratch%group = fft_sizes%gs_group + fft_scratch_new%fft_scratch%group = fft_sizes%rs_group CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg]) nm = nmray*mx2 IF (alltoall_sgl) THEN @@ -3160,7 +3151,7 @@ SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes) fft_scratch_new%fft_scratch%yzcount(0:np - 1)) ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), & fft_scratch_new%fft_scratch%yzdispl(0:np - 1)) - fft_scratch_new%fft_scratch%group = fft_sizes%gs_group + fft_scratch_new%fft_scratch%group = fft_sizes%rs_group dim = fft_sizes%rs_group%num_pe_cart pos = fft_sizes%rs_group%mepos_cart @@ -3375,7 +3366,6 @@ SUBROUTINE is_equal(fft_size_1, fft_size_2, equal) equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray - equal = equal .AND. fft_size_1%gs_group == fft_size_2%gs_group equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos) diff --git a/src/pw/ps_wavelet_methods.F b/src/pw/ps_wavelet_methods.F index 0601531ca5..ab4bd5ed3c 100644 --- a/src/pw/ps_wavelet_methods.F +++ b/src/pw/ps_wavelet_methods.F @@ -83,9 +83,9 @@ SUBROUTINE ps_wavelet_create(poisson_params, wavelet, pw_grid) hy = pw_grid%dr(2) hz = pw_grid%dr(3) - nproc = PRODUCT(pw_grid%para%rs_dims) + nproc = PRODUCT(pw_grid%para%group%num_pe_cart) - iproc = pw_grid%para%rs_mpo + iproc = pw_grid%para%group%mepos NULLIFY (wavelet%karray, wavelet%rho_z_sliced) @@ -126,8 +126,8 @@ SUBROUTINE RS_z_slice_distribution(wavelet, pw_grid) REAL(KIND=dp) :: hx, hy, hz CALL timeset(routineN, handle) - nproc = PRODUCT(pw_grid%para%rs_dims) - iproc = pw_grid%para%rs_mpo + nproc = PRODUCT(pw_grid%para%group%num_pe_cart) + iproc = pw_grid%para%group%mepos geocode = wavelet%geocode nx = pw_grid%npts(1) ny = pw_grid%npts(2) @@ -154,7 +154,7 @@ SUBROUTINE RS_z_slice_distribution(wavelet, pw_grid) ALLOCATE (wavelet%rho_z_sliced(md1, md3, z_dim)) CALL createKernel(geocode, nx, ny, nz, hx, hy, hz, wavelet%itype_scf, iproc, nproc, wavelet%karray, & - pw_grid%para%rs_group) + pw_grid%para%group) CALL timestop(handle) END SUBROUTINE RS_z_slice_distribution @@ -186,8 +186,8 @@ SUBROUTINE cp2k_distribution_to_z_slices(density, wavelet, pw_grid) CPASSERT(ASSOCIATED(wavelet)) - nproc = PRODUCT(pw_grid%para%rs_dims) - iproc = pw_grid%para%rs_mpo + nproc = PRODUCT(pw_grid%para%group%num_pe_cart) + iproc = pw_grid%para%group%mepos md2 = wavelet%PS_grid(3) m2 = pw_grid%npts(3) lb(:) = pw_grid%bounds_local(1, :) @@ -237,10 +237,10 @@ SUBROUTINE cp2k_distribution_to_z_slices(density, wavelet, pw_grid) IF (should_warn > 0 .AND. iproc == 0) & CPWARN("Density non-zero on the edges of the unit cell: wrong results in WAVELET solver") - DO i = 0, pw_grid%para%rs_dims(1) - 1 - DO j = 0, pw_grid%para%rs_dims(2) - 1 + DO i = 0, pw_grid%para%group%num_pe_cart(1) - 1 + DO j = 0, pw_grid%para%group%num_pe_cart(2) - 1 cart_pos = (/i, j/) - CALL pw_grid%para%rs_group%rank_cart(cart_pos, dest) + CALL pw_grid%para%group%rank_cart(cart_pos, dest) IF ((ub(1) .GE. lb(1)) .AND. (ub(2) .GE. lb(2))) THEN IF (dest*local_z_dim .LE. m2) THEN IF ((dest + 1)*local_z_dim .LE. m2) THEN @@ -254,8 +254,8 @@ SUBROUTINE cp2k_distribution_to_z_slices(density, wavelet, pw_grid) ELSE scount(dest + 1) = 0 END IF - lox = get_limit(pw_grid%npts(1), pw_grid%para%rs_dims(1), i) - loy = get_limit(pw_grid%npts(2), pw_grid%para%rs_dims(2), j) + lox = get_limit(pw_grid%npts(1), pw_grid%para%group%num_pe_cart(1), i) + loy = get_limit(pw_grid%npts(2), pw_grid%para%group%num_pe_cart(2), j) IF ((lox(2) .GE. lox(1)) .AND. (loy(2) .GE. loy(1))) THEN IF (iproc*local_z_dim .LE. m2) THEN IF ((iproc + 1)*local_z_dim .LE. m2) THEN @@ -278,18 +278,18 @@ SUBROUTINE cp2k_distribution_to_z_slices(density, wavelet, pw_grid) sdispl(i) = sdispl(i - 1) + scount(i - 1) rdispl(i) = rdispl(i - 1) + rcount(i - 1) END DO - CALL pw_grid%para%rs_group%alltoall(sbuf, scount, sdispl, rbuf, rcount, rdispl) + CALL pw_grid%para%group%alltoall(sbuf, scount, sdispl, rbuf, rcount, rdispl) !!!! and now, how to put the right cubes to the right position!!!!!! wavelet%rho_z_sliced = 0.0_dp - DO i = 0, pw_grid%para%rs_dims(1) - 1 - DO j = 0, pw_grid%para%rs_dims(2) - 1 + DO i = 0, pw_grid%para%group%num_pe_cart(1) - 1 + DO j = 0, pw_grid%para%group%num_pe_cart(2) - 1 cart_pos = (/i, j/) - CALL pw_grid%para%rs_group%rank_cart(cart_pos, dest) + CALL pw_grid%para%group%rank_cart(cart_pos, dest) - lox = get_limit(pw_grid%npts(1), pw_grid%para%rs_dims(1), i) - loy = get_limit(pw_grid%npts(2), pw_grid%para%rs_dims(2), j) + lox = get_limit(pw_grid%npts(1), pw_grid%para%group%num_pe_cart(1), i) + loy = get_limit(pw_grid%npts(2), pw_grid%para%group%num_pe_cart(2), j) IF (iproc*local_z_dim .LE. m2) THEN IF ((iproc + 1)*local_z_dim .LE. m2) THEN loz = local_z_dim @@ -336,8 +336,8 @@ SUBROUTINE z_slices_to_cp2k_distribution(density, wavelet, pw_grid) CPASSERT(ASSOCIATED(wavelet)) - nproc = PRODUCT(pw_grid%para%rs_dims) - iproc = pw_grid%para%rs_mpo + nproc = PRODUCT(pw_grid%para%group%num_pe_cart) + iproc = pw_grid%para%group%mepos md2 = wavelet%PS_grid(3) m2 = pw_grid%npts(3) @@ -363,12 +363,12 @@ SUBROUTINE z_slices_to_cp2k_distribution(density, wavelet, pw_grid) loz = 0 END IF - min_x = get_limit(pw_grid%npts(1), pw_grid%para%rs_dims(1), 0) - min_y = get_limit(pw_grid%npts(2), pw_grid%para%rs_dims(2), 0) - DO i = 0, pw_grid%para%rs_dims(1) - 1 - DO j = 0, pw_grid%para%rs_dims(2) - 1 + min_x = get_limit(pw_grid%npts(1), pw_grid%para%group%num_pe_cart(1), 0) + min_y = get_limit(pw_grid%npts(2), pw_grid%para%group%num_pe_cart(2), 0) + DO i = 0, pw_grid%para%group%num_pe_cart(1) - 1 + DO j = 0, pw_grid%para%group%num_pe_cart(2) - 1 cart_pos = (/i, j/) - CALL pw_grid%para%rs_group%rank_cart(cart_pos, dest) + CALL pw_grid%para%group%rank_cart(cart_pos, dest) IF ((ub(1) .GE. lb(1)) .AND. (ub(2) .GE. lb(2))) THEN IF (dest*local_z_dim .LE. m2) THEN IF ((dest + 1)*local_z_dim .LE. m2) THEN @@ -382,8 +382,8 @@ SUBROUTINE z_slices_to_cp2k_distribution(density, wavelet, pw_grid) ELSE rcount(dest + 1) = 0 END IF - lox = get_limit(pw_grid%npts(1), pw_grid%para%rs_dims(1), i) - loy = get_limit(pw_grid%npts(2), pw_grid%para%rs_dims(2), j) + lox = get_limit(pw_grid%npts(1), pw_grid%para%group%num_pe_cart(1), i) + loy = get_limit(pw_grid%npts(2), pw_grid%para%group%num_pe_cart(2), j) IF ((lox(2) .GE. lox(1)) .AND. (loy(2) .GE. loy(1))) THEN scount(dest + 1) = ABS((lox(2) - lox(1) + 1)*(loy(2) - loy(1) + 1)*loz) DO k = lox(1) - min_x(1) + 1, lox(2) - min_x(1) + 1 @@ -405,14 +405,14 @@ SUBROUTINE z_slices_to_cp2k_distribution(density, wavelet, pw_grid) sdispl(i) = sdispl(i - 1) + scount(i - 1) rdispl(i) = rdispl(i - 1) + rcount(i - 1) END DO - CALL pw_grid%para%rs_group%alltoall(sbuf, scount, sdispl, rbuf, rcount, rdispl) + CALL pw_grid%para%group%alltoall(sbuf, scount, sdispl, rbuf, rcount, rdispl) !!!! and now, how to put the right cubes to the right position!!!!!! - DO i = 0, pw_grid%para%rs_dims(1) - 1 - DO j = 0, pw_grid%para%rs_dims(2) - 1 + DO i = 0, pw_grid%para%group%num_pe_cart(1) - 1 + DO j = 0, pw_grid%para%group%num_pe_cart(2) - 1 cart_pos = (/i, j/) - CALL pw_grid%para%rs_group%rank_cart(cart_pos, dest) + CALL pw_grid%para%group%rank_cart(cart_pos, dest) IF (dest*local_z_dim .LE. m2) THEN IF ((dest + 1)*local_z_dim .LE. m2) THEN loz = local_z_dim @@ -454,8 +454,8 @@ SUBROUTINE ps_wavelet_solve(wavelet, pw_grid) REAL(KIND=dp) :: hx, hy, hz CALL timeset(routineN, handle) - nproc = PRODUCT(pw_grid%para%rs_dims) - iproc = pw_grid%para%rs_mpo + nproc = PRODUCT(pw_grid%para%group%num_pe_cart) + iproc = pw_grid%para%group%mepos geocode = wavelet%geocode nx = pw_grid%npts(1) ny = pw_grid%npts(2) diff --git a/src/pw/ps_wavelet_util.F b/src/pw/ps_wavelet_util.F index 56c203f4fc..e68e033a4f 100644 --- a/src/pw/ps_wavelet_util.F +++ b/src/pw/ps_wavelet_util.F @@ -167,17 +167,17 @@ SUBROUTINE PSolver(geocode, iproc, nproc, n01, n02, n03, hx, hy, hz, & !no powers of hgrid because they are incorporated in the plane wave treatment scal = 1._dp/REAL(n1*n2*n3, KIND=dp) CALL P_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf, & - scal, hx, hy, hz, pw_grid%para%rs_group) + scal, hx, hy, hz, pw_grid%para%group) ELSE IF (geocode == 'S') THEN !only one power of hgrid scal = hy/REAL(n1*n2*n3, KIND=dp) CALL S_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, karray, zf, & - scal, pw_grid%para%rs_group) + scal, pw_grid%para%group) ELSE IF (geocode == 'F') THEN hgrid = MAX(hx, hy, hz) scal = hgrid**3/REAL(n1*n2*n3, KIND=dp) CALL F_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, karray, zf, & - scal, pw_grid%para%rs_group) + scal, pw_grid%para%group) factor = 0.5_dp*hgrid**3 END IF diff --git a/src/pw/pw_copy_all.F b/src/pw/pw_copy_all.F index f9e586a4ee..3cef31219b 100644 --- a/src/pw/pw_copy_all.F +++ b/src/pw/pw_copy_all.F @@ -58,8 +58,8 @@ SUBROUTINE pw_copy_match(pw1, pw2) pg2 => pw2%pw_grid group = pg1%para%group - group_size = pg1%para%group_size - me = pg1%para%my_pos + group_size = pg1%para%group%num_pe + me = pg1%para%group%mepos ALLOCATE (ngr(group_size)) ngr = 0 ngr(me + 1) = pg1%ngpts_cut_local diff --git a/src/pw/pw_gpu.F b/src/pw/pw_gpu.F index fa08cb7036..25236009a7 100644 --- a/src/pw/pw_gpu.F +++ b/src/pw/pw_gpu.F @@ -33,8 +33,7 @@ MODULE pw_gpu release_fft_scratch, x_to_yz, xz_to_yz, yz_to_x, yz_to_xz USE kinds, ONLY: dp USE mathconstants, ONLY: z_zero - USE message_passing, ONLY: mp_cart_type,& - mp_comm_type + USE message_passing, ONLY: mp_cart_type USE pw_grid_types, ONLY: FULLSPACE USE pw_types, ONLY: pw_c1d_gs_type,& pw_r3d_rs_type @@ -226,8 +225,7 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: grays, pbuf, qbuf, rbuf, sbuf COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER :: tbuf INTEGER :: g_pos, handle, lg, lmax, mg, mmax, mx2, & - mz2, n1, n2, ngpts, nmax, numtask, & - numtask_g, numtask_r, rp + mz2, n1, n2, ngpts, nmax, numtask, rp INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p INTEGER, DIMENSION(2) :: r_dim, r_pos INTEGER, DIMENSION(:), POINTER :: n, nloc, nyzray @@ -235,7 +233,6 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) TYPE(fft_scratch_sizes) :: fft_scratch_size TYPE(fft_scratch_type), POINTER :: fft_scratch TYPE(mp_cart_type) :: rs_group - TYPE(mp_comm_type) :: gs_group CALL timeset(routineN, handle) @@ -247,20 +244,14 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) !..transform IF (pw1%pw_grid%para%ray_distribution) THEN - gs_group = pw1%pw_grid%para%group - rs_group = pw1%pw_grid%para%rs_group + rs_group = pw1%pw_grid%para%group nyzray => pw1%pw_grid%para%nyzray bo => pw1%pw_grid%para%bo - numtask_g = gs_group%num_pe - g_pos = gs_group%mepos - numtask_r = rs_group%num_pe + g_pos = rs_group%mepos + numtask = rs_group%num_pe r_dim = rs_group%num_pe_cart r_pos = rs_group%mepos_cart - IF (numtask_g /= numtask_r) THEN - CPABORT("Real space and G space groups are different.") - END IF - numtask = numtask_r lg = SIZE(grays, 1) mg = SIZE(grays, 2) @@ -269,7 +260,7 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) ALLOCATE (p2p(0:numtask - 1)) - CALL gs_group%rank_compare(rs_group, p2p) + CALL rs_group%rank_compare(rs_group, p2p) rp = p2p(g_pos) mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1 @@ -298,7 +289,6 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) fft_scratch_size%nmax = nmax fft_scratch_size%nmray = MAXVAL(nyzray) fft_scratch_size%nyzray = nyzray(g_pos) - fft_scratch_size%gs_group = gs_group fft_scratch_size%rs_group = rs_group fft_scratch_size%g_pos = g_pos fft_scratch_size%r_pos = r_pos @@ -359,7 +349,7 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) CALL pw_gpu_cff(pw1, tbuf) ! Exchange data ( transpose of matrix ) and sort - CALL yz_to_x(tbuf, gs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, & + CALL yz_to_x(tbuf, rs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, & bo(:, :, :, 2), sbuf, fft_scratch) ! FFT along x @@ -374,7 +364,7 @@ SUBROUTINE pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale) !-------------------------------------------------------------------------- ELSE CPABORT("Not implemented (no ray_distr.) in: pw_gpu_r3dc1d_3d_ps.") - !CALL fft3d ( dir, n, pwin, grays, pw1%pw_grid%para%rs_group, & + !CALL fft3d ( dir, n, pwin, grays, pw1%pw_grid%para%group, & ! pw1%pw_grid%para%bo, scale = scale, debug=test ) END IF @@ -398,8 +388,7 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: grays, pbuf, qbuf, rbuf, sbuf COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER :: tbuf INTEGER :: g_pos, handle, lg, lmax, mg, mmax, mx2, & - mz2, n1, n2, ngpts, nmax, numtask, & - numtask_g, numtask_r, rp + mz2, n1, n2, ngpts, nmax, numtask, rp INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p INTEGER, DIMENSION(2) :: r_dim, r_pos INTEGER, DIMENSION(:), POINTER :: n, nloc, nyzray @@ -407,7 +396,6 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) TYPE(fft_scratch_sizes) :: fft_scratch_size TYPE(fft_scratch_type), POINTER :: fft_scratch TYPE(mp_cart_type) :: rs_group - TYPE(mp_comm_type) :: gs_group CALL timeset(routineN, handle) @@ -419,20 +407,14 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) !..transform IF (pw1%pw_grid%para%ray_distribution) THEN - gs_group = pw1%pw_grid%para%group - rs_group = pw1%pw_grid%para%rs_group + rs_group = pw1%pw_grid%para%group nyzray => pw1%pw_grid%para%nyzray bo => pw1%pw_grid%para%bo - numtask_g = gs_group%num_pe - g_pos = gs_group%mepos - numtask_r = rs_group%num_pe + g_pos = rs_group%mepos + numtask = rs_group%num_pe r_dim = rs_group%num_pe_cart r_pos = rs_group%mepos_cart - IF (numtask_g /= numtask_r) THEN - CPABORT("Real space and G space groups are different.") - END IF - numtask = numtask_r lg = SIZE(grays, 1) mg = SIZE(grays, 2) @@ -441,7 +423,7 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) ALLOCATE (p2p(0:numtask - 1)) - CALL gs_group%rank_compare(rs_group, p2p) + CALL rs_group%rank_compare(rs_group, p2p) rp = p2p(g_pos) mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1 @@ -470,7 +452,6 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) fft_scratch_size%nmax = nmax fft_scratch_size%nmray = MAXVAL(nyzray) fft_scratch_size%nyzray = nyzray(g_pos) - fft_scratch_size%gs_group = gs_group fft_scratch_size%rs_group = rs_group fft_scratch_size%g_pos = g_pos fft_scratch_size%r_pos = r_pos @@ -535,7 +516,7 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) ! Exchange data ( transpose of matrix ) and sort IF (pw1%pw_grid%grid_span /= FULLSPACE) tbuf = z_zero - CALL x_to_yz(sbuf, gs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, & + CALL x_to_yz(sbuf, rs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, & bo(:, :, :, 2), tbuf, fft_scratch) ! FFT along y and z @@ -550,7 +531,7 @@ SUBROUTINE pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale) !-------------------------------------------------------------------------- ELSE CPABORT("Not implemented (no ray_distr.) in: pw_gpu_c1dr3d_3d_ps.") - !CALL fft3d ( dir, n, pwin, grays, pw1%pw_grid%para%rs_group, & + !CALL fft3d ( dir, n, pwin, grays, pw1%pw_grid%para%group, & ! pw1%pw_grid%para%bo, scale = scale, debug=test ) END IF diff --git a/src/pw/pw_grid_types.F b/src/pw/pw_grid_types.F index 0b08bc6d8a..b6b124c512 100644 --- a/src/pw/pw_grid_types.F +++ b/src/pw/pw_grid_types.F @@ -14,9 +14,7 @@ MODULE pw_grid_types USE kinds, ONLY: dp,& int_8 - USE message_passing, ONLY: mp_cart_type,& - mp_comm_null,& - mp_comm_type + USE message_passing, ONLY: mp_cart_type #include "../base/base_uses.f90" IMPLICIT NONE @@ -41,18 +39,10 @@ MODULE pw_grid_types INTEGER :: mode = PW_MODE_LOCAL ! 0 = local = PW_MODE_LOCAL ; 1 = distributed = PW_MODE_DISTRIBUTED LOGICAL :: ray_distribution = .FALSE. ! block or pencil distribution LOGICAL :: blocked = .FALSE. ! block or pencil distribution - TYPE(mp_comm_type) :: group = mp_comm_null ! MPI group - INTEGER :: my_pos = -1! Position within group - INTEGER :: group_size = -1! # of Processors in group - LOGICAL :: group_head = .FALSE. ! Master process within group - INTEGER :: group_head_id = 0 ! Id of group_head INTEGER, DIMENSION(:, :, :), ALLOCATABLE :: yzp ! g-space rays (xy,k,pe) INTEGER, DIMENSION(:, :), ALLOCATABLE :: yzq ! local inverse pointer of yzp INTEGER, DIMENSION(:), ALLOCATABLE :: nyzray ! number of g-space rays (pe) - TYPE(mp_cart_type) :: rs_group = mp_cart_type() ! real space group (2-dim cart) - INTEGER :: rs_mpo = -1 ! real space group position - INTEGER, DIMENSION(2) :: rs_dims = 0 ! real space group dimensions - INTEGER, DIMENSION(2) :: rs_pos = -1 ! real space group positions in grid + TYPE(mp_cart_type) :: group = mp_cart_type() ! real space group (2-dim cart) INTEGER, DIMENSION(:, :, :, :), ALLOCATABLE :: bo ! list of axis distribution INTEGER, DIMENSION(:), ALLOCATABLE :: pos_of_x ! what my_pos holds a given x plane....should go: hard-codes to plane distributed END TYPE pw_para_type diff --git a/src/pw/pw_grids.F b/src/pw/pw_grids.F index b48e77cecf..eba2a9bef4 100644 --- a/src/pw/pw_grids.F +++ b/src/pw/pw_grids.F @@ -98,6 +98,8 @@ SUBROUTINE pw_grid_create_local(pw_grid, bounds) TYPE(pw_grid_type), POINTER :: pw_grid INTEGER, DIMENSION(2, 3), INTENT(IN) :: bounds + INTEGER, DIMENSION(2) :: rs_dims + CPASSERT(.NOT. ASSOCIATED(pw_grid)) ALLOCATE (pw_grid) pw_grid%bounds = bounds @@ -110,7 +112,6 @@ SUBROUTINE pw_grid_create_local(pw_grid, bounds) pw_grid%ngpts_cut_local = pw_grid%ngpts_local pw_grid%grid_span = FULLSPACE pw_grid%para%mode = PW_MODE_LOCAL - pw_grid%para%rs_dims = 0 pw_grid%reference = 0 pw_grid%ref_count = 1 NULLIFY (pw_grid%g) @@ -124,13 +125,9 @@ SUBROUTINE pw_grid_create_local(pw_grid, bounds) pw_grid%id_nr = grid_tag ! parallel info - CALL pw_grid%para%group%from_dup(mp_comm_self) - pw_grid%para%group_size = pw_grid%para%group%num_pe - pw_grid%para%my_pos = pw_grid%para%group%mepos - pw_grid%para%group_head_id = 0 - pw_grid%para%group_head = & - (pw_grid%para%group_head_id == pw_grid%para%my_pos) - IF (pw_grid%para%group_size > 1) THEN + rs_dims = 1 + CALL pw_grid%para%group%create(mp_comm_self, 2, rs_dims) + IF (pw_grid%para%group%num_pe > 1) THEN pw_grid%para%mode = PW_MODE_DISTRIBUTED ELSE pw_grid%para%mode = PW_MODE_LOCAL @@ -320,7 +317,6 @@ SUBROUTINE pw_grid_create_extended(pw_grid, mp_comm, cell_hmat, grid_span, cutof pw_grid%cutoff = 0.0_dp pw_grid%grid_span = FULLSPACE pw_grid%para%mode = PW_MODE_LOCAL - pw_grid%para%rs_dims = 0 pw_grid%reference = 0 pw_grid%ref_count = 1 NULLIFY (pw_grid%g) @@ -334,13 +330,7 @@ SUBROUTINE pw_grid_create_extended(pw_grid, mp_comm, cell_hmat, grid_span, cutof pw_grid%id_nr = grid_tag ! parallel info - CALL pw_grid%para%group%from_dup(mp_comm) - pw_grid%para%group_size = pw_grid%para%group%num_pe - pw_grid%para%my_pos = pw_grid%para%group%mepos - pw_grid%para%group_head_id = 0 - pw_grid%para%group_head = & - (pw_grid%para%group_head_id == pw_grid%para%my_pos) - IF (pw_grid%para%group_size > 1) THEN + IF (mp_comm%num_pe > 1) THEN pw_grid%para%mode = PW_MODE_DISTRIBUTED ELSE pw_grid%para%mode = PW_MODE_LOCAL @@ -426,7 +416,7 @@ SUBROUTINE pw_grid_create_extended(pw_grid, mp_comm, cell_hmat, grid_span, cutof CPABORT("BOUNDS, NPTS or CUTOFF have to be specified") END IF - CALL pw_grid_setup_internal(cell_hmat, cell_h_inv, cell_deth, pw_grid, bounds_local=bounds_local, & + CALL pw_grid_setup_internal(cell_hmat, cell_h_inv, cell_deth, pw_grid, mp_comm, bounds_local=bounds_local, & blocked=blocked, ref_grid=ref_grid, rs_dims=rs_dims, iounit=iounit) #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW) @@ -520,6 +510,7 @@ END SUBROUTINE pw_grid_create_ghatmap !> \param cell_h_inv ... !> \param cell_deth ... !> \param pw_grid ... +!> \param mp_comm ... !> \param bounds_local ... !> \param blocked ... !> \param ref_grid ... @@ -539,11 +530,13 @@ END SUBROUTINE pw_grid_create_ghatmap !> \note !> this is the function that should be used in the future ! ************************************************************************************************** - SUBROUTINE pw_grid_setup_internal(cell_hmat, cell_h_inv, cell_deth, pw_grid, bounds_local, & + SUBROUTINE pw_grid_setup_internal(cell_hmat, cell_h_inv, cell_deth, pw_grid, mp_comm, bounds_local, & blocked, ref_grid, rs_dims, iounit) REAL(KIND=dp), DIMENSION(3, 3), INTENT(IN) :: cell_hmat, cell_h_inv REAL(KIND=dp), INTENT(IN) :: cell_deth TYPE(pw_grid_type), INTENT(INOUT) :: pw_grid + + CLASS(mp_comm_type), INTENT(IN) :: mp_comm INTEGER, DIMENSION(2, 3), INTENT(IN), OPTIONAL :: bounds_local INTEGER, INTENT(in), OPTIONAL :: blocked TYPE(pw_grid_type), INTENT(in), OPTIONAL :: ref_grid @@ -580,7 +573,7 @@ SUBROUTINE pw_grid_setup_internal(cell_hmat, cell_h_inv, cell_deth, pw_grid, bou ! the indices in yz_mask are from -n/2 .. n/2 shifted by n/2 + 1 ! these are not mapped indices ! ALLOCATE (yz_mask(n(2), n(3))) - CALL pw_grid_count(cell_h_inv, pw_grid, ecut, yz_mask) + CALL pw_grid_count(cell_h_inv, pw_grid, mp_comm, ecut, yz_mask) ! Check if reference grid is compatible IF (PRESENT(ref_grid)) THEN @@ -590,7 +583,7 @@ SUBROUTINE pw_grid_setup_internal(cell_hmat, cell_h_inv, cell_deth, pw_grid, bou END IF ! Distribute grid - CALL pw_grid_distribute(pw_grid, yz_mask, bounds_local=bounds_local, ref_grid=ref_grid, blocked=blocked, & + CALL pw_grid_distribute(pw_grid, mp_comm, yz_mask, bounds_local=bounds_local, ref_grid=ref_grid, blocked=blocked, & rs_dims=rs_dims) ! Allocate the grid fields @@ -714,7 +707,7 @@ SUBROUTINE pw_grid_print(pw_grid, info) n(2) = pw_grid%ngpts_local CALL pw_grid%para%group%sum(n(1:2)) n(3) = SUM(pw_grid%para%nyzray) - rv(:, 1) = REAL(n, KIND=dp)/REAL(pw_grid%para%group_size, KIND=dp) + rv(:, 1) = REAL(n, KIND=dp)/REAL(pw_grid%para%group%num_pe, KIND=dp) n(1) = pw_grid%ngpts_cut_local n(2) = pw_grid%ngpts_local CALL pw_grid%para%group%max(n(1:2)) @@ -726,7 +719,7 @@ SUBROUTINE pw_grid_print(pw_grid, info) n(3) = MINVAL(pw_grid%para%nyzray) rv(:, 3) = REAL(n, KIND=dp) - IF (pw_grid%para%group_head .AND. info > 0) THEN + IF (info > 0) THEN WRITE (info, '(/,A,T71,I10)') & " PW_GRID| Information for grid number ", pw_grid%id_nr IF (pw_grid%reference > 0) THEN @@ -734,10 +727,10 @@ SUBROUTINE pw_grid_print(pw_grid, info) " PW_GRID| Number of the reference grid ", pw_grid%reference END IF WRITE (info, '(A,T60,I10,A)') & - " PW_GRID| Grid distributed over ", pw_grid%para%group_size, & + " PW_GRID| Grid distributed over ", pw_grid%para%group%num_pe, & " processors" WRITE (info, '(A,T71,2I5)') & - " PW_GRID| Real space group dimensions ", pw_grid%para%rs_dims + " PW_GRID| Real space group dimensions ", pw_grid%para%group%num_pe_cart IF (pw_grid%para%blocked) THEN WRITE (info, '(A,T78,A)') " PW_GRID| the grid is blocked: ", "YES" ELSE @@ -780,6 +773,7 @@ END SUBROUTINE pw_grid_print ! ************************************************************************************************** !> \brief Distribute grids in real and Fourier Space to the processors in group !> \param pw_grid ... +!> \param mp_comm ... !> \param yz_mask ... !> \param bounds_local ... !> \param ref_grid ... @@ -791,9 +785,11 @@ END SUBROUTINE pw_grid_print !> JGH (09-Sep-2003) reduce scaling for distribution !> \author JGH (22-12-2000) ! ************************************************************************************************** - SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, rs_dims) + SUBROUTINE pw_grid_distribute(pw_grid, mp_comm, yz_mask, bounds_local, ref_grid, blocked, rs_dims) TYPE(pw_grid_type), INTENT(INOUT) :: pw_grid + + CLASS(mp_comm_type), INTENT(IN) :: mp_comm INTEGER, DIMENSION(:, :), INTENT(INOUT) :: yz_mask INTEGER, DIMENSION(2, 3), INTENT(IN), OPTIONAL :: bounds_local TYPE(pw_grid_type), INTENT(IN), OPTIONAL :: ref_grid @@ -809,6 +805,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, INTEGER, ALLOCATABLE, DIMENSION(:) :: pemap INTEGER, ALLOCATABLE, DIMENSION(:, :) :: yz_index INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: axis_dist_all + INTEGER, DIMENSION(2) :: my_rs_dims INTEGER, DIMENSION(2, 3) :: axis_dist LOGICAL :: blocking @@ -820,9 +817,10 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, lbz = pw_grid%bounds(1, 3) pw_grid%ngpts = PRODUCT(INT(pw_grid%npts, KIND=int_8)) - CPASSERT(ALL(pw_grid%para%rs_dims == 0)) + + my_rs_dims = 0 IF (PRESENT(rs_dims)) THEN - pw_grid%para%rs_dims = rs_dims + my_rs_dims = rs_dims END IF IF (PRESENT(blocked)) THEN @@ -843,13 +841,9 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, pw_grid%ngpts_cut_local = INT(pw_grid%ngpts_cut) CPASSERT(pw_grid%ngpts < HUGE(pw_grid%ngpts_local)) pw_grid%ngpts_local = INT(pw_grid%ngpts) - pw_grid%para%rs_dims = 1 - CALL pw_grid%para%rs_group%create(pw_grid%para%group, 2, & - pw_grid%para%rs_dims) - pw_grid%para%rs_dims = pw_grid%para%rs_group%num_pe_cart - pw_grid%para%rs_pos = pw_grid%para%rs_group%mepos_cart - CALL pw_grid%para%rs_group%rank_cart(pw_grid%para%rs_pos, pw_grid%para%rs_mpo) - pw_grid%para%group_size = 1 + my_rs_dims = 1 + CALL pw_grid%para%group%create(mp_comm, 2, my_rs_dims) + ALLOCATE (pw_grid%para%bo(2, 3, 0:0, 3)) DO i = 1, 3 pw_grid%para%bo(1, 1:3, 0, i) = 1 @@ -862,7 +856,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, nx = pw_grid%npts(1) ny = pw_grid%npts(2) nz = pw_grid%npts(3) - np = pw_grid%para%group_size + np = mp_comm%num_pe ! The user can specify 2 strictly positive indices => specific layout ! 1 strictly positive index => the other is fixed by the number of CPUs @@ -873,42 +867,42 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, ! if blocking is free: ! 1) blocked=.FALSE. for plane distributions ! 2) blocked=.TRUE. for non-plane distributions - IF (ANY(pw_grid%para%rs_dims <= 0)) THEN - IF (ALL(pw_grid%para%rs_dims <= 0)) THEN - pw_grid%para%rs_dims = (/0, 0/) + IF (ANY(my_rs_dims <= 0)) THEN + IF (ALL(my_rs_dims <= 0)) THEN + my_rs_dims = [0, 0] ELSE - IF (pw_grid%para%rs_dims(1) > 0) THEN - pw_grid%para%rs_dims(2) = np/pw_grid%para%rs_dims(1) + IF (my_rs_dims(1) > 0) THEN + my_rs_dims(2) = np/my_rs_dims(1) ELSE - pw_grid%para%rs_dims(1) = np/pw_grid%para%rs_dims(2) + my_rs_dims(1) = np/my_rs_dims(2) END IF END IF END IF ! reset if the distribution can not be fulfilled - IF (PRODUCT(pw_grid%para%rs_dims) .NE. np) pw_grid%para%rs_dims = (/0, 0/) - ! reset if the distribution can not be dealt with (/1,np/) - IF (ALL(pw_grid%para%rs_dims == (/1, np/))) pw_grid%para%rs_dims = (/0, 0/) + IF (PRODUCT(my_rs_dims) .NE. np) my_rs_dims = [0, 0] + ! reset if the distribution can not be dealt with [1,np] + IF (ALL(my_rs_dims == [1, np])) my_rs_dims = [0, 0] - ! if (/0,0/) now, we can optimize it ourselves - IF (ALL(pw_grid%para%rs_dims == (/0, 0/))) THEN + ! if [0,0] now, we can optimize it ourselves + IF (ALL(my_rs_dims == [0, 0])) THEN ! only small grids have a chance to be 2d distributed IF (nx < np) THEN ! gives the most square looking distribution - CALL mp_dims_create(np, pw_grid%para%rs_dims) + CALL mp_dims_create(np, my_rs_dims) ! we tend to like the first index being smaller than the second - IF (pw_grid%para%rs_dims(1) > pw_grid%para%rs_dims(2)) THEN - itmp = pw_grid%para%rs_dims(1) - pw_grid%para%rs_dims(1) = pw_grid%para%rs_dims(2) - pw_grid%para%rs_dims(2) = itmp + IF (my_rs_dims(1) > my_rs_dims(2)) THEN + itmp = my_rs_dims(1) + my_rs_dims(1) = my_rs_dims(2) + my_rs_dims(2) = itmp END IF ! but should avoid having the first index 1 in all cases - IF (pw_grid%para%rs_dims(1) == 1) THEN - itmp = pw_grid%para%rs_dims(1) - pw_grid%para%rs_dims(1) = pw_grid%para%rs_dims(2) - pw_grid%para%rs_dims(2) = itmp + IF (my_rs_dims(1) == 1) THEN + itmp = my_rs_dims(1) + my_rs_dims(1) = my_rs_dims(2) + my_rs_dims(2) = itmp END IF ELSE - pw_grid%para%rs_dims = (/np, 1/) + my_rs_dims = [np, 1] END IF END IF @@ -919,7 +913,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, CASE (do_pw_grid_blocked_true) blocking = .TRUE. CASE (do_pw_grid_blocked_free) - IF (ALL(pw_grid%para%rs_dims == (/np, 1/))) THEN + IF (ALL(my_rs_dims == [np, 1])) THEN blocking = .FALSE. ELSE blocking = .TRUE. @@ -929,20 +923,16 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, END SELECT !..create group for real space distribution - CALL pw_grid%para%rs_group%create(pw_grid%para%group, 2, & - pw_grid%para%rs_dims) - pw_grid%para%rs_dims = pw_grid%para%rs_group%num_pe_cart - pw_grid%para%rs_pos = pw_grid%para%rs_group%mepos_cart - CALL pw_grid%para%rs_group%rank_cart(pw_grid%para%rs_pos, pw_grid%para%rs_mpo) + CALL pw_grid%para%group%create(mp_comm, 2, my_rs_dims) IF (PRESENT(bounds_local)) THEN pw_grid%bounds_local = bounds_local ELSE - lo = get_limit(nx, pw_grid%para%rs_dims(1), & - pw_grid%para%rs_pos(1)) + lo = get_limit(nx, pw_grid%para%group%num_pe_cart(1), & + pw_grid%para%group%mepos_cart(1)) pw_grid%bounds_local(:, 1) = lo + pw_grid%bounds(1, 1) - 1 - lo = get_limit(ny, pw_grid%para%rs_dims(2), & - pw_grid%para%rs_pos(2)) + lo = get_limit(ny, pw_grid%para%group%num_pe_cart(2), & + pw_grid%para%group%mepos_cart(2)) pw_grid%bounds_local(:, 2) = lo + pw_grid%bounds(1, 2) - 1 pw_grid%bounds_local(:, 3) = pw_grid%bounds(:, 3) END IF @@ -952,7 +942,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, !..the third distribution is needed for the second step in the FFT ALLOCATE (pw_grid%para%bo(2, 3, 0:np - 1, 3)) - rsd = pw_grid%para%rs_dims + rsd = pw_grid%para%group%num_pe_cart IF (PRESENT(bounds_local)) THEN ! axis_dist tells what portion of 1 .. nx , 1 .. ny , 1 .. nz are in the current process @@ -960,9 +950,9 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, axis_dist(:, i) = bounds_local(:, i) - pw_grid%bounds(1, i) + 1 END DO ALLOCATE (axis_dist_all(2, 3, np)) - CALL pw_grid%para%rs_group%allgather(axis_dist, axis_dist_all) + CALL pw_grid%para%group%allgather(axis_dist, axis_dist_all) DO ip = 0, np - 1 - CALL pw_grid%para%rs_group%coords(ip, coor) + CALL pw_grid%para%group%coords(ip, coor) ! distribution xyZ pw_grid%para%bo(1:2, 1, ip, 1) = axis_dist_all(1:2, 1, ip + 1) pw_grid%para%bo(1:2, 2, ip, 1) = axis_dist_all(1:2, 2, ip + 1) @@ -982,7 +972,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, DEALLOCATE (axis_dist_all) ELSE DO ip = 0, np - 1 - CALL pw_grid%para%rs_group%coords(ip, coor) + CALL pw_grid%para%group%coords(ip, coor) ! distribution xyZ pw_grid%para%bo(1:2, 1, ip, 1) = get_limit(nx, rsd(1), coor(1)) pw_grid%para%bo(1:2, 2, ip, 1) = get_limit(ny, rsd(2), coor(2)) @@ -1035,7 +1025,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, yz_mask(lo(1), lo(2)) = 0 ip = MOD(i - 1, 2*np) IF (ip > np - 1) ip = 2*np - ip - 1 - IF (ip == pw_grid%para%my_pos) THEN + IF (ip == pw_grid%para%group%mepos) THEN pw_grid%ngpts_cut_local = pw_grid%ngpts_cut_local + gmax END IF pw_grid%para%yzq(lo(1), lo(2)) = ip @@ -1074,7 +1064,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, ns = pw_grid%para%nyzray(ip) pw_grid%para%yzp(1, ns, ip) = j pw_grid%para%yzp(2, ns, ip) = i - IF (ip == pw_grid%para%my_pos) THEN + IF (ip == pw_grid%para%group%mepos) THEN pw_grid%para%yzq(j, i) = ns ELSE pw_grid%para%yzq(j, i) = -1 @@ -1103,7 +1093,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, pw_grid%para%nyzray(ip) = n*m END DO - ipl = pw_grid%para%rs_mpo + ipl = pw_grid%para%group%mepos l = pw_grid%para%bo(2, 1, ipl, 3) - & pw_grid%para%bo(1, 1, ipl, 3) + 1 m = pw_grid%para%bo(2, 2, ipl, 3) - & @@ -1133,7 +1123,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, ALLOCATE (pemap(0:np - 1)) pemap = 0 - pemap(pw_grid%para%my_pos) = pw_grid%para%rs_mpo + pemap(pw_grid%para%group%mepos) = pw_grid%para%group%mepos CALL pw_grid%para%group%sum(pemap) DO ip = 0, np - 1 @@ -1164,7 +1154,7 @@ SUBROUTINE pw_grid_distribute(pw_grid, yz_mask, bounds_local, ref_grid, blocked, IF (pw_grid%para%mode .EQ. PW_MODE_DISTRIBUTED) THEN ALLOCATE (pw_grid%para%pos_of_x(pw_grid%bounds(1, 1):pw_grid%bounds(2, 1))) pw_grid%para%pos_of_x = 0 - pw_grid%para%pos_of_x(pw_grid%bounds_local(1, 1):pw_grid%bounds_local(2, 1)) = pw_grid%para%my_pos + pw_grid%para%pos_of_x(pw_grid%bounds_local(1, 1):pw_grid%bounds_local(2, 1)) = pw_grid%para%group%mepos CALL pw_grid%para%group%sum(pw_grid%para%pos_of_x) ELSE ! this should not be needed @@ -1203,7 +1193,7 @@ SUBROUTINE pre_tag(pw_grid, yz_mask, ref_grid) mz = SIZE(yz_mask, 2) ! loop over all processors and all g vectors yz lines on this processor - DO ip = 0, ref_grid%para%group_size - 1 + DO ip = 0, ref_grid%para%group%num_pe - 1 DO ig = 1, ref_grid%para%nyzray(ip) ! go from mapped coordinates to original coordinates ! 1, 2, ..., n-1, n -> 0, 1, ..., (n/2)-1, -(n/2), -(n/2)+1, ..., -2, -1 @@ -1235,7 +1225,7 @@ SUBROUTINE pre_tag(pw_grid, yz_mask, ref_grid) yz_mask(y, z) = 0 pw_grid%para%yzq(y, z) = ip END IF - IF (ip == pw_grid%para%my_pos) THEN + IF (ip == pw_grid%para%group%mepos) THEN pw_grid%ngpts_cut_local = pw_grid%ngpts_cut_local + gmax END IF END DO @@ -1370,6 +1360,7 @@ PURE SUBROUTINE pw_vec_length(h_inv, length_x, length_y, length_z, length, l, m, !> \brief Count total number of g vectors !> \param h_inv ... !> \param pw_grid ... +!> \param mp_comm ... !> \param cutoff ... !> \param yz_mask ... !> \par History @@ -1377,10 +1368,12 @@ PURE SUBROUTINE pw_vec_length(h_inv, length_x, length_y, length_z, length, l, m, !> \author apsi !> Christopher Mundy ! ************************************************************************************************** - SUBROUTINE pw_grid_count(h_inv, pw_grid, cutoff, yz_mask) + SUBROUTINE pw_grid_count(h_inv, pw_grid, mp_comm, cutoff, yz_mask) REAL(KIND=dp), DIMENSION(3, 3) :: h_inv TYPE(pw_grid_type), INTENT(INOUT) :: pw_grid + + CLASS(mp_comm_type), INTENT(IN) :: mp_comm REAL(KIND=dp), INTENT(IN) :: cutoff INTEGER, DIMENSION(:, :), INTENT(OUT) :: yz_mask @@ -1405,7 +1398,7 @@ SUBROUTINE pw_grid_count(h_inv, pw_grid, cutoff, yz_mask) nlim(2) = n_upperlimit ELSE IF (pw_grid%para%mode == PW_MODE_DISTRIBUTED) THEN n = n_upperlimit - bounds(1, 3) + 1 - nlim = get_limit(n, pw_grid%para%group_size, pw_grid%para%my_pos) + nlim = get_limit(n, mp_comm%num_pe, mp_comm%mepos) nlim = nlim + bounds(1, 3) - 1 ELSE CPABORT("para % mode not specified") @@ -1435,8 +1428,8 @@ SUBROUTINE pw_grid_count(h_inv, pw_grid, cutoff, yz_mask) ! number of g-vectors for grid IF (pw_grid%para%mode == PW_MODE_DISTRIBUTED) THEN - CALL pw_grid%para%group%sum(gpt) - CALL pw_grid%para%group%sum(yz_mask) + CALL mp_comm%sum(gpt) + CALL mp_comm%sum(yz_mask) END IF pw_grid%ngpts_cut = gpt @@ -1512,7 +1505,7 @@ SUBROUTINE pw_grid_assign(h_inv, pw_grid, cutoff) IF (pw_grid%para%ray_distribution) THEN gpt = 0 - ip = pw_grid%para%my_pos + ip = pw_grid%para%group%mepos DO i = 1, pw_grid%para%nyzray(ip) n = pw_grid%para%yzp(2, i, ip) + lbz - 1 m = pw_grid%para%yzp(1, i, ip) + lby - 1 @@ -1540,7 +1533,7 @@ SUBROUTINE pw_grid_assign(h_inv, pw_grid, cutoff) ELSE - bol = pw_grid%para%bo(:, :, pw_grid%para%rs_mpo, 3) + bol = pw_grid%para%bo(:, :, pw_grid%para%group%mepos, 3) gpt = 0 DO n = bounds(1, 3), bounds(2, 3) IF (n < 0) THEN @@ -1731,7 +1724,7 @@ SUBROUTINE pw_grid_allocate(pw_grid, ng, bounds) IF (pw_grid%para%mode == PW_MODE_DISTRIBUTED) THEN ALLOCATE (pw_grid%grays(pw_grid%npts(1), & - pw_grid%para%nyzray(pw_grid%para%my_pos))) + pw_grid%para%nyzray(pw_grid%para%group%mepos))) END IF ALLOCATE (pw_grid%mapl%pos(bounds(1, 1):bounds(2, 1))) @@ -2067,7 +2060,7 @@ SUBROUTINE pw_grid_remap(pw_grid, yz) END DO END IF - ip = pw_grid%para%my_pos + ip = pw_grid%para%group%mepos is = 0 DO i = 1, nz DO j = 1, ny @@ -2237,8 +2230,6 @@ SUBROUTINE pw_grid_release(pw_grid) END IF ! also release groups CALL pw_grid%para%group%free() - IF (PRODUCT(pw_grid%para%rs_dims) /= 0) & - CALL pw_grid%para%rs_group%free() IF (ALLOCATED(pw_grid%para%pos_of_x)) THEN DEALLOCATE (pw_grid%para%pos_of_x) END IF diff --git a/src/pw/pw_methods.F b/src/pw/pw_methods.F index c5bb95b5f2..2cea393adc 100644 --- a/src/pw/pw_methods.F +++ b/src/pw/pw_methods.F @@ -1351,7 +1351,7 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de !..parallel FFT ! - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) THEN + IF (test .AND. out_unit > 0) THEN WRITE (out_unit, '(A)') " FFT Protocol " #:if space=="rs" WRITE (out_unit, '(A,T76,A)') " Transform direction ", "FWFFT" @@ -1365,7 +1365,7 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de WRITE (out_unit, '(A,T66,E15.6)') " scale factor", norm END IF - my_pos = pw1%pw_grid%para%my_pos + my_pos = pw1%pw_grid%para%group%mepos nrays = pw1%pw_grid%para%nyzray(my_pos) grays => pw1%pw_grid%grays @@ -1377,15 +1377,14 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de !..transform IF (pw1%pw_grid%para%ray_distribution) THEN CALL fft3d(FWFFT, n, c_in, grays, pw1%pw_grid%para%group, & - pw1%pw_grid%para%rs_group, & pw1%pw_grid%para%yzp, pw1%pw_grid%para%nyzray, & pw1%pw_grid%para%bo, scale=norm, debug=test) ELSE - CALL fft3d(FWFFT, n, c_in, grays, pw1%pw_grid%para%rs_group, & + CALL fft3d(FWFFT, n, c_in, grays, pw1%pw_grid%para%group, & pw1%pw_grid%para%bo, scale=norm, debug=test) END IF !..prepare output - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) & + IF (test .AND. out_unit > 0) & WRITE (out_unit, '(A)') " PW_GATHER : 2d -> 1d " CALL pw_gather_p_${kind2}$ (pw2, grays) #:elif kind=="r3d" and kind2=="c1d" @@ -1404,15 +1403,14 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de !..transform IF (pw1%pw_grid%para%ray_distribution) THEN CALL fft3d(FWFFT, n, c_in, grays, pw1%pw_grid%para%group, & - pw1%pw_grid%para%rs_group, & pw1%pw_grid%para%yzp, pw1%pw_grid%para%nyzray, & pw1%pw_grid%para%bo, scale=norm, debug=test) ELSE - CALL fft3d(FWFFT, n, c_in, grays, pw1%pw_grid%para%rs_group, & + CALL fft3d(FWFFT, n, c_in, grays, pw1%pw_grid%para%group, & pw1%pw_grid%para%bo, scale=norm, debug=test) END IF !..prepare output - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) & + IF (test .AND. out_unit > 0) & WRITE (out_unit, '(A)') " PW_GATHER : 2d -> 1d " CALL pw_gather_p_${kind2}$ (pw2, grays) DEALLOCATE (c_in) @@ -1424,7 +1422,7 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de #:else #:if kind=="c1d" and kind2=="c3d" !..prepare input - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) & + IF (test .AND. out_unit > 0) & WRITE (out_unit, '(A)') " PW_SCATTER : 2d -> 1d " grays = z_zero CALL pw_scatter_p_${kind}$ (pw1, grays) @@ -1432,11 +1430,10 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de !..transform IF (pw1%pw_grid%para%ray_distribution) THEN CALL fft3d(BWFFT, n, c_in, grays, pw1%pw_grid%para%group, & - pw1%pw_grid%para%rs_group, & pw1%pw_grid%para%yzp, pw1%pw_grid%para%nyzray, & pw1%pw_grid%para%bo, scale=norm, debug=test) ELSE - CALL fft3d(BWFFT, n, c_in, grays, pw1%pw_grid%para%rs_group, & + CALL fft3d(BWFFT, n, c_in, grays, pw1%pw_grid%para%group, & pw1%pw_grid%para%bo, scale=norm, debug=test) END IF !..prepare output (nothing to do) @@ -1449,7 +1446,7 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de ELSE #endif !.. prepare input - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) & + IF (test .AND. out_unit > 0) & WRITE (out_unit, '(A)') " PW_SCATTER : 2d -> 1d " grays = z_zero CALL pw_scatter_p_${kind}$ (pw1, grays) @@ -1458,15 +1455,14 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de !..transform IF (pw1%pw_grid%para%ray_distribution) THEN CALL fft3d(BWFFT, n, c_in, grays, pw1%pw_grid%para%group, & - pw1%pw_grid%para%rs_group, & pw1%pw_grid%para%yzp, pw1%pw_grid%para%nyzray, & pw1%pw_grid%para%bo, scale=norm, debug=test) ELSE - CALL fft3d(BWFFT, n, c_in, grays, pw1%pw_grid%para%rs_group, & + CALL fft3d(BWFFT, n, c_in, grays, pw1%pw_grid%para%group, & pw1%pw_grid%para%bo, scale=norm, debug=test) END IF !..prepare output - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) & + IF (test .AND. out_unit > 0) & WRITE (out_unit, '(A)') " Real part " CALL pw_copy_from_array(pw2, c_in) DEALLOCATE (c_in) @@ -1477,7 +1473,7 @@ SUBROUTINE fft_wrap_pw1pw2_${kind}$_${kind2}$_${space}$_${space2}$ (pw1, pw2, de #:endif END IF - IF (test .AND. pw1%pw_grid%para%group_head .AND. out_unit > 0) THEN + IF (test .AND. out_unit > 0) THEN WRITE (out_unit, '(A)') " End of FFT Protocol " END IF diff --git a/src/pw/pw_spline_utils.F b/src/pw/pw_spline_utils.F index 68bababb1f..cde177ba1f 100644 --- a/src/pw/pw_spline_utils.F +++ b/src/pw/pw_spline_utils.F @@ -1050,7 +1050,7 @@ SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, & CALL timeset(routineN//"_comm", handle2) coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* & (coarse_bo(2, 3) - coarse_bo(1, 3) + 1) - n_procs = coarse_coeffs_pw%pw_grid%para%group_size + n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), & sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), & rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1)) @@ -2278,7 +2278,7 @@ SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, & CALL timeset(routineN//"_comm", handle2) coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* & (coarse_bo(2, 3) - coarse_bo(1, 3) + 1) - n_procs = coarse_coeffs_pw%pw_grid%para%group_size + n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), & sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), & rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), & diff --git a/src/pw/realspace_grid_cube.F b/src/pw/realspace_grid_cube.F index 11c333a622..0f818e7917 100644 --- a/src/pw/realspace_grid_cube.F +++ b/src/pw/realspace_grid_cube.F @@ -141,9 +141,9 @@ SUBROUTINE pw_to_cube(pw, unit_nr, title, particles_r, particles_z, stride, zero ALLOCATE (buf(L3:U3)) - my_rank = pw%pw_grid%para%my_pos + my_rank = pw%pw_grid%para%group%mepos gid = pw%pw_grid%para%group - num_pe = pw%pw_grid%para%group_size + num_pe = pw%pw_grid%para%group%num_pe tag = 1 rank (1) = unit_nr @@ -265,8 +265,8 @@ SUBROUTINE cube_to_pw(grid, filename, scaling, parallel_read, silent) END IF !get rs grids and parallel environment gid = grid%pw_grid%para%group - my_rank = grid%pw_grid%para%my_pos - num_pe = grid%pw_grid%para%group_size + my_rank = grid%pw_grid%para%group%mepos + num_pe = grid%pw_grid%para%group%num_pe tag = 1 lbounds_local = grid%pw_grid%bounds_local(1, :) @@ -402,8 +402,8 @@ SUBROUTINE cube_to_pw_parallel(grid, filename, scaling, msglen, silent) !get rs grids and parallel envnment gid = grid%pw_grid%para%group - my_rank = grid%pw_grid%para%my_pos - num_pe = grid%pw_grid%para%group_size + my_rank = grid%pw_grid%para%group%mepos + num_pe = grid%pw_grid%para%group%num_pe tag = 1 DO i = 1, 3 @@ -457,7 +457,7 @@ SUBROUTINE cube_to_pw_parallel(grid, filename, scaling, msglen, silent) CALL close_file(unit_number=extunit_handle) END IF ! Sync offset and start parallel read - CALL gid%bcast(offset_global, grid%pw_grid%para%group_head_id) + CALL gid%bcast(offset_global, grid%pw_grid%para%group%source) BOF = offset_global CALL extunit%open(groupid=gid, filepath=filename, amode_status=file_amode_rdonly) ! Determine byte offsets for each grid z-slice which are local to a process @@ -655,7 +655,7 @@ SUBROUTINE pw_to_cube_parallel(grid, unit_nr, title, particles_r, particles_z, s !get rs grids and parallel envnment gid = grid%pw_grid%para%group - my_rank = grid%pw_grid%para%my_pos + my_rank = grid%pw_grid%para%group%mepos ! Shortcut lbounds = grid%pw_grid%bounds(1, :) @@ -743,7 +743,7 @@ SUBROUTINE pw_to_cube_parallel(grid, unit_nr, title, particles_r, particles_z, s END IF END IF ! Sync offset - CALL gid%bcast(BOF, grid%pw_grid%para%group_head_id) + CALL gid%bcast(BOF, grid%pw_grid%para%group%source) ! Determine byte offsets for each grid z-slice which are local to a process ! and convert z-slices to cube format compatible strings ALLOCATE (displacements(nslices)) @@ -874,9 +874,9 @@ SUBROUTINE pw_to_simple_volumetric(pw, unit_nr, stride, pw2) ALLOCATE (buf(L3:U3)) IF (DOUBLE) ALLOCATE (buf2(L3:U3)) - my_rank = pw%pw_grid%para%my_pos + my_rank = pw%pw_grid%para%group%mepos gid = pw%pw_grid%para%group - num_pe = pw%pw_grid%para%group_size + num_pe = pw%pw_grid%para%group%num_pe tag = 1 rank (1) = unit_nr diff --git a/src/pw/realspace_grid_types.F b/src/pw/realspace_grid_types.F index fc21f14b46..6ce818cc94 100644 --- a/src/pw/realspace_grid_types.F +++ b/src/pw/realspace_grid_types.F @@ -110,7 +110,6 @@ MODULE realspace_grid_types ! they are most useful for fully distributed grids, where they reflect the topology of the grid TYPE(mp_comm_type) :: group = mp_comm_null INTEGER :: my_pos = -1 - LOGICAL :: group_head = .FALSE. INTEGER :: group_size = 0 INTEGER, DIMENSION(3) :: group_dim = -1 INTEGER, DIMENSION(3) :: group_coor = -1 @@ -250,14 +249,13 @@ SUBROUTINE rs_grid_create_descriptor(desc, pw_grid, input_settings, border_point desc%distributed = .FALSE. desc%group = mp_comm_null desc%group_size = 1 - desc%group_head = .TRUE. desc%group_dim = 1 desc%group_coor = 0 desc%my_pos = 0 ELSE ! group size of desc grid ! global grid dimensions are still the same - desc%group_size = pw_grid%para%group_size + desc%group_size = pw_grid%para%group%num_pe desc%npts = pw_grid%npts desc%ngpts = PRODUCT(INT(desc%npts, KIND=int_8)) desc%lb = pw_grid%bounds(1, :) @@ -359,7 +357,6 @@ SUBROUTINE rs_grid_create_descriptor(desc, pw_grid, input_settings, border_point END IF desc%distributed = .FALSE. desc%parallel = .TRUE. - desc%group_head = pw_grid%para%group_head desc%group_coor(:) = 0 desc%my_virtual_pos = 0 @@ -386,7 +383,6 @@ SUBROUTINE rs_grid_create_descriptor(desc, pw_grid, input_settings, border_point ! we are going parallel on the real space grid desc%parallel = .TRUE. desc%distributed = .TRUE. - desc%group_head = (desc%my_pos == 0) ! set up global info about the distribution ALLOCATE (desc%rank2coord(3, 0:desc%group_size - 1)) @@ -772,8 +768,8 @@ SUBROUTINE transfer_rs2pw_replicated(rs, pw) INTEGER, DIMENSION(3) :: lb, ub REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: recvbuf, sendbuf, swaparray - ASSOCIATE (np => pw%pw_grid%para%group_size, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group_size - 1, 1), & - pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%rs_group, mepos => pw%pw_grid%para%rs_mpo, & + ASSOCIATE (np => pw%pw_grid%para%group%num_pe, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group%num_pe - 1, 1), & + pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%group, mepos => pw%pw_grid%para%group%mepos, & grid => rs%r) ALLOCATE (rcount(0:np - 1)) DO ip = 1, np @@ -842,8 +838,8 @@ SUBROUTINE transfer_pw2rs_replicated(rs, pw) REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: recvbuf, sendbuf, swaparray TYPE(mp_request_type), DIMENSION(2) :: req - ASSOCIATE (np => pw%pw_grid%para%group_size, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group_size - 1, 1), & - pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%rs_group, mepos => pw%pw_grid%para%rs_mpo, & + ASSOCIATE (np => pw%pw_grid%para%group%num_pe, bo => pw%pw_grid%para%bo(1:2, 1:3, 0:pw%pw_grid%para%group%num_pe - 1, 1), & + pbo => pw%pw_grid%bounds, group => pw%pw_grid%para%group, mepos => pw%pw_grid%para%group%mepos, & grid => rs%r) ALLOCATE (rcount(0:np - 1)) DO ip = 1, np @@ -1206,22 +1202,22 @@ SUBROUTINE transfer_rs2pw_distributed(rs, pw) END DO ! This is the real redistribution - ALLOCATE (bounds(0:pw%pw_grid%para%group_size - 1, 1:4)) + ALLOCATE (bounds(0:pw%pw_grid%para%group%num_pe - 1, 1:4)) ! work out the pw grid points each proc holds - DO i = 0, pw%pw_grid%para%group_size - 1 + DO i = 0, pw%pw_grid%para%group%num_pe - 1 bounds(i, 1:2) = pw%pw_grid%para%bo(1:2, 1, i, 1) bounds(i, 3:4) = pw%pw_grid%para%bo(1:2, 2, i, 1) bounds(i, 1:2) = bounds(i, 1:2) - pw%pw_grid%npts(1)/2 - 1 bounds(i, 3:4) = bounds(i, 3:4) - pw%pw_grid%npts(2)/2 - 1 END DO - ALLOCATE (send_tasks(0:pw%pw_grid%para%group_size - 1, 1:6)) - ALLOCATE (send_sizes(0:pw%pw_grid%para%group_size - 1)) - ALLOCATE (send_disps(0:pw%pw_grid%para%group_size - 1)) - ALLOCATE (recv_tasks(0:pw%pw_grid%para%group_size - 1, 1:6)) - ALLOCATE (recv_sizes(0:pw%pw_grid%para%group_size - 1)) - ALLOCATE (recv_disps(0:pw%pw_grid%para%group_size - 1)) + ALLOCATE (send_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6)) + ALLOCATE (send_sizes(0:pw%pw_grid%para%group%num_pe - 1)) + ALLOCATE (send_disps(0:pw%pw_grid%para%group%num_pe - 1)) + ALLOCATE (recv_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6)) + ALLOCATE (recv_sizes(0:pw%pw_grid%para%group%num_pe - 1)) + ALLOCATE (recv_disps(0:pw%pw_grid%para%group%num_pe - 1)) send_tasks(:, 1) = 1 send_tasks(:, 2) = 0 send_tasks(:, 3) = 1 @@ -1232,7 +1228,7 @@ SUBROUTINE transfer_rs2pw_distributed(rs, pw) recv_sizes = 0 my_rs_rank = rs%desc%my_pos - my_pw_rank = pw%pw_grid%para%rs_mpo + my_pw_rank = pw%pw_grid%para%group%mepos ! find the processors that should hold our data ! should be part of the rs grid type @@ -1284,7 +1280,7 @@ SUBROUTINE transfer_rs2pw_distributed(rs, pw) ub_recv(:) = ub_send(:) !$OMP PARALLEL DO DEFAULT(NONE), & !$OMP SHARED(pw,lb_send,ub_send,bounds,send_tasks,send_sizes) - DO j = 0, pw%pw_grid%para%group_size - 1 + DO j = 0, pw%pw_grid%para%group%num_pe - 1 IF (lb_send(1) .GT. bounds(j, 2)) CYCLE IF (ub_send(1) .LT. bounds(j, 1)) CYCLE @@ -1305,7 +1301,7 @@ SUBROUTINE transfer_rs2pw_distributed(rs, pw) send_disps(0) = 0 recv_disps(0) = 0 - DO i = 1, pw%pw_grid%para%group_size - 1 + DO i = 1, pw%pw_grid%para%group%num_pe - 1 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1) recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1) END DO @@ -1465,21 +1461,21 @@ SUBROUTINE transfer_pw2rs_distributed(rs, pw) ! This is the real redistribution - ALLOCATE (bounds(0:pw%pw_grid%para%group_size - 1, 1:4)) + ALLOCATE (bounds(0:pw%pw_grid%para%group%num_pe - 1, 1:4)) - DO i = 0, pw%pw_grid%para%group_size - 1 + DO i = 0, pw%pw_grid%para%group%num_pe - 1 bounds(i, 1:2) = pw%pw_grid%para%bo(1:2, 1, i, 1) bounds(i, 3:4) = pw%pw_grid%para%bo(1:2, 2, i, 1) bounds(i, 1:2) = bounds(i, 1:2) - pw%pw_grid%npts(1)/2 - 1 bounds(i, 3:4) = bounds(i, 3:4) - pw%pw_grid%npts(2)/2 - 1 END DO - ALLOCATE (send_tasks(0:pw%pw_grid%para%group_size - 1, 1:6)) - ALLOCATE (send_sizes(0:pw%pw_grid%para%group_size - 1)) - ALLOCATE (send_disps(0:pw%pw_grid%para%group_size - 1)) - ALLOCATE (recv_tasks(0:pw%pw_grid%para%group_size - 1, 1:6)) - ALLOCATE (recv_sizes(0:pw%pw_grid%para%group_size - 1)) - ALLOCATE (recv_disps(0:pw%pw_grid%para%group_size - 1)) + ALLOCATE (send_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6)) + ALLOCATE (send_sizes(0:pw%pw_grid%para%group%num_pe - 1)) + ALLOCATE (send_disps(0:pw%pw_grid%para%group%num_pe - 1)) + ALLOCATE (recv_tasks(0:pw%pw_grid%para%group%num_pe - 1, 1:6)) + ALLOCATE (recv_sizes(0:pw%pw_grid%para%group%num_pe - 1)) + ALLOCATE (recv_disps(0:pw%pw_grid%para%group%num_pe - 1)) send_tasks = 0 send_tasks(:, 1) = 1 @@ -1500,7 +1496,7 @@ SUBROUTINE transfer_pw2rs_distributed(rs, pw) recv_sizes = 0 my_rs_rank = rs%desc%my_pos - my_pw_rank = pw%pw_grid%para%rs_mpo + my_pw_rank = pw%pw_grid%para%group%mepos ! find the processors that should hold our data ! should be part of the rs grid type @@ -1513,7 +1509,7 @@ SUBROUTINE transfer_pw2rs_distributed(rs, pw) !$OMP PARALLEL DO DEFAULT(NONE), & !$OMP PRIVATE(coords,idir,pos,lb_send,ub_send), & !$OMP SHARED(rs,bounds,my_rs_rank,send_tasks,send_sizes,pw) - DO i = 0, pw%pw_grid%para%group_size - 1 + DO i = 0, pw%pw_grid%para%group%num_pe - 1 coords(:) = rs%desc%rank2coord(:, rs%desc%real2virtual(i)) !calculate the real rs grid points on each processor @@ -1555,7 +1551,7 @@ SUBROUTINE transfer_pw2rs_distributed(rs, pw) !$OMP PARALLEL DO DEFAULT(NONE), & !$OMP SHARED(pw,lb_send,ub_send,bounds,recv_tasks,recv_sizes) - DO j = 0, pw%pw_grid%para%group_size - 1 + DO j = 0, pw%pw_grid%para%group%num_pe - 1 IF (ub_send(1) .LT. bounds(j, 1)) CYCLE IF (lb_send(1) .GT. bounds(j, 2)) CYCLE @@ -1576,7 +1572,7 @@ SUBROUTINE transfer_pw2rs_distributed(rs, pw) send_disps(0) = 0 recv_disps(0) = 0 - DO i = 1, pw%pw_grid%para%group_size - 1 + DO i = 1, pw%pw_grid%para%group%num_pe - 1 send_disps(i) = send_disps(i - 1) + send_sizes(i - 1) recv_disps(i) = recv_disps(i - 1) + recv_sizes(i - 1) END DO diff --git a/src/qmmm_gpw_forces.F b/src/qmmm_gpw_forces.F index 2dd7dfe825..ad507b877c 100644 --- a/src/qmmm_gpw_forces.F +++ b/src/qmmm_gpw_forces.F @@ -460,7 +460,7 @@ SUBROUTINE qmmm_forces_with_gaussian(rho, qmmm_env, mm_particles, & END DO pos_of_x => grids(auxbas_grid)%pw_grid%para%pos_of_x group = grids(auxbas_grid)%pw_grid%para%group - me = grids(auxbas_grid)%pw_grid%para%my_pos + me = grids(auxbas_grid)%pw_grid%para%group%mepos glb = rho%pw_grid%bounds(1, :) gub = rho%pw_grid%bounds(2, :) IF ((pos_of_x(glb(1)) .EQ. me) .AND. (pos_of_x(gub(1)) .EQ. me)) THEN diff --git a/src/qmmm_pw_grid.F b/src/qmmm_pw_grid.F index 98d316d99f..2132f5e004 100644 --- a/src/qmmm_pw_grid.F +++ b/src/qmmm_pw_grid.F @@ -159,22 +159,11 @@ SUBROUTINE pw_grid_create_copy_no_pbc(pw_grid_in, pw_grid_out, pw_mode) pw_grid_out%cutoff = pw_grid_in%cutoff !para - pw_grid_out%para%group_size = pw_grid_out%para%group%num_pe - pw_grid_out%para%my_pos = pw_grid_out%para%group%mepos - pw_grid_out%para%group_head_id = pw_grid_in%para%group_head_id - pw_grid_out%para%group_head = & - (pw_grid_out%para%group_head_id == pw_grid_out%para%my_pos) pw_grid_out%para%mode = pw_mode_loc ALLOCATE (pos_of_x(pw_grid_out%bounds(1, 1):pw_grid_out%bounds(2, 1))) pos_of_x(:pw_grid_out%bounds(2, 1) - 1) = pw_grid_in%para%pos_of_x pos_of_x(pw_grid_out%bounds(2, 1)) = pos_of_x(pw_grid_out%bounds(2, 1) - 1) CALL MOVE_ALLOC(pos_of_x, pw_grid_out%para%pos_of_x) - pw_grid_out%para%rs_dims = pw_grid_in%para%rs_dims - IF (PRODUCT(pw_grid_in%para%rs_dims) /= 0) THEN - CALL pw_grid_out%para%rs_group%from_dup(pw_grid_in%para%rs_group) - END IF - pw_grid_out%para%rs_pos = pw_grid_in%para%rs_pos - pw_grid_out%para%rs_mpo = pw_grid_in%para%rs_mpo NULLIFY (pw_grid_out%g, pw_grid_out%gsq) CPASSERT(pw_grid_in%grid_span == FULLSPACE) diff --git a/src/qs_collocate_density.F b/src/qs_collocate_density.F index b2c51d2c60..c7b28fc36c 100644 --- a/src/qs_collocate_density.F +++ b/src/qs_collocate_density.F @@ -592,8 +592,8 @@ SUBROUTINE calculate_lri_rho_elec(lri_rho_g, lri_rho_r, qs_env, & ALLOCATE (pab(maxco, 1)) offset = 0 - my_pos = mgrid_rspace(1)%pw_grid%para%my_pos - group_size = mgrid_rspace(1)%pw_grid%para%group_size + my_pos = mgrid_rspace(1)%pw_grid%para%group%mepos + group_size = mgrid_rspace(1)%pw_grid%para%group%num_pe DO ikind = 1, SIZE(atomic_kind_set) @@ -2500,8 +2500,8 @@ SUBROUTINE collocate_single_gaussian(rho, rho_gspace, & offset = 0 group = mgrid_rspace(1)%pw_grid%para%group - my_pos = mgrid_rspace(1)%pw_grid%para%my_pos - group_size = mgrid_rspace(1)%pw_grid%para%group_size + my_pos = mgrid_rspace(1)%pw_grid%para%group%mepos + group_size = mgrid_rspace(1)%pw_grid%para%group%num_pe ALLOCATE (where_is_the_point(0:group_size - 1)) DO iatom = 1, natom @@ -2706,8 +2706,8 @@ SUBROUTINE calculate_wavefunction(mo_vectors, ivector, rho, rho_gspace, & offset = 0 group = mgrid_rspace(1)%pw_grid%para%group - my_pos = mgrid_rspace(1)%pw_grid%para%my_pos - group_size = mgrid_rspace(1)%pw_grid%para%group_size + my_pos = mgrid_rspace(1)%pw_grid%para%group%mepos + group_size = mgrid_rspace(1)%pw_grid%para%group%num_pe ALLOCATE (where_is_the_point(0:group_size - 1)) DO iatom = 1, natom diff --git a/src/qs_external_density.F b/src/qs_external_density.F index 2fdbffd6f7..1769abc335 100644 --- a/src/qs_external_density.F +++ b/src/qs_external_density.F @@ -115,8 +115,8 @@ SUBROUTINE external_read_density(qs_env) CALL section_vals_val_get(ext_den_section, "FILE_DENSITY", c_val=filename) tag = 1 - ASSOCIATE (gid => rho_ext_r(1)%pw_grid%para%group, my_rank => rho_ext_r(1)%pw_grid%para%my_pos, & - num_pe => rho_ext_r(1)%pw_grid%para%group_size) + ASSOCIATE (gid => rho_ext_r(1)%pw_grid%para%group, my_rank => rho_ext_r(1)%pw_grid%para%group%mepos, & + num_pe => rho_ext_r(1)%pw_grid%para%group%num_pe) IF (dft_control%read_external_density) THEN diff --git a/src/qs_external_potential.F b/src/qs_external_potential.F index 67ad371ba8..71e99584e3 100644 --- a/src/qs_external_potential.F +++ b/src/qs_external_potential.F @@ -412,8 +412,8 @@ SUBROUTINE interpolate_external_potential(r, grid, func, dfunc, calc_derivatives ! The values of external potential on grid are distributed among the ! processes, so first we have to gather them up gid = grid%pw_grid%para%group - my_rank = grid%pw_grid%para%my_pos - num_pe = grid%pw_grid%para%group_size + my_rank = grid%pw_grid%para%group%mepos + num_pe = grid%pw_grid%para%group%num_pe tag = 1 dr = grid%pw_grid%dr diff --git a/src/qs_integrate_potential_single.F b/src/qs_integrate_potential_single.F index ac31cbb024..4b825f87d1 100644 --- a/src/qs_integrate_potential_single.F +++ b/src/qs_integrate_potential_single.F @@ -828,8 +828,8 @@ SUBROUTINE integrate_v_rspace_one_center(v_rspace, qs_env, int_res, & eps_rho_rspace = dft_control%qs_control%eps_rho_rspace offset = 0 - my_pos = v_rspace%pw_grid%para%my_pos - group_size = v_rspace%pw_grid%para%group_size + my_pos = v_rspace%pw_grid%para%group%mepos + group_size = v_rspace%pw_grid%para%group%num_pe DO ikind = 1, nkind @@ -1029,8 +1029,8 @@ SUBROUTINE integrate_v_rspace_diagonal(v_rspace, ksmat, pmat, qs_env, calculate_ use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer) offset = 0 - my_pos = v_rspace%pw_grid%para%my_pos - group_size = v_rspace%pw_grid%para%group_size + my_pos = v_rspace%pw_grid%para%group%mepos + group_size = v_rspace%pw_grid%para%group%num_pe DO ikind = 1, nkind