diff --git a/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/CMakeLists.txt b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/CMakeLists.txt new file mode 100644 index 0000000000..5365f33d30 --- /dev/null +++ b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/CMakeLists.txt @@ -0,0 +1,3 @@ +add_fortran_example(do_concurrent) +add_fortran_example(hybrid_do_concurrent) +add_fortran_example(omp6_do_concurrent) diff --git a/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/do_concurrent.f90 b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/do_concurrent.f90 new file mode 100644 index 0000000000..6a37db7726 --- /dev/null +++ b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/do_concurrent.f90 @@ -0,0 +1,32 @@ +! Snippet begin0 +program do_concurrent + use omp_lib + implicit none + + integer :: i, outer + integer, dimension(10000) :: x, y, z + double precision :: t0, t1, time + x = 1 + y = 0 + z = 0 + ! Dummy offload to warm up the device + !$omp target + !$omp end target + + t0 = omp_get_wtime() + do outer = 1, 24000 + !call do_work_on_host_updating_x(x,...) + do concurrent (i = 1:10000) + y(i) = x(i) + 1 + enddo + + do concurrent (i = 1:10000) + z(i) = y(i) + 1 + enddo + !call do_work_on_host_using_z(z,...) + enddo + t1 = omp_get_wtime() + time = t1-t0 + print *, time +end program do_concurrent +! Snippet end diff --git a/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/hybrid_do_concurrent.f90 b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/hybrid_do_concurrent.f90 new file mode 100644 index 0000000000..07c1b923d3 --- /dev/null +++ b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/hybrid_do_concurrent.f90 @@ -0,0 +1,34 @@ +program hybrid_do_concurrent + use omp_lib + implicit none + + integer :: i, outer + integer, dimension(10000) :: x, y, z + double precision :: t0, t1, time + x = 1 + y = 0 + z = 0 + ! Dummy offload to warm up the device + !$omp target + !$omp end target + + t0 = omp_get_wtime() +! Snippet begin + do outer = 1, 24000 + !call do_work_on_host_updating_x(x,...) + !$omp target data map(to: x) map(alloc: y) map(from: z) + do concurrent (i = 1:10000) + y(i) = x(i) + 1 + enddo + + do concurrent (i = 1:10000) + z(i) = y(i) + 1 + enddo + !$omp end target data + !call do_work_on_host_using_z(z,...) + enddo +! Snippet end + t1 = omp_get_wtime() + time = t1-t0 + print *, time +end program hybrid_do_concurrent diff --git a/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/omp6_do_concurrent.f90 b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/omp6_do_concurrent.f90 new file mode 100644 index 0000000000..cddb5703c2 --- /dev/null +++ b/Publications/GPU-Opt-Guide/OpenMP/28_offload_do_concurrent/omp6_do_concurrent.f90 @@ -0,0 +1,36 @@ +program omp6_do_concurrent + use omp_lib + implicit none + + integer :: i, outer + integer, dimension(10000) :: x, y, z + double precision :: t0, t1, time + x = 1 + y = 0 + z = 0 + ! Dummy offload to warm up the device + !$omp target + !$omp end target + + t0 = omp_get_wtime() +! Snippet begin + do outer = 1, 2400 + !call do_work_on_host_updating_x(x,...) + !$omp target data map(to: x) map(alloc: y) map(from: z) + !$omp target teams loop + do concurrent (i = 1:10000) + y(i) = x(i) + 1 + enddo + !$omp target teams loop + do concurrent (i = 1:10000) + z(i) = y(i) + 1 + enddo + !$omp end target data + !call do_work_on_host_using_z(z,...) + enddo +! Snippet end + t1 = omp_get_wtime() + time = t1-t0 + + print *, time +end program omp6_do_concurrent diff --git a/Publications/GPU-Opt-Guide/OpenMP/CMakeLists.txt b/Publications/GPU-Opt-Guide/OpenMP/CMakeLists.txt index 53a6c589cb..efaeca084c 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/OpenMP/CMakeLists.txt @@ -18,3 +18,4 @@ add_subdirectory(23_omp_work_group) add_subdirectory(24_device_ptr_addr_clauses) add_subdirectory(25_fortran_example) add_subdirectory(26_omp_prefetch) +add_subdirectory(28_offload_do_concurrent)