From b664c118cbd8eda70ead02081c92cea3882686bc Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Thu, 29 Feb 2024 10:31:09 +0100 Subject: [PATCH 1/3] Various smaller fixes or improvements * Avoid calling offloadEnsureMallocHeapSize (if offloading the GRID code is disabled). * Suppress error message when using "which" for an unknown command (exts/build_dbcsr). * Improved finding OpenCL header file (exts/build_dbcsr). --- exts/build_dbcsr/Makefile | 24 ++++++++++++++---------- src/grid/common/grid_library.c | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/exts/build_dbcsr/Makefile b/exts/build_dbcsr/Makefile index 5bfaf89d50..eb0ad246ea 100644 --- a/exts/build_dbcsr/Makefile +++ b/exts/build_dbcsr/Makefile @@ -299,22 +299,26 @@ OPENCL_DEFAULT := $(wildcard $(LIBSMM_ACC_ABS_DIR)/../opencl/smm/tune_multiply.c OPENCL_WITHGPU := $(wildcard $(LIBSMM_ACC_ABS_DIR)/../opencl/smm/params/tune_multiply_$(GPUVER).csv) OPENCL_PARAMS := $(if $(OPENCL_WITHGPU),$(OPENCL_WITHGPU),$(OPENCL_DEFAULT)) OPENCL_PARDEP := $(if $(ACC_MAKEDEP),$(shell $(ACC_MAKEDEP) $(LIBSMM_ACC_ABS_DIR)/../opencl/smm/.with_gpu $(GPUVER))) -$(LIBSMM_ACC_ABS_DIR)/../opencl/smm/opencl_kernels.h: $(OPENCL_KRNLGEN) $(OPENCL_KERNELS) $(OPENCL_PARAMS) $(OPENCL_PARDEP) +OPENCL_COMMON := $(wildcard $(LIBSMM_ACC_ABS_DIR)/../opencl/common/*.h) +$(LIBSMM_ACC_ABS_DIR)/../opencl/smm/opencl_kernels.h: $(OPENCL_KRNLGEN) $(OPENCL_KERNELS) $(OPENCL_COMMON) \ + $(OPENCL_PARAMS) $(OPENCL_PARDEP) $(OPENCL_KRNLGEN) $(OPENCL_KERNELS) $(OPENCL_PARAMS) $@ opencl_libsmm.o: opencl_libsmm.c $(LIBSMM_ACC_ABS_DIR)/../opencl/smm/opencl_kernels.h ifeq (Darwin,$(shell uname)) LDFLAGS += -framework OpenCL else # OpenCL include directory (cl.h not installed per "opencl-headers" package) - ifeq (,$(CUDATOOLKIT_HOME)) - CUDATOOLKIT_HOME := $(NVSDKCOMPUTE_ROOT) - endif - ifeq (,$(CUDATOOLKIT_HOME)) - NVCC := $(call which,nvcc) - CUDATOOLKIT_HOME := $(if $(NVCC),$(abspath $(dir $(NVCC))/..)) - endif - ifneq (,$(CUDATOOLKIT_HOME)) - CFLAGS += -I$(CUDATOOLKIT_HOME)/include + NVCC := $(shell which nvcc 2>/dev/null) + NVCC_PATH := $(if $(NVCC),$(wildcard $(dir $(NVCC))/..)) + CUDA_FILE := $(wildcard $(NVCC_PATH)/../cuda/include/cuda.h) + CUDA_PATH := $(if $(CUDA_FILE),$(NVCC_PATH)/../cuda,$(NVCC_PATH)) + CUDA_LIBS := $(if $(wildcard $(CUDA_PATH)/lib64),lib64,lib) + ifneq (,$(CUDA_PATH)) + CFLAGS += -I$(CUDA_PATH)/include + else ifneq (,$(wildcard $(OPENCL_ROOT)/include/CL/cl.h)) + CFLAGS += -I$(OPENCL_ROOT)/include + else ifneq (,$(wildcard $(OCL_ROOT)/include/CL/cl.h)) + CFLAGS += -I$(OCL_ROOT)/include endif endif endif diff --git a/src/grid/common/grid_library.c b/src/grid/common/grid_library.c index 34dc3fd1b4..8cfef47ee6 100644 --- a/src/grid/common/grid_library.c +++ b/src/grid/common/grid_library.c @@ -52,7 +52,7 @@ void grid_library_init(void) { abort(); } -#if defined(__OFFLOAD) +#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID) // Reserve global GPU memory for storing the intermediate Cab matrix blocks. // CUDA does not allow to increase this limit after a kernel was launched. // Unfortunately, the required memory is hard to predict because we neither From 17b9f55875b8621fd59aa803f773f85fe58e4c89 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Thu, 29 Feb 2024 11:14:49 +0100 Subject: [PATCH 2/3] Make use of SIMD directives - Compilers allow to toggle OpenMP SIMD separate from targeting multicore. - Note: -fopenmp may not enable SIMD directives (-fopenmp-simd). --- src/grid/cpu/grid_cpu_collint.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/grid/cpu/grid_cpu_collint.h b/src/grid/cpu/grid_cpu_collint.h index 828d96159c..b5a4937887 100644 --- a/src/grid/cpu/grid_cpu_collint.h +++ b/src/grid/cpu/grid_cpu_collint.h @@ -46,6 +46,7 @@ ortho_cx_to_grid_scalar(const int lp, const int cmax, const int i, #if (GRID_DO_COLLOCATE) // collocate double reg[4] = {0.0, 0.0, 0.0, 0.0}; +#pragma omp simd reduction(+ : reg) for (int lxp = 0; lxp <= lp; lxp++) { const double p = pol[0][lxp][i + cmax]; reg[0] += cx[lxp * 4 + 0] * p; @@ -61,6 +62,7 @@ ortho_cx_to_grid_scalar(const int lp, const int cmax, const int i, #else // integrate const double reg[4] = {*grid_0, *grid_1, *grid_2, *grid_3}; +#pragma omp simd for (int lxp = 0; lxp <= lp; lxp++) { const double p = pol[0][lxp][i + cmax]; cx[lxp * 4 + 0] += reg[0] * p; @@ -454,7 +456,7 @@ ortho_cxyz_to_grid(const int lp, const double zetp, const double dh[3][3], } const int(*map)[2 * cmax + 1] = (const int(*)[2 * cmax + 1]) map_mutable; - // Precompute lenght of sections with homogeneous cube to grid mapping. + // Precompute length of sections with homogeneous cube to grid mapping. int sections_mutable[3][2 * cmax + 1]; for (int i = 0; i < 3; i++) { for (int kg = 2 * cmax; kg >= 0; kg--) { @@ -739,7 +741,7 @@ general_precompute_mapping(const int index_min, const int index_max, } } - // Precompute lenght of sections with homogeneous cube to grid mapping. + // Precompute length of sections with homogeneous cube to grid mapping. const int range = index_max - index_min + 1; for (int kg = range - 1; kg >= 0; kg--) { if (kg == range - 1 || map[kg] != map[kg + 1] - 1) { From 5d0137bad4b144682bef61de2426227b81169af3 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Thu, 29 Feb 2024 11:23:47 +0100 Subject: [PATCH 3/3] Fixed some typos in code comments and abort messages --- src/efield_utils.F | 4 ++-- src/motion/mc/mc_moves.F | 2 +- src/start/input_cp2k_motion.F | 2 +- src/tmc/input_cp2k_tmc.F | 2 +- src/tmc/tmc_setup.F | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/efield_utils.F b/src/efield_utils.F index b5f2f9a468..5959812ebf 100644 --- a/src/efield_utils.F +++ b/src/efield_utils.F @@ -54,9 +54,9 @@ MODULE efield_utils ! ************************************************************************************************** !> \brief Replace the original implementation of the electric-electronic -!> interaction in the lenght gauge. This calculation is no longer done in +!> interaction in the length gauge. This calculation is no longer done in !> the grid but using matrices to match the velocity gauge implementation. -!> Note: The energy is store in energy%core and computed later on. +!> Note: The energy is stored in energy%core and computed later on. !> \param qs_env ... !> \author Guillaume Le Breton (02.23) ! ************************************************************************************************** diff --git a/src/motion/mc/mc_moves.F b/src/motion/mc/mc_moves.F index 853b7d83f3..150e186ec4 100644 --- a/src/motion/mc/mc_moves.F +++ b/src/motion/mc/mc_moves.F @@ -1051,7 +1051,7 @@ SUBROUTINE mc_volume_move(mc_par, force_env, moves, move_updates, & IF (ionode) rand = rng_stream%next() CALL group%bcast(rand, source) -! find the test cell lenghts for the discrete volume move +! find the test cell lengths for the discrete volume move IF (ldiscrete) THEN IF (rand .LT. 0.5_dp) THEN lincrease = .TRUE. diff --git a/src/start/input_cp2k_motion.F b/src/start/input_cp2k_motion.F index 83f88b807a..44fbebacf8 100644 --- a/src/start/input_cp2k_motion.F +++ b/src/start/input_cp2k_motion.F @@ -1039,7 +1039,7 @@ SUBROUTINE create_cell_opt_section(section) CALL keyword_create( & keyword, __LOCATION__, name="KEEP_ANGLES", & - description="Keep angles between the cell vectors constant, but allow the lenghts of the"// & + description="Keep angles between the cell vectors constant, but allow the lengths of the"// & " cell vectors to change independently."// & " Albeit general, this is most useful for triclinic cells, to enforce higher symmetry, see KEEP_SYMMETRY.", & usage="KEEP_ANGLES TRUE", default_l_val=.FALSE., lone_keyword_l_val=.TRUE.) diff --git a/src/tmc/input_cp2k_tmc.F b/src/tmc/input_cp2k_tmc.F index 5fc464836c..25a77fb621 100644 --- a/src/tmc/input_cp2k_tmc.F +++ b/src/tmc/input_cp2k_tmc.F @@ -649,7 +649,7 @@ SUBROUTINE create_TMC_ana_kinds(section) CALL keyword_create(keyword, __LOCATION__, & name="G_R", & description="Radial Distribution Function for each pair of atoms "// & - "using the amount of specified bins within MAX(cell_lenght)/2", & + "using the amount of specified bins within MAX(cell_length)/2", & usage="G_R or G_R {INTEGER}", & default_i_val=-1, lone_keyword_i_val=-1) CALL section_add_keyword(section, keyword) diff --git a/src/tmc/tmc_setup.F b/src/tmc/tmc_setup.F index f48c61b6dc..5fc6f842ec 100644 --- a/src/tmc/tmc_setup.F +++ b/src/tmc/tmc_setup.F @@ -753,7 +753,7 @@ SUBROUTINE tmc_read_input(input, tmc_env) IF (SIZE(r_arr_tmp) .NE. tmc_env%params%dim_per_elem) & CPABORT("The entered sub box sizes does not fit in number of dimensions.") IF (ANY(r_arr_tmp .LE. 0.0_dp)) & - CPABORT("The entered sub box lenghts should be greater than 0.") + CPABORT("The entered sub box lengths should be greater than 0.") DO itmp = 1, SIZE(tmc_env%params%sub_box_size) tmc_env%params%sub_box_size(itmp) = r_arr_tmp(itmp)/au2a END DO