From b440d11408a4cbb75496e878c221513de2a7c420 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 7 Dec 2021 15:41:11 +0100 Subject: [PATCH 1/9] fixups for llvm --- CMake/OpenAccHelper.cmake | 5 ++++- CMakeLists.txt | 14 +++++++------- coreneuron/CMakeLists.txt | 2 ++ coreneuron/permute/cellorder.cpp | 4 +++- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 5838742f8..817b3da06 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -73,14 +73,17 @@ if(CORENRN_ENABLE_GPU) else() message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers") endif() + set(NVHPC_ACC_COMP_FLAGS) + set(NVHPC_ACC_LINK_FLAGS) # avoid PGI adding standard compliant "-A" flags - set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14) + # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14) string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}") # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors. # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined # for OpenACC code generation. set(NVHPC_CXX_INLINE_FLAGS "-Mautoinline") + set(NVHPC_CXX_INLINE_FLAGS) endif() # ============================================================================= diff --git a/CMakeLists.txt b/CMakeLists.txt index d3e1950d0..c6fdebd11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,13 +127,13 @@ if(CORENRN_ENABLE_GPU) # CUDA_RESOLVE_DEVICE_SYMBOLS OFF) # Fail hard and early if we don't have the PGI/NVHPC compiler. - if(NOT CORENRN_HAVE_NVHPC_COMPILER) - message( - FATAL_ERROR - "GPU support is available via OpenACC using PGI/NVIDIA compilers." - " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++" - ) - endif() + # if(NOT CORENRN_HAVE_NVHPC_COMPILER) + # message( + # FATAL_ERROR + # "GPU support is available via OpenACC using PGI/NVIDIA compilers." + # " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++" + # ) + # endif() # Set some sensible default CUDA architectures. if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index d370df1df..7e4112a17 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -198,6 +198,8 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) target_link_libraries(coreneuron ${MPI_CXX_LIBRARIES}) endif() +target_link_libraries(coreneuron CUDA::cudart) + # this is where we handle dynamic mpi library build if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # ~~~ diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp index 14feb31de..d9e559616 100644 --- a/coreneuron/permute/cellorder.cpp +++ b/coreneuron/permute/cellorder.cpp @@ -490,6 +490,7 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid bool has_subtrees_to_compute = true; // clang-format off + // OL211207: check if we need an OpenMP directive here. nrn_pragma_acc(loop seq) for (; has_subtrees_to_compute; ) { // ncycle loop #ifndef CORENEURON_ENABLE_GPU @@ -538,6 +539,7 @@ static void bksub_interleaved2(NrnThread* nt, #ifndef CORENEURON_ENABLE_GPU for (int i = root; i < lastroot; i += 1) { #else + // OL211207: check if we need an OpenMP directive here. nrn_pragma_acc(loop seq) for (int i = root; i < lastroot; i += warpsize) { #endif @@ -661,7 +663,7 @@ void solve_interleaved1(int ith) { lastnode [0:ncell], cellsize [0:ncell]) if (nt->compute_gpu) async(nt->stream_id)) - nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) + // nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (int icell = 0; icell < ncell; ++icell) { int icellsize = cellsize[icell]; triang_interleaved(nt, icell, icellsize, nstride, stride, lastnode); From 7e3e2c7c54e90ddbe9720643b9fc0a7238500372 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 7 Dec 2021 16:37:23 +0100 Subject: [PATCH 2/9] Tweaks towards an LLVM build. --- CMake/MakefileBuildOptions.cmake | 5 +++++ CMake/OpenAccHelper.cmake | 2 +- coreneuron/sim/scopmath/sparse_thread.cpp | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index fc0b0b551..0ebc7bea9 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -50,6 +50,11 @@ list(REMOVE_ITEM CORENRN_LINK_LIBS "Threads::Threads") # replicate CMake magic to transform system libs to -l foreach(link_lib ${CORENRN_LINK_LIBS}) + if(TARGET ${link_lib}) + get_property(link_flag TARGET ${link_lib} PROPERTY INTERFACE_LINK_LIBRARIES) + string(APPEND CORENRN_COMMON_LDFLAGS " ${link_flag}") + continue() + endif() if(${link_lib} MATCHES "\-l.*") string(APPEND CORENRN_COMMON_LDFLAGS " ${link_lib}") continue() diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 817b3da06..cdf2162c3 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -73,7 +73,7 @@ if(CORENRN_ENABLE_GPU) else() message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers") endif() - set(NVHPC_ACC_COMP_FLAGS) + set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}") set(NVHPC_ACC_LINK_FLAGS) # avoid PGI adding standard compliant "-A" flags # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14) diff --git a/coreneuron/sim/scopmath/sparse_thread.cpp b/coreneuron/sim/scopmath/sparse_thread.cpp index 71643430a..70da9aa21 100644 --- a/coreneuron/sim/scopmath/sparse_thread.cpp +++ b/coreneuron/sim/scopmath/sparse_thread.cpp @@ -211,8 +211,8 @@ static int matsol(SparseObj* so, int _iml) { /* Upper triangularization */ so->numop = 0; for (unsigned i = 1; i <= so->neqn; i++) { - Elm* pivot; - if (fabs((pivot = so->diag[i])->value[_iml]) <= ROUNDOFF) { + Elm* pivot{so->diag[i]}; + if (fabs(pivot->value[_iml]) <= ROUNDOFF) { return SINGULAR; } /* Eliminate all elements in pivot column */ From 72cb02a800af7e1a247910f432180cef5f6d8912 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 7 Dec 2021 16:47:03 +0100 Subject: [PATCH 3/9] Declare cnrn_ methods. --- coreneuron/utils/offload.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 078990107..08a04c868 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -6,6 +6,8 @@ # ============================================================================= */ #pragma once +#include + #define nrn_pragma_stringify(x) #x #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #define nrn_pragma_acc(x) From fbf95519c4568de19df58fd2adeb87bbf2fd202a Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Thu, 9 Dec 2021 19:31:29 -0500 Subject: [PATCH 4/9] Various changes (including temporary) to make XL OpenMP offload build working * todo: temporary changes to OpenAccHelper.cmake, needs refinement * todo: see caliper linkling issue * todo: _OPENACC needs to be renamed CORENRN_ENABLE_GPU so that OpenMP based builds can use GPU offload. * todo: hardcoded CXX flags for quick build --- CMake/OpenAccHelper.cmake | 59 +++++++++++++++++++------------- coreneuron/utils/offload.hpp | 2 +- extra/nrnivmodl_core_makefile.in | 2 +- 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index cdf2162c3..76d9eb3ce 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -50,31 +50,44 @@ if(CORENRN_ENABLE_GPU) endif() set(CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}") endif() - # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code - # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code - # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in - # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP - # code is compiled with the same CUDA version as the explicit CUDA code. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") - # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA - # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the - # same default compute capabilities as each other, particularly on GPU-less build machines. - foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES}) - string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}") - endforeach() - if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") - # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available - # for a region then prefer OpenMP. - add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD) - string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu") - elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC") - # Only enable OpenACC offload for GPU - string(APPEND NVHPC_ACC_COMP_FLAGS " -acc") + if(CORENRN_HAVE_NVHPC_COMPILER) + # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code + # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code + # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in + # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP + # code is compiled with the same CUDA version as the explicit CUDA code. + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") + # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA + # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the + # same default compute capabilities as each other, particularly on GPU-less build machines. + foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES}) + string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}") + endforeach() + if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") + # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available + # for a region then prefer OpenMP. + add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD) + string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu") + elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC") + # Only enable OpenACC offload for GPU + string(APPEND NVHPC_ACC_COMP_FLAGS " -acc") + else() + message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers") + endif() + elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang") + set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport") + set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper") + + if(CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD) + # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available + # for a region then prefer OpenMP. + add_compile_definitions(CORENRN_PREFER_OPENMP_OFFLOAD) + endif() + elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}") else() - message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers") + message(FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} is not supported in GPU builds.") endif() - set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}") - set(NVHPC_ACC_LINK_FLAGS) # avoid PGI adding standard compliant "-A" flags # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14) string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}") diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 08a04c868..b974c5463 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -9,7 +9,7 @@ #include #define nrn_pragma_stringify(x) #x -#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) +#if defined(CORENEURON_ENABLE_GPU) && defined(CORENRN_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #define nrn_pragma_acc(x) #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x)) #include diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 5bd424865..cf303d5d2 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -71,7 +71,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),) CORENRN_PERLEXE=perl endif -CXXFLAGS = @CORENRN_CXX_FLAGS@ +CXXFLAGS = @CORENRN_CXX_FLAGS@ -lcaliper -qcuda CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES) CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@ CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@ From 877988853cc1139c05ac964a311f3c25eedca39f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 10 Dec 2021 07:14:51 +0100 Subject: [PATCH 5/9] Use CORENEURON_PREFER_OPENMP_OFFLOAD. --- CMake/OpenAccHelper.cmake | 20 ++++++++++++++------ coreneuron/utils/offload.hpp | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 76d9eb3ce..44c8904a7 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -75,16 +75,24 @@ if(CORENRN_ENABLE_GPU) message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers") endif() elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang") - set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport") - set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper") - - if(CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD) + if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") + set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport") + set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper") # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available # for a region then prefer OpenMP. - add_compile_definitions(CORENRN_PREFER_OPENMP_OFFLOAD) + add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD) + else() + message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with XLClang compilers") endif() elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") - set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}") + if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") + set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}") + # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available + # for a region then prefer OpenMP. + add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD) + else() + message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with Clang compilers") + endif() else() message(FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} is not supported in GPU builds.") endif() diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index b974c5463..08a04c868 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -9,7 +9,7 @@ #include #define nrn_pragma_stringify(x) #x -#if defined(CORENEURON_ENABLE_GPU) && defined(CORENRN_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) +#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #define nrn_pragma_acc(x) #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x)) #include From 4b8b09307a0d895a8dd7040205510360fdccc877 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 10 Dec 2021 08:49:08 +0100 Subject: [PATCH 6/9] Disable CUDA complation of partial piv and nrnran123. --- CMake/OpenAccHelper.cmake | 12 ++++++------ CMakeLists.txt | 6 +++--- coreneuron/CMakeLists.txt | 9 +++++---- extra/nrnivmodl_core_makefile.in | 2 +- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 44c8904a7..f439bb0a2 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -42,12 +42,12 @@ if(CORENRN_ENABLE_GPU) # ${CMAKE_CUDA_COMPILER_VERSION} find_package(CUDAToolkit 9.0 REQUIRED) # Be a bit paranoid - if(NOT ${CMAKE_CUDA_COMPILER_VERSION} STREQUAL ${CUDAToolkit_VERSION}) - message( - FATAL_ERROR - "CUDA compiler (${CMAKE_CUDA_COMPILER_VERSION}) and toolkit (${CUDAToolkit_VERSION}) versions are not the same!" - ) - endif() + # if(NOT ${CMAKE_CUDA_COMPILER_VERSION} STREQUAL ${CUDAToolkit_VERSION}) + # message( + # FATAL_ERROR + # "CUDA compiler (${CMAKE_CUDA_COMPILER_VERSION}) and toolkit (${CUDAToolkit_VERSION}) versions are not the same!" + # ) + # endif() set(CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}") endif() if(CORENRN_HAVE_NVHPC_COMPILER) diff --git a/CMakeLists.txt b/CMakeLists.txt index c6fdebd11..0c9ed89c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -184,9 +184,9 @@ if(CORENRN_ENABLE_GPU) # 3057 : Warning on ignoring __host__ annotation in some functions # 3085 : Warning on redeclaring a __host__ function as __host__ __device__ # ~~~ - set(CMAKE_CUDA_FLAGS - "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe --diag_suppress=3057,--diag_suppress=3085" - ) + # set(CMAKE_CUDA_FLAGS + # "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe --diag_suppress=3057,--diag_suppress=3085" + # ) if(CORENRN_ENABLE_NMODL) # NMODL supports both OpenACC and OpenMP target offload diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 7e4112a17..e0ba45bb1 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -32,6 +32,7 @@ file( "utils/*/*.c" "utils/*/*.cpp") file(GLOB_RECURSE CORENEURON_CUDA_FILES "*.cu") +list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cu") set(SCOPMATH_CODE_FILES "sim/scopmath/abort.cpp" "sim/scopmath/crout_thread.cpp" "sim/scopmath/newton_thread.cpp" "sim/scopmath/sparse_thread.cpp" "sim/scopmath/ssimplic_thread.cpp") @@ -129,16 +130,16 @@ if(CORENRN_ENABLE_GPU) # CMake options based on the LANGUAGE property # (https://cmake.org/cmake/help/latest/policy/CMP0119.html), so using a single .cu file and # setting LANGUAGE=CXX in non-GPU builds does not work. - list(REMOVE_ITEM CORENEURON_CODE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cpp") + #list(REMOVE_ITEM CORENEURON_CODE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cpp") list(APPEND CORENEURON_CODE_FILES ${CORENEURON_CUDA_FILES}) # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling # these functions from CUDA kernels presents no issue ... - if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - endif() + # if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + # list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + # endif() endif() # ============================================================================= diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index cf303d5d2..5bd424865 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -71,7 +71,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),) CORENRN_PERLEXE=perl endif -CXXFLAGS = @CORENRN_CXX_FLAGS@ -lcaliper -qcuda +CXXFLAGS = @CORENRN_CXX_FLAGS@ CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES) CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@ CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@ From 439c5600d52e8184065abc7bdcf68375074e131e Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 17 Dec 2021 16:48:12 +0100 Subject: [PATCH 7/9] fixup. --- coreneuron/utils/offload.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 08a04c868..078990107 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -6,8 +6,6 @@ # ============================================================================= */ #pragma once -#include - #define nrn_pragma_stringify(x) #x #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #define nrn_pragma_acc(x) From 6ff530c3beb6722b4f8f041e1b332b7d9443b91e Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Mon, 3 Jan 2022 16:53:17 +0100 Subject: [PATCH 8/9] Avoid setting additional linker flags at global scope * Setting CMAKE_EXE_LINKER_FLAGS was used also for NMODL when NMODL is built from submodule * In case of LLVM OpenMP offload, if binary is being created from the object that is not compiled with OpenMP flags, it causes link error: echo "int main() { return 0; } " > foo.cpp clang++ -c foo.cpp # adding openmp flags here doesn't cause below error clang++ foo.o -o foo -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda nvlink fatal : Could not open input file '/tmp/foo-0b9a1a.cubin' clang-12: error: nvlink command failed with exit code 1 (use -v to see invocation) --- CMake/OpenAccHelper.cmake | 1 - CMakeLists.txt | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index f439bb0a2..8f14fb754 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -98,7 +98,6 @@ if(CORENRN_ENABLE_GPU) endif() # avoid PGI adding standard compliant "-A" flags # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14) - string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}") # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors. # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c9ed89c7..4ceb1d147 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,6 +419,9 @@ endif() # ============================================================================= set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IGNORE_UNKNOWN_PRAGMA_FLAGS}") +# Set coreneuron specific linker flag after NMODL submodule is added +string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}") + # ============================================================================= # Add main directories # ============================================================================= From 57cecb59d051b83adbca41fbe49680521b6d719f Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Mon, 3 Jan 2022 17:00:52 +0100 Subject: [PATCH 9/9] Skip cuda files in the build (temporarily) for LLVM only build --- coreneuron/CMakeLists.txt | 2 ++ coreneuron/utils/memory_utils.cpp | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index e0ba45bb1..c9af89553 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -32,6 +32,8 @@ file( "utils/*/*.c" "utils/*/*.cpp") file(GLOB_RECURSE CORENEURON_CUDA_FILES "*.cu") +list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu") +list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/profile/cuda_profile.cu") list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cu") set(SCOPMATH_CODE_FILES "sim/scopmath/abort.cpp" "sim/scopmath/crout_thread.cpp" "sim/scopmath/newton_thread.cpp" diff --git a/coreneuron/utils/memory_utils.cpp b/coreneuron/utils/memory_utils.cpp index e066e1627..da0ef9455 100644 --- a/coreneuron/utils/memory_utils.cpp +++ b/coreneuron/utils/memory_utils.cpp @@ -108,7 +108,8 @@ void report_mem_usage(const char* message, bool all_ranks) { mem_avg); #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { - print_gpu_memory_usage(); + // TODO: temporary to avoid CUDA code usage with LLVM build + //print_gpu_memory_usage(); } #endif }