From b440d11408a4cbb75496e878c221513de2a7c420 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Tue, 7 Dec 2021 15:41:11 +0100
Subject: [PATCH 1/9] fixups for llvm

---
 CMake/OpenAccHelper.cmake        |  5 ++++-
 CMakeLists.txt                   | 14 +++++++-------
 coreneuron/CMakeLists.txt        |  2 ++
 coreneuron/permute/cellorder.cpp |  4 +++-
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
index 5838742f8..817b3da06 100644
--- a/CMake/OpenAccHelper.cmake
+++ b/CMake/OpenAccHelper.cmake
@@ -73,14 +73,17 @@ if(CORENRN_ENABLE_GPU)
   else()
     message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
   endif()
+  set(NVHPC_ACC_COMP_FLAGS)
+  set(NVHPC_ACC_LINK_FLAGS)
   # avoid PGI adding standard compliant "-A" flags
-  set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
+  # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
   string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}")
   # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
   # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
   # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
   # for OpenACC code generation.
   set(NVHPC_CXX_INLINE_FLAGS "-Mautoinline")
+  set(NVHPC_CXX_INLINE_FLAGS)
 endif()
 
 # =============================================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3e1950d0..c6fdebd11 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,13 +127,13 @@ if(CORENRN_ENABLE_GPU)
   # CUDA_RESOLVE_DEVICE_SYMBOLS OFF)
 
   # Fail hard and early if we don't have the PGI/NVHPC compiler.
-  if(NOT CORENRN_HAVE_NVHPC_COMPILER)
-    message(
-      FATAL_ERROR
-        "GPU support is available via OpenACC using PGI/NVIDIA compilers."
-        " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++"
-    )
-  endif()
+  # if(NOT CORENRN_HAVE_NVHPC_COMPILER)
+  #   message(
+  #     FATAL_ERROR
+  #       "GPU support is available via OpenACC using PGI/NVIDIA compilers."
+  #       " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++"
+  #   )
+  # endif()
 
   # Set some sensible default CUDA architectures.
   if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt
index d370df1df..7e4112a17 100644
--- a/coreneuron/CMakeLists.txt
+++ b/coreneuron/CMakeLists.txt
@@ -198,6 +198,8 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC)
   target_link_libraries(coreneuron ${MPI_CXX_LIBRARIES})
 endif()
 
+target_link_libraries(coreneuron CUDA::cudart)
+
 # this is where we handle dynamic mpi library build
 if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC)
   # ~~~
diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp
index 14feb31de..d9e559616 100644
--- a/coreneuron/permute/cellorder.cpp
+++ b/coreneuron/permute/cellorder.cpp
@@ -490,6 +490,7 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
     bool has_subtrees_to_compute = true;
 
     // clang-format off
+    // OL211207: check if we need an OpenMP directive here.
     nrn_pragma_acc(loop seq)
     for (; has_subtrees_to_compute; ) {  // ncycle loop
 #ifndef CORENEURON_ENABLE_GPU
@@ -538,6 +539,7 @@ static void bksub_interleaved2(NrnThread* nt,
 #ifndef CORENEURON_ENABLE_GPU
     for (int i = root; i < lastroot; i += 1) {
 #else
+    // OL211207: check if we need an OpenMP directive here.
     nrn_pragma_acc(loop seq)
     for (int i = root; i < lastroot; i += warpsize) {
 #endif
@@ -661,7 +663,7 @@ void solve_interleaved1(int ith) {
                                          lastnode [0:ncell],
                                          cellsize [0:ncell]) if (nt->compute_gpu)
                        async(nt->stream_id))
-    nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
+    // nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
     for (int icell = 0; icell < ncell; ++icell) {
         int icellsize = cellsize[icell];
         triang_interleaved(nt, icell, icellsize, nstride, stride, lastnode);

From 7e3e2c7c54e90ddbe9720643b9fc0a7238500372 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Tue, 7 Dec 2021 16:37:23 +0100
Subject: [PATCH 2/9] Tweaks towards an LLVM build.

---
 CMake/MakefileBuildOptions.cmake          | 5 +++++
 CMake/OpenAccHelper.cmake                 | 2 +-
 coreneuron/sim/scopmath/sparse_thread.cpp | 4 ++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake
index fc0b0b551..0ebc7bea9 100644
--- a/CMake/MakefileBuildOptions.cmake
+++ b/CMake/MakefileBuildOptions.cmake
@@ -50,6 +50,11 @@ list(REMOVE_ITEM CORENRN_LINK_LIBS "Threads::Threads")
 
 # replicate CMake magic to transform system libs to -l<libname>
 foreach(link_lib ${CORENRN_LINK_LIBS})
+  if(TARGET ${link_lib})
+    get_property(link_flag TARGET ${link_lib} PROPERTY INTERFACE_LINK_LIBRARIES)
+    string(APPEND CORENRN_COMMON_LDFLAGS " ${link_flag}")
+    continue()
+  endif()
   if(${link_lib} MATCHES "\-l.*")
     string(APPEND CORENRN_COMMON_LDFLAGS " ${link_lib}")
     continue()
diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
index 817b3da06..cdf2162c3 100644
--- a/CMake/OpenAccHelper.cmake
+++ b/CMake/OpenAccHelper.cmake
@@ -73,7 +73,7 @@ if(CORENRN_ENABLE_GPU)
   else()
     message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
   endif()
-  set(NVHPC_ACC_COMP_FLAGS)
+  set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
   set(NVHPC_ACC_LINK_FLAGS)
   # avoid PGI adding standard compliant "-A" flags
   # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
diff --git a/coreneuron/sim/scopmath/sparse_thread.cpp b/coreneuron/sim/scopmath/sparse_thread.cpp
index 71643430a..70da9aa21 100644
--- a/coreneuron/sim/scopmath/sparse_thread.cpp
+++ b/coreneuron/sim/scopmath/sparse_thread.cpp
@@ -211,8 +211,8 @@ static int matsol(SparseObj* so, int _iml) {
     /* Upper triangularization */
     so->numop = 0;
     for (unsigned i = 1; i <= so->neqn; i++) {
-        Elm* pivot;
-        if (fabs((pivot = so->diag[i])->value[_iml]) <= ROUNDOFF) {
+        Elm* pivot{so->diag[i]};
+        if (fabs(pivot->value[_iml]) <= ROUNDOFF) {
             return SINGULAR;
         }
         /* Eliminate all elements in pivot column */

From 72cb02a800af7e1a247910f432180cef5f6d8912 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Tue, 7 Dec 2021 16:47:03 +0100
Subject: [PATCH 3/9] Declare cnrn_ methods.

---
 coreneuron/utils/offload.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp
index 078990107..08a04c868 100644
--- a/coreneuron/utils/offload.hpp
+++ b/coreneuron/utils/offload.hpp
@@ -6,6 +6,8 @@
 # =============================================================================
 */
 #pragma once
+#include <cstddef>
+
 #define nrn_pragma_stringify(x) #x
 #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
 #define nrn_pragma_acc(x)

From fbf95519c4568de19df58fd2adeb87bbf2fd202a Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <kumbhar@login1.ascent.olcf.ornl.gov>
Date: Thu, 9 Dec 2021 19:31:29 -0500
Subject: [PATCH 4/9] Various changes (including temporary) to make XL OpenMP
 offload build working  * todo: temporary changes to OpenAccHelper.cmake,
 needs refinement  * todo: see caliper linkling issue  * todo: _OPENACC needs
 to be renamed CORENRN_ENABLE_GPU so that OpenMP          based builds can use
 GPU offload.  * todo: hardcoded CXX flags for quick build

---
 CMake/OpenAccHelper.cmake        | 59 +++++++++++++++++++-------------
 coreneuron/utils/offload.hpp     |  2 +-
 extra/nrnivmodl_core_makefile.in |  2 +-
 3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
index cdf2162c3..76d9eb3ce 100644
--- a/CMake/OpenAccHelper.cmake
+++ b/CMake/OpenAccHelper.cmake
@@ -50,31 +50,44 @@ if(CORENRN_ENABLE_GPU)
     endif()
     set(CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
   endif()
-  # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code
-  # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code
-  # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in
-  # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP
-  # code is compiled with the same CUDA version as the explicit CUDA code.
-  set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo")
-  # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
-  # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
-  # same default compute capabilities as each other, particularly on GPU-less build machines.
-  foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES})
-    string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}")
-  endforeach()
-  if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
-    # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
-    # for a region then prefer OpenMP.
-    add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD)
-    string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu")
-  elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC")
-    # Only enable OpenACC offload for GPU
-    string(APPEND NVHPC_ACC_COMP_FLAGS " -acc")
+  if(CORENRN_HAVE_NVHPC_COMPILER)
+    # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code
+    # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code
+    # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in
+    # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP
+    # code is compiled with the same CUDA version as the explicit CUDA code.
+    set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo")
+    # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
+    # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
+    # same default compute capabilities as each other, particularly on GPU-less build machines.
+    foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES})
+      string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}")
+    endforeach()
+    if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
+      # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
+      # for a region then prefer OpenMP.
+      add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD)
+      string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu")
+    elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC")
+      # Only enable OpenACC offload for GPU
+      string(APPEND NVHPC_ACC_COMP_FLAGS " -acc")
+    else()
+      message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
+    endif()
+  elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang")
+    set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport")
+    set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper")
+
+    if(CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD)
+      # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
+      # for a region then prefer OpenMP.
+      add_compile_definitions(CORENRN_PREFER_OPENMP_OFFLOAD)
+    endif()
+  elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
+    set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
   else()
-    message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
+    message(FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} is not supported in GPU builds.")
   endif()
-  set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
-  set(NVHPC_ACC_LINK_FLAGS)
   # avoid PGI adding standard compliant "-A" flags
   # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
   string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}")
diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp
index 08a04c868..b974c5463 100644
--- a/coreneuron/utils/offload.hpp
+++ b/coreneuron/utils/offload.hpp
@@ -9,7 +9,7 @@
 #include <cstddef>
 
 #define nrn_pragma_stringify(x) #x
-#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
+#if defined(CORENEURON_ENABLE_GPU) && defined(CORENRN_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
 #define nrn_pragma_acc(x)
 #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x))
 #include <omp.h>
diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in
index 5bd424865..cf303d5d2 100644
--- a/extra/nrnivmodl_core_makefile.in
+++ b/extra/nrnivmodl_core_makefile.in
@@ -71,7 +71,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),)
   CORENRN_PERLEXE=perl
 endif
 
-CXXFLAGS = @CORENRN_CXX_FLAGS@
+CXXFLAGS = @CORENRN_CXX_FLAGS@ -lcaliper -qcuda
 CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES)
 CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@
 CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@

From 877988853cc1139c05ac964a311f3c25eedca39f Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Fri, 10 Dec 2021 07:14:51 +0100
Subject: [PATCH 5/9] Use CORENEURON_PREFER_OPENMP_OFFLOAD.

---
 CMake/OpenAccHelper.cmake    | 20 ++++++++++++++------
 coreneuron/utils/offload.hpp |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
index 76d9eb3ce..44c8904a7 100644
--- a/CMake/OpenAccHelper.cmake
+++ b/CMake/OpenAccHelper.cmake
@@ -75,16 +75,24 @@ if(CORENRN_ENABLE_GPU)
       message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
     endif()
   elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang")
-    set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport")
-    set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper")
-
-    if(CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD)
+    if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
+      set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport")
+      set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper")
       # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
       # for a region then prefer OpenMP.
-      add_compile_definitions(CORENRN_PREFER_OPENMP_OFFLOAD)
+      add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD)
+    else()
+      message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with XLClang compilers")
     endif()
   elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
-    set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
+    if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
+      set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
+      # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
+      # for a region then prefer OpenMP.
+      add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD)
+    else()
+      message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with Clang compilers")
+    endif()
   else()
     message(FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} is not supported in GPU builds.")
   endif()
diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp
index b974c5463..08a04c868 100644
--- a/coreneuron/utils/offload.hpp
+++ b/coreneuron/utils/offload.hpp
@@ -9,7 +9,7 @@
 #include <cstddef>
 
 #define nrn_pragma_stringify(x) #x
-#if defined(CORENEURON_ENABLE_GPU) && defined(CORENRN_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
+#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
 #define nrn_pragma_acc(x)
 #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x))
 #include <omp.h>

From 4b8b09307a0d895a8dd7040205510360fdccc877 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Fri, 10 Dec 2021 08:49:08 +0100
Subject: [PATCH 6/9] Disable CUDA complation of partial piv and nrnran123.

---
 CMake/OpenAccHelper.cmake        | 12 ++++++------
 CMakeLists.txt                   |  6 +++---
 coreneuron/CMakeLists.txt        |  9 +++++----
 extra/nrnivmodl_core_makefile.in |  2 +-
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
index 44c8904a7..f439bb0a2 100644
--- a/CMake/OpenAccHelper.cmake
+++ b/CMake/OpenAccHelper.cmake
@@ -42,12 +42,12 @@ if(CORENRN_ENABLE_GPU)
     # ${CMAKE_CUDA_COMPILER_VERSION}
     find_package(CUDAToolkit 9.0 REQUIRED)
     # Be a bit paranoid
-    if(NOT ${CMAKE_CUDA_COMPILER_VERSION} STREQUAL ${CUDAToolkit_VERSION})
-      message(
-        FATAL_ERROR
-          "CUDA compiler (${CMAKE_CUDA_COMPILER_VERSION}) and toolkit (${CUDAToolkit_VERSION}) versions are not the same!"
-      )
-    endif()
+    # if(NOT ${CMAKE_CUDA_COMPILER_VERSION} STREQUAL ${CUDAToolkit_VERSION})
+    #   message(
+    #     FATAL_ERROR
+    #       "CUDA compiler (${CMAKE_CUDA_COMPILER_VERSION}) and toolkit (${CUDAToolkit_VERSION}) versions are not the same!"
+    #   )
+    # endif()
     set(CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
   endif()
   if(CORENRN_HAVE_NVHPC_COMPILER)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c6fdebd11..0c9ed89c7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,9 +184,9 @@ if(CORENRN_ENABLE_GPU)
   # 3057 : Warning on ignoring __host__ annotation in some functions
   # 3085 : Warning on redeclaring a __host__ function as __host__ __device__
   # ~~~
-  set(CMAKE_CUDA_FLAGS
-      "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe --diag_suppress=3057,--diag_suppress=3085"
-  )
+  # set(CMAKE_CUDA_FLAGS
+  #     "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe --diag_suppress=3057,--diag_suppress=3085"
+  # )
 
   if(CORENRN_ENABLE_NMODL)
     # NMODL supports both OpenACC and OpenMP target offload
diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt
index 7e4112a17..e0ba45bb1 100644
--- a/coreneuron/CMakeLists.txt
+++ b/coreneuron/CMakeLists.txt
@@ -32,6 +32,7 @@ file(
   "utils/*/*.c"
   "utils/*/*.cpp")
 file(GLOB_RECURSE CORENEURON_CUDA_FILES "*.cu")
+list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cu")
 set(SCOPMATH_CODE_FILES
     "sim/scopmath/abort.cpp" "sim/scopmath/crout_thread.cpp" "sim/scopmath/newton_thread.cpp"
     "sim/scopmath/sparse_thread.cpp" "sim/scopmath/ssimplic_thread.cpp")
@@ -129,16 +130,16 @@ if(CORENRN_ENABLE_GPU)
   # CMake <v3.20 does not pass explicit -x <lang> options based on the LANGUAGE property
   # (https://cmake.org/cmake/help/latest/policy/CMP0119.html), so using a single .cu file and
   # setting LANGUAGE=CXX in non-GPU builds does not work.
-  list(REMOVE_ITEM CORENEURON_CODE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cpp")
+  #list(REMOVE_ITEM CORENEURON_CODE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cpp")
   list(APPEND CORENEURON_CODE_FILES ${CORENEURON_CUDA_FILES})
 
   # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from
   # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with
   # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling
   # these functions from CUDA kernels presents no issue ...
-  if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu)
-    list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu)
-  endif()
+  # if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu)
+  #   list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu)
+  # endif()
 endif()
 
 # =============================================================================
diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in
index cf303d5d2..5bd424865 100644
--- a/extra/nrnivmodl_core_makefile.in
+++ b/extra/nrnivmodl_core_makefile.in
@@ -71,7 +71,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),)
   CORENRN_PERLEXE=perl
 endif
 
-CXXFLAGS = @CORENRN_CXX_FLAGS@ -lcaliper -qcuda
+CXXFLAGS = @CORENRN_CXX_FLAGS@
 CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES)
 CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@
 CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@

From 439c5600d52e8184065abc7bdcf68375074e131e Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Fri, 17 Dec 2021 16:48:12 +0100
Subject: [PATCH 7/9] fixup.

---
 coreneuron/utils/offload.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp
index 08a04c868..078990107 100644
--- a/coreneuron/utils/offload.hpp
+++ b/coreneuron/utils/offload.hpp
@@ -6,8 +6,6 @@
 # =============================================================================
 */
 #pragma once
-#include <cstddef>
-
 #define nrn_pragma_stringify(x) #x
 #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
 #define nrn_pragma_acc(x)

From 6ff530c3beb6722b4f8f041e1b332b7d9443b91e Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Mon, 3 Jan 2022 16:53:17 +0100
Subject: [PATCH 8/9] Avoid setting additional linker flags at global scope  *
 Setting CMAKE_EXE_LINKER_FLAGS was used also for NMODL    when NMODL is built
 from submodule  * In case of LLVM OpenMP offload, if binary is being created 
   from the object that is not compiled with OpenMP flags,    it causes link
 error:

   echo "int main() { return 0; } " > foo.cpp
   clang++ -c foo.cpp                                           # adding openmp flags here doesn't cause below error
   clang++ foo.o -o foo -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda

     nvlink fatal   : Could not open input file '/tmp/foo-0b9a1a.cubin'
     clang-12: error: nvlink command failed with exit code 1 (use -v to see invocation)
---
 CMake/OpenAccHelper.cmake | 1 -
 CMakeLists.txt            | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
index f439bb0a2..8f14fb754 100644
--- a/CMake/OpenAccHelper.cmake
+++ b/CMake/OpenAccHelper.cmake
@@ -98,7 +98,6 @@ if(CORENRN_ENABLE_GPU)
   endif()
   # avoid PGI adding standard compliant "-A" flags
   # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
-  string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}")
   # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
   # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
   # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c9ed89c7..4ceb1d147 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -419,6 +419,9 @@ endif()
 # =============================================================================
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IGNORE_UNKNOWN_PRAGMA_FLAGS}")
 
+# Set coreneuron specific linker flag after NMODL submodule is added
+string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}")
+
 # =============================================================================
 # Add main directories
 # =============================================================================

From 57cecb59d051b83adbca41fbe49680521b6d719f Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Mon, 3 Jan 2022 17:00:52 +0100
Subject: [PATCH 9/9] Skip cuda files in the build (temporarily) for LLVM only
 build

---
 coreneuron/CMakeLists.txt         | 2 ++
 coreneuron/utils/memory_utils.cpp | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt
index e0ba45bb1..c9af89553 100644
--- a/coreneuron/CMakeLists.txt
+++ b/coreneuron/CMakeLists.txt
@@ -32,6 +32,8 @@ file(
   "utils/*/*.c"
   "utils/*/*.cpp")
 file(GLOB_RECURSE CORENEURON_CUDA_FILES "*.cu")
+list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu")
+list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/profile/cuda_profile.cu")
 list(REMOVE_ITEM CORENEURON_CUDA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cu")
 set(SCOPMATH_CODE_FILES
     "sim/scopmath/abort.cpp" "sim/scopmath/crout_thread.cpp" "sim/scopmath/newton_thread.cpp"
diff --git a/coreneuron/utils/memory_utils.cpp b/coreneuron/utils/memory_utils.cpp
index e066e1627..da0ef9455 100644
--- a/coreneuron/utils/memory_utils.cpp
+++ b/coreneuron/utils/memory_utils.cpp
@@ -108,7 +108,8 @@ void report_mem_usage(const char* message, bool all_ranks) {
                mem_avg);
 #ifdef CORENEURON_ENABLE_GPU
         if (corenrn_param.gpu) {
-            print_gpu_memory_usage();
+            // TODO: temporary to avoid CUDA code usage with LLVM build
+            //print_gpu_memory_usage();
         }
 #endif
     }