Skip to content

Commit

Permalink
Merge pull request #546 from DrTimothyAldenDavis/dev2
Browse files Browse the repository at this point in the history
GraphBLAS: CUDA fixes, demo output
  • Loading branch information
DrTimothyAldenDavis authored Nov 29, 2023
2 parents b0c0d61 + ea2d215 commit 2895129
Show file tree
Hide file tree
Showing 29 changed files with 5,890 additions and 3,263 deletions.
6 changes: 3 additions & 3 deletions GraphBLAS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ if ( SUITESPARSE_CUDA )
# with CUDA and RMM
add_subdirectory ( CUDA )
set ( GB_CUDA GraphBLAS_CUDA ${CUDA_LIBRARIES} )
set ( GB_RMM rmm_wrap ${CUDA_LIBRARIES} )
set ( GB_RMM RMM_wrap ${CUDA_LIBRARIES} )
add_subdirectory ( rmm_wrap )
include_directories ( "rmm_wrap" ${CUDA_INCLUDE_DIRS}
"/usr/local/cuda/include/cub" )
Expand Down Expand Up @@ -259,7 +259,7 @@ if ( BUILD_SHARED_LIBS )

if ( SUITESPARSE_CUDA )
add_dependencies ( GraphBLAS GraphBLAS_CUDA )
# add_dependencies ( GraphBLAS rmm_wrap )
add_dependencies ( GraphBLAS RMM_wrap )
target_compile_definitions ( GraphBLAS PRIVATE "SUITESPARSE_CUDA" )
endif ( )

Expand Down Expand Up @@ -300,7 +300,7 @@ if ( BUILD_STATIC_LIBS )
if ( SUITESPARSE_CUDA )
add_dependencies ( GraphBLAS_static GraphBLAS_CUDA )
set ( GRAPHBLAS_STATIC_MODULES "${GRAPHBLAS_STATIC_MODULES} GraphBLAS_CUDA" )
# add_dependencies ( GraphBLAS_static rmm_wrap )
add_dependencies ( GraphBLAS_static RMM_wrap )
target_compile_definitions ( GraphBLAS_static PRIVATE "SUITESPARSE_CUDA" )
endif ( )

Expand Down
5 changes: 5 additions & 0 deletions GraphBLAS/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set ( CMAKE_CUDA_FLAGS "-cudart=static -lineinfo " )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++17 -fPIC " )

add_compile_definitions ( GBNCPUFEAT )
add_compile_definitions ( GBCUDA_CPLUSPLUS )

message ( STATUS "C++ flags for CUDA: ${CMAKE_CXX_FLAGS}" )

Expand All @@ -54,6 +55,7 @@ set ( GRAPHBLAS_CUDA_INCLUDES
../Source/Shared
../Source/Template
../Source/Factories
Template
../Include
../CUDA )

Expand Down Expand Up @@ -149,6 +151,8 @@ endif ( )
# test suite for the CUDA kernels
#-------------------------------------------------------------------------------

if ( 0 )

# 1. Execute enumify/stringify/jitify logic to compile ptx kernels and
# compile/link w/ relevant *.cu files.

Expand Down Expand Up @@ -266,3 +270,4 @@ target_include_directories ( graphblascuda_test
${CUDAToolkit_INCLUDE_DIRS}
${GRAPHBLAS_CUDA_INCLUDES} )

endif ( )
1 change: 1 addition & 0 deletions GraphBLAS/CUDA/GB_AxB_dot3_cuda_branch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

// Decide branch direction for GPU use for the dot-product MxM

#include "GraphBLAS.h"
extern "C"
{
#include "GB_mxm.h"
Expand Down
5 changes: 3 additions & 2 deletions GraphBLAS/CUDA/GB_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ extern "C"
#include "GB_compiler.h"
#include "GB_cpu_features.h"
#include "GB_warnings.h"
#define GB_LIBRARY
#include "GraphBLAS.h"
}

#define GB_LIBRARY
#include "GraphBLAS.h"

extern "C"
{
#include <cassert>
Expand Down
5 changes: 3 additions & 2 deletions GraphBLAS/CUDA/GB_cuda_init.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//------------------------------------------------------------------------------
// GB_cuda_init: initialize the GPUs for use by GraphBLAS
// GraphBLAS/CUDA/GB_cuda_init: initialize the GPUs for use by GraphBLAS
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2022, All Rights Reserved.
// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2023, All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

//------------------------------------------------------------------------------
Expand All @@ -12,6 +12,7 @@
// assumed. Then each GPU is "warmed up" by allocating a small amount of
// memory.

#undef GBCUDA_CPLUSPLUS
#include "GB.h"

GrB_Info GB_cuda_init (void)
Expand Down
2 changes: 2 additions & 0 deletions GraphBLAS/CUDA/GB_cuda_type_bits.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: Apache-2.0

#if 0
#include "GB.h"

size_t GB_cuda_type_bits (GB_Type_code);
Expand All @@ -25,3 +26,4 @@ size_t GB_cuda_type_bits (GB_Type_code type_code)
}
}

#endif
1 change: 1 addition & 0 deletions GraphBLAS/CUDA/GB_cuda_type_branch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

// All built-in types pass this rule.

#include "GraphBLAS.h"
extern "C"
{
#include "GB.h"
Expand Down
1 change: 1 addition & 0 deletions GraphBLAS/CUDA/GB_reduce_to_scalar_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// threadblock. Then GB_reduce_to_scalar on the CPU sees this V as the result,
// and calls itself recursively to continue the reduction.

#include "GraphBLAS.h"
extern "C"
{
#include "GB_reduce.h"
Expand Down
52 changes: 36 additions & 16 deletions GraphBLAS/Config/GraphBLAS.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// This GraphBLAS.h file contains GraphBLAS definitions for user applications
// to #include. A few functions and variables with the prefix GB_ need to be
// defined in this file and are thus technically visible to the user, but they
// must not be accessed in user code. They are here only so that the ANSI C11
// must not be accessed in user code. They are here only so that the C11
// _Generic feature can be used in the user-accessible polymorphic functions,
// or to implement a fast GxB_Iterator using macros.

Expand Down Expand Up @@ -110,24 +110,32 @@
#define GB_GLOBAL extern
#endif

// GraphBLAS requires an ANSI C11 compiler for its polymorphic functions (using
// GraphBLAS requires an C11 compiler for its polymorphic functions (using
// the _Generic keyword), but it can be used in an C90 compiler if those
// functions are disabled.

// With ANSI C11 and later, _Generic keyword and polymorphic functions can be
// With C11 and later, _Generic keyword and polymorphic functions can be
// used. Earlier versions of the language do not have this feature.

#ifdef __STDC_VERSION__
// ANSI C17: 201710L
// ANSI C11: 201112L
// ANSI C99: 199901L
// ANSI C95: 199409L
// C17: 201710L
// C11: 201112L
// C99: 199901L
// C95: 199409L
#define GxB_STDC_VERSION __STDC_VERSION__
#else
// assume ANSI C90 / C89
// assume C90 / C89
#define GxB_STDC_VERSION 199001L
#endif

//------------------------------------------------------------------------------
// CUDA (currently experimental, not for production use)
//------------------------------------------------------------------------------

#ifndef SUITESPARSE_CUDA
#cmakedefine SUITESPARSE_CUDA
#endif

//------------------------------------------------------------------------------
// definitions for complex types
//------------------------------------------------------------------------------
Expand All @@ -138,7 +146,19 @@
#ifndef GXB_COMPLEX_H
#define GXB_COMPLEX_H

#if defined (_MSC_VER) && !(defined (__INTEL_COMPILER) || defined(__INTEL_CLANG_COMPILER))
#if defined ( GBCUDA_CPLUSPLUS )

// C++ complex types for CUDA
#include <cmath>
#include <complex>
#undef I
typedef std::complex<float> GxB_FC32_t ;
typedef std::complex<double> GxB_FC64_t ;
#define GxB_CMPLXF(r,i) GxB_FC32_t(r,i)
#define GxB_CMPLX(r,i) GxB_FC64_t(r,i)
#define GB_HAS_CMPLX_MACROS 1

#elif defined (_MSC_VER) && !(defined (__INTEL_COMPILER) || defined(__INTEL_CLANG_COMPILER))

// Microsoft Windows complex types for C
#include <complex.h>
Expand All @@ -151,13 +171,13 @@

#else

// ANSI C11 complex types
// C11 complex types
#include <complex.h>
#undef I
typedef float _Complex GxB_FC32_t ;
typedef double _Complex GxB_FC64_t ;
#if (defined (CMPLX) && defined (CMPLXF))
// use the ANSI C11 CMPLX and CMPLXF macros
// use the C11 CMPLX and CMPLXF macros
#define GxB_CMPLX(r,i) CMPLX (r,i)
#define GxB_CMPLXF(r,i) CMPLXF (r,i)
#define GB_HAS_CMPLX_MACROS 1
Expand Down Expand Up @@ -185,10 +205,10 @@
// NVIDIA nvcc
#define GB_restrict __restrict__
#elif GxB_STDC_VERSION >= 199901L
// ANSI C99 or later
// C99 or later
#define GB_restrict restrict
#else
// ANSI C95 and earlier: no restrict keyword
// C95 and earlier: no restrict keyword
#define GB_restrict
#endif

Expand Down Expand Up @@ -885,7 +905,7 @@ GB_GLOBAL GrB_UnaryOp
// Unary operators for floating-point types only
//------------------------------------------------------------------------------

// The following floating-point unary operators and their ANSI C11 equivalents,
// The following floating-point unary operators and their C11 equivalents,
// are only defined for floating-point (real and complex) types.

GB_GLOBAL GrB_UnaryOp
Expand Down Expand Up @@ -949,7 +969,7 @@ GB_GLOBAL GrB_UnaryOp
GxB_CBRT_FP64,

// frexpx and frexpe return the mantissa and exponent, respectively,
// from the ANSI C11 frexp function. The exponent is returned as a
// from the C11 frexp function. The exponent is returned as a
// floating-point value, not an integer.

// z = frexpx (x) z = frexpe (x)
Expand Down Expand Up @@ -10737,7 +10757,7 @@ GrB_Info GrB_Matrix_exportHint // suggest the best export format

// GrB_Matrix_serialize/deserialize are slightly different from their GxB*
// counterparts. The blob is allocated by GxB_Matrix_serialize, and must be
// freed by the same free() method passed to GxB_init (or the ANSI C11 free()
// freed by the same free() method passed to GxB_init (or the C11 free()
// if GrB_init was used). By contrast, the GrB* methods require the user
// application to pass in a preallocated blob to GrB_Matrix_serialize, whose
// size can be given by GrB_Matrix_serializeSize (as a loose upper bound).
Expand Down
4 changes: 2 additions & 2 deletions GraphBLAS/Config/README.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ QUICK START: To compile and install, do these commands in this directory:
make
sudo make install

Please be patient; some files can take several minutes to compile. Requires an
ANSI C11 compiler, so cmake will fail if your compiler is not C11 compliant.
Please be patient; some files can take several minutes to compile. Requires a
C11 compiler, so cmake will fail if your compiler is not C11 compliant.
See the User Guide PDF in Doc/ for directions on how to use another compiler.

For faster compilation, do this instead of just "make", which uses 32
Expand Down
Loading

0 comments on commit 2895129

Please sign in to comment.