-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Integrate Modified AC-SpGEMM / GALATIC #26
base: master
Are you sure you want to change the base?
Changes from 11 commits
b9bd4bc
179666b
5080cc3
4cb56c7
ca71c72
b9e075c
2c7bdb9
99c74ba
04138ae
50fa929
ce4a855
dac2a22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[submodule "ext/moderngpu"] | ||
path = ext/moderngpu | ||
url = https://ctcyang@github.com/ctcyang/moderngpu.git | ||
[submodule "ext/cub"] | ||
path = ext/cub | ||
url = https://ctcyang@github.com/NVlabs/cub.git | ||
url = git@github.com:ctcyang/moderngpu.git | ||
[submodule "ext/GALATIC"] | ||
path = ext/GALATIC | ||
url = git@github.com:richardlett/GALATIC.git |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,22 +13,22 @@ set( PROJ_PATH ${CMAKE_SOURCE_DIR}) | |
set( PROJ_OUT_PATH ${CMAKE_BINARY_DIR}) | ||
set( PROJ_HEADERS "" ) | ||
set( PROJ_LIBRARIES "" ) | ||
set( PROJ_INCLUDES "./" "ext/moderngpu/include" "ext/cub/cub") | ||
set( PROJ_INCLUDES "./" "ext/moderngpu/include" "ext") | ||
set( mgpu_SRC_FILES "ext/moderngpu/src/mgpucontext.cu" "ext/moderngpu/src/mgpuutil.cpp") | ||
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin ) | ||
#set( CUDA_CURAND_LIBRARY "$ENV{CUDA_HOME}/lib64/libcurand.so" ) | ||
#set( CUDA_CUBLAS_LIBRARY "$ENV{CUDA_HOME}/lib64/libcublas.so" ) | ||
set( CUDA_CUSPARSE_LIBRARY "$ENV{CUDA_HOME}/lib64/libcusparse.so" ) | ||
#FILE( GLOB_RECURSE PROJ_SOURCES graphblas/*.cu ../graphblas/*.cpp ) | ||
#FILE( G LOB_RECURSE PROJ_SOURCES graphblas/*.cu ../graphblas/*.cpp ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: extra tab here |
||
#FILE( GLOB_RECURSE PROJ_LIBRARIES ext/cublas1.1/*.cu ) | ||
FILE( GLOB_RECURSE PROJ_HEADERS graphblas/*.hpp) | ||
# nvcc flags | ||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_35 -lineinfo -O3 -use_fast_math -Xptxas=-v") | ||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_35 -lineinfo -O3 -use_fast_math -Xptxas=-v --expt-relaxed-constexpr ") | ||
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-fpermissive;-arch=sm_35;-lineinfo;-Xptxas=-v;-dlcm=ca;-maxrregcount=64) | ||
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_21) | ||
# needed for cudamalloc | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | ||
set(CMAKE_CXX_FLAGS "-fpermissive -g -m64 -std=c++11" ) | ||
set(CMAKE_CXX_FLAGS "-fpermissive -g -std=c++14" ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one space instead of two between |
||
#set(CMAKE_CXX_FLAGS "-fpermissive -pg -m64 -std=c++11" ) | ||
#set(CMAKE_CXX_FLAGS "-fpermissive -g -m64 -std=c++11 -H" ) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ include common.mk | |
#------------------------------------------------------------------------------- | ||
|
||
# Includes | ||
INC += -I$(MGPU_DIR) -I$(CUB_DIR) -I$(BOOST_DIR) -I$(GRB_DIR) | ||
INC += -I$(MGPU_DIR) -I$(BOOST_DIR) -I$(GRB_DIR) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one space instead of two between |
||
|
||
#------------------------------------------------------------------------------- | ||
# Dependency Lists | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,11 +43,30 @@ Info mxm(Matrix<c>* C, | |
desc)); | ||
} else if (typeid(c) == typeid(float) && typeid(a) == typeid(float) && | ||
typeid(b) == typeid(float)) { | ||
CHECK(cusparse_spgemm2(&C->sparse_, mask, accum, op, &A->sparse_, | ||
&B->sparse_, desc)); | ||
|
||
Desc_value s_mode; | ||
CHECK(desc->get(GrB_MODE, &s_mode)); | ||
|
||
if (s_mode == GrB_CUSPARSE2) | ||
CHECK(cusparse_spgemm2(&C->sparse_, mask, accum, op, &A->sparse_, | ||
&B->sparse_, desc)); | ||
else { | ||
if (s_mode != GrB_GALATIC) { | ||
std::cout << R"(Unknown mode (Options are: "cuspare2" and "galatic"; defaulting to galatic)" << std::endl; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: cusparse2 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤦 wow |
||
} | ||
CHECK(GALATIC_spgemm(&C->sparse_, | ||
op, | ||
&A->sparse_, | ||
&B->sparse_, | ||
desc)); | ||
|
||
} | ||
} else { | ||
std::cout << "Error: Unmasked SpGEMM not implemented yet!\n"; | ||
return GrB_NOT_IMPLEMENTED; | ||
CHECK(GALATIC_spgemm(&C->sparse_, | ||
op, | ||
&A->sparse_, | ||
&B->sparse_, | ||
desc)); | ||
} | ||
} else { | ||
std::cout << "Error: SpMM and GEMM not implemented yet!\n"; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
#ifndef GRAPHBLAS_BACKEND_CUDA_SPGEMM_HPP_ | ||
#define GRAPHBLAS_BACKEND_CUDA_SPGEMM_HPP_ | ||
|
||
#include <GALATIC/GALATICMinimumIncludes.cuh> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mind changing this to That way, the PROJ_INCLUDES can be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops, bad inference made me think that was for files that were resolved based on relative path to current file |
||
|
||
#include "graphblas/backend/cuda/sparse_matrix.hpp" | ||
|
||
#include <cuda.h> | ||
|
@@ -108,6 +110,182 @@ Info spgemmMasked(SparseMatrix<c>* C, | |
C->csc_initialized_ = false; | ||
return GrB_SUCCESS; | ||
} | ||
// Shallow copy graphblast sparsematrix -> Galatic dCSR format | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: please add a newline between following template function and above function. |
||
template<typename T> | ||
static void matrixToGalatic(const SparseMatrix<T> *input , dCSR<T>& output) { | ||
output.col_ids = reinterpret_cast<unsigned int*>(input->d_csrColInd_); | ||
output.data = input->d_csrVal_; | ||
output.row_offsets = reinterpret_cast<unsigned int*>(input->d_csrRowPtr_); | ||
output.rows = input->nrows_; | ||
output.cols = input->ncols_; | ||
output.nnz = input->nvals_; | ||
} | ||
|
||
// Shallow copy Galatic dCSR format -> graphblast sparsematrix | ||
template<typename T> | ||
static void galaticToSparse(SparseMatrix<T> *output , const dCSR<T>& input) { | ||
output->d_csrColInd_ = reinterpret_cast<Index*>(input.col_ids); | ||
output->d_csrVal_ = input.data; | ||
output->d_csrRowPtr_ = reinterpret_cast<Index*>(input.row_offsets); | ||
output->nvals_ = input.nnz; | ||
output->ncapacity_ = input.nnz; | ||
} | ||
|
||
// Nullize pointers in Galatic's sparse matrices; | ||
// Galatic's destructors check for null. Doing this will prevent double | ||
// freeing when shallowcopying with matrixToGalatic & galaticToSparse | ||
template<typename T> | ||
static void nullizeGalaticMatrix(dCSR<T>& m) { | ||
m.data = nullptr; | ||
m.col_ids = nullptr; | ||
m.row_offsets = nullptr; | ||
} | ||
|
||
// A generic shim between graphblast's and GALATIC's semiring interfaces | ||
template<typename NativeSR, typename a, typename b, typename c> | ||
struct GalaticSemiring : SemiRing<a, b, c> { | ||
NativeSR nativeSemiring; | ||
|
||
__device__ c multiply(const a& left, const b& right) const | ||
{ return nativeSemiring.mul_op(left, right); } | ||
__device__ c add(const c& left,const c& right) const | ||
{ return nativeSemiring.add_op(left, right); } | ||
__device__ static c AdditiveIdentity() | ||
{ return NativeSR::identity(); } | ||
}; | ||
|
||
template <typename c, typename a, typename b, typename SemiringT> | ||
Info GALATIC_spgemm(SparseMatrix<c>* C, | ||
SemiringT op, | ||
const SparseMatrix<a>* A, | ||
const SparseMatrix<b>* B, | ||
Descriptor* desc) { | ||
|
||
Index A_nrows, A_ncols, A_nvals; | ||
Index B_nrows, B_ncols, B_nvals; | ||
Index C_nrows, C_ncols, C_nvals; | ||
|
||
A_nrows = A->nrows_; | ||
A_ncols = A->ncols_; | ||
A_nvals = A->nvals_; | ||
B_nrows = B->nrows_; | ||
B_ncols = B->ncols_; | ||
B_nvals = B->nvals_; | ||
C_nrows = C->nrows_; | ||
C_ncols = C->ncols_; | ||
|
||
// Dimension compatibility check | ||
if ((A_ncols != B_nrows) || (C_ncols != B_ncols) || (C_nrows != A_nrows)) { | ||
std::cout << "Dim mismatch mxm" << std::endl; | ||
std::cout << A_ncols << " " << B_nrows << std::endl; | ||
std::cout << C_ncols << " " << B_ncols << std::endl; | ||
std::cout << C_nrows << " " << A_nrows << std::endl; | ||
return GrB_DIMENSION_MISMATCH; | ||
} | ||
|
||
if (C->d_csrColInd_ != NULL) { | ||
CUDA_CALL(cudaFree(C->d_csrColInd_)); | ||
CUDA_CALL(cudaFree(C->d_csrVal_)); | ||
C->d_csrColInd_ = NULL; | ||
C->d_csrVal_ = NULL; | ||
} | ||
|
||
if (C->d_csrRowPtr_ != NULL) { | ||
CUDA_CALL(cudaFree(C->d_csrRowPtr_)); | ||
C->d_csrRowPtr_ = NULL; | ||
} | ||
|
||
if (C->h_csrColInd_ != NULL) { | ||
free(C->h_csrColInd_); | ||
free(C->h_csrVal_); | ||
C->h_csrColInd_ = NULL; | ||
C->h_csrVal_ = NULL; | ||
} | ||
|
||
dCSR<c> outMatrixGPU; | ||
dCSR<a> leftInputMatrixGPU; | ||
dCSR<b> rightInputMatrixGPU; | ||
|
||
//shallow copy input matrices to galatic format | ||
matrixToGalatic(A, leftInputMatrixGPU); | ||
matrixToGalatic(B, rightInputMatrixGPU); | ||
|
||
GPUMatrixMatrixMultiplyTraits DefaultTraits; | ||
|
||
// GALATIC has its own semiring interface; | ||
// GalaticSemiring is a shim here for conversion of graphblast-style | ||
// SemiringT type. GalaticSemiring definition is above this function | ||
GalaticSemiring<SemiringT, a, b, c> semiring_shim; | ||
semiring_shim.nativeSemiring = op; | ||
|
||
ExecutionStats stats; | ||
try { | ||
Desc_value nt_mode; | ||
CHECK(desc->get(GrB_NT, &nt_mode)); | ||
const int num_threads = static_cast<int>(nt_mode); | ||
|
||
switch (num_threads) { | ||
case 64: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
64, 4, 2, 8, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
case 128: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
128, 4, 2, 4, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
( leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
case 512: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
512, 1, 1, 1, 2, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
default: // 256 | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
256, 4, 2, 4, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
} | ||
} catch(std::exception& e) { | ||
std::cerr | ||
<< "Exception occured in GALATIC SpGEMM, called from GALATIC_spgemm\n" | ||
<< "Exception:\n" | ||
<< e.what() | ||
<< std::endl; | ||
return GrB_OUT_OF_MEMORY; //the most likely issue, fixme | ||
} | ||
|
||
// shallow copy to native format. | ||
galaticToSparse(C , outMatrixGPU); | ||
|
||
// prevent allocations being freed twice when destructors are ran, | ||
// as we are doing shallow copies: | ||
// | ||
// A, B -> leftInputMatrixGPU, rightInputMatrixGPU | ||
// outMatrixGPU -> C. | ||
nullizeGalaticMatrix(outMatrixGPU); | ||
nullizeGalaticMatrix(leftInputMatrixGPU); | ||
nullizeGalaticMatrix(rightInputMatrixGPU); | ||
|
||
if (C->h_csrRowPtr_ == NULL) | ||
C->h_csrRowPtr_ = reinterpret_cast<Index*>(malloc((A_nrows+1)* | ||
sizeof(Index))); | ||
C->h_csrColInd_ = reinterpret_cast<Index*>(malloc(C->ncapacity_*sizeof(Index))); | ||
C->h_csrVal_ = reinterpret_cast<c*>(malloc(C->ncapacity_*sizeof(c))); | ||
|
||
C->need_update_ = true; // Set flag that we need to copy data from GPU | ||
C->csr_initialized_ = true; | ||
C->csc_initialized_ = false; | ||
return GrB_SUCCESS; | ||
} | ||
|
||
template <typename c, typename a, typename b, typename m, | ||
typename BinaryOpT, typename SemiringT> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Kind of a personal preference thing, but could you change these to use HTTPS verification rather than SSH? I usually don't have ssh-key installed, so I typically use HTTPS.
I've tried with HTTPS and for people with access to GALATIC private repo, then so long as that they enter my Github login + pw, it lets them pull in the submodule.