-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Integrate Modified AC-SpGEMM / GALATIC #26
base: master
Are you sure you want to change the base?
Changes from 10 commits
b9bd4bc
179666b
5080cc3
4cb56c7
ca71c72
b9e075c
2c7bdb9
99c74ba
04138ae
50fa929
ce4a855
dac2a22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[submodule "ext/moderngpu"] | ||
path = ext/moderngpu | ||
url = https://[email protected]/ctcyang/moderngpu.git | ||
[submodule "ext/cub"] | ||
path = ext/cub | ||
url = https://ctcyang@github.com/NVlabs/cub.git | ||
[submodule "ext/GALATIC"] | ||
path = ext/GALATIC | ||
url = git@github.com:richardlett/GALATIC.git |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ set( PROJ_PATH ${CMAKE_SOURCE_DIR}) | |
set( PROJ_OUT_PATH ${CMAKE_BINARY_DIR}) | ||
set( PROJ_HEADERS "" ) | ||
set( PROJ_LIBRARIES "" ) | ||
set( PROJ_INCLUDES "./" "ext/moderngpu/include" "ext/cub/cub") | ||
set( PROJ_INCLUDES "./" "ext/moderngpu/include") | ||
set( mgpu_SRC_FILES "ext/moderngpu/src/mgpucontext.cu" "ext/moderngpu/src/mgpuutil.cpp") | ||
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin ) | ||
#set( CUDA_CURAND_LIBRARY "$ENV{CUDA_HOME}/lib64/libcurand.so" ) | ||
|
@@ -23,12 +23,12 @@ set( CUDA_CUSPARSE_LIBRARY "$ENV{CUDA_HOME}/lib64/libcusparse.so" ) | |
#FILE( GLOB_RECURSE PROJ_LIBRARIES ext/cublas1.1/*.cu ) | ||
FILE( GLOB_RECURSE PROJ_HEADERS graphblas/*.hpp) | ||
# nvcc flags | ||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_35 -lineinfo -O3 -use_fast_math -Xptxas=-v") | ||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_35 -lineinfo -O3 -use_fast_math -Xptxas=-v --expt-relaxed-constexpr ") | ||
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-fpermissive;-arch=sm_35;-lineinfo;-Xptxas=-v;-dlcm=ca;-maxrregcount=64) | ||
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_21) | ||
# needed for cudamalloc | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | ||
set(CMAKE_CXX_FLAGS "-fpermissive -g -m64 -std=c++11" ) | ||
set(CMAKE_CXX_FLAGS "-fpermissive -g -std=c++14" ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one space instead of two between |
||
#set(CMAKE_CXX_FLAGS "-fpermissive -pg -m64 -std=c++11" ) | ||
#set(CMAKE_CXX_FLAGS "-fpermissive -g -m64 -std=c++11 -H" ) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ include common.mk | |
#------------------------------------------------------------------------------- | ||
|
||
# Includes | ||
INC += -I$(MGPU_DIR) -I$(CUB_DIR) -I$(BOOST_DIR) -I$(GRB_DIR) | ||
INC += -I$(MGPU_DIR) -I$(BOOST_DIR) -I$(GRB_DIR) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one space instead of two between |
||
|
||
#------------------------------------------------------------------------------- | ||
# Dependency Lists | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,11 +43,30 @@ Info mxm(Matrix<c>* C, | |
desc)); | ||
} else if (typeid(c) == typeid(float) && typeid(a) == typeid(float) && | ||
typeid(b) == typeid(float)) { | ||
CHECK(cusparse_spgemm2(&C->sparse_, mask, accum, op, &A->sparse_, | ||
&B->sparse_, desc)); | ||
|
||
Desc_value s_mode; | ||
CHECK(desc->get(GrB_MODE, &s_mode)); | ||
|
||
if (s_mode == GrB_CUSPARSE2) | ||
CHECK(cusparse_spgemm2(&C->sparse_, mask, accum, op, &A->sparse_, | ||
&B->sparse_, desc)); | ||
else { | ||
if (s_mode != GrB_GALATIC) { | ||
std::cout << R"(Unknown mode (Options are: "cusspare2" and "galatic"; defaulting to galatic)" << std::endl; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: fix typo cusspare2 -> cusparse2 |
||
} | ||
CHECK(GALATIC_spgemm(&C->sparse_, | ||
op, | ||
&A->sparse_, | ||
&B->sparse_, | ||
desc)); | ||
|
||
} | ||
} else { | ||
std::cout << "Error: Unmasked SpGEMM not implemented yet!\n"; | ||
return GrB_NOT_IMPLEMENTED; | ||
CHECK(GALATIC_spgemm(&C->sparse_, | ||
op, | ||
&A->sparse_, | ||
&B->sparse_, | ||
desc)); | ||
} | ||
} else { | ||
std::cout << "Error: SpMM and GEMM not implemented yet!\n"; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,24 @@ | ||
#ifndef GRAPHBLAS_BACKEND_CUDA_SPGEMM_HPP_ | ||
#define GRAPHBLAS_BACKEND_CUDA_SPGEMM_HPP_ | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: stylistic thing - I like having only single-spaced lines between lines with code, so could you get rid of all the newline spacing? 1-line gaps are fine, so don't worry about those. |
||
#include "../../../ext/GALATIC/include/dCSR.cuh" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a big fan of relative path includes that are dependent on a file being in the right folder. Could you add the galatic directory "GALATIC/include" to the CMakeLists.txt includes? Something else that would be nice to have is a single file helper file in GALATIC that includes all helper files part of your public interface. Then we can just have a one-liner Some examples for how to do that are "cub.cuh", ModernGPU's "moderngpu.cuh" and "graphblast.hpp". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for pointing this out! You can tell I don't have the most experience with C++ :p There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel that way about myself too, but most important thing is to code lots and learn something new every day =P |
||
#include "../../../ext/GALATIC/include/SemiRingInterface.h" | ||
#include "../../../ext/GALATIC/source/device/Multiply.cuh" | ||
|
||
#include "graphblas/backend/cuda/sparse_matrix.hpp" | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: get rid of empty line |
||
#include <cuda.h> | ||
#include <cusparse.h> | ||
|
||
#include <iostream> | ||
#include <vector> | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: get rid of empty lines |
||
|
||
|
||
|
||
namespace graphblas { | ||
namespace backend { | ||
template <typename T> | ||
|
@@ -108,6 +118,191 @@ Info spgemmMasked(SparseMatrix<c>* C, | |
C->csc_initialized_ = false; | ||
return GrB_SUCCESS; | ||
} | ||
// Shallow copy graphblast sparsematrix -> Galatic dCSR format | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: please add a newline between following template function and above function. |
||
template<typename T> | ||
static void matrixToGalatic(const SparseMatrix<T> *input , dCSR<T>& output) { | ||
output.col_ids = reinterpret_cast<unsigned int*>(input->d_csrColInd_); | ||
output.data = input->d_csrVal_; | ||
output.row_offsets = reinterpret_cast<unsigned int*>(input->d_csrRowPtr_); | ||
output.rows = input->nrows_; | ||
output.cols = input->ncols_; | ||
output.nnz = input->nvals_; | ||
} | ||
|
||
// Shallow copy Galatic dCSR format -> graphblast sparsematrix | ||
template<typename T> | ||
static void galaticToSparse(SparseMatrix<T> *output , const dCSR<T>& input) { | ||
output->d_csrColInd_ = reinterpret_cast<Index*>(input.col_ids); | ||
output->d_csrVal_ = input.data; | ||
output->d_csrRowPtr_ = reinterpret_cast<Index*>(input.row_offsets); | ||
output->nvals_ = input.nnz; | ||
output->ncapacity_ = input.nnz; | ||
} | ||
|
||
// Nullize pointers in Galatic's sparse matrices; | ||
// Galatic's destructors check for null. Doing this will prevent double | ||
// freeing when shallowcopying with matrixToGalatic & galaticToSparse | ||
template<typename T> | ||
static void nullizeGalaticMatrix(dCSR<T>& m) { | ||
m.data = nullptr; | ||
m.col_ids = nullptr; | ||
m.row_offsets = nullptr; | ||
} | ||
|
||
// A generic shim between graphblast's and GALATIC's semiring interfaces | ||
template<typename NativeSR, typename a, typename b, typename c> | ||
struct GalaticSemiring : SemiRing<a, b, c> { | ||
NativeSR nativeSemiring; | ||
|
||
__device__ c multiply(const a& left, const b& right) const | ||
{ return nativeSemiring.mul_op(left, right); } | ||
__device__ c add(const c& left,const c& right) const | ||
{ return nativeSemiring.add_op(left, right); } | ||
__device__ static c AdditiveIdentity() | ||
{ return NativeSR::identity(); } | ||
}; | ||
|
||
template <typename c, typename a, typename b, typename SemiringT> | ||
Info GALATIC_spgemm(SparseMatrix<c>* C, | ||
SemiringT op, | ||
const SparseMatrix<a>* A, | ||
const SparseMatrix<b>* B, | ||
Descriptor* desc) { | ||
|
||
Index A_nrows, A_ncols, A_nvals; | ||
Index B_nrows, B_ncols, B_nvals; | ||
Index C_nrows, C_ncols, C_nvals; | ||
|
||
A_nrows = A->nrows_; | ||
A_ncols = A->ncols_; | ||
A_nvals = A->nvals_; | ||
B_nrows = B->nrows_; | ||
B_ncols = B->ncols_; | ||
B_nvals = B->nvals_; | ||
C_nrows = C->nrows_; | ||
C_ncols = C->ncols_; | ||
|
||
// Dimension compatibility check | ||
if ((A_ncols != B_nrows) || (C_ncols != B_ncols) || (C_nrows != A_nrows)) { | ||
std::cout << "Dim mismatch mxm" << std::endl; | ||
std::cout << A_ncols << " " << B_nrows << std::endl; | ||
std::cout << C_ncols << " " << B_ncols << std::endl; | ||
std::cout << C_nrows << " " << A_nrows << std::endl; | ||
return GrB_DIMENSION_MISMATCH; | ||
} | ||
|
||
if (C->d_csrColInd_ != NULL) { | ||
CUDA_CALL(cudaFree(C->d_csrColInd_)); | ||
CUDA_CALL(cudaFree(C->d_csrVal_)); | ||
C->d_csrColInd_ = NULL; | ||
C->d_csrVal_ = NULL; | ||
} | ||
|
||
if (C->d_csrRowPtr_ != NULL) { | ||
CUDA_CALL(cudaFree(C->d_csrRowPtr_)); | ||
C->d_csrRowPtr_ = NULL; | ||
} | ||
|
||
if (C->h_csrColInd_ != NULL) { | ||
free(C->h_csrColInd_); | ||
free(C->h_csrVal_); | ||
C->h_csrColInd_ = NULL; | ||
C->h_csrVal_ = NULL; | ||
} | ||
|
||
dCSR<c> outMatrixGPU; | ||
dCSR<a> leftInputMatrixGPU; | ||
dCSR<b> rightInputMatrixGPU; | ||
|
||
//shallow copy input matrices to galatic format | ||
matrixToGalatic(A, leftInputMatrixGPU); | ||
matrixToGalatic(B, rightInputMatrixGPU); | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: get rid of extra lines and the rest in this file. |
||
|
||
GPUMatrixMatrixMultiplyTraits DefaultTraits; | ||
|
||
|
||
// GALATIC has its own semiring interface; | ||
// GalaticSemiring is a shim here for conversion of graphblast-style | ||
// SemiringT type. GalaticSemiring definition is above this function | ||
GalaticSemiring<SemiringT, a, b, c> semiring_shim; | ||
semiring_shim.nativeSemiring = op; | ||
|
||
ExecutionStats stats; | ||
try { | ||
|
||
|
||
Desc_value nt_mode; | ||
CHECK(desc->get(GrB_NT, &nt_mode)); | ||
const int num_threads = static_cast<int>(nt_mode); | ||
|
||
switch (num_threads) { | ||
case 64: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
64, 4, 2, 8, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
case 128: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
128, 4, 2, 4, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
( leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
case 512: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
512, 1, 1, 1, 2, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
default: // 256 | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
256, 4, 2, 4, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
} | ||
} catch(std::exception& e) { | ||
std::cerr | ||
<< "Exception occured in GALATIC SpGEMM, called from GALATIC_spgemm\n" | ||
<< "Exception:\n" | ||
<< e.what() | ||
<< std::endl; | ||
return GrB_OUT_OF_MEMORY; //the most likely issue, fixme | ||
} | ||
|
||
// shallow copy to native format. | ||
galaticToSparse(C , outMatrixGPU); | ||
|
||
// prevent allocations being freed twice when destructors are ran, | ||
// as we are doing shallow copies: | ||
// | ||
// A, B -> leftInputMatrixGPU, rightInputMatrixGPU | ||
// outMatrixGPU -> C. | ||
nullizeGalaticMatrix(outMatrixGPU); | ||
nullizeGalaticMatrix(leftInputMatrixGPU); | ||
nullizeGalaticMatrix(rightInputMatrixGPU); | ||
|
||
|
||
if (C->h_csrRowPtr_ == NULL) | ||
C->h_csrRowPtr_ = reinterpret_cast<Index*>(malloc((A_nrows+1)* | ||
sizeof(Index))); | ||
C->h_csrColInd_ = reinterpret_cast<Index*>(malloc(C->ncapacity_*sizeof(Index))); | ||
C->h_csrVal_ = reinterpret_cast<c*>(malloc(C->ncapacity_*sizeof(c))); | ||
|
||
|
||
C->need_update_ = true; // Set flag that we need to copy data from GPU | ||
C->csr_initialized_ = true; | ||
C->csc_initialized_ = false; | ||
return GrB_SUCCESS; | ||
} | ||
|
||
|
||
|
||
template <typename c, typename a, typename b, typename m, | ||
typename BinaryOpT, typename SemiringT> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: Do you mind changing both of these submodule URLs to a consistent format such as
https://github.com/ctcyang/moderngpu.git
andhttps://github.com/richardlett/GALATIC.git
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed them both to ssh as that will be more universal until repo is public (automatically uses your ssh-key)