diff --git a/CMakeLists.txt b/CMakeLists.txt index 545ec65ad..e4d8a936c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,11 +168,11 @@ option(GUNROCK_GENCODE_SM20 option(GUNROCK_GENCODE_SM30 "ON to generate code for Compute Capability 3.0 devices (e.g. Tesla K10)" - ON) + OFF) option(GUNROCK_GENCODE_SM35 "ON to generate code for Compute Capability 3.5 devices (e.g. Tesla K20)" - OFF) + ON) option(GUNROCK_GENCODE_SM37 "ON to generate code for Compute Capability 3.7 devices (e.g. Tesla K80)" diff --git a/gunrock/app/bc/bc_app.cu b/gunrock/app/bc/bc_app.cu index 01a034cbb..d1d0e4b90 100644 --- a/gunrock/app/bc/bc_app.cu +++ b/gunrock/app/bc/bc_app.cu @@ -240,18 +240,18 @@ void runBC(GRGraph* output, BC_Parameter *parameter) void dispatchBC( GRGraph* grapho, const GRGraph* graphi, - const GRSetup config, + const GRSetup* config, const GRTypes data_t, ContextPtr* context, cudaStream_t* streams) { BC_Parameter* parameter = new BC_Parameter; parameter->src = (long long*)malloc(sizeof(long long)); - parameter->g_quiet = config.quiet; + parameter->g_quiet = config -> quiet; parameter->context = context; parameter->streams = streams; - parameter->num_gpus = config.num_devices; - parameter->gpu_idx = config.device_list; + parameter->num_gpus = config -> num_devices; + parameter->gpu_idx = config -> device_list; switch (data_t.VTXID_TYPE) { @@ -286,7 +286,7 @@ void dispatchBC( parameter->graph = &csr; // determine source vertex to start - switch (config.source_mode) + switch (config -> source_mode) { case randomize: { @@ -301,7 +301,7 @@ void dispatchBC( } case manually: { - parameter->src[0] = config.source_vertex[0]; + parameter->src[0] = config -> source_vertex[0]; break; } default: @@ -341,7 +341,7 @@ void dispatchBC( void gunrock_bc( GRGraph *grapho, const GRGraph *graphi, - const GRSetup config, + const GRSetup *config, const GRTypes data_t) { // GPU-related configurations @@ -350,20 +350,20 @@ void gunrock_bc( ContextPtr *context = NULL; cudaStream_t *streams = NULL; - num_gpus = config.num_devices; + num_gpus = config -> num_devices; gpu_idx = new int [num_gpus]; for (int i = 0; i < num_gpus; ++i) { - gpu_idx[i] = config.device_list[i]; + gpu_idx[i] = config -> device_list[i]; } // Create streams and MordernGPU context for each GPU streams = new cudaStream_t[num_gpus * num_gpus * 2]; context = new ContextPtr[num_gpus * num_gpus]; - if (!config.quiet) { printf(" using %d GPUs:", num_gpus); } + if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); } for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); } + if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); } util::SetDevice(gpu_idx[gpu]); for (int i = 0; i < num_gpus * 2; ++i) { @@ -378,7 +378,7 @@ void gunrock_bc( } } } - if (!config.quiet) { printf("\n"); } + if (!config -> quiet) { printf("\n"); } dispatchBC(grapho, graphi, config, data_t, context, streams); } @@ -406,8 +406,8 @@ void bc( data_t.SIZET_TYPE = SIZET_INT; // integer graph size type data_t.VALUE_TYPE = VALUE_FLOAT; // float attributes type - struct GRSetup config = InitSetup(1, NULL); // primitive-specific configures - config.source_vertex[0] = source; // source vertex to start + struct GRSetup *config = InitSetup(1, NULL); // primitive-specific configures + config -> source_vertex[0] = source; // source vertex to start struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph)); diff --git a/gunrock/app/bc/bc_enactor.cuh b/gunrock/app/bc/bc_enactor.cuh index 81e7230ed..d777b55b9 100644 --- a/gunrock/app/bc/bc_enactor.cuh +++ b/gunrock/app/bc/bc_enactor.cuh @@ -2226,6 +2226,7 @@ public: else if (traversal_mode == "LB_LIGHT_CULL") return MODE_SWITCH ::Init(*this, context, problem, max_grid_size); + else printf("Traversal mode %s is not supported by BC at the moment\n", traversal_mode.c_str()); } //to reduce compile time, get rid of other architecture for now diff --git a/gunrock/app/bc/bc_functor.cuh b/gunrock/app/bc/bc_functor.cuh index 193fb871e..c85869d5c 100644 --- a/gunrock/app/bc/bc_functor.cuh +++ b/gunrock/app/bc/bc_functor.cuh @@ -16,6 +16,7 @@ #include #include +#include namespace gunrock { namespace app { @@ -92,7 +93,7 @@ struct ForwardFunctor { //VertexId label_d; //util::io::ModifiedLoad::Ld( // label_d, d_data_slice->labels + d_id); - //label_d = __ldg(d_data_slice -> labels + d_id); + //label_d = _ldg(d_data_slice -> labels + d_id); //if (label_d == label /*+ 1*/) { //Accumulate sigma value atomicAdd(d_data_slice->sigmas + d_id, d_data_slice->sigmas[s_id]); @@ -229,10 +230,10 @@ struct BackwardFunctor { VertexId d_label; //util::io::ModifiedLoad::Ld( // s_label, d_data_slice->labels + s_id); - s_label = __ldg(d_data_slice -> labels + s_id); + s_label = _ldg(d_data_slice -> labels + s_id); //util::io::ModifiedLoad::Ld( // d_label, d_data_slice->labels + d_id); - d_label = __ldg(d_data_slice -> labels + d_id); + d_label = _ldg(d_data_slice -> labels + d_id); return (d_label == s_label + 1); } @@ -265,17 +266,17 @@ struct BackwardFunctor { Value from_sigma; //util::io::ModifiedLoad::Ld( // from_sigma, d_data_slice->sigmas + s_id); - from_sigma = __ldg(d_data_slice -> sigmas + s_id); + from_sigma = _ldg(d_data_slice -> sigmas + s_id); Value to_sigma; //util::io::ModifiedLoad::Ld( // to_sigma, d_data_slice->sigmas + d_id); - to_sigma = __ldg(d_data_slice -> sigmas + d_id); + to_sigma = _ldg(d_data_slice -> sigmas + d_id); Value to_delta; //util::io::ModifiedLoad::Ld( // to_delta, d_data_slice->deltas + d_id); - to_delta = __ldg(d_data_slice -> deltas + d_id); + to_delta = _ldg(d_data_slice -> deltas + d_id); Value result = from_sigma / to_sigma * (1.0 + to_delta); diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu index d4ff55a7b..dae82b2da 100644 --- a/gunrock/app/bfs/bfs_app.cu +++ b/gunrock/app/bfs/bfs_app.cu @@ -36,12 +36,14 @@ struct BFS_Parameter : gunrock::app::TestParameter_Base public: bool mark_predecessors ; // mark src-distance vs. parent vertices bool enable_idempotence; // enable idempotence operation + bool direction_optimized; // enable direction optimization double max_queue_sizing1 ; // maximum queue sizing factor BFS_Parameter() { mark_predecessors = false; enable_idempotence = false; + direction_optimized = false; max_queue_sizing1 = -1.0f; } @@ -173,10 +175,12 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter) float partition_factor = parameter -> partition_factor; int partition_seed = parameter -> partition_seed; bool g_stream_from_host = parameter -> g_stream_from_host; - int traversal_mode = parameter -> traversal_mode; + std::string traversal_mode = parameter -> traversal_mode; bool instrument = parameter -> instrumented; bool debug = parameter -> debug; bool size_check = parameter -> size_check; + bool undirected = parameter -> g_undirected; + bool direction_optimized = parameter -> direction_optimized; size_t *org_size = new size_t [num_gpus]; // Allocate host-side label array VertexId *h_labels = new VertexId[graph->nodes]; @@ -193,7 +197,7 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter) cudaSetDevice(gpu_idx[gpu]); cudaMemGetInfo(&(org_size[gpu]), &dummy); } - Problem *problem = new Problem; // Allocate problem on GPU + Problem *problem = new Problem(direction_optimized, undirected); // Allocate problem on GPU util::GRError( problem->Init( g_stream_from_host, @@ -210,7 +214,7 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter) "Problem BFS Initialization Failed", __FILE__, __LINE__); Enactor *enactor = new Enactor( - num_gpus, gpu_idx, instrument, debug, size_check); // BFS enactor map + num_gpus, gpu_idx, instrument, debug, size_check, direction_optimized); // BFS enactor map util::GRError( enactor->Init(context, problem, max_grid_size, traversal_mode), "BFS Enactor init failed", __FILE__, __LINE__); @@ -276,21 +280,21 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter) float dispatch_bfs( GRGraph* grapho, const GRGraph* graphi, - const GRSetup config, + const GRSetup* config, const GRTypes data_t, ContextPtr* context, cudaStream_t* streams) { BFS_Parameter *parameter = new BFS_Parameter; - parameter->iterations = config.num_iters; - parameter->src = (long long*)malloc(sizeof(long long)*config.num_iters); + parameter->iterations = config -> num_iters; + parameter->src = (long long*)malloc(sizeof(long long)*config -> num_iters); parameter->context = context; parameter->streams = streams; - parameter->g_quiet = config.quiet; - parameter->num_gpus = config.num_devices; - parameter->gpu_idx = config.device_list; - parameter->mark_predecessors = config.mark_predecessors; - parameter->enable_idempotence = config.enable_idempotence; + parameter->g_quiet = config -> quiet; + parameter->num_gpus = config -> num_devices; + parameter->gpu_idx = config -> device_list; + parameter->mark_predecessors = config -> mark_predecessors; + parameter->enable_idempotence = config -> enable_idempotence; float elapsed_time; @@ -315,7 +319,7 @@ float dispatch_bfs( parameter->graph = &csr; // determine source vertex to start - switch (config.source_mode) + switch (config -> source_mode) { case randomize: { @@ -329,7 +333,7 @@ float dispatch_bfs( { int max_deg = 0; int node_id = csr.GetNodeWithHighestDegree(max_deg); - for (int i = 0; i < config.num_iters; ++i) + for (int i = 0; i < config -> num_iters; ++i) { parameter->src[i] = node_id; } @@ -339,7 +343,7 @@ float dispatch_bfs( { for (int i = 0; i < parameter->iterations; ++i) { - parameter->src[i] = config.source_vertex[i]; + parameter->src[i] = config -> source_vertex[i]; } break; } @@ -355,7 +359,7 @@ float dispatch_bfs( if (!parameter->g_quiet) { printf(" source: %lld", (long long) parameter->src[0]); - for (int i = 1; i < config.num_iters; ++i) + for (int i = 1; i < config -> num_iters; ++i) { printf(",%lld", (long long) parameter->src[i]); } @@ -403,7 +407,7 @@ float dispatch_bfs( float gunrock_bfs( GRGraph* grapho, const GRGraph* graphi, - const GRSetup config, + const GRSetup* config, const GRTypes data_t) { // GPU-related configurations @@ -412,20 +416,20 @@ float gunrock_bfs( ContextPtr *context = NULL; cudaStream_t *streams = NULL; - num_gpus = config.num_devices; + num_gpus = config -> num_devices; gpu_idx = new int [num_gpus]; for (int i = 0; i < num_gpus; ++i) { - gpu_idx[i] = config.device_list[i]; + gpu_idx[i] = config -> device_list[i]; } // Create streams and MordernGPU context for each GPU streams = new cudaStream_t[num_gpus * num_gpus * 2]; context = new ContextPtr[num_gpus * num_gpus]; - if (!config.quiet) { printf(" using %d GPUs:", num_gpus); } + if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); } for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); } + if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); } util::SetDevice(gpu_idx[gpu]); for (int i = 0; i < num_gpus * 2; ++i) { @@ -440,7 +444,7 @@ float gunrock_bfs( } } } - if (!config.quiet) { printf("\n"); } + if (!config -> quiet) { printf("\n"); } return dispatch_bfs(grapho, graphi, config, data_t, context, streams); } @@ -476,10 +480,10 @@ float bfs( data_t.SIZET_TYPE = SIZET_INT; // integer graph size type data_t.VALUE_TYPE = VALUE_INT; // integer attributes type - struct GRSetup config = InitSetup(num_iters, source); // primitive-specific configures - config.mark_predecessors = mark_predecessors; // do not mark predecessors - config.enable_idempotence = enable_idempotence; // wether enable idempotence - config.source_mode = source_mode; + struct GRSetup* config = InitSetup(num_iters, source); // primitive-specific configures + config -> mark_predecessors = mark_predecessors; // do not mark predecessors + config -> enable_idempotence = enable_idempotence; // wether enable idempotence + config -> source_mode = source_mode; struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph)); diff --git a/gunrock/app/bfs/bfs_enactor.cuh b/gunrock/app/bfs/bfs_enactor.cuh index f77b2dbb6..2d47b7c74 100644 --- a/gunrock/app/bfs/bfs_enactor.cuh +++ b/gunrock/app/bfs/bfs_enactor.cuh @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -128,7 +127,7 @@ struct LoadLabel static __device__ __forceinline__ long long Load (long long *&d_labels, SizeT &pos) { - return __ldg(d_labels + pos); + return _ldg(d_labels + pos); } }; @@ -170,7 +169,7 @@ __global__ void Expand_Incoming_Kernel( //MaskT tex_mask_byte; if (x < num_elements) { - key = __ldg(d_keys_in + x); + key = _ldg(d_keys_in + x); if (KernelPolicy::Problem::ENABLE_IDEMPOTENCE) { mask_pos = (key & KernelPolicy::LOAD_BALANCED_CULL::ELEMENT_ID_MASK) >> (2+sizeof(MaskT)); @@ -200,7 +199,7 @@ __global__ void Expand_Incoming_Kernel( //if (to_process) //{ // if (tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, key + 1) == tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, key)) - //if (__ldg(d_row_offsets + key) == __ldg(d_row_offsets + (key+1))) + //if (_ldg(d_row_offsets + key) == _ldg(d_row_offsets + (key+1))) // to_process = false; //} } else to_process = false; @@ -317,7 +316,7 @@ __global__ void From_Unvisited_Queue_IDEM( if (key >= num_nodes) break; //if (tex1Dfetch(gunrock::oprtr::cull_filter::LabelsTex::labels, key) != util::MaxValue()) - if (__ldg(d_labels + key) != util::MaxValue()) + if (_ldg(d_labels + key) != util::MaxValue()) { //to_process = false; mask_byte |= mask_bit; @@ -327,7 +326,7 @@ __global__ void From_Unvisited_Queue_IDEM( //if (to_process) { // only works for undirected graph //if (tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, key) == tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, key+1)) to_process = false; - if (__ldg(d_row_offsets + key) == __ldg(d_row_offsets + (key+1))) + if (_ldg(d_row_offsets + key) == _ldg(d_row_offsets + (key+1))) continue; } //if (to_process) @@ -506,7 +505,7 @@ __global__ void Inverse_Expand( if (x < num_unvisited_vertices) { - key = __ldg(d_unvisited_key_in + x); + key = _ldg(d_unvisited_key_in + x); } else to_process = false; if (to_process && Problem::ENABLE_IDEMPOTENCE) @@ -537,9 +536,9 @@ __global__ void Inverse_Expand( if (to_process) { SizeT edge_start = //tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, key); - __ldg(d_inverse_row_offsets + key); + _ldg(d_inverse_row_offsets + key); SizeT edge_end = //tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, key+1); - __ldg(d_inverse_row_offsets + (key+1)); + _ldg(d_inverse_row_offsets + (key+1)); for (SizeT edge_id = edge_start; edge_id < edge_end; edge_id++) { VertexId neighbor = d_inverse_column_indices[edge_id]; @@ -618,7 +617,7 @@ __global__ void Update_Mask_Kernel( while ( x < end_mask_pos) { - MaskT mask = __ldg(visited_masks + x); + MaskT mask = _ldg(visited_masks + x); VertexId v = (x << 3) * sizeof(MaskT); bool has_change = false; #pragma unroll @@ -627,7 +626,7 @@ __global__ void Update_Mask_Kernel( MaskT mask_bit = 1 << i; if ((!(mask & mask_bit)) && (v < num_nodes)) { - if (__ldg(labels + v) != util::MaxValue()) + if (_ldg(labels + v) != util::MaxValue()) { mask |= mask_bit; has_change = true; @@ -682,7 +681,7 @@ __global__ void Combind_Masks( { MaskT in_mask = 0; if (d_out_key_length[gpu] != 0) - in_mask = __ldg(s_mask_ins[gpu] + x); + in_mask = _ldg(s_mask_ins[gpu] + x); //printf("(%d, %d) : in_mask = %#x\n", blockIdx.x, threadIdx.x, in_mask); new_mask |= in_mask; } @@ -698,7 +697,7 @@ __global__ void Combind_Masks( MaskT mask_bit = 1 << i; if ((update_mask & mask_bit) && (v < num_nodes)) { - if (__ldg(d_partition_table + v) == 0) + if (_ldg(d_partition_table + v) == 0) { l_vertices[l_vertex_counter] = v; l_vertex_counter ++; diff --git a/gunrock/app/bfs/bfs_functor.cuh b/gunrock/app/bfs/bfs_functor.cuh index d807b10d5..0422dfa33 100644 --- a/gunrock/app/bfs/bfs_functor.cuh +++ b/gunrock/app/bfs/bfs_functor.cuh @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include diff --git a/gunrock/app/cc/cc_app.cu b/gunrock/app/cc/cc_app.cu index 7b011c84f..ca406e4d9 100644 --- a/gunrock/app/cc/cc_app.cu +++ b/gunrock/app/cc/cc_app.cu @@ -94,6 +94,7 @@ void runCC(GRGraph* output, CC_Parameter *parameter) bool instrument = parameter -> instrumented; bool debug = parameter -> debug; bool size_check = parameter -> size_check; + std::string traversal_mode = parameter -> traversal_mode; size_t *org_size = new size_t [num_gpus]; // Allocate host-side label array VertexId *h_component_ids = new VertexId[graph->nodes]; @@ -105,7 +106,7 @@ void runCC(GRGraph* output, CC_Parameter *parameter) cudaMemGetInfo(&(org_size[gpu]), &dummy); } - Problem* problem = new Problem(false); // Allocate problem on GPU + Problem* problem = new Problem; // Allocate problem on GPU util::GRError( problem->Init( g_stream_from_host, @@ -124,7 +125,7 @@ void runCC(GRGraph* output, CC_Parameter *parameter) Enactor* enactor = new Enactor( num_gpus, gpu_idx, instrument, debug, size_check); // CC enactor map util::GRError( - enactor->Init(context, problem, max_grid_size), + enactor->Init(context, problem, traversal_mode, max_grid_size), "CC Enactor Init failed", __FILE__, __LINE__); // Perform CC @@ -176,7 +177,7 @@ void runCC(GRGraph* output, CC_Parameter *parameter) void dispatch_cc( GRGraph* grapho, const GRGraph* graphi, - const GRSetup config, + const GRSetup* config, const GRTypes data_t, ContextPtr* context, cudaStream_t* streams) @@ -184,9 +185,9 @@ void dispatch_cc( CC_Parameter *parameter = new CC_Parameter; parameter->context = context; parameter->streams = streams; - parameter->g_quiet = config.quiet; - parameter->num_gpus = config.num_devices; - parameter->gpu_idx = config.device_list; + parameter->g_quiet = config -> quiet; + parameter->num_gpus = config -> num_devices; + parameter->gpu_idx = config -> device_list; switch (data_t.VTXID_TYPE) { @@ -245,7 +246,7 @@ void dispatch_cc( void gunrock_cc( GRGraph *grapho, const GRGraph *graphi, - const GRSetup config, + const GRSetup *config, const GRTypes data_t) { // GPU-related configurations @@ -254,20 +255,20 @@ void gunrock_cc( ContextPtr *context = NULL; cudaStream_t *streams = NULL; - num_gpus = config.num_devices; + num_gpus = config -> num_devices; gpu_idx = new int [num_gpus]; for (int i = 0; i < num_gpus; ++i) { - gpu_idx[i] = config.device_list[i]; + gpu_idx[i] = config -> device_list[i]; } // Create streams and MordernGPU context for each GPU streams = new cudaStream_t[num_gpus * num_gpus * 2]; context = new ContextPtr[num_gpus * num_gpus]; - if (!config.quiet) { printf(" using %d GPUs:", num_gpus); } + if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); } for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); } + if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); } util::SetDevice(gpu_idx[gpu]); for (int i = 0; i < num_gpus * 2; ++i) { @@ -282,7 +283,7 @@ void gunrock_cc( } } } - if (!config.quiet) { printf("\n"); } + if (!config -> quiet) { printf("\n"); } dispatch_cc(grapho, graphi, config, data_t, context, streams); } @@ -309,7 +310,7 @@ int cc( data_t.SIZET_TYPE = SIZET_INT; // integer graph size type data_t.VALUE_TYPE = VALUE_INT; // integer attributes type - struct GRSetup config = InitSetup(1, NULL); // primitive-specific configures + struct GRSetup *config = InitSetup(1, NULL); // primitive-specific configures struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph)); diff --git a/gunrock/app/cc/cc_enactor.cuh b/gunrock/app/cc/cc_enactor.cuh index 67066c8c6..cfeca6a9c 100644 --- a/gunrock/app/cc/cc_enactor.cuh +++ b/gunrock/app/cc/cc_enactor.cuh @@ -49,9 +49,9 @@ __global__ void Expand_Incoming_Kernel( while (x < num_elements) { - VertexId key = __ldg(keys_in + x); - VertexId new_pred = __ldg(vertex_associate_in + x); - VertexId old_pred = __ldg(vertex_associate_org + key); + VertexId key = _ldg(keys_in + x); + VertexId new_pred = _ldg(vertex_associate_in + x); + VertexId old_pred = _ldg(vertex_associate_org + key); if (new_pred != old_pred) { if (new_pred < old_pred) vertex_associate_org[old_pred] = new_pred; @@ -140,14 +140,14 @@ __global__ void Make_Output_Kernel( VertexId old_cid = 0, new_cid = 0, min_cid = 0; if (x < num_vertices) { - old_cid = __ldg(old_component_ids + x); - new_cid = __ldg(component_ids + x); + old_cid = _ldg(old_component_ids + x); + new_cid = _ldg(component_ids + x); min_cid = min(new_cid, old_cid); if (old_cid == min_cid) to_process = false; else { old_component_ids[x] = min_cid; - VertexId old_grandparent = __ldg(component_ids + old_cid); + VertexId old_grandparent = _ldg(component_ids + old_cid); if (min_cid != old_grandparent) { //printf("%d\t Make_Output : not updated, old_cid = %d, min_cid = %d, old_grandparent = %d\n", diff --git a/gunrock/app/cc/cc_functor.cuh b/gunrock/app/cc/cc_functor.cuh index f5ead0d0f..8b87f3324 100644 --- a/gunrock/app/cc/cc_functor.cuh +++ b/gunrock/app/cc/cc_functor.cuh @@ -3,6 +3,7 @@ #include #include #include +#include namespace gunrock { namespace app { @@ -253,10 +254,10 @@ struct HookMinFunctor { VertexId to_node; util::io::ModifiedLoad::Ld( from_node, d_data_slice->froms + node); - //from_node = __ldg(d_data_slice -> froms + node); + //from_node = _ldg(d_data_slice -> froms + node); util::io::ModifiedLoad::Ld( to_node, d_data_slice->tos + node); - //to_node = __ldg(d_data_slice -> tos + node); + //to_node = _ldg(d_data_slice -> tos + node); VertexId parent_from; VertexId parent_to; util::io::ModifiedLoad::Ld( @@ -343,18 +344,18 @@ struct HookMaxFunctor { VertexId to_node; util::io::ModifiedLoad::Ld( from_node, d_data_slice->froms + node); - //from_node = __ldg(d_data_slice -> froms + node); + //from_node = _ldg(d_data_slice -> froms + node); util::io::ModifiedLoad::Ld( to_node, d_data_slice->tos + node); - //to_node = __ldg(d_data_slice -> tos + node); + //to_node = _ldg(d_data_slice -> tos + node); VertexId parent_from; VertexId parent_to; //util::io::ModifiedLoad::Ld( // parent_from, d_data_slice -> component_ids + from_node); - parent_from = __ldg(d_data_slice -> component_ids + from_node); + parent_from = _ldg(d_data_slice -> component_ids + from_node); //util::io::ModifiedLoad::Ld( // parent_to , d_data_slice -> component_ids + to_node); - parent_to = __ldg(d_data_slice -> component_ids + to_node); + parent_to = _ldg(d_data_slice -> component_ids + to_node); //VertexId max_node = parent_from > parent_to ? parent_from : parent_to; //VertexId min_node = parent_from + parent_to - max_node; //if (max_node == min_node) @@ -405,7 +406,7 @@ struct HookMaxFunctor { util::io::ModifiedLoad::Ld( mark, d_data_slice -> marks + edge_id); if (mark) return false; - //if (__ldg(d_data_slice -> marks + edge_id)) return false; + //if (_ldg(d_data_slice -> marks + edge_id)) return false; //VertexId from_node; = s_id //VertexId to_node; = d_id @@ -516,11 +517,11 @@ struct PtrJumpFunctor { VertexId parent; //util::io::ModifiedLoad::Ld( // parent, d_data_slice -> component_ids + node); - parent = __ldg(d_data_slice -> component_ids + node); + parent = _ldg(d_data_slice -> component_ids + node); VertexId grand_parent; //util::io::ModifiedLoad::Ld( // grand_parent, d_data_slice -> component_ids + parent); - grand_parent = __ldg(d_data_slice -> component_ids + parent); + grand_parent = _ldg(d_data_slice -> component_ids + parent); if (parent != grand_parent) { util::io::ModifiedStore::St( 0, d_data_slice ->vertex_flag + 0); diff --git a/gunrock/app/dobfs/dobfs_enactor.cuh b/gunrock/app/dobfs/dobfs_enactor.cuh index 4aa69e333..ca9685942 100644 --- a/gunrock/app/dobfs/dobfs_enactor.cuh +++ b/gunrock/app/dobfs/dobfs_enactor.cuh @@ -276,6 +276,7 @@ public: enactor_stats[0], frontier_attribute[0], enactor_stats->iteration+1, + data_slice, d_data_slice, (VertexId*)NULL, (bool* )NULL, @@ -494,10 +495,11 @@ public: enactor_stats -> nodes_queued[0] += frontier_attribute -> queue_length; // Edge Map gunrock::oprtr::advance::LaunchKernel - ( + ( enactor_stats[0], frontier_attribute[0], enactor_stats->iteration+1, + data_slice, d_data_slice, data_slice->d_index_queue, data_slice->d_frontier_map_in, @@ -689,9 +691,10 @@ public: enactor_stats -> nodes_queued[0] += frontier_attribute -> queue_length; // Edge Map gunrock::oprtr::advance::LaunchKernel - ( + ( enactor_stats[0], frontier_attribute[0], + data_slice, d_data_slice, (VertexId*)NULL, (bool* )NULL, diff --git a/gunrock/app/dobfs/dobfs_functor.cuh b/gunrock/app/dobfs/dobfs_functor.cuh index 60b024f5d..3b8b0e789 100644 --- a/gunrock/app/dobfs/dobfs_functor.cuh +++ b/gunrock/app/dobfs/dobfs_functor.cuh @@ -41,10 +41,10 @@ namespace dobfs { * @tparam ProblemData Problem data type which contains data slice for BFS problem * */ -template +template struct PrepareInputFrontierMapFunctor { - typedef typename ProblemData::DataSlice DataSlice; + typedef typename Problem::DataSlice DataSlice; typedef _LabelT LabelT; /** @@ -57,7 +57,14 @@ struct PrepareInputFrontierMapFunctor * * \return Whether to load the apply function for the node and include it in the outgoing vertex frontier. */ - static __device__ __forceinline__ bool CondFilter(VertexId node, DataSlice *problem, Value v =0, SizeT nid=0) + static __device__ __forceinline__ bool CondFilter(//VertexId node, DataSlice *problem, Value v =0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { return true; } @@ -65,10 +72,17 @@ struct PrepareInputFrontierMapFunctor /** * @brief Vertex mapping apply function. Set frontier_map_in */ - static __device__ __forceinline__ void ApplyFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + static __device__ __forceinline__ void ApplyFilter(//VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { - util::io::ModifiedStore::St( - true, problem->d_frontier_map_in + node); + util::io::ModifiedStore::St( + true, d_data_slice->d_frontier_map_in + node); } }; @@ -80,10 +94,10 @@ struct PrepareInputFrontierMapFunctor * @tparam ProblemData Problem data type which contains data slice for BFS problem * */ -template +template struct PrepareUnvisitedQueueFunctor { - typedef typename ProblemData::DataSlice DataSlice; + typedef typename Problem::DataSlice DataSlice; typedef _LabelT LabelT; /** @@ -96,18 +110,32 @@ struct PrepareUnvisitedQueueFunctor * * \return Whether to load the apply function for the node and include it in the outgoing vertex frontier. */ - static __device__ __forceinline__ bool CondFilter(VertexId node, DataSlice *problem, Value v =0, SizeT nid=0) + static __device__ __forceinline__ bool CondFilter(//VertexId node, DataSlice *problem, Value v =0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { - VertexId label; - util::io::ModifiedLoad::Ld( - label, problem->labels + node); - return (label == -1 || label == util::MaxValue()-1); + VertexId new_label; + util::io::ModifiedLoad::Ld( + new_label, d_data_slice->labels + node); + return (new_label == -1 || new_label == util::MaxValue()-1); } /** * @brief Vertex mapping apply function. Doing nothing. */ - static __device__ __forceinline__ void ApplyFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + static __device__ __forceinline__ void ApplyFilter(//VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { // Doing nothing here } @@ -126,10 +154,10 @@ struct PrepareUnvisitedQueueFunctor * @tparam ProblemData Problem data type which contains data slice for BFS problem * */ -template +template struct ReverseBFSFunctor { - typedef typename ProblemData::DataSlice DataSlice; + typedef typename Problem::DataSlice DataSlice; typedef _LabelT LabelT; /** * @brief Forward Edge Mapping condition function. Check if the destination node @@ -143,13 +171,21 @@ struct ReverseBFSFunctor * * \return Whether to load the apply function for the edge and include the destination node in the next frontier. */ - static __device__ __forceinline__ bool CondEdge(VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) + static __device__ __forceinline__ bool CondEdge(//VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) + VertexId s_id, + VertexId d_id, + DataSlice *d_data_slice, + SizeT edge_id , + VertexId input_item, + LabelT label , + SizeT input_pos , + SizeT &output_pos) { // Check if the destination node has been claimed as someone's child //return (atomicCAS(&problem->d_preds[d_id], -2, s_id) == -2) ? true : false; - if (ProblemData::MARK_PREDECESSORS) - util::io::ModifiedStore::St( - s_id, problem->preds + d_id); + if (Problem::MARK_PREDECESSORS) + util::io::ModifiedStore::St( + s_id, d_data_slice->preds + d_id); return true; } @@ -165,15 +201,23 @@ struct ReverseBFSFunctor * @param[in] e_id_in input edge id * */ - static __device__ __forceinline__ void ApplyEdge(VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) + static __device__ __forceinline__ void ApplyEdge(//VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) + VertexId s_id, + VertexId d_id, + DataSlice *d_data_slice, + SizeT edge_id , + VertexId input_item, + LabelT label , + SizeT input_pos , + SizeT &output_pos) { //set d_labels[d_id] to be d_labels[s_id]+1 VertexId label = s_id; - if (ProblemData::MARK_PREDECESSORS) - util::io::ModifiedLoad::Ld( - label, problem->labels + s_id); - util::io::ModifiedStore::St( - label+1, problem->labels + d_id); + if (Problem::MARK_PREDECESSORS) + util::io::ModifiedLoad::Ld( + label, d_data_slice->labels + s_id); + util::io::ModifiedStore::St( + label+1, d_data_slice->labels + d_id); //printf("src:%d, dst:%d, label:%d\n", s_id, d_id, problem->d_labels[d_id]); } @@ -188,7 +232,14 @@ struct ReverseBFSFunctor * * \return Whether to load the apply function for the node and include it in the outgoing vertex frontier. */ - static __device__ __forceinline__ bool CondFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + static __device__ __forceinline__ bool CondFilter(//VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { return (node != -1); } @@ -196,7 +247,14 @@ struct ReverseBFSFunctor /** * @brief Vertex mapping apply function. Doing nothing. */ - static __device__ __forceinline__ void ApplyFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + static __device__ __forceinline__ void ApplyFilter(//VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { // Doing nothing here } @@ -214,10 +272,10 @@ struct ReverseBFSFunctor * @tparam ProblemData Problem data type which contains data slice for BFS problem * */ -template +template struct SwitchToNormalFunctor { - typedef typename ProblemData::DataSlice DataSlice; + typedef typename Problem::DataSlice DataSlice; typedef _LabelT LabelT; /** @@ -230,18 +288,32 @@ struct SwitchToNormalFunctor * * \return Whether to load the apply function for the node and include it in the outgoing vertex frontier. */ - static __device__ __forceinline__ bool CondFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + static __device__ __forceinline__ bool CondFilter(//VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { bool flag; - util::io::ModifiedLoad::Ld( - flag, problem->d_frontier_map_out + node); + util::io::ModifiedLoad::Ld( + flag, d_data_slice->d_frontier_map_out + node); return (flag); } /** * @brief Vertex mapping apply function. Doing nothing. */ - static __device__ __forceinline__ void ApplyFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + static __device__ __forceinline__ void ApplyFilter(//VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) + VertexId v, + VertexId node, + DataSlice *d_data_slice, + SizeT nid , + LabelT label, + SizeT input_pos, + SizeT output_pos) { // Doing nothing here } diff --git a/gunrock/app/dobfs/dobfs_problem.cuh b/gunrock/app/dobfs/dobfs_problem.cuh index bdfce1b7e..0749d073c 100644 --- a/gunrock/app/dobfs/dobfs_problem.cuh +++ b/gunrock/app/dobfs/dobfs_problem.cuh @@ -55,6 +55,7 @@ struct DOBFSProblem : ProblemBase BaseProblem; typedef DataSliceBase BaseDataSlice; + typedef unsigned char MaskT; //Helper structures diff --git a/gunrock/app/enactor_loop.cuh b/gunrock/app/enactor_loop.cuh index 6dc995fe9..7e9b23a79 100644 --- a/gunrock/app/enactor_loop.cuh +++ b/gunrock/app/enactor_loop.cuh @@ -100,12 +100,12 @@ void Iteration_Loop( *scanned_edges_ = NULL; int peer, peer_, peer__, gpu_, i, iteration_, wait_count; bool over_sized; - int communicate_latency = enactor -> communicate_latency; + SizeT communicate_latency = enactor -> communicate_latency; float communicate_multipy = enactor -> communicate_multipy; - int expand_latency = enactor -> expand_latency; - int subqueue_latency = enactor -> subqueue_latency; - int fullqueue_latency = enactor -> fullqueue_latency; - int makeout_latency = enactor -> makeout_latency; + SizeT expand_latency = enactor -> expand_latency; + SizeT subqueue_latency = enactor -> subqueue_latency; + SizeT fullqueue_latency = enactor -> fullqueue_latency; + SizeT makeout_latency = enactor -> makeout_latency; if (enactor -> debug) { diff --git a/gunrock/app/pr/pr_app.cu b/gunrock/app/pr/pr_app.cu index 2e9ffdf9b..526d4c260 100644 --- a/gunrock/app/pr/pr_app.cu +++ b/gunrock/app/pr/pr_app.cu @@ -141,7 +141,7 @@ void runPageRank(GRGraph *output, PR_Parameter *parameter) Value delta = parameter -> delta; Value error = parameter -> error; SizeT max_iter = parameter -> max_iter; - int traversal_mode = parameter -> traversal_mode; + std::string traversal_mode = parameter -> traversal_mode; bool instrument = parameter -> instrumented; bool debug = parameter -> debug; bool size_check = parameter -> size_check; @@ -167,6 +167,7 @@ void runPageRank(GRGraph *output, PR_Parameter *parameter) gpu_idx, partition_method, streams, + context, max_queue_sizing, max_in_sizing, partition_factor, @@ -233,7 +234,7 @@ void runPageRank(GRGraph *output, PR_Parameter *parameter) void dispatchPageRank( GRGraph *grapho, const GRGraph *graphi, - const GRSetup config, + const GRSetup *config, const GRTypes data_t, ContextPtr* context, cudaStream_t* streams) @@ -243,13 +244,13 @@ void dispatchPageRank( parameter->src[0] = -1; parameter->context = context; parameter->streams = streams; - parameter->g_quiet = config.quiet; - parameter->num_gpus = config.num_devices; - parameter->gpu_idx = config.device_list; - parameter->delta = config.pagerank_delta; - parameter->error = config.pagerank_error; - parameter->max_iter = config.max_iters; - parameter->normalized = config.pagerank_normalized; + parameter->g_quiet = config -> quiet; + parameter->num_gpus = config -> num_devices; + parameter->gpu_idx = config -> device_list; + parameter->delta = config -> pagerank_delta; + parameter->error = config -> pagerank_error; + parameter->max_iter = config -> max_iters; + parameter->normalized = config -> pagerank_normalized; parameter->g_undirected = true; switch (data_t.VTXID_TYPE) @@ -310,7 +311,7 @@ void dispatchPageRank( void gunrock_pagerank( GRGraph *grapho, const GRGraph *graphi, - const GRSetup config, + const GRSetup *config, const GRTypes data_t) { // GPU-related configurations @@ -319,20 +320,20 @@ void gunrock_pagerank( ContextPtr *context = NULL; cudaStream_t *streams = NULL; - num_gpus = config.num_devices; + num_gpus = config -> num_devices; gpu_idx = new int [num_gpus]; for (int i = 0; i < num_gpus; ++i) { - gpu_idx[i] = config.device_list[i]; + gpu_idx[i] = config -> device_list[i]; } // Create streams and MordernGPU context for each GPU streams = new cudaStream_t[num_gpus * num_gpus * 2]; context = new ContextPtr[num_gpus * num_gpus]; - if (!config.quiet) { printf(" using %d GPUs:", num_gpus); } + if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); } for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); } + if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); } util::SetDevice(gpu_idx[gpu]); for (int i = 0; i < num_gpus * 2; ++i) { @@ -347,7 +348,7 @@ void gunrock_pagerank( } } } - if (!config.quiet) { printf("\n"); } + if (!config -> quiet) { printf("\n"); } dispatchPageRank(grapho, graphi, config, data_t, context, streams); } @@ -377,9 +378,9 @@ void pagerank( data_t.SIZET_TYPE = SIZET_INT; // integer graph size type data_t.VALUE_TYPE = VALUE_FLOAT; // float attributes type - struct GRSetup config = InitSetup(1, NULL); // primitive-specific configures - config.top_nodes = 10; // number of top nodes - config.pagerank_normalized = normalized; + struct GRSetup *config = InitSetup(1, NULL); // primitive-specific configures + config -> top_nodes = 10; // number of top nodes + config -> pagerank_normalized = normalized; struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph)); diff --git a/gunrock/app/problem_base.cuh b/gunrock/app/problem_base.cuh index 099cc8075..1318ee69d 100644 --- a/gunrock/app/problem_base.cuh +++ b/gunrock/app/problem_base.cuh @@ -1282,7 +1282,7 @@ public: float partition_factor ; // Partition factor int partition_seed ; // Partition seed int iterations ; // Number of repeats - int traversal_mode ; // Load-balanced or Dynamic cooperative + std::string traversal_mode ; // Load-balanced or Dynamic cooperative /** * @brief TestParameter_Base constructor @@ -1310,7 +1310,7 @@ public: partition_factor = -1; partition_seed = -1; iterations = 1; - traversal_mode = -1; + traversal_mode = "LB"; } // end TestParameter_Base() /** diff --git a/gunrock/app/sssp/sssp_app.cu b/gunrock/app/sssp/sssp_app.cu index bad85572e..ab4e45572 100644 --- a/gunrock/app/sssp/sssp_app.cu +++ b/gunrock/app/sssp/sssp_app.cu @@ -139,7 +139,7 @@ void runSSSP(GRGraph* output, SSSP_Parameter *parameter) int partition_seed = parameter -> partition_seed; bool g_stream_from_host = parameter -> g_stream_from_host; int delta_factor = parameter -> delta_factor; - int traversal_mode = parameter -> traversal_mode; + std::string traversal_mode = parameter -> traversal_mode; bool instrument = parameter -> instrumented; bool debug = parameter -> debug; bool size_check = parameter -> size_check; @@ -147,6 +147,7 @@ void runSSSP(GRGraph* output, SSSP_Parameter *parameter) // Allocate host-side distance arrays Value *h_distances = new Value[graph->nodes]; VertexId *h_preds = MARK_PREDECESSORS ? new VertexId[graph->nodes] : NULL; + if (max_queue_sizing < 1.2) max_queue_sizing=1.2; for (int gpu = 0; gpu < num_gpus; gpu++) { @@ -227,7 +228,7 @@ void runSSSP(GRGraph* output, SSSP_Parameter *parameter) void dispatchSSSP( GRGraph* grapho, const GRGraph* graphi, - const GRSetup config, + const GRSetup* config, const GRTypes data_t, ContextPtr* context, cudaStream_t* streams) @@ -236,12 +237,12 @@ void dispatchSSSP( parameter->src = (long long*)malloc(sizeof(long long)); parameter->context = context; parameter->streams = streams; - parameter->g_quiet = config.quiet; - parameter->num_gpus = config.num_devices; - parameter->gpu_idx = config.device_list; - parameter->delta_factor = config.delta_factor; - parameter->traversal_mode = config.traversal_mode; - parameter->mark_predecessors = config.mark_predecessors; + parameter->g_quiet = config -> quiet; + parameter->num_gpus = config -> num_devices; + parameter->gpu_idx = config -> device_list; + parameter->delta_factor = config -> delta_factor; + parameter->traversal_mode = std::string(config -> traversal_mode); + parameter->mark_predecessors = config -> mark_predecessors; switch (data_t.VTXID_TYPE) { @@ -264,7 +265,7 @@ void dispatchSSSP( parameter->graph = &csr; // determine source vertex to start - switch (config.source_mode) + switch (config -> source_mode) { case randomize: { @@ -279,7 +280,7 @@ void dispatchSSSP( } case manually: { - parameter->src[0] = config.source_vertex[0]; + parameter->src[0] = config -> source_vertex[0]; break; } default: @@ -335,7 +336,7 @@ void dispatchSSSP( void gunrock_sssp( GRGraph* grapho, const GRGraph* graphi, - const GRSetup config, + const GRSetup* config, const GRTypes data_t) { // GPU-related configurations @@ -344,20 +345,20 @@ void gunrock_sssp( ContextPtr *context = NULL; cudaStream_t *streams = NULL; - num_gpus = config.num_devices; + num_gpus = config -> num_devices; gpu_idx = new int [num_gpus]; for (int i = 0; i < num_gpus; ++i) { - gpu_idx[i] = config.device_list[i]; + gpu_idx[i] = config -> device_list[i]; } // Create streams and MordernGPU context for each GPU streams = new cudaStream_t[num_gpus * num_gpus * 2]; context = new ContextPtr[num_gpus * num_gpus]; - if (!config.quiet) { printf(" using %d GPUs:", num_gpus); } + if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); } for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); } + if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); } util::SetDevice(gpu_idx[gpu]); for (int i = 0; i < num_gpus * 2; ++i) { @@ -372,7 +373,7 @@ void gunrock_sssp( } } } - if (!config.quiet) { printf("\n"); } + if (!config -> quiet) { printf("\n"); } dispatchSSSP(grapho, graphi, config, data_t, context, streams); } @@ -401,9 +402,9 @@ void sssp( data_t.SIZET_TYPE = SIZET_INT; // integer graph size type data_t.VALUE_TYPE = VALUE_INT; // integer attributes type - struct GRSetup config = InitSetup(1, NULL); // primitive-specific configures - config.source_vertex[0] = source; // source vertex to start - config.mark_predecessors = false; // do not mark predecessors + struct GRSetup *config = InitSetup(1, NULL); // primitive-specific configures + config -> source_vertex[0] = source; // source vertex to start + config -> mark_predecessors = false; // do not mark predecessors struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph)); diff --git a/gunrock/app/sssp/sssp_enactor.cuh b/gunrock/app/sssp/sssp_enactor.cuh index d9e67d18f..a5aafb3d5 100644 --- a/gunrock/app/sssp/sssp_enactor.cuh +++ b/gunrock/app/sssp/sssp_enactor.cuh @@ -721,7 +721,7 @@ struct SSSPIteration : public IterationBase < over_sized, thread_num, iteration, peer_, false)) return; if (enactor_stats->retval = Check_Size ( - true, "queue3", graph_slice->nodes+2, + true, "queue3", graph_slice->nodes * 1.2 + 2, &frontier_queue->keys [selector ], over_sized, thread_num, iteration, peer_, true )) return; if (enactor -> problem -> use_double_buffer) @@ -733,7 +733,7 @@ struct SSSPIteration : public IterationBase < over_sized, thread_num, iteration, peer_, false)) return; if (enactor_stats->retval = Check_Size ( - true, "queue3", graph_slice->nodes+2, + true, "queue3", graph_slice->nodes * 1.2 + 2, &frontier_queue->values[selector ], over_sized, thread_num, iteration, peer_, true )) return; } @@ -1318,7 +1318,7 @@ public: else if (traversal_mode == "LB_LIGHT_CULL") return MODE_SWITCH ::Init(*this, context, problem, max_grid_size); - + else printf("Traversal_mode %s is undefined for SSSP\n", traversal_mode.c_str()); // if (traversal_mode == 0) // return InitSSSP< LBAdvanceKernelPolicy, FilterKernelPolicy>( // context, problem, max_grid_size); diff --git a/gunrock/graphio/grmat.cuh b/gunrock/graphio/grmat.cuh index eb5e4cb96..17324dacf 100644 --- a/gunrock/graphio/grmat.cuh +++ b/gunrock/graphio/grmat.cuh @@ -150,6 +150,7 @@ __global__ void Rmat_Kernel( } } +template __global__ void Rand_Init( unsigned int seed, curandState *d_states) @@ -245,6 +246,7 @@ cudaError_t BuildRmatGraph( if (retval = rand_states[gpu].Allocate(block_size * grid_size, util::DEVICE)) return retval; Rand_Init + <<>> (seed_, rand_states[gpu].GetPointer(util::DEVICE)); @@ -389,6 +391,7 @@ cudaError_t BuildMetaRmatGraph( if (retval = rand_states[gpu].Allocate(block_size * grid_size, util::DEVICE)) return retval; Rand_Init + <<>> (seed_, rand_states[gpu].GetPointer(util::DEVICE)); diff --git a/gunrock/graphio/utils.cuh b/gunrock/graphio/utils.cuh index 44d222fe3..541744cc6 100644 --- a/gunrock/graphio/utils.cuh +++ b/gunrock/graphio/utils.cuh @@ -100,9 +100,10 @@ void RemoveStandaloneNodes( if (thread_num == 0) block_offsets = new SizeT[num_threads + 1]; #pragma omp barrier - displacements[node_start] = 0; + if (node_end > node_start) displacements[node_start] = 0; for (VertexId node = node_start; node < node_end - 1; node++) displacements[node + 1] = displacements[node] + 1 - marker[node]; + #pragma omp barrier if (node_end != 0) block_offsets[thread_num + 1] = displacements[node_end - 1] + 1 - marker[node_end - 1]; else block_offsets[thread_num + 1] = 1 - marker[0]; @@ -119,10 +120,18 @@ void RemoveStandaloneNodes( { if (marker[node] == 0) continue; VertexId node_ = node - block_offsets[thread_num] - displacements[node]; + //printf("thread_num = %d, node = %d, block_offsets[] = %d, displacements[] = %d, node_ = %d\n", + // thread_num, node, block_offsets[thread_num], displacements[node], node_); new_nodes [node ] = node_; new_offsets[node_] = row_offsets[node]; if (values != NULL) new_values[node_] = values[node]; } + + //#pragma omp barrier + //for (SizeT edge = edge_start; edge < edge_end; edge++) + //{ + // column_indices[edge] = new_nodes[column_indices[edge]]; + //} } for (SizeT edge = 0; edge < edges; edge++) diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index 165f57b34..58f13ea89 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -17,6 +17,7 @@ #include #include #include +#include /** * @brief VertexId data type enumerators. @@ -103,7 +104,7 @@ struct GRSetup float pagerank_error; // PageRank specific value bool pagerank_normalized; // PageRank specific flag float max_queue_sizing; // Setting frontier queue size - int traversal_mode; // Traversal mode: 0 for LB, 1 TWC + char* traversal_mode; // Traversal mode: 0 for LB, 1 TWC enum SrcMode source_mode; // Source mode rand/largest_degree }; @@ -117,12 +118,12 @@ struct GRSetup // Link mentions is an issue with C99, not a clang specific issue static #endif -inline struct GRSetup InitSetup(int num_iters, int* source) +inline struct GRSetup* InitSetup(int num_iters, int* source) { - struct GRSetup configurations; - configurations.quiet = true; - configurations.mark_predecessors = true; - configurations.enable_idempotence = false; + struct GRSetup *configurations = (struct GRSetup*)malloc(sizeof(struct GRSetup)); + configurations -> quiet = true; + configurations -> mark_predecessors = true; + configurations -> enable_idempotence = false; int* sources = (int*)malloc(sizeof(int)*num_iters); int i; if (source == NULL) @@ -132,20 +133,22 @@ inline struct GRSetup InitSetup(int num_iters, int* source) { for (i = 0; i < num_iters; ++i) sources[i] = source[i]; } - configurations.source_vertex = sources; - configurations.delta_factor = 32; - configurations.num_devices = 1; - configurations.max_iters = 50; - configurations.num_iters = num_iters; - configurations.top_nodes = 10; - configurations.pagerank_delta = 0.85f; - configurations.pagerank_error = 0.01f; - configurations.pagerank_normalized = false; - configurations.max_queue_sizing = 1.0; - configurations.traversal_mode = 0; - configurations.source_mode = manually; + configurations -> source_vertex = sources; + configurations -> delta_factor = 32; + configurations -> num_devices = 1; + configurations -> max_iters = 50; + configurations -> num_iters = num_iters; + configurations -> top_nodes = 10; + configurations -> pagerank_delta = 0.85f; + configurations -> pagerank_error = 0.01f; + configurations -> pagerank_normalized = false; + configurations -> max_queue_sizing = 1.0; + configurations -> traversal_mode = (char*)malloc(sizeof(char) * 3); + strcpy(configurations -> traversal_mode, "LB"); + configurations -> traversal_mode[2] = '\0'; + configurations -> source_mode = manually; int* gpu_idx = (int*)malloc(sizeof(int)); gpu_idx[0] = 0; - configurations.device_list = gpu_idx; + configurations -> device_list = gpu_idx; return configurations; } @@ -164,7 +167,7 @@ extern "C" { float gunrock_bfs( struct GRGraph* grapho, // Output graph / results const struct GRGraph* graphi, // Input graph structure - const struct GRSetup config, // Flag configurations + const struct GRSetup* config, // Flag configurations const struct GRTypes data_t); // Data type Configurations /* @@ -204,7 +207,7 @@ float bfs( void gunrock_bc( struct GRGraph* grapho, // Output graph / results const struct GRGraph* graphi, // Input graph structure - const struct GRSetup config, // Flag configurations + const struct GRSetup* config, // Flag configurations const struct GRTypes data_t); // Data type Configurations /** @@ -236,7 +239,7 @@ void bc( void gunrock_cc( struct GRGraph* grapho, // Output graph / results const struct GRGraph* graphi, // Input graph structure - const struct GRSetup config, // Flag configurations + const struct GRSetup* config, // Flag configurations const struct GRTypes data_t); // Data type Configurations /** @@ -268,7 +271,7 @@ int cc( void gunrock_sssp( struct GRGraph* grapho, // Output graph / results const struct GRGraph* graphi, // Input graph structure - const struct GRSetup config, // Flag configurations + const struct GRSetup* config, // Flag configurations const struct GRTypes data_t); // Data type Configurations /** @@ -302,7 +305,7 @@ void sssp( void gunrock_pagerank( struct GRGraph* grapho, // Output graph / results const struct GRGraph* graphi, // Input graph structure - const struct GRSetup config, // Flag configurations + const struct GRSetup* config, // Flag configurations const struct GRTypes data_t); // Data type Configurations /** diff --git a/gunrock/oprtr/all_edges_advance/kernel.cuh b/gunrock/oprtr/all_edges_advance/kernel.cuh index 34e8b1420..db78eae23 100644 --- a/gunrock/oprtr/all_edges_advance/kernel.cuh +++ b/gunrock/oprtr/all_edges_advance/kernel.cuh @@ -14,6 +14,7 @@ */ #pragma once +#include #include #include @@ -78,7 +79,7 @@ struct Dispatch> 1; //printf("(%d, %d) looking for %d, current range [%d, %d], mid_point = %d\n", // blockIdx.x, threadIdx.x, item_to_find, lower_bound, upper_bound, data[mid_point]); - if (__ldg(data + mid_point) < item_to_find) + if (_ldg(data + mid_point) < item_to_find) lower_bound = mid_point + 1; else upper_bound = mid_point; } @@ -87,7 +88,7 @@ struct Dispatch(); @@ -139,7 +140,7 @@ struct Dispatch %d, input_queue_len = %d\n", diff --git a/gunrock/oprtr/edge_map_partitioned/kernel.cuh b/gunrock/oprtr/edge_map_partitioned/kernel.cuh index 0ba854592..01a45648b 100644 --- a/gunrock/oprtr/edge_map_partitioned/kernel.cuh +++ b/gunrock/oprtr/edge_map_partitioned/kernel.cuh @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -97,11 +98,11 @@ struct Dispatch= max_vertex) ? max_edge :*/ //d_row_offsets[d_vertex_id]; //tex1Dfetch(RowOffsetsTex::row_offsets, vertex_id); - __ldg(d_row_offsets + vertex_id); + _ldg(d_row_offsets + vertex_id); SizeT second = /*(d_vertex_id + 1 >= max_vertex) ? max_edge :*/ //d_row_offsets[d_vertex_id+1]; //tex1Dfetch(RowOffsetsTex::row_offsets, vertex_id + 1); - __ldg(d_row_offsets + (vertex_id + 1)); + _ldg(d_row_offsets + (vertex_id + 1)); //printf(" d_vertex_id = %d, max_vertex = %d, max_edge = %d, first = %d, second = %d\n", // d_vertex_id, max_vertex, max_edge, first, second); @@ -267,7 +268,7 @@ struct Dispatch= 0) smem_storage.row_offset[threadIdx.x]= //row_offsets[input_item]; //tex1Dfetch(RowOffsetsTex::row_offsets, input_item); - __ldg(row_offsets + input_item); + _ldg(row_offsets + input_item); else smem_storage.row_offset[threadIdx.x] = util::MaxValue(); } else if (ADVANCE_TYPE == gunrock::oprtr::advance::E2V || diff --git a/gunrock/oprtr/edge_map_partitioned_cull/kernel.cuh b/gunrock/oprtr/edge_map_partitioned_cull/kernel.cuh index c2cbeb114..fa9c4fa73 100644 --- a/gunrock/oprtr/edge_map_partitioned_cull/kernel.cuh +++ b/gunrock/oprtr/edge_map_partitioned_cull/kernel.cuh @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,7 @@ struct LoadRowOffset static __device__ __forceinline__ long long Load (long long *&d_row_offsets, VertexId &pos) { - return __ldg(d_row_offsets + pos); + return _ldg(d_row_offsets + pos); } }; @@ -170,12 +171,12 @@ struct Dispatch= max_vertex) ? max_edge :*/ //d_row_offsets[d_vertex_id]; //tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, d_vertex_id); - //__ldg(d_row_offsets + (d_vertex_id)); + //_ldg(d_row_offsets + (d_vertex_id)); SizeT second = LoadRowOffset::Load(d_row_offsets, d_vertex_id + 1); /*(d_vertex_id + 1 >= max_vertex) ? max_edge :*/ //d_row_offsets[d_vertex_id+1]; //tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, d_vertex_id + 1); - //__ldg(d_row_offsets + (d_vertex_id+1)); + //_ldg(d_row_offsets + (d_vertex_id+1)); //printf(" d_vertex_id = %d, max_vertex = %d, max_edge = %d, first = %d, second = %d\n", // d_vertex_id, max_vertex, max_edge, first, second); @@ -449,12 +450,12 @@ struct Dispatch= 0) smem_storage.row_offset[threadIdx.x]= (output_inverse_graph) ? - __ldg(d_inverse_row_offsets + input_item) : + _ldg(d_inverse_row_offsets + input_item) : LoadRowOffset::Load(d_row_offsets, input_item); //row_offsets[input_item]; //tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, input_item); - //__ldg(((output_inverse_graph) ? d_inverse_row_offsets : + //_ldg(((output_inverse_graph) ? d_inverse_row_offsets : // d_row_offsets) + input_item); else smem_storage.row_offset[threadIdx.x] = util::MaxValue(); } @@ -772,11 +773,11 @@ struct Dispatch= 0) smem_storage.row_offset[threadIdx.x] = (output_inverse_graph) ? - __ldg(d_inverse_row_offsets + input_item) : + _ldg(d_inverse_row_offsets + input_item) : LoadRowOffset::Load(d_row_offsets, input_item); //row_offsets[input_item]; //tex1Dfetch(gunrock::oprtr::edge_map_partitioned::RowOffsetsTex::row_offsets, input_item); - //__ldg(row_offsets + input_item); + //_ldg(row_offsets + input_item); else smem_storage.row_offset[threadIdx.x] = util::MaxValue(); } else if (ADVANCE_TYPE == gunrock::oprtr::advance::E2V || ADVANCE_TYPE == gunrock::oprtr::advance::E2E) diff --git a/gunrock/util/device_intrinsics.cuh b/gunrock/util/device_intrinsics.cuh index dad0f8c9b..b58e1a118 100644 --- a/gunrock/util/device_intrinsics.cuh +++ b/gunrock/util/device_intrinsics.cuh @@ -12,7 +12,10 @@ * @brief Common device intrinsics (potentially specialized by architecture) */ -#pragma once +//#pragma once + +#ifndef DEVICE_INTRINSICS_CUH +#define DEVICE_INTRINSICS_CUH #include #include @@ -49,13 +52,32 @@ __device__ static long long atomicAdd(long long *addr, long long val) (unsigned long long )val); } +#if __GR_CUDA_ARCH__ <= 300 // TODO: only works if both *addr and val are non-negetive -//__device__ static long long atomicMin(long long *addr, long long val) -//{ -// return (long long)atomicMin( -// (unsigned long long*)addr, -// (unsigned long long )val); -//} +/*__device__ static signed long long int atomicMin(signed long long int* addr, signed long long int val) +{ + unsigned long long int pre_value = (unsigned long long int)val; + unsigned long long int old_value = (unsigned long long int)val; + while (true) + { + old_value = atomicCAS((unsigned long long int*)addr, pre_value, (unsigned long long int)val); + if (old_value <= (unsigned long long int)val) break; + if (old_value == pre_value) break; + pre_value = old_value; + } + return old_value; +}*/ +#endif + +template +__device__ __forceinline__ T _ldg(T* addr) +{ +#if __GR_CUDA_ARCH__ >= 350 + return __ldg(addr); +#else + return *addr; +#endif +} namespace gunrock { namespace util { @@ -159,6 +181,7 @@ __device__ int BinarySearch(KeyType i, ArrayType *queue) } // namespace util } // namespace gunrock +#endif // Leave this at the end of the file // Local Variables: // mode:c++ diff --git a/gunrock/util/latency_utils.cuh b/gunrock/util/latency_utils.cuh index dfa6a2822..8c3fd105e 100644 --- a/gunrock/util/latency_utils.cuh +++ b/gunrock/util/latency_utils.cuh @@ -21,9 +21,10 @@ namespace latency { #define NUM_BLOCKS 120 #define BLOCK_SIZE 1024 +template __global__ void Load_Kernel( - long long num_repeats, - long long num_elements, + SizeT num_repeats, + SizeT num_elements, int *d_data) { int pos = blockDim.x * blockIdx.x + threadIdx.x; @@ -45,11 +46,12 @@ __global__ void Load_Kernel( } } +template cudaError_t Get_BaseLine( //int num_blocks, //int block_size, - long long num_repeats, - long long num_elements, + SizeT num_repeats, + SizeT num_elements, cudaStream_t stream, float &elapsed_ms, int *d_data) @@ -92,11 +94,12 @@ cudaError_t Test_BaseLine( return retval; } +template cudaError_t Insert_Latency( //int num_blocks, //int block_size, - long long num_repeats, - long long num_elements, + SizeT num_repeats, + SizeT num_elements, cudaStream_t stream, int *d_data) { diff --git a/shared_lib_tests/shared_lib_bc.c b/shared_lib_tests/shared_lib_bc.c index c3bf257de..5ee445cbc 100644 --- a/shared_lib_tests/shared_lib_bc.c +++ b/shared_lib_tests/shared_lib_bc.c @@ -14,7 +14,7 @@ int main(int argc, char* argv[]) data_t.SIZET_TYPE = SIZET_INT; // graph size type data_t.VALUE_TYPE = VALUE_FLOAT; // attributes type - struct GRSetup config = InitSetup(1, NULL); // gunrock configurations + struct GRSetup *config = InitSetup(1, NULL); // gunrock configurations int num_nodes = 7, num_edges = 26; int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; diff --git a/shared_lib_tests/shared_lib_bfs.c b/shared_lib_tests/shared_lib_bfs.c index 728e29e6f..dbc0fdc59 100644 --- a/shared_lib_tests/shared_lib_bfs.c +++ b/shared_lib_tests/shared_lib_bfs.c @@ -15,7 +15,7 @@ int main(int argc, char* argv[]) data_t.VALUE_TYPE = VALUE_INT; // attributes type int srcs[1] = {1}; - struct GRSetup config = InitSetup(1, srcs); // gunrock configurations + struct GRSetup *config = InitSetup(1, srcs); // gunrock configurations int num_nodes = 7, num_edges = 15; // number of nodes and edges int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; diff --git a/shared_lib_tests/shared_lib_cc.c b/shared_lib_tests/shared_lib_cc.c index 337e49e55..1a2e5cc0c 100644 --- a/shared_lib_tests/shared_lib_cc.c +++ b/shared_lib_tests/shared_lib_cc.c @@ -14,7 +14,7 @@ int main(int argc, char* argv[]) data_t.SIZET_TYPE = SIZET_INT; // graph size type data_t.VALUE_TYPE = VALUE_INT; // attributes type - struct GRSetup config = InitSetup(1, NULL); // gunrock configurations + struct GRSetup *config = InitSetup(1, NULL); // gunrock configurations int num_nodes = 7, num_edges = 26; int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; diff --git a/shared_lib_tests/shared_lib_pr.c b/shared_lib_tests/shared_lib_pr.c index 6d43249b1..8c05e0bc6 100644 --- a/shared_lib_tests/shared_lib_pr.c +++ b/shared_lib_tests/shared_lib_pr.c @@ -14,7 +14,7 @@ int main(int argc, char* argv[]) data_t.SIZET_TYPE = SIZET_INT; // graph size type data_t.VALUE_TYPE = VALUE_FLOAT; // attributes type - struct GRSetup config = InitSetup(1, NULL); // gunrock configurations + struct GRSetup *config = InitSetup(1, NULL); // gunrock configurations int num_nodes = 7, num_edges = 26; int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; @@ -35,7 +35,7 @@ int main(int argc, char* argv[]) float *top_ranks = (float*)malloc(sizeof(float) * graphi->num_nodes); top_nodes = ( int*)grapho->node_value2; top_ranks = (float*)grapho->node_value1; - int node; for (node = 0; node < config.top_nodes; ++node) + int node; for (node = 0; node < config -> top_nodes; ++node) printf("Node_ID [%d] : Score: [%f]\n", top_nodes[node], top_ranks[node]); if (graphi) free(graphi); diff --git a/shared_lib_tests/shared_lib_sssp.c b/shared_lib_tests/shared_lib_sssp.c index f7c07ad40..60eb589bd 100644 --- a/shared_lib_tests/shared_lib_sssp.c +++ b/shared_lib_tests/shared_lib_sssp.c @@ -14,7 +14,7 @@ int main(int argc, char* argv[]) data_t.SIZET_TYPE = SIZET_INT; // graph size type data_t.VALUE_TYPE = VALUE_INT; // attributes type - struct GRSetup config = InitSetup(1, NULL); // gunrock configurations + struct GRSetup *config = InitSetup(1, NULL); // gunrock configurations int num_nodes = 7, num_edges = 15; // number of nodes and edges int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; diff --git a/tests/bc/CMakeLists.txt b/tests/bc/CMakeLists.txt index 35c6db4f8..416714b79 100644 --- a/tests/bc/CMakeLists.txt +++ b/tests/bc/CMakeLists.txt @@ -12,6 +12,12 @@ set (mgpu_SOURCE_FILES ${mgpu_SOURCE_DIRS}/mgpucontext.cu ${mgpu_SOURCE_DIRS}/mgpuutil.cpp) +if (cub_INCLUDE_DIRS) + include_directories(${cub_INCLUDE_DIRS}) +else() + message(SEND_ERROR "CUB include directory not set.") +endif() + CUDA_ADD_EXECUTABLE(betweenness_centrality test_bc.cu ${CMAKE_SOURCE_DIR}/gunrock/util/test_utils.cu diff --git a/tests/bc/test_bc.cu b/tests/bc/test_bc.cu index 4b933ddd7..bf66bed3e 100644 --- a/tests/bc/test_bc.cu +++ b/tests/bc/test_bc.cu @@ -442,6 +442,7 @@ cudaError_t RunTests(Info *info) int fullqueue_latency = info->info["fullqueue_latency" ].get_int (); int makeout_latency = info->info["makeout_latency" ].get_int (); std::string traversal_mode = info->info["traversal_mode" ].get_str (); + if (traversal_mode == "TWC") traversal_mode = "LB"; if (communicate_multipy > 1) max_in_sizing *= communicate_multipy; CpuTimer cpu_timer; diff --git a/tests/bfs/CMakeLists.txt b/tests/bfs/CMakeLists.txt index f8f5f5e1c..1b671e53d 100644 --- a/tests/bfs/CMakeLists.txt +++ b/tests/bfs/CMakeLists.txt @@ -14,6 +14,12 @@ set (mgpu_SOURCE_FILES ${mgpu_SOURCE_DIRS}/mgpucontext.cu ${mgpu_SOURCE_DIRS}/mgpuutil.cpp) +if (cub_INCLUDE_DIRS) + include_directories(${cub_INCLUDE_DIRS}) +else() + message(SEND_ERROR "CUB include directory not set.") +endif() + CUDA_ADD_EXECUTABLE(breadth_first_search test_bfs.cu ${CMAKE_SOURCE_DIR}/gunrock/util/test_utils.cu diff --git a/tests/bfs/test_bfs.cu b/tests/bfs/test_bfs.cu index 521cd96af..3db23a1d3 100644 --- a/tests/bfs/test_bfs.cu +++ b/tests/bfs/test_bfs.cu @@ -426,7 +426,9 @@ cudaError_t RunTests(Info *info) if (!quiet_mode) printf("Using traversal-mode %s\n", traversal_mode.c_str()); - json_spirit::mArray source_list = info->info["source_list"].get_array(); + json_spirit::mArray source_list; + if (src_type == "list") + source_list = info->info["source_list"].get_array(); for (int iter = 0; iter < iterations; ++iter) { if (src_type == "random2") @@ -440,12 +442,13 @@ cudaError_t RunTests(Info *info) } } else if (src_type == "list") { - if (source_list.size() == 0) { + if (source_list.size() == 0) + { if (!quiet_mode) printf("No source list found. Use 0 as source.\n"); src = 0; } else { - src = source_list[iter].get_int(); + src = source_list[iter].get_int(); } } @@ -875,7 +878,7 @@ template < typename VertexId> int main_SizeT(CommandLineArgs *args) { -// disabled to reduce compile time +// can be disabled to reduce compile time if (args -> CheckCmdLineFlag("64bit-SizeT") || sizeof(VertexId) > 4) return main_Value(args); else @@ -884,9 +887,17 @@ int main_SizeT(CommandLineArgs *args) int main_VertexId(CommandLineArgs *args) { -// disabled, because oprtr::filter::KernelPolicy::SmemStorage is too large for 64bit VertexId +// can be disabled to reduce compile time +// atomicMin(long long) is only available for compute capability 3.5 or higher if (args -> CheckCmdLineFlag("64bit-VertexId")) +#if __GR_CUDA_ARCH__ <= 300 + { + printf("64bit-VertexId disabled, because atomicMin(long long) is only supported by compute capability 3.5 or higher\n"); + return 1; + } +#else return main_SizeT(args); +#endif else return main_SizeT(args); } diff --git a/tests/cc/CMakeLists.txt b/tests/cc/CMakeLists.txt index 2eb15b781..8b10119db 100644 --- a/tests/cc/CMakeLists.txt +++ b/tests/cc/CMakeLists.txt @@ -12,6 +12,12 @@ set (mgpu_SOURCE_FILES ${mgpu_SOURCE_DIRS}/mgpucontext.cu ${mgpu_SOURCE_DIRS}/mgpuutil.cpp) +if (cub_INCLUDE_DIRS) + include_directories(${cub_INCLUDE_DIRS}) +else() + message(SEND_ERROR "CUB include directory not set.") +endif() + CUDA_ADD_EXECUTABLE(connected_component test_cc.cu ${CMAKE_SOURCE_DIR}/gunrock/util/test_utils.cu diff --git a/tests/sssp/CMakeLists.txt b/tests/sssp/CMakeLists.txt index 849c6a176..9bbb80037 100644 --- a/tests/sssp/CMakeLists.txt +++ b/tests/sssp/CMakeLists.txt @@ -12,6 +12,12 @@ set (mgpu_SOURCE_FILES ${mgpu_SOURCE_DIRS}/mgpucontext.cu ${mgpu_SOURCE_DIRS}/mgpuutil.cpp) +if (cub_INCLUDE_DIRS) + include_directories(${cub_INCLUDE_DIRS}) +else() + message(SEND_ERROR "CUB include directory not set.") +endif() + CUDA_ADD_EXECUTABLE(single_source_shortest_path test_sssp.cu ${CMAKE_SOURCE_DIR}/gunrock/util/test_utils.cu diff --git a/tests/sssp/test_sssp.cu b/tests/sssp/test_sssp.cu index 001795130..667b36fbb 100644 --- a/tests/sssp/test_sssp.cu +++ b/tests/sssp/test_sssp.cu @@ -331,6 +331,7 @@ cudaError_t RunTests(Info *info) CpuTimer cpu_timer; cudaError_t retval = cudaSuccess; + if (max_queue_sizing < 1.2) max_queue_sizing=1.2; cpu_timer.Start(); json_spirit::mArray device_list = info->info["device_list"].get_array();