debug and interface alignment. BFS, SSSP, BC, CC, PR past build and c…

…test; *_app.cu and shared library version still having parameter passing issues (only work with default parameters); DOBFS(old), HITS, SALSA, WTF, MST, SM haven't been aligned yet
sgpyc · Apr 27, 2016 · 403e4e7 · 403e4e7
1 parent 380efc7
commit 403e4e7
Show file tree

Hide file tree

Showing 38 changed files with 400 additions and 233 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -168,11 +168,11 @@ option(GUNROCK_GENCODE_SM20
 
 option(GUNROCK_GENCODE_SM30
   "ON to generate code for Compute Capability 3.0 devices (e.g. Tesla K10)"
-  ON)
+  OFF)
 
 option(GUNROCK_GENCODE_SM35
   "ON to generate code for Compute Capability 3.5 devices (e.g. Tesla K20)"
-  OFF)
+  ON)
 
 option(GUNROCK_GENCODE_SM37
   "ON to generate code for Compute Capability 3.7 devices (e.g. Tesla K80)"

diff --git a/gunrock/app/bc/bc_app.cu b/gunrock/app/bc/bc_app.cu
@@ -240,18 +240,18 @@ void runBC(GRGraph* output, BC_Parameter *parameter)
 void dispatchBC(
     GRGraph*        grapho,
     const GRGraph*  graphi,
-    const GRSetup   config,
+    const GRSetup*  config,
     const GRTypes   data_t,
     ContextPtr*     context,
     cudaStream_t*   streams)
 {
     BC_Parameter* parameter = new BC_Parameter;
     parameter->src = (long long*)malloc(sizeof(long long));
-    parameter->g_quiet  = config.quiet;
+    parameter->g_quiet  = config -> quiet;
     parameter->context  = context;
     parameter->streams  = streams;
-    parameter->num_gpus = config.num_devices;
-    parameter->gpu_idx  = config.device_list;
+    parameter->num_gpus = config -> num_devices;
+    parameter->gpu_idx  = config -> device_list;
 
     switch (data_t.VTXID_TYPE)
     {
@@ -286,7 +286,7 @@ void dispatchBC(
                 parameter->graph = &csr;
 
                 // determine source vertex to start
-                switch (config.source_mode)
+                switch (config -> source_mode)
                 {
                 case randomize:
                 {
@@ -301,7 +301,7 @@ void dispatchBC(
                 }
                 case manually:
                 {
-                    parameter->src[0] = config.source_vertex[0];
+                    parameter->src[0] = config -> source_vertex[0];
                     break;
                 }
                 default:
@@ -341,7 +341,7 @@ void dispatchBC(
 void gunrock_bc(
     GRGraph       *grapho,
     const GRGraph *graphi,
-    const GRSetup  config,
+    const GRSetup *config,
     const GRTypes  data_t)
 {
     // GPU-related configurations
@@ -350,20 +350,20 @@ void gunrock_bc(
     ContextPtr    *context = NULL;
     cudaStream_t  *streams = NULL;
 
-    num_gpus = config.num_devices;
+    num_gpus = config -> num_devices;
     gpu_idx  = new int [num_gpus];
     for (int i = 0; i < num_gpus; ++i)
     {
-        gpu_idx[i] = config.device_list[i];
+        gpu_idx[i] = config -> device_list[i];
     }
 
     // Create streams and MordernGPU context for each GPU
     streams = new cudaStream_t[num_gpus * num_gpus * 2];
     context = new ContextPtr[num_gpus * num_gpus];
-    if (!config.quiet) { printf(" using %d GPUs:", num_gpus); }
+    if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); }
     for (int gpu = 0; gpu < num_gpus; ++gpu)
     {
-        if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); }
+        if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); }
         util::SetDevice(gpu_idx[gpu]);
         for (int i = 0; i < num_gpus * 2; ++i)
         {
@@ -378,7 +378,7 @@ void gunrock_bc(
             }
         }
     }
-    if (!config.quiet) { printf("\n"); }
+    if (!config -> quiet) { printf("\n"); }
 
     dispatchBC(grapho, graphi, config, data_t, context, streams);
 }
@@ -406,8 +406,8 @@ void bc(
     data_t.SIZET_TYPE = SIZET_INT;    // integer graph size type
     data_t.VALUE_TYPE = VALUE_FLOAT;  // float attributes type
 
-    struct GRSetup config = InitSetup(1, NULL);  // primitive-specific configures
-    config.source_vertex[0] = source;        // source vertex to start
+    struct GRSetup *config = InitSetup(1, NULL);  // primitive-specific configures
+    config -> source_vertex[0] = source;        // source vertex to start
 
     struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph));
     struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph));

diff --git a/gunrock/app/bc/bc_enactor.cuh b/gunrock/app/bc/bc_enactor.cuh
@@ -2226,6 +2226,7 @@ public:
             else if (traversal_mode == "LB_LIGHT_CULL")
                  return MODE_SWITCH<SizeT, gunrock::oprtr::advance::LB_LIGHT_CULL>
                     ::Init(*this, context, problem, max_grid_size);
+            else printf("Traversal mode %s is not supported by BC at the moment\n", traversal_mode.c_str());
         }
 
         //to reduce compile time, get rid of other architecture for now

diff --git a/gunrock/app/bc/bc_functor.cuh b/gunrock/app/bc/bc_functor.cuh
@@ -16,6 +16,7 @@
 
 #include <gunrock/app/problem_base.cuh>
 #include <gunrock/app/bc/bc_problem.cuh>
+#include <gunrock/util/device_intrinsics.cuh>
 
 namespace gunrock {
 namespace app {
@@ -92,7 +93,7 @@ struct ForwardFunctor {
             //VertexId label_d;
             //util::io::ModifiedLoad<Problem::COLUMN_READ_MODIFIER>::Ld(
             //    label_d, d_data_slice->labels + d_id);
-            //label_d = __ldg(d_data_slice -> labels + d_id);
+            //label_d = _ldg(d_data_slice -> labels + d_id);
             //if (label_d == label /*+ 1*/) {
                 //Accumulate sigma value
                 atomicAdd(d_data_slice->sigmas + d_id, d_data_slice->sigmas[s_id]);
@@ -229,10 +230,10 @@ struct BackwardFunctor {
         VertexId d_label;
         //util::io::ModifiedLoad<Problem::COLUMN_READ_MODIFIER>::Ld(
         //    s_label, d_data_slice->labels + s_id);
-        s_label = __ldg(d_data_slice -> labels + s_id);
+        s_label = _ldg(d_data_slice -> labels + s_id);
         //util::io::ModifiedLoad<Problem::COLUMN_READ_MODIFIER>::Ld(
         //    d_label, d_data_slice->labels + d_id);
-        d_label = __ldg(d_data_slice -> labels + d_id);
+        d_label = _ldg(d_data_slice -> labels + d_id);
         return (d_label == s_label + 1);
     }
 
@@ -265,17 +266,17 @@ struct BackwardFunctor {
         Value from_sigma;
         //util::io::ModifiedLoad<Problem::COLUMN_READ_MODIFIER>::Ld(
         //    from_sigma, d_data_slice->sigmas + s_id);
-        from_sigma = __ldg(d_data_slice -> sigmas + s_id);
+        from_sigma = _ldg(d_data_slice -> sigmas + s_id);
 
         Value to_sigma;
         //util::io::ModifiedLoad<Problem::COLUMN_READ_MODIFIER>::Ld(
         //    to_sigma, d_data_slice->sigmas + d_id);
-        to_sigma = __ldg(d_data_slice -> sigmas + d_id);
+        to_sigma = _ldg(d_data_slice -> sigmas + d_id);
 
         Value to_delta;
         //util::io::ModifiedLoad<Problem::COLUMN_READ_MODIFIER>::Ld(
         //    to_delta, d_data_slice->deltas + d_id);
-        to_delta = __ldg(d_data_slice -> deltas + d_id);
+        to_delta = _ldg(d_data_slice -> deltas + d_id);
 
         Value result = from_sigma / to_sigma * (1.0 + to_delta);
 

diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu
@@ -36,12 +36,14 @@ struct BFS_Parameter : gunrock::app::TestParameter_Base
 public:
     bool   mark_predecessors ;  // mark src-distance vs. parent vertices
     bool   enable_idempotence;  // enable idempotence operation
+    bool   direction_optimized; // enable direction optimization
     double max_queue_sizing1 ;  // maximum queue sizing factor
 
     BFS_Parameter()
     {
         mark_predecessors  = false;
         enable_idempotence = false;
+        direction_optimized = false;
         max_queue_sizing1  = -1.0f;
     }
 
@@ -173,10 +175,12 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter)
     float         partition_factor     = parameter -> partition_factor;
     int           partition_seed       = parameter -> partition_seed;
     bool          g_stream_from_host   = parameter -> g_stream_from_host;
-    int           traversal_mode       = parameter -> traversal_mode;
+    std::string   traversal_mode       = parameter -> traversal_mode;
     bool          instrument           = parameter -> instrumented;
     bool          debug                = parameter -> debug;
     bool          size_check           = parameter -> size_check;
+    bool          undirected           = parameter -> g_undirected;
+    bool          direction_optimized  = parameter -> direction_optimized;
     size_t       *org_size             = new size_t  [num_gpus];
     // Allocate host-side label array
     VertexId     *h_labels             = new VertexId[graph->nodes];
@@ -193,7 +197,7 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter)
         cudaSetDevice(gpu_idx[gpu]);
         cudaMemGetInfo(&(org_size[gpu]), &dummy);
     }
-    Problem *problem = new Problem;  // Allocate problem on GPU
+    Problem *problem = new Problem(direction_optimized, undirected);  // Allocate problem on GPU
 
     util::GRError( problem->Init(
         g_stream_from_host,
@@ -210,7 +214,7 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter)
         "Problem BFS Initialization Failed", __FILE__, __LINE__);
 
     Enactor *enactor = new Enactor(
-        num_gpus, gpu_idx, instrument, debug, size_check);  // BFS enactor map
+        num_gpus, gpu_idx, instrument, debug, size_check, direction_optimized);  // BFS enactor map
     util::GRError(
         enactor->Init(context, problem, max_grid_size, traversal_mode),
         "BFS Enactor init failed", __FILE__, __LINE__);
@@ -276,21 +280,21 @@ float runBFS(GRGraph* output, BFS_Parameter *parameter)
 float dispatch_bfs(
     GRGraph*       grapho,
     const GRGraph* graphi,
-    const GRSetup  config,
+    const GRSetup* config,
     const GRTypes  data_t,
     ContextPtr*    context,
     cudaStream_t*  streams)
 {
     BFS_Parameter *parameter = new BFS_Parameter;
-    parameter->iterations = config.num_iters;
-    parameter->src = (long long*)malloc(sizeof(long long)*config.num_iters);
+    parameter->iterations = config -> num_iters;
+    parameter->src = (long long*)malloc(sizeof(long long)*config -> num_iters);
     parameter->context  = context;
     parameter->streams  = streams;
-    parameter->g_quiet  = config.quiet;
-    parameter->num_gpus = config.num_devices;
-    parameter->gpu_idx  = config.device_list;
-    parameter->mark_predecessors  = config.mark_predecessors;
-    parameter->enable_idempotence = config.enable_idempotence;
+    parameter->g_quiet  = config -> quiet;
+    parameter->num_gpus = config -> num_devices;
+    parameter->gpu_idx  = config -> device_list;
+    parameter->mark_predecessors  = config -> mark_predecessors;
+    parameter->enable_idempotence = config -> enable_idempotence;
 
     float elapsed_time;
 
@@ -315,7 +319,7 @@ float dispatch_bfs(
                 parameter->graph = &csr;
 
                 // determine source vertex to start
-                switch (config.source_mode)
+                switch (config -> source_mode)
                 {
                 case randomize:
                 {
@@ -329,7 +333,7 @@ float dispatch_bfs(
                 {
                     int max_deg = 0;
                     int node_id = csr.GetNodeWithHighestDegree(max_deg);
-                    for (int i = 0; i < config.num_iters; ++i)
+                    for (int i = 0; i < config -> num_iters; ++i)
                     {
                         parameter->src[i] = node_id;
                     }
@@ -339,7 +343,7 @@ float dispatch_bfs(
                 {
                     for (int i = 0; i < parameter->iterations; ++i)
                     {
-                        parameter->src[i] = config.source_vertex[i];
+                        parameter->src[i] = config -> source_vertex[i];
                     }
                     break;
                 }
@@ -355,7 +359,7 @@ float dispatch_bfs(
                 if (!parameter->g_quiet)
                 {
                     printf(" source: %lld", (long long) parameter->src[0]);
-                    for (int i = 1; i < config.num_iters; ++i)
+                    for (int i = 1; i < config -> num_iters; ++i)
                     {
                         printf(",%lld", (long long) parameter->src[i]);
                     }
@@ -403,7 +407,7 @@ float dispatch_bfs(
 float gunrock_bfs(
     GRGraph*       grapho,
     const GRGraph* graphi,
-    const GRSetup  config,
+    const GRSetup* config,
     const GRTypes  data_t)
 {
     // GPU-related configurations
@@ -412,20 +416,20 @@ float gunrock_bfs(
     ContextPtr    *context = NULL;
     cudaStream_t  *streams = NULL;
 
-    num_gpus = config.num_devices;
+    num_gpus = config -> num_devices;
     gpu_idx  = new int [num_gpus];
     for (int i = 0; i < num_gpus; ++i)
     {
-        gpu_idx[i] = config.device_list[i];
+        gpu_idx[i] = config -> device_list[i];
     }
 
     // Create streams and MordernGPU context for each GPU
     streams = new cudaStream_t[num_gpus * num_gpus * 2];
     context = new ContextPtr[num_gpus * num_gpus];
-    if (!config.quiet) { printf(" using %d GPUs:", num_gpus); }
+    if (!config -> quiet) { printf(" using %d GPUs:", num_gpus); }
     for (int gpu = 0; gpu < num_gpus; ++gpu)
     {
-        if (!config.quiet) { printf(" %d ", gpu_idx[gpu]); }
+        if (!config -> quiet) { printf(" %d ", gpu_idx[gpu]); }
         util::SetDevice(gpu_idx[gpu]);
         for (int i = 0; i < num_gpus * 2; ++i)
         {
@@ -440,7 +444,7 @@ float gunrock_bfs(
             }
         }
     }
-    if (!config.quiet) { printf("\n"); }
+    if (!config -> quiet) { printf("\n"); }
 
     return dispatch_bfs(grapho, graphi, config, data_t, context, streams);
 }
@@ -476,10 +480,10 @@ float bfs(
     data_t.SIZET_TYPE = SIZET_INT;  // integer graph size type
     data_t.VALUE_TYPE = VALUE_INT;  // integer attributes type
 
-    struct GRSetup config = InitSetup(num_iters, source);  // primitive-specific configures
-    config.mark_predecessors  = mark_predecessors;    // do not mark predecessors
-    config.enable_idempotence = enable_idempotence;    // wether enable idempotence
-    config.source_mode = source_mode;
+    struct GRSetup* config = InitSetup(num_iters, source);  // primitive-specific configures
+    config -> mark_predecessors  = mark_predecessors;    // do not mark predecessors
+    config -> enable_idempotence = enable_idempotence;    // wether enable idempotence
+    config -> source_mode = source_mode;
 
     struct GRGraph *grapho = (struct GRGraph*)malloc(sizeof(struct GRGraph));
     struct GRGraph *graphi = (struct GRGraph*)malloc(sizeof(struct GRGraph));