From c06081a0271479aa5da587328ea2bd14dfdfb7da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CIvy=E2=80=9D?= <bopeng@kth.se>
Date: Sat, 24 Sep 2016 23:44:10 +0200
Subject: [PATCH] rm compilation warnings,separate communicators

---
 CMakeLists.txt                |  5 ++++-
 communication/Com3DNonblk.cpp | 12 ++++++++++--
 fields/EMfields3D.cpp         |  2 +-
 include/Basic.h               |  8 ++++----
 include/BlockCommunicator.h   |  4 ++--
 include/Com3DNonblk.h         |  2 --
 include/Connection.h          |  8 ++++----
 include/MPIdata.h             |  7 ++++++-
 inputoutput/ParallelIO.cpp    |  2 +-
 main/iPIC3Dlib.cpp            |  2 +-
 particles/Particles3D.cpp     |  4 ++--
 particles/Particles3Dcomm.cpp |  1 -
 performances/Timing.cpp       |  8 ++++----
 solvers/CG.cpp                | 11 ++++++++---
 solvers/GMRES.cpp             | 18 +++++++++++-------
 utility/Basic.cpp             | 15 +++++++++------
 utility/MPIdata.cpp           | 12 ++++--------
 utility/TimeTasks.cpp         |  5 +++--
 utility/debug.cpp             |  2 +-
 utility/errors.cpp            |  4 ++--
 20 files changed, 77 insertions(+), 55 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 29f7018..1112b2b 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,7 +32,10 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") ## Xeon
      set(CMAKE_CXX_COMPILER "icpc")  
    endif()
    #set(CMAKE_CXX_FLAGS "-O3 -openmp -g -xHost -fno-exceptions -vec-report")
-   set(CMAKE_CXX_FLAGS  "-O3 -h noomp -DNO_HDF5  ") # -DPRINTPCL for printing particle subcycles  -DNO_HDF5 to disable output
+   # -DPRINTPCL for printing particle subcycles 
+   #-DNO_HDF5 to disable output 
+   #-h noomp if running on bluewaters
+   set(CMAKE_CXX_FLAGS  "-O3 -DNO_HDF5 ")
 else()
    set(CMAKE_CXX_FLAGS "-O3")
 endif()
diff --git a/communication/Com3DNonblk.cpp b/communication/Com3DNonblk.cpp
index ad11caa..18c55eb 100644
--- a/communication/Com3DNonblk.cpp
+++ b/communication/Com3DNonblk.cpp
@@ -23,7 +23,10 @@
 //isCenterFlag: 1 communicateCenter; 0 communicateNode
 void NBDerivedHaloComm(int nx, int ny, int nz, double ***vector,const VirtualTopology3D * vct, EMfields3D *EMf,bool isCenterFlag, bool isFaceOnlyFlag, bool needInterp, bool isParticle)
 {
-	MPI_Errhandler_set(MPI_COMM_WORLD,MPI_ERRORS_RETURN);
+    const MPI_Comm comm       = isParticle ?vct->getParticleComm()      :vct->getFieldComm();
+#ifdef DEBUG
+	MPI_Errhandler_set(comm,MPI_ERRORS_RETURN);
+#endif
     MPI_Status  stat[12];
     MPI_Request reqList[12];				  //at most 6 requests x 2 (send recv)
     int communicationCnt[6] = {0,0,0,0,0,0};  //1 if there communication on that dir
@@ -31,7 +34,6 @@ void NBDerivedHaloComm(int nx, int ny, int nz, double ***vector,const VirtualTop
 
     const int tag_XL=1,tag_YL=2,tag_ZL=3,tag_XR=4,tag_YR=5,tag_ZR=6;//To address same rank as left and right neighbour in periodic case
     const int myrank    	  = vct->getCartesian_rank();
-    const MPI_Comm comm       = isParticle ?vct->getParticleComm()      :vct->getFieldComm();
     const int right_neighborX = isParticle ?vct->getXright_neighbor_P() :vct->getXright_neighbor();
     const int left_neighborX  = isParticle ?vct->getXleft_neighbor_P()  :vct->getXleft_neighbor();
     const int right_neighborY = isParticle ?vct->getYright_neighbor_P() :vct->getYright_neighbor();
@@ -134,12 +136,14 @@ void NBDerivedHaloComm(int nx, int ny, int nz, double ***vector,const VirtualTop
             int error_code = stat[si].MPI_ERROR;
             if (error_code != MPI_SUCCESS) {
                 stopFlag = true;
+#ifdef DEBUG
                 char error_string[100];
                 int length_of_error_string, error_class;
 
                 MPI_Error_class(error_code, &error_class);
                 MPI_Error_string(error_class, error_string, &length_of_error_string);
                 dprintf("MPI_Waitall error at %d  %s\n",si, error_string);
+#endif
             }
         }
         if(stopFlag) exit (EXIT_FAILURE);
@@ -360,12 +364,14 @@ void NBDerivedHaloComm(int nx, int ny, int nz, double ***vector,const VirtualTop
 				int error_code = stat[si].MPI_ERROR;
 				if (error_code != MPI_SUCCESS) {
 					stopFlag = true;
+#ifdef DEBUG
 					char error_string[100];
 					int length_of_error_string, error_class;
 
 					MPI_Error_class(error_code, &error_class);
 					MPI_Error_string(error_class, error_string, &length_of_error_string);
 					dprintf("MPI_Waitall error at %d  %s\n",si, error_string);
+#endif
 				}
 			}
 			if(stopFlag) exit (EXIT_FAILURE);;
@@ -467,12 +473,14 @@ void NBDerivedHaloComm(int nx, int ny, int nz, double ***vector,const VirtualTop
 				int error_code = stat[si].MPI_ERROR;
 				if (error_code != MPI_SUCCESS) {
 					stopFlag = true;
+#ifdef DEBUG
 					char error_string[100];
 					int length_of_error_string, error_class;
 
 					MPI_Error_class(error_code, &error_class);
 					MPI_Error_string(error_class, error_string, &length_of_error_string);
 					dprintf("MPI_Waitall error at %d  %s\n",si, error_string);
+#endif
 				}
 			}
 			if(stopFlag) exit (EXIT_FAILURE);
diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp
index 1d8a596..e988b73 100644
--- a/fields/EMfields3D.cpp
+++ b/fields/EMfields3D.cpp
@@ -29,13 +29,13 @@
 #include "CG.h"
 #include "GMRES.h"
 #include "Particles3Dcomm.h"
-#include "TimeTasks.h"
 #include "Moments.h"
 #include "Parameters.h"
 #include "ompdefs.h"
 #include "debug.h"
 #include "string.h"
 #include "mic_particles.h"
+#include "TimeTasks.h"
 #include "ipicmath.h" // for roundup_to_multiple
 #include "Alloc.h"
 #include "asserts.h"
diff --git a/include/Basic.h b/include/Basic.h
index 41ee374..b0c151a 100644
--- a/include/Basic.h
+++ b/include/Basic.h
@@ -50,7 +50,7 @@ developers: Stefano Markidis, Giovanni Lapenta
 
 
 /** method to calculate the parallel dot product with vect1, vect2 having the ghost cells*/
-double dotP(const double *vect1, const double *vect2, int n);
+double dotP(const double *vect1, const double *vect2, int n,MPI_Comm* comm);
 /** method to calculate dot product */
 double dot(const double *vect1, const double *vect2, int n);
 /** method to calculate the square norm of a vector */
@@ -60,11 +60,11 @@ double norm2(const arr3_double vect, int nx, int ny);
 /** method to calculate the square norm of a vector */
 double norm2(const double *vect, int nx);
 /** method to calculate the parallel dot product */
-double norm2P(const arr3_double vect, int nx, int ny, int nz);
+//double norm2P(const arr3_double vect, int nx, int ny, int nz);
 /** method to calculate the parallel norm of a vector on different processors with the ghost cell */
-double norm2P(const double *vect, int n);
+//double norm2P(const double *vect, int n);
 /** method to calculate the parallel norm of a vector on different processors with the gost cell*/
-double normP(const double *vect, int n);
+double normP(const double *vect, int n,MPI_Comm* comm);
 /** method to calculate the difference of two vectors*/
 void sub(double *res, const double *vect1, const double *vect2, int n);
 /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/
diff --git a/include/BlockCommunicator.h b/include/BlockCommunicator.h
index b9a4819..0483c47 100644
--- a/include/BlockCommunicator.h
+++ b/include/BlockCommunicator.h
@@ -159,7 +159,7 @@ struct Block
     //  listID, commID,
     //  dest.rank(), dest.tag_name(),
     //  nop);
-    MPI_Isend(&block[0], NUMBERS_PER_ELEMENT*block.size(), MPI_DOUBLE,
+    MPI_Isend((double*)&block[0], NUMBERS_PER_ELEMENT*block.size(), MPI_DOUBLE,
       dest.rank(), dest.tag(), dest.comm(), &request);
     //dprintf("finished sending block number %d", listID);
   }
@@ -188,7 +188,7 @@ struct Block
     // make sure that space exists to receive
     int newsize = signal_hack() ? capacity+1 : capacity;
     block.resize(newsize);
-    MPI_Irecv(&block[0], NUMBERS_PER_ELEMENT*block.size(), MPI_DOUBLE,
+    MPI_Irecv((double*)&block[0], NUMBERS_PER_ELEMENT*block.size(), MPI_DOUBLE,
       source.rank(), source.tag(), source.comm(), &request);
   }
   // processing received data
diff --git a/include/Com3DNonblk.h b/include/Com3DNonblk.h
index 7d25732..eff3bae 100644
--- a/include/Com3DNonblk.h
+++ b/include/Com3DNonblk.h
@@ -35,11 +35,9 @@ developers           : Stefano Markidis, Ivy Bo Peng
 #include "mpi.h"
 #include "BcFields3D.h"
 #include "VCtopology3D.h"
-#include "TimeTasks.h"
 #include "ipicdefs.h"
 #include "Alloc.h"
 #include "debug.h"
-//#include "parallel.h"
 #include "EMfields3D.h"
 
 /** communicate ghost cells (FOR NODES) */
diff --git a/include/Connection.h b/include/Connection.h
index 579b186..a728d0f 100644
--- a/include/Connection.h
+++ b/include/Connection.h
@@ -41,7 +41,7 @@
 // one way is to compare the positions of the centroids, first in
 // the x direction, then in the y direction, and then in the z
 // direction.  One might use tag=1 for downward communication and
-// tag=2 for upward communication, and use MPI_COMM_WORLD for the
+// tag=2 for upward communication, and all ways use particle communicator for the
 // group.
 //
 namespace Direction
@@ -83,14 +83,14 @@ class Connection
   Connection():
     _rank(0),
     _tag(0),
-    _comm(MPI_COMM_WORLD)
+    _comm(MPI_COMM_NULL)
   {}
-  Connection(int rank_, int tag_, MPI_Comm comm_ = MPI_COMM_WORLD)
+  Connection(int rank_, int tag_, MPI_Comm comm_ = MPI_COMM_NULL)
   {
     init(rank_,tag_,comm_);
   }
  private:
-  void init(int rank_, int tag_, MPI_Comm comm_ = MPI_COMM_WORLD)
+  void init(int rank_, int tag_, MPI_Comm comm_ = MPI_COMM_NULL)
   {
     _rank = rank_;
     _tag = tag_;
diff --git a/include/MPIdata.h b/include/MPIdata.h
index a65adc8..c2c8712 100644
--- a/include/MPIdata.h
+++ b/include/MPIdata.h
@@ -29,7 +29,9 @@ email                : markidis@lanl.gov, lapenta@lanl.gov
 
 #ifndef MPIDATA_H
 #define MPIDATA_H
-
+#ifndef NO_MPI
+#include <mpi.h>
+#endif
 /**
  * MPI Data Structure. This class contains:
  *
@@ -69,7 +71,10 @@ class MPIdata {
 public:
   static int get_rank(){return instance().rank;}
   static int get_nprocs(){return instance().nprocs;}
+  static int get_PicGlobalComm(){return instance().PIC_COMM;}
 private:
+    /*iPIC3D Global Communicator*/
+  static MPI_Comm PIC_COMM;
   /** rank of the process */
   static int rank;
   /** number of processes */
diff --git a/inputoutput/ParallelIO.cpp b/inputoutput/ParallelIO.cpp
index 47f11e2..5feeeaf 100644
--- a/inputoutput/ParallelIO.cpp
+++ b/inputoutput/ParallelIO.cpp
@@ -301,7 +301,7 @@ void ReadFieldsH5hut(int nspec, EMfields3D *EMf, Collective *col, VCtopology3D *
   infile.CloseFieldsFile();
 
   // initialize B on centers
-  MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MPIdata::get_PicGlobalComm());
 
   // Comm ghost nodes for B-field
   communicateNodeBC(grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBx(), col->bcBx[0],col->bcBx[1],col->bcBx[2],col->bcBx[3],col->bcBx[4],col->bcBx[5], vct);
diff --git a/main/iPIC3Dlib.cpp b/main/iPIC3Dlib.cpp
index dd8e615..0785898 100644
--- a/main/iPIC3Dlib.cpp
+++ b/main/iPIC3Dlib.cpp
@@ -111,7 +111,7 @@ int c_Solver::Init(int argc, char **argv) {
     }
   }
   // We create a new communicator with a 3D virtual Cartesian topology
-  vct->setup_vctopology(MPI_COMM_WORLD);
+    vct->setup_vctopology(MPIdata::get_PicGlobalComm());
   {
     stringstream num_proc_ss;
     num_proc_ss << vct->getCartesian_rank();
diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp
index f1f3a8f..7cd6d9d 100644
--- a/particles/Particles3D.cpp
+++ b/particles/Particles3D.cpp
@@ -1920,7 +1920,7 @@ void Particles3D::openbc_particles_inflow()
     for (int j = 1; j < grid->getNYC() - 1; j++)
       for (int k = 1; k < grid->getNZC() - 1; k++){
 
-    	/*
+    	
     	//The below is uniformly distributed in location
 		for (int ii=0; ii < npcelx; ii++)
 		  for (int jj=0; jj < npcely; jj++)
@@ -2281,7 +2281,7 @@ double Particles3D::delta_f(double u, double v, double w, double x, double y, do
     a3[l + lmax] += df0_dvpar(vpar, vperp) * bessel_Jn_array[l];
   }
 
-  deltaf = (0.0, 0.0);
+  //deltaf = (0.0, 0.0);
   for (register int l = -lmax; l < lmax + 1; l++) {
     deltaf += (a3[l + lmax] * Ex_mod * exp(I * Ex_phase) + a1[l + lmax] * Ey_mod * exp(I * Ey_phase) + a2[l + lmax] * Ez_mod * exp(I * Ez_phase)) / (kpar * vpar + l * om_c - omega) * exp(-I * phi * (double) l);
   }
diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp
index 4f018f7..05a709e 100644
--- a/particles/Particles3Dcomm.cpp
+++ b/particles/Particles3Dcomm.cpp
@@ -33,7 +33,6 @@ developers: Stefano Markidis, Giovanni Lapenta.
 #include "VCtopology3D.h"
 #include "Collective.h"
 #include "Alloc.h"
-#include "Basic.h"
 #include "Grid3DCU.h"
 #include "Field.h"
 #include "MPIdata.h"
diff --git a/performances/Timing.cpp b/performances/Timing.cpp
index b18a529..8019702 100644
--- a/performances/Timing.cpp
+++ b/performances/Timing.cpp
@@ -22,7 +22,7 @@
 #include "stdio.h"
 #include "Timing.h"
 #include "ipicdefs.h"
-
+#include "MPIdata.h"
 /**
  * 
  * series of methods for timing and profiling
@@ -56,7 +56,7 @@ Timing::Timing(int my_rank) {
   // MPE_Describe_state(event2a,event2b,"Field","blue"); // the mover is blue in the visualizer
   // MPE_Describe_state(event3a,event3b,"Interp P->G","yellow"); // the interpolation particle->Grid is yellow in the visualizer
   // }
-  former_MPI_Barrier(MPI_COMM_WORLD);
+    former_MPI_Barrier(MPIdata::get_PicGlobalComm());
   // start the log
   // MPE_Start_log();
 
@@ -65,12 +65,12 @@ Timing::Timing(int my_rank) {
 /** start the timer */
 void Timing::startTiming() {
   ttick = MPI_Wtick();
-  former_MPI_Barrier(MPI_COMM_WORLD);
+  former_MPI_Barrier(MPIdata::get_PicGlobalComm());
   tstart = MPI_Wtime();
 }
 /** stop the timer */
 void Timing::stopTiming() {
-  former_MPI_Barrier(MPI_COMM_WORLD);
+  former_MPI_Barrier(MPIdata::get_PicGlobalComm());
   tend = MPI_Wtime();
   texecution = tend - tstart;
   if (rank_id == 0) {
diff --git a/solvers/CG.cpp b/solvers/CG.cpp
index 69e3314..b581185 100644
--- a/solvers/CG.cpp
+++ b/solvers/CG.cpp
@@ -23,6 +23,9 @@
 #include "CG.h"
 #include "Basic.h"
 #include "parallel.h"
+//#include "ipicdefs.h"
+#include "EMfields3D.h"
+#include "VCtopology3D.h"
 
 /**
  * 
@@ -44,6 +47,8 @@ bool CG(double *xkrylov, int xkrylovlen, double *b, int maxit, double tol, FIELD
   eqValue(0.0, v, xkrylovlen);
   eqValue(0.0, z, xkrylovlen);
   eqValue(0.0, im, xkrylovlen);
+    
+  MPI_Comm fieldcomm = (field->get_vct()).getFieldComm();
 
   int i = 0;
   bool CONVERGED = false;
@@ -55,7 +60,7 @@ bool CG(double *xkrylov, int xkrylovlen, double *b, int maxit, double tol, FIELD
   sub(r, b, im, xkrylovlen);
   // v = r
   eq(v, r, xkrylovlen);
-  c = dotP(r, r, xkrylovlen);
+  c = dotP(r, r, xkrylovlen,&fieldcomm);
   initial_error = sqrt(c);
   if (is_output_thread())
     printf("CG Initial error: %g\n", initial_error);
@@ -64,12 +69,12 @@ bool CG(double *xkrylov, int xkrylovlen, double *b, int maxit, double tol, FIELD
     return (true);
   while (i < maxit) {
     (field->*FunctionImage) (z, v);
-    t = c / dotP(v, z, xkrylovlen);
+    t = c / dotP(v, z, xkrylovlen,&fieldcomm);
     // x(i+1) = x + t*v
     addscale(t, xkrylov, v, xkrylovlen);
     // r(i+1) = r - t*z
     addscale(-t, r, z, xkrylovlen);
-    d = dotP(r, r, xkrylovlen);
+    d = dotP(r, r, xkrylovlen,&fieldcomm);
     if (CGVERBOSE && is_output_thread())
     {
       printf("Iteration # %d - norm of residual relative to initial error %g\n",
diff --git a/solvers/GMRES.cpp b/solvers/GMRES.cpp
index 7ae6993..1376c18 100644
--- a/solvers/GMRES.cpp
+++ b/solvers/GMRES.cpp
@@ -26,7 +26,9 @@
 #include "errors.h"
 #include "Alloc.h"
 #include "TimeTasks.h"
-#include "ipicdefs.h"
+//#include "ipicdefs.h"
+#include "EMfields3D.h"
+#include "VCtopology3D.h"
 
 void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen,
   const double *b, int m, int max_iter, double tol, Field * field)
@@ -73,7 +75,9 @@ void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen,
             "------------------------------------\n\n");
   }
 
-  double normb = normP(b, xkrylovlen);
+  MPI_Comm fieldcomm = (field->get_vct()).getFieldComm();
+    
+  double normb = normP(b, xkrylovlen,&fieldcomm);
   if (normb == 0.0)
     normb = 1.0;
 
@@ -83,7 +87,7 @@ void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen,
     // r = b - A*x
     (field->*FunctionImage) (im, xkrylov);
     sub(r, b, im, xkrylovlen);
-    initial_error = normP(r, xkrylovlen);
+    initial_error = normP(r, xkrylovlen,&fieldcomm);
 
     if (itr == 0) {
       if (is_output_thread())
@@ -126,7 +130,7 @@ void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen,
       {
         
         MPI_Allreduce(MPI_IN_PLACE, y, (k+2),
-          MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+          MPI_DOUBLE, MPI_SUM, fieldcomm);
       }
       for (int j = 0; j <= k; j++) {
         H[j][k] = y[j];
@@ -134,7 +138,7 @@ void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen,
       }
       // Is there a numerically stable way to
       // eliminate this second all-reduce all?
-      H[k+1][k] = normP(V[k+1], xkrylovlen);
+      H[k+1][k] = normP(V[k+1], xkrylovlen,&fieldcomm);
       //
       // check that vectors are orthogonal
       //
@@ -150,11 +154,11 @@ void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen,
       if (av + delta * H[k + 1][k] == av)
       {
         for (int j = 0; j <= k; j++) {
-          const double htmp = dotP(w, V[j], xkrylovlen);
+          const double htmp = dotP(w, V[j], xkrylovlen,&fieldcomm);
           H[j][k] = H[j][k] + htmp;
           addscale(-htmp, w, V[j], xkrylovlen);
         }
-        H[k + 1][k] = normP(w, xkrylovlen);
+        H[k + 1][k] = normP(w, xkrylovlen,&fieldcomm);
       }
       // normalize the new vector
       scale(w, (1.0 / H[k + 1][k]), xkrylovlen);
diff --git a/utility/Basic.cpp b/utility/Basic.cpp
index cb9233b..710d500 100644
--- a/utility/Basic.cpp
+++ b/utility/Basic.cpp
@@ -27,12 +27,12 @@
 #include "errors.h"
 
 /** method to calculate the parallel dot product with vect1, vect2 having the ghost cells*/
-double dotP(const double *vect1, const double *vect2, int n) {
+double dotP(const double *vect1, const double *vect2, int n,MPI_Comm* comm) {
   double result = 0;
   double local_result = 0;
   for (register int i = 0; i < n; i++)
     local_result += vect1[i] * vect2[i];
-  MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, *comm);
   return (result);
 
 }
@@ -70,6 +70,7 @@ double norm2(const double *vect, int nx) {
 
 
 /** method to calculate the parallel dot product */
+/*
 double norm2P(const arr3_double vect, int nx, int ny, int nz) {
   double result = 0;
   double local_result = 0;
@@ -80,8 +81,10 @@ double norm2P(const arr3_double vect, int nx, int ny, int nz) {
 
   MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   return (result);
-}
+}*/
+
 /** method to calculate the parallel norm of a vector on different processors with the ghost cell */
+/*
 double norm2P(const double *vect, int n) {
   double result = 0;
   double local_result = 0;
@@ -89,14 +92,14 @@ double norm2P(const double *vect, int n) {
     local_result += vect[i] * vect[i];
   MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
   return (result);
-}
+}*/
 /** method to calculate the parallel norm of a vector on different processors with the gost cell*/
-double normP(const double *vect, int n) {
+double normP(const double *vect, int n,MPI_Comm* comm) {
   double result = 0.0;
   double local_result = 0.0;
   for (register int i = 0; i < n; i++)
     local_result += vect[i] * vect[i];
-  MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, *comm);
   return (sqrt(result));
 
 }
diff --git a/utility/MPIdata.cpp b/utility/MPIdata.cpp
index a1f5ef3..116d61c 100644
--- a/utility/MPIdata.cpp
+++ b/utility/MPIdata.cpp
@@ -21,12 +21,8 @@
 #ifndef NO_MPI
   #include <mpi.h>
 #endif
-//#include <stdio.h>
 #include <assert.h>
 #include "MPIdata.h"
-#ifndef NO_MPI
-#include <mpi.h>
-#endif
 #include "ompdefs.h" // for omp_get_max_threads
 
 // code to check that init() is called before instance()
@@ -34,6 +30,7 @@
 // no need for this to have more than file scope
 int MPIdata::rank=-1;
 int MPIdata::nprocs=-1;
+int MPIdata::PIC_COMM=MPI_COMM_NULL;
 static bool MPIdata_is_initialized=false;
 bool MPIdata_assert_initialized()
 {
@@ -62,11 +59,10 @@ void MPIdata::init(int *argc, char ***argv) {
   /* Initialize the MPI API */
   MPI_Init(argc, argv);
 
-  /* Set rank */
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_dup(MPI_COMM_WORLD, &PIC_COMM);
+  MPI_Comm_rank(PIC_COMM, &rank);
+  MPI_Comm_size(PIC_COMM, &nprocs);
 
-  /* Set total number of processors */
-  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  #endif // NO_MPI
 
   MPIdata_is_initialized = true;
diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp
index 19b91b2..f70c1db 100644
--- a/utility/TimeTasks.cpp
+++ b/utility/TimeTasks.cpp
@@ -311,8 +311,9 @@ void TimeTasks::print_cycle_times(int cycle,
 
 static void reduce_doubles(int len, MPI_Op mpi_op, void* inbuff, void* outbuff)
 {
+    
     MPI_Allreduce(inbuff,outbuff,
-      len,MPI_DOUBLE,mpi_op,MPI_COMM_WORLD);
+                  len,MPI_DOUBLE,mpi_op,MPIdata::get_PicGlobalComm());
 }
 
 void TimeTasks::print_cycle_times(int cycle, const char* reduce_mode)
@@ -413,7 +414,7 @@ TimeTasks_caller_to_set_main_task_for_scope::
   // assume that only one thread is active
   assert(!omp_get_thread_num());
 
-  //MPI_Barrier(MPI_COMM_WORLD);
+
   timeTasks.end_main_task(task, start_time);
 }
 
diff --git a/utility/debug.cpp b/utility/debug.cpp
index 3184903..c7b5e0a 100644
--- a/utility/debug.cpp
+++ b/utility/debug.cpp
@@ -146,7 +146,7 @@ void fprintf_fileLine(FILE * fptr,
 
   // print the message
   fflush(fptr);
-    fprintf(fptr,error_msg);
+  fprintf(fptr,"%s",error_msg);
   fflush(fptr);
 }
 
diff --git a/utility/errors.cpp b/utility/errors.cpp
index b16cfb7..2fec2b8 100644
--- a/utility/errors.cpp
+++ b/utility/errors.cpp
@@ -66,7 +66,7 @@ using namespace std;
       << ", function " << func  \
       <<"\n\t" << type << " value: " << expr << " = " << val << endl; \
     fflush(stdout); \
-      { fprintf(stdout,ss.str().c_str()); } \
+    { fprintf(stdout,"%s",ss.str().c_str()); }	\
     fflush(stdout); \
     abort(); \
   }
@@ -124,7 +124,7 @@ void eprintf_fileLine(FILE * fptr, const char *type,
   // print the message
   fflush(fptr);
     // #pragma omp critical // need this?
-    { fprintf(fptr,error_msg); }
+  { fprintf(fptr,"%s",error_msg); }
   fflush(fptr);
   abort();
 }