Skip to content

Commit

Permalink
Merge pull request #205 from esmf-org/feature/pio_with_mpiuni
Browse files Browse the repository at this point in the history
Enable PIO with mpiuni

This PR enables building and running with the internal PIO when using mpiuni. This is especially relevant for people using ESMPy, since ESMPy is sometimes built without mpi – and this is apparently needed on many HPC systems (see also conda-forge/esmpy-feedstock#70). This resolves #131 .
  • Loading branch information
billsacks authored Jun 27, 2024
2 parents 03d0eb5 + 4478342 commit 1e77226
Show file tree
Hide file tree
Showing 10 changed files with 187 additions and 45 deletions.
6 changes: 0 additions & 6 deletions build/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -1699,12 +1699,6 @@ export ESMF_PIO = $(ESMF_PIODEFAULT)
endif

ifeq ($(ESMF_PIO),internal)
ifeq ($(ESMF_COMM),mpiuni)
#TODO: This turns PIO off if it was set to internal from a default setting.
#TODO: We need to do this while our internal PIO does not support mpiuni mode,
#TODO: but want to allow external PIO or explicit ESMF_PIO setting for developm. #TODO: Eventually this should become unnecessary.
ESMF_PIO = OFF
endif
ifndef ESMF_NETCDF
# PIO, starting with version 2, depends on NetCDF. Defaulting to internal needs
# be turned off if there is no NetCDF available. Externally set PIO will be let
Expand Down
8 changes: 8 additions & 0 deletions src/Infrastructure/IO/PIO/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ else
PIO_CMAKE_OPTS += -DPIO_ENABLE_LOGGING=OFF -DCMAKE_BUILD_TYPE=release
endif

ifeq ($(ESMF_COMM),mpiuni)
# Use ESMF's mpiuni as a stand-in for the mpi-serial library that PIO expects
PIO_CMAKE_OPTS += -DPIO_USE_MPISERIAL=ON -DMPISERIAL_PATH=$(ESMF_DIR)/src/Infrastructure/stubs/mpiuni

# There are problems building PIO's tests with mpiuni; for now, just disable this internal testing
PIO_CMAKE_OPTS += -DPIO_ENABLE_TESTS=OFF
endif

ifdef ESMF_NETCDF_INCLUDE
ifneq ("$(wildcard $(ESMF_NETCDF_LIBPATH)/libnetcdf.a)","")
PIO_CMAKE_OPTS += -DNetCDF_C_INCLUDE_DIR=$(ESMF_NETCDF_INCLUDE) -DNetCDF_C_LIBRARY=$(ESMF_NETCDF_LIBPATH)/libnetcdf.a
Expand Down
5 changes: 0 additions & 5 deletions src/Infrastructure/IO/doc/IO_rest.tex
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@

\begin{enumerate}

% See https://github.com/esmf-org/esmf/issues/131
\item {\bf I/O of NetCDF files requires a real MPI library.}
Currently I/O of NetCDF files (which uses PIO) requires a real MPI
library: it cannot be done with ESMF\_COMM set to "mpiuni".

\item {\bf Limited data formats supported.}
Currently a small fraction of the anticipated data formats is implemented by
ESMF. The data I/O uses NetCDF format, and ESMF Info
Expand Down
8 changes: 8 additions & 0 deletions src/Infrastructure/stubs/mpiuni/libmpi-serial.a
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
This is a dummy file needed to satisfy PIO's FindMPISERIAL.cmake.

This is needed because, when building with mpiuni, we tell PIO that this
mpiuni directory is the location of mpi-serial (since we use mpiuni for
the same purpose as mpi-serial). But for FindMPISERIAL.cmake to succeed,
it needs to find a libmpi-serial.a in the mpi-serial directory; hence,
we put this file here to trick it into thinking that this is truly an
mpi-serial installation.
102 changes: 100 additions & 2 deletions src/Infrastructure/stubs/mpiuni/mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,28 @@ static int num_attr = 1,mpi_tag_ub = 100000000;

/*
To avoid problems with prototypes to the system memcpy() it is duplicated here
This version also supports checking for MPI_IN_PLACE
*/
int MPIUNI_Memcpy(void *a,const void* b,int n) {
int MPIUNI_Memcpy(void *a,const void* b,int n,enum CheckForMPIInPlace_Flag check_flag) {
switch(check_flag) {
case CHECK_FOR_MPI_IN_PLACE_NONE:
// No pre-check in this case; proceed to the actual memcpy
break;
case CHECK_FOR_MPI_IN_PLACE_SOURCE:
if (b == MPI_IN_PLACE) {
// If the source is MPI_IN_PLACE, do nothing
return 0;
}
break;
case CHECK_FOR_MPI_IN_PLACE_DEST:
if (a == MPI_IN_PLACE) {
// If the dest is MPI_IN_PLACE, do nothing
return 0;
}
break;
}

int i;
char *aa= (char*)a;
char *bb= (char*)b;
Expand Down Expand Up @@ -179,6 +199,84 @@ int Petsc_MPI_Finalize(void)
return 0;
}

int ESMC_MPI_Alltoallw(void *sendbuf, int *sendcounts, int *sdispls,
MPI_Datatype *sendtypes, void *recvbuf, int *recvcounts,
int *rdispls, MPI_Datatype *recvtypes, MPI_Comm comm)
{
// Since we are only implementing this for the single-processor case, the counts, displs
// and types arguments should all have length 1. We assume that's the case in this
// implementation.

// Displacements are not implemented so return an error code if they are non-zero
if (sdispls[0] != 0 || rdispls[0] != 0) {
return MPI_ERR_INTERN;
}

MPIUNI_Memcpy(recvbuf, sendbuf, sendcounts[0]*sendtypes[0], CHECK_FOR_MPI_IN_PLACE_SOURCE);
return MPI_SUCCESS;
}

int ESMC_MPI_Scatterv(void *sendbuf, int *sendcounts, int *displs,
MPI_Datatype sendtype, void *recvbuf, int recvcount,
MPI_Datatype recvtype, int root, MPI_Comm comm)
{
// Since we are only implementing this for the single-processor case, the sendcounts and
// displs arguments should have length 1. We assume that's the case in this
// implementation.

// Displacements are not implemented so return an error code if they are non-zero
if (displs[0] != 0) {
return MPI_ERR_INTERN;
}

MPIUNI_Memcpy(recvbuf, sendbuf, sendcounts[0]*sendtype, CHECK_FOR_MPI_IN_PLACE_DEST);
return MPI_SUCCESS;
}

int ESMC_MPI_Type_create_hvector(int count, int blocklength, MPI_Aint stride,
MPI_Datatype oldtype, MPI_Datatype *newtype)
{
// Note mpiuni's definition of each datatype as sizeof(raw-type).
//
// From some experimentation with a real MPI library, the MPI_Type_size of newtype is
// independent of the value of stride. Since the MPI_Datatype in mpiuni is just the size
// of the datatype, we ignore the possible complexity of stride in this implementation.
*newtype = count*blocklength*oldtype;
return MPI_SUCCESS;
}

int ESMC_MPI_Type_create_indexed_block(int count, int blocklength,
const int array_of_displacements[],
MPI_Datatype oldtype,
MPI_Datatype *newtype)
{
// Note mpiuni's definition of each datatype as sizeof(raw-type).
//
// From some experimentation with a real MPI library, the MPI_Type_size of newtype is
// independent of the values in array_of_displacements. Since the MPI_Datatype in mpiuni
// is just the size of the datatype, we ignore the possible complexity of
// array_of_displacements in this implementation.
*newtype = count*blocklength*oldtype;
return MPI_SUCCESS;
}

int ESMC_MPI_Type_hvector(int count, int blocklength, MPI_Aint stride,
MPI_Datatype oldtype, MPI_Datatype *newtype)
{
// MPI_Type_hvector is a deprecated version of MPI_Type_create_hvector; the only
// difference is in how stride is specified (bytes vs. elements); since we ignore stride
// in our implementation of MPI_Type_create_hvector, we can use the same implementation
// for both.
return ESMC_MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype);
}

int ESMC_MPI_Type_size(MPI_Datatype datatype, int *size)
{
// Note that, conveniently, mpiuni defines each datatype as sizeof(raw-type)
*size = datatype;
return MPI_SUCCESS;
}

#if !defined (ESMF_OS_MinGW)
// POSIX version
double ESMC_MPI_Wtime(void)
Expand Down Expand Up @@ -403,7 +501,7 @@ void MPIUNI_STDCALL mpi_allreduce(void *sendbuf,void *recvbuf,int *count,int *da
*ierr = MPI_ERR_OP;
return;
}
MPIUNI_Memcpy(recvbuf,sendbuf,(*count)*MPIUNI_DATASIZE[*datatype]);
MPIUNI_Memcpy(recvbuf,sendbuf,(*count)*MPIUNI_DATASIZE[*datatype],CHECK_FOR_MPI_IN_PLACE_SOURCE);
*ierr = MPI_SUCCESS;
}
void MPIUNI_STDCALL mpi_allreduce_(void *sendbuf,void *recvbuf,int *count,int *datatype,int *op,int *comm,int *ierr)
Expand Down
71 changes: 45 additions & 26 deletions src/Infrastructure/stubs/mpiuni/mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,16 @@ extern void *MPIUNI_TMP;
#define MPI_COMM_WORLD 1
#define MPI_COMM_SELF MPI_COMM_WORLD
#define MPI_COMM_NULL 0
#define MPI_GROUP_EMPTY (-1)
#define MPI_GROUP_NULL 0
#define MPI_SUCCESS 0
#define MPI_IDENT 0
#define MPI_CONGRUENT 0
#define MPI_SIMILAR 0
#define MPI_UNEQUAL 3
#define MPI_ANY_SOURCE (-2)
#define MPI_PROC_NULL (-3)
#define MPI_ROOT (-4)
#define MPI_KEYVAL_INVALID 0
#define MPI_ERR_UNKNOWN 18
#define MPI_ERR_INTERN 21
Expand All @@ -109,13 +113,19 @@ typedef int MPI_Info; /* handle */

#define MPI_INFO_NULL (0)

#define MPI_IN_PLACE (void *)(-1)
enum CheckForMPIInPlace_Flag {
CHECK_FOR_MPI_IN_PLACE_NONE,
CHECK_FOR_MPI_IN_PLACE_SOURCE,
CHECK_FOR_MPI_IN_PLACE_DEST
};


extern int MPIUNI_Memcpy(void*,const void*,int);
extern int MPIUNI_Memcpy(void*,const void*,int,enum CheckForMPIInPlace_Flag);

/* In order to handle datatypes, we make them into "sizeof(raw-type)";
this allows us to do the MPIUNI_Memcpy's easily */
#define MPI_Datatype int
#define MPI_DATATYPE_NULL 0
#define MPI_FLOAT sizeof(float)
#define MPI_DOUBLE sizeof(double)
#define MPI_LONG_DOUBLE sizeof(long double)
Expand All @@ -140,6 +150,7 @@ extern int MPIUNI_Memcpy(void*,const void*,int);
#define MPI_2INTEGER (2*sizeof(int))
#define MPI_UNSIGNED_CHAR sizeof(unsigned char)
#define MPI_UNSIGNED_LONG sizeof(unsigned long)
#define MPI_OFFSET sizeof(MPI_Offset)
#define MPIU_PETSCLOGDOUBLE sizeof(PetscLogDouble)
#define MPI_REQUEST_NULL ((MPI_Request)0)

Expand Down Expand Up @@ -197,6 +208,14 @@ extern int Petsc_MPI_Initialized(int *);
extern int Petsc_MPI_Comm_dup(MPI_Comm,MPI_Comm *);
extern int Petsc_MPI_Finalize(void);
extern int Petsc_MPI_Finalized(int *);
extern int ESMC_MPI_Alltoallw(void *,int *,int *,MPI_Datatype *,
void *,int *,int *,MPI_Datatype *,MPI_Comm);
extern int ESMC_MPI_Scatterv(void *,int *,int *,MPI_Datatype,
void *,int,MPI_Datatype,int,MPI_Comm);
extern int ESMC_MPI_Type_create_hvector(int,int,MPI_Aint,MPI_Datatype,MPI_Datatype *);
extern int ESMC_MPI_Type_create_indexed_block(int,int,const int[],MPI_Datatype,MPI_Datatype *);
extern int ESMC_MPI_Type_hvector(int,int,MPI_Aint,MPI_Datatype,MPI_Datatype *);
extern int ESMC_MPI_Type_size(MPI_Datatype,int *);
extern double ESMC_MPI_Wtime(void);

#define MPI_Abort Petsc_MPI_Abort
Expand All @@ -210,6 +229,12 @@ extern double ESMC_MPI_Wtime(void);
#define MPI_Comm_dup Petsc_MPI_Comm_dup
#define MPI_Finalize Petsc_MPI_Finalize
#define MPI_Finalized Petsc_MPI_Finalized
#define MPI_Alltoallw ESMC_MPI_Alltoallw
#define MPI_Scatterv ESMC_MPI_Scatterv
#define MPI_Type_create_hvector ESMC_MPI_Type_create_hvector
#define MPI_Type_create_indexed_block ESMC_MPI_Type_create_indexed_block
#define MPI_Type_hvector ESMC_MPI_Type_hvector
#define MPI_Type_size ESMC_MPI_Type_size
#define MPI_Wtime ESMC_MPI_Wtime

/*
Expand Down Expand Up @@ -458,13 +483,12 @@ extern double ESMC_MPI_Wtime(void);
dest,sendtag,recvbuf,recvcount,\
recvtype,source,recvtag,\
comm,status) \
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount) * (sendtype))
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount) * (sendtype),CHECK_FOR_MPI_IN_PLACE_NONE)
#define MPI_Sendrecv_replace(buf,count, datatype,dest,sendtag,\
source,recvtag,comm,status) MPI_SUCCESS
#define MPI_Type_contiguous(count, oldtype,newtype) \
(*(newtype) = (count)*(oldtype),MPI_SUCCESS)
#define MPI_Type_vector(count,blocklength,stride,oldtype, newtype) MPI_SUCCESS
#define MPI_Type_hvector(count,blocklength,stride,oldtype, newtype) MPI_SUCCESS
#define MPI_Type_indexed(count,array_of_blocklengths,\
array_of_displacements, oldtype,\
newtype) MPI_SUCCESS
Expand All @@ -478,8 +502,6 @@ extern double ESMC_MPI_Wtime(void);
(*(address) = (long)(char *)(location),MPI_SUCCESS)
#define MPI_Type_extent(datatype,extent) \
MPI_Abort(MPI_COMM_WORLD,0)
#define MPI_Type_size(datatype,size) \
MPI_Abort(MPI_COMM_WORLD,0)
#define MPI_Type_lb(datatype,displacement) \
MPI_Abort(MPI_COMM_WORLD,0)
#define MPI_Type_ub(datatype,displacement) \
Expand Down Expand Up @@ -513,7 +535,7 @@ extern double ESMC_MPI_Wtime(void);
MPIUNI_TMP = (void*)(long) (root),\
MPIUNI_TMP = (void*)(long) (recvtype),\
MPIUNI_TMP = (void*)(long) (comm),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype)),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPI_SUCCESS)
#define MPI_Gatherv(sendbuf,sendcount, sendtype,\
recvbuf,recvcounts,displs,\
Expand All @@ -523,7 +545,7 @@ extern double ESMC_MPI_Wtime(void);
MPIUNI_TMP = (void*)(long) (recvtype),\
MPIUNI_TMP = (void*)(long) (root),\
MPIUNI_TMP = (void*)(long) (comm),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype)),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPI_SUCCESS)
#define MPI_Scatter(sendbuf,sendcount, sendtype,\
recvbuf,recvcount, recvtype,\
Expand All @@ -536,32 +558,20 @@ extern double ESMC_MPI_Wtime(void);
MPIUNI_TMP = (void*)(long) (recvtype),\
MPIUNI_TMP = (void*)(long) (root),\
MPIUNI_TMP = (void*)(long) (comm),MPI_Abort(MPI_COMM_WORLD,0))
#define MPI_Scatterv(sendbuf,sendcounts,displs,\
sendtype, recvbuf,recvcount,\
recvtype,root,comm) \
(MPIUNI_TMP = (void*)(long) (sendbuf),\
MPIUNI_TMP = (void*)(long) (sendcounts),\
MPIUNI_TMP = (void*)(long) (displs),\
MPIUNI_TMP = (void*)(long) (sendtype),\
MPIUNI_TMP = (void*)(long) (recvbuf),\
MPIUNI_TMP = (void*)(long) (recvcount),\
MPIUNI_TMP = (void*)(long) (recvtype),\
MPIUNI_TMP = (void*)(long) (root),\
MPIUNI_TMP = (void*)(long) (comm),MPI_Abort(MPI_COMM_WORLD,0))
#define MPI_Allgather(sendbuf,sendcount, sendtype,\
recvbuf,recvcount, recvtype,comm) \
(MPIUNI_TMP = (void*)(long) (recvcount),\
MPIUNI_TMP = (void*)(long) (recvtype),\
MPIUNI_TMP = (void*)(long) (comm),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype)),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPI_SUCCESS)
#define MPI_Allgatherv(sendbuf,sendcount, sendtype,\
recvbuf,recvcounts,displs,recvtype,comm) \
(MPIUNI_TMP = (void*)(long) (recvcounts),\
MPIUNI_TMP = (void*)(long) (displs),\
MPIUNI_TMP = (void*)(long) (recvtype),\
MPIUNI_TMP = (void*)(long) (comm),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype)),\
MPIUNI_Memcpy(recvbuf,sendbuf,(sendcount)* (sendtype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPI_SUCCESS)
#define MPI_Alltoall(sendbuf,sendcount, sendtype,\
recvbuf,recvcount, recvtype,\
Expand All @@ -571,13 +581,13 @@ extern double ESMC_MPI_Wtime(void);
rdispls, recvtype,comm) MPI_Abort(MPI_COMM_WORLD,0)
#define MPI_Reduce(sendbuf, recvbuf,count,\
datatype,op,root,comm) \
(MPIUNI_Memcpy(recvbuf,sendbuf,(count)*(datatype)),\
(MPIUNI_Memcpy(recvbuf,sendbuf,(count)*(datatype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPIUNI_TMP = (void*)(long) (comm),MPI_SUCCESS)
#define MPI_Allreduce(sendbuf, recvbuf,count,datatype,op,comm) \
(MPIUNI_Memcpy(recvbuf,sendbuf,(count)*(datatype)),\
(MPIUNI_Memcpy(recvbuf,sendbuf,(count)*(datatype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPIUNI_TMP = (void*)(long) (comm),MPI_SUCCESS)
#define MPI_Scan(sendbuf, recvbuf,count,datatype,op,comm) \
(MPIUNI_Memcpy(recvbuf,sendbuf,(count)*(datatype)),\
(MPIUNI_Memcpy(recvbuf,sendbuf,(count)*(datatype),CHECK_FOR_MPI_IN_PLACE_SOURCE), \
MPIUNI_TMP = (void*)(long) (comm),MPI_SUCCESS)
#define MPI_Reduce_scatter(sendbuf, recvbuf,recvcounts,\
datatype,op,comm) \
Expand Down Expand Up @@ -626,6 +636,15 @@ extern double ESMC_MPI_Wtime(void);
remote_leader,tag,newintercomm) MPI_SUCCESS
#define MPI_Intercomm_merge(intercomm,high,newintracomm) MPI_SUCCESS

#define MPI_Info_create(info) \
(MPIUNI_TMP = (void*)(long) (info),\
MPI_SUCCESS)
#define MPI_Info_set(info,key,value) \
(MPIUNI_TMP = (void*)(long) (info),\
MPIUNI_TMP = (void*)(long) (key),\
MPIUNI_TMP = (void*)(long) (value),\
MPI_SUCCESS)

#define MPI_Topo_test(comm,status) MPI_SUCCESS
#define MPI_Cart_create(comm_old,ndims,dims,periods,\
reorder,comm_cart) MPI_SUCCESS
Expand All @@ -649,7 +668,7 @@ extern double ESMC_MPI_Wtime(void);
#define MPI_Cart_map(comm,ndims,dims,periods,newrank) MPI_Abort(MPI_COMM_WORLD,0)
#define MPI_Graph_map(comm,a,b,c,d) MPI_Abort(MPI_COMM_WORLD,0)
#define MPI_Get_processor_name(name,result_len) \
(MPIUNI_Memcpy(name,"localhost",9*sizeof(char)),name[10] = 0,*(result_len) = 10)
(MPIUNI_Memcpy(name,"localhost",9*sizeof(char),CHECK_FOR_MPI_IN_PLACE_NONE),name[10] = 0,*(result_len) = 10)
#define MPI_Errhandler_create(function,errhandler) \
(MPIUNI_TMP = (void*)(long) (errhandler),\
MPI_SUCCESS)
Expand Down
9 changes: 6 additions & 3 deletions src/Infrastructure/stubs/mpiuni/mpirun
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ do
done

if [ $# -gt 0 ]; then
# If relative path is used prepend a ./
progname=`dirname $1`/`basename $1`
progname=$1
shift
# If the given command isn't in PATH, assume relative path is used, so prepend a ./
if ! command -v $progname &> /dev/null; then
progname=`dirname $progname`/`basename $progname`
fi

# Execute the program
# Execute the program
$progname $*
exit $?
fi
Expand Down
1 change: 0 additions & 1 deletion src/addon/esmpy/doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ The following packages are *optional*:
* ESMF installation with NetCDF - required to create :class:`Grids <esmpy.api.grid.Grid>`, :class:`Meshes <esmpy.api.mesh.Mesh>` and :class:`Fields <esmpy.api.field.Field>` from file, and to write regridding weights to file
- NetCDF must be built as a shared library for ESMPy installation to succeed
* ESMF installation with PIO (the Parallel IO library) - required to create :class:`Meshes <esmpy.api.mesh.Mesh>` and :class:`Fields <esmpy.api.field.Field>` from file, and to write regridding weights to file
- Note that building ESMF with PIO requires building with a real MPI library (not mpiuni)
* `mpi4py <https://mpi4py.readthedocs.io/en/stable/>`_- python bindings to MPI, needed to run some of the parallel regridding examples
* `pytest <https://docs.pytest.org/en/7.1.x/>`_ - for testing

Expand Down
Loading

0 comments on commit 1e77226

Please sign in to comment.