From 92cfdad6d02f1b3e95a6cb2311e7bb5941b61f27 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 3 Feb 2022 07:31:29 -0800 Subject: [PATCH 1/5] Ensure an error gets reported when rankfile fails If someone specifies a non-existent CPU, then generate an error message instead of just silently failing. Ditto for the sequential mapper. Signed-off-by: Ralph Castain dd Signed-off-by: Ralph Castain (cherry picked from commit 71895c5eb1aa509be26c55568342b5f2dc910a2d) --- src/hwloc/hwloc-internal.h | 3 +- src/hwloc/hwloc_base_util.c | 21 +++-- .../rmaps/rank_file/help-rmaps_rank_file.txt | 78 ++++--------------- src/mca/rmaps/rank_file/rmaps_rank_file.c | 23 +++++- src/mca/rmaps/seq/help-prte-rmaps-seq.txt | 25 +++++- src/mca/rmaps/seq/rmaps_seq.c | 41 +++++++--- 6 files changed, 105 insertions(+), 86 deletions(-) diff --git a/src/hwloc/hwloc-internal.h b/src/hwloc/hwloc-internal.h index 6fdd7c64d1..da355fbffe 100644 --- a/src/hwloc/hwloc-internal.h +++ b/src/hwloc/hwloc-internal.h @@ -389,7 +389,8 @@ PRTE_EXPORT int prte_hwloc_base_memory_set(prte_hwloc_base_memory_segment_t *seg * Make a prettyprint string for a hwloc_cpuset_t (e.g., "package * 2[core 3]"). */ -PRTE_EXPORT char *prte_hwloc_base_cset2str(hwloc_cpuset_t cpuset, bool use_hwthread_cpus, +PRTE_EXPORT char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset, + bool use_hwthread_cpus, hwloc_topology_t topo); /* get the hwloc object that corresponds to the given processor id and type */ diff --git a/src/hwloc/hwloc_base_util.c b/src/hwloc/hwloc_base_util.c index 5d88e7da5b..fe84c86ffc 100644 --- a/src/hwloc/hwloc_base_util.c +++ b/src/hwloc/hwloc_base_util.c @@ -20,7 +20,7 @@ * All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019-2020 IBM Corporation. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -937,8 +937,8 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top package_id = atoi(package_core[0]); /* get the object for this package id */ - if (NULL - == (package = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, 0, package_id))) { + package = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, 0, package_id); + if (NULL == package) { prte_argv_free(package_core); return PRTE_ERR_NOT_FOUND; } @@ -978,6 +978,10 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top core_id = atoi(list[j]) + npus; /* get that object */ core = prte_hwloc_base_get_obj_by_type(topo, obj_type, 0, core_id); + if (NULL == core) { + rc = PRTE_ERR_NOT_FOUND; + break; + } /* get the cpus */ hwloc_bitmap_or(cpumask, cpumask, core->cpuset); } @@ -994,6 +998,10 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top core_id = j + npus; /* get that object */ core = prte_hwloc_base_get_obj_by_type(topo, obj_type, 0, core_id); + if (NULL == core) { + rc = PRTE_ERR_NOT_FOUND; + break; + } /* get the cpus add them into the result */ hwloc_bitmap_or(cpumask, cpumask, core->cpuset); } @@ -1092,7 +1100,7 @@ int prte_hwloc_base_cpu_list_parse(const char *slot_str, hwloc_topology_t topo, prte_argv_free(item); prte_argv_free(rngs); prte_argv_free(list); - return PRTE_ERR_SILENT; + return PRTE_ERR_NOT_FOUND; } /* get the cpus for that object and set them in the massk*/ hwloc_bitmap_or(cpumask, cpumask, pu->cpuset); @@ -1109,7 +1117,7 @@ int prte_hwloc_base_cpu_list_parse(const char *slot_str, hwloc_topology_t topo, prte_argv_free(range); prte_argv_free(item); prte_argv_free(rngs); - return PRTE_ERR_SILENT; + return PRTE_ERR_NOT_FOUND; } /* get the cpus for that object and set them in the mask*/ hwloc_bitmap_or(cpumask, cpumask, pu->cpuset); @@ -1495,7 +1503,8 @@ static void build_map(hwloc_topology_t topo, hwloc_cpuset_t avail, bool use_hwth /* * Make a prettyprint string for a hwloc_cpuset_t */ -char *prte_hwloc_base_cset2str(hwloc_cpuset_t cpuset, bool use_hwthread_cpus, hwloc_topology_t topo) +char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset, + bool use_hwthread_cpus, hwloc_topology_t topo) { int n, npkgs, npus, ncores; char tmp[2048], ans[4096]; diff --git a/src/mca/rmaps/rank_file/help-rmaps_rank_file.txt b/src/mca/rmaps/rank_file/help-rmaps_rank_file.txt index 941672ff74..a38038f268 100644 --- a/src/mca/rmaps/rank_file/help-rmaps_rank_file.txt +++ b/src/mca/rmaps/rank_file/help-rmaps_rank_file.txt @@ -4,7 +4,7 @@ # Copyright (c) 2013 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2018-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2021 Nanook Consulting. All rights reserved. +# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,23 +13,12 @@ # # This is the US/English general help file for rankle utilities. # -# Voltaire -[no-hwloc] -A slot_list containing detailed location info was given, but -hwloc support is not available: - - Rank: %d - Slot list: %s - -Unfortunately, hwloc support is required for this action. -Please reconfigure OMPI for hwloc if binding to specified -cpus is desired. [no-rankfile] -PRTE was unable to open the rankfile: - %s +%s was unable to open the rankfile: + Filename: %s Check to make sure the path and filename are correct. -usage: prun --map-by rankfile:file= ./app +usage: %s -mca rmaps_rankfile_path rankfile ./app Examples of proper syntax include: cat hostfile @@ -43,43 +32,6 @@ Examples of proper syntax include: rank 2=host4 slot=1-2 rank 3=host3 slot=0:1;1:0-2 # -[parse_error_string] -PRTE detected a parse error in the rankfile (%s) -It occured on line number %d on token %d: - %s -Examples of proper syntax include: - rank 1=host1 slot=1:0,1 - rank 0=host2 slot=0:* - rank 2=host4 slot=1-2 - rank 3=host3 slot=0:1;1:0-2 -# -[parse_error_int] -PRTE detected a parse error in the rankfile (%s) -It occured on line number %d on token %d: - %d -Examples of proper syntax include: - rank 1=host1 slot=1:0,1 - rank 0=host2 slot=0:* - rank 2=host4 slot=1-2 - rank 3=host3 slot=0:1;1:0-2 -# -[parse_error] -PRTE detected a parse error in the rankfile (%s) -It occured on line number %d on token %d. Examples of -proper syntax include: - rank 1=host1 slot=1:0,1 - rank 0=host2 slot=0:* - rank 2=host4 slot=1-2 - rank 3=host3 slot=0:1;1:0-2 - -# -[not-all-mapped-alloc] -Some of the requested ranks are not included in the current allocation. - %s - -Please verify that you have specified the allocated resources properly in -the provided rankfile. -# [bad-host] The rankfile that was used claimed that a host was either not allocated or oversubscribed its slots. Please review your rank-slot @@ -92,28 +44,21 @@ some systems may require using full hostnames, such as [bad-index] Rankfile claimed host %s by index that is bigger than number of allocated hosts. # -[bad-rankfile] -Error, invalid rank (%d) in the rankfile (%s) -# [bad-assign] Error, rank %d is already assigned to %s, check %s # [bad-syntax] Error, invalid syntax in the rankfile (%s) -syntax must be the fallowing +syntax must be the following + rank i=host_i slot=string + Examples of proper syntax include: rank 1=host1 slot=1:0,1 rank 0=host2 slot=0:* rank 2=host4 slot=1-2 rank 3=host3 slot=0:1;1:0-2 # -[prte-rmaps-rf:multi-apps-and-zero-np] -RMAPS found multiple applications to be launched, with -at least one that failed to specify the number of processes to execute. -When specifying multiple applications, you must specify how many processes -of each to launch via the -np argument. -# [missing-rank] A rank is missing its location specification: @@ -123,3 +68,12 @@ A rank is missing its location specification: All processes must have their location specified in the rank file. Either add an entry to the file, or provide a default slot_list to use for any unspecified ranks. +# +[missing-cpu] +While parsing the rankfile, %s encountered a line that specified +a non-existent CPU: + + Slots: %s + Available CPUs: %s + +Please correct the line and try again. diff --git a/src/mca/rmaps/rank_file/rmaps_rank_file.c b/src/mca/rmaps/rank_file/rmaps_rank_file.c index 94caefec40..8ce3bc6ceb 100644 --- a/src/mca/rmaps/rank_file/rmaps_rank_file.c +++ b/src/mca/rmaps/rank_file/rmaps_rank_file.c @@ -19,7 +19,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -346,8 +346,22 @@ static int prte_rmaps_rf_map(prte_job_t *jdata) } bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the package and core */ - if (PRTE_SUCCESS - != (rc = prte_hwloc_base_cpu_list_parse(slots, node->topology->topo, bitmap))) { + rc = prte_hwloc_base_cpu_list_parse(slots, node->topology->topo, bitmap); + if (PRTE_ERR_NOT_FOUND == rc) { + char *tmp = prte_hwloc_base_cset2str(hwloc_topology_get_allowed_cpuset(node->topology->topo), + false, node->topology->topo); + prte_show_help("help-rmaps_rank_file.txt", "missing-cpu", true, + prte_tool_basename, slots, tmp); + free(tmp); + rc = PRTE_ERR_SILENT; + hwloc_bitmap_free(bitmap); + goto error; + } else if (PRTE_ERROR == rc) { + prte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); + rc = PRTE_ERR_SILENT; + hwloc_bitmap_free(bitmap); + goto error; + } else { PRTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; @@ -437,7 +451,8 @@ static int prte_rmaps_rank_file_parse(const char *rankfile) prte_rmaps_rank_file_in = fopen(rankfile, "r"); if (NULL == prte_rmaps_rank_file_in) { - prte_show_help("help-rmaps_rank_file.txt", "no-rankfile", true, rankfile); + prte_show_help("help-rmaps_rank_file.txt", "no-rankfile", true, + prte_tool_basename, rankfile, prte_tool_basename); rc = PRTE_ERR_NOT_FOUND; goto unlock; } diff --git a/src/mca/rmaps/seq/help-prte-rmaps-seq.txt b/src/mca/rmaps/seq/help-prte-rmaps-seq.txt index c0b6fe401a..97b89dc150 100644 --- a/src/mca/rmaps/seq/help-prte-rmaps-seq.txt +++ b/src/mca/rmaps/seq/help-prte-rmaps-seq.txt @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2017-2020 Intel, Inc. All rights reserved. # Copyright (c) 2018-2020 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2021 Nanook Consulting All rights reserved. +# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -45,3 +45,26 @@ for use by the sequential mapper in its assignments by using the appropriate modifier: Example: --map-by seq:file=myseqfile +# +[missing-cpu] +While parsing the sequential hostfile, %s encountered a line that +specified a non-existent CPU: + + Slots: %s + Available CPUs: %s + +Please correct the line and try again. +# +[bad-syntax] +Error, invalid syntax in sequential hostfile %s. + +The sequential mapper requires that there be a node entry for +every process in the job, and it processes the provided file +using each node entry to identify the node where that numbered +rank is to be placed. You can specify the CPU binding for the +process on its node entry line in the following manner: + +A 1:0,1 # put this rank on node A, bound to socket 1, CPUs 0 and 1 +B 1-4 # put this rank on node B, bound to CPUs 1-4 + +Please correct the syntax and try again. diff --git a/src/mca/rmaps/seq/rmaps_seq.c b/src/mca/rmaps/seq/rmaps_seq.c index 37239de0e1..effcc942f6 100644 --- a/src/mca/rmaps/seq/rmaps_seq.c +++ b/src/mca/rmaps/seq/rmaps_seq.c @@ -16,7 +16,7 @@ * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -202,7 +202,6 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) app->app); PRTE_CONSTRUCT(&sq_list, prte_list_t); rc = process_file(hosts, &sq_list); - free(hosts); if (PRTE_SUCCESS != rc) { PRTE_LIST_DESTRUCT(&sq_list); goto error; @@ -217,10 +216,8 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) * list */ if (PRTE_SUCCESS != (rc = prte_util_get_ordered_dash_host_list(&node_list, hosts))) { PRTE_ERROR_LOG(rc); - free(hosts); goto error; } - free(hosts); /* transfer the list to a seq_node_t list */ PRTE_CONSTRUCT(&sq_list, prte_list_t); while (NULL != (nd = (prte_node_t *) prte_list_remove_first(&node_list))) { @@ -242,7 +239,6 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) app->app); PRTE_CONSTRUCT(&sq_list, prte_list_t); rc = process_file(hosts, &sq_list); - free(hosts); if (PRTE_SUCCESS != rc) { PRTE_LIST_DESTRUCT(&sq_list); goto error; @@ -252,6 +248,7 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) prte_output_verbose(5, prte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using default hostfile nodes on app %s", app->app); seq_list = &default_seq_list; + hosts = strdup(prte_default_hostfile); } else { /* can't do anything - no nodes available! */ prte_show_help("help-prte-rmaps-base.txt", "prte-rmaps-base:no-available-resources", @@ -281,7 +278,8 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) if (NULL == seq_list || 0 == (num_nodes = (int32_t) prte_list_get_size(seq_list))) { prte_show_help("help-prte-rmaps-base.txt", "prte-rmaps-base:no-available-resources", true); - return PRTE_ERR_SILENT; + rc = PRTE_ERR_SILENT; + goto error; } /* set #procs to the number of entries */ @@ -293,7 +291,8 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) } else if (num_nodes < app->num_procs) { prte_show_help("help-prte-rmaps-seq.txt", "seq:not-enough-resources", true, app->num_procs, num_nodes); - return PRTE_ERR_SILENT; + rc = PRTE_ERR_SILENT; + goto error; } if (seq_list == &default_seq_list) { @@ -357,14 +356,16 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) prte_show_help("help-prte-rmaps-base.txt", "prte-rmaps-base:alloc-error", true, app->num_procs, app->app); PRTE_UPDATE_EXIT_STATUS(PRTE_ERROR_DEFAULT_EXIT_CODE); - return PRTE_ERR_SILENT; + rc = PRTE_ERR_SILENT; + goto error; } else if (PRTE_MAPPING_NO_OVERSUBSCRIBE & PRTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { /* if we were explicitly told not to oversubscribe, then don't */ prte_show_help("help-prte-rmaps-base.txt", "prte-rmaps-base:alloc-error", true, app->num_procs, app->app); PRTE_UPDATE_EXIT_STATUS(PRTE_ERROR_DEFAULT_EXIT_CODE); - return PRTE_ERR_SILENT; + rc = PRTE_ERR_SILENT; + goto error; } } } @@ -396,9 +397,19 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) /* setup the bitmap */ bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the package and core */ - if (PRTE_SUCCESS - != (rc = prte_hwloc_base_cpu_list_parse(sq->cpuset, node->topology->topo, - bitmap))) { + rc = prte_hwloc_base_cpu_list_parse(sq->cpuset, node->topology->topo, bitmap); + if (PRTE_ERR_NOT_FOUND == rc) { + char *tmp = prte_hwloc_base_cset2str(hwloc_topology_get_allowed_cpuset(node->topology->topo), + false, node->topology->topo); + prte_show_help("help-rmaps-seq.txt", "missing-cpu", true, + prte_tool_basename, sq->cpuset, tmp); + free(tmp); + } else if (PRTE_ERROR == rc) { + prte_show_help("help-rmaps-seq.txt", "bad-syntax", true, hosts); + rc = PRTE_ERR_SILENT; + hwloc_bitmap_free(bitmap); + goto error; + } else { PRTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; @@ -453,6 +464,9 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) } else { save = sq; } + if (NULL != hosts) { + free(hosts); + } } /* mark that this job is to be fully @@ -464,6 +478,9 @@ static int prte_rmaps_seq_map(prte_job_t *jdata) error: PRTE_LIST_DESTRUCT(&default_seq_list); + if (NULL != hosts) { + free(hosts); + } return rc; } From 0c542cfb2a5c820d5c945f7643edfa8c2aa5e1ff Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 3 Feb 2022 11:26:39 -0800 Subject: [PATCH 2/5] Add missing examples to tarball Signed-off-by: Ralph Castain (cherry picked from commit d073028d05ec8b7a2313e93cbddbecc2b5199b08) --- examples/Makefile.include | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/examples/Makefile.include b/examples/Makefile.include index 3c4d6ca583..17fcc99dde 100644 --- a/examples/Makefile.include +++ b/examples/Makefile.include @@ -15,7 +15,7 @@ # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved. # Copyright (c) 2016-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2021 Nanook Consulting. All rights reserved. +# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -35,23 +35,32 @@ EXTRA_DIST += \ examples/README \ examples/Makefile \ + examples/examples.h \ + examples/alloc.c \ + examples/bad_exit.c \ examples/client.c \ - examples/debugger/direct.c \ - examples/debugger/direct-multi.c \ - examples/debugger/indirect.c \ - examples/debugger/indirect-multi.c \ + examples/client2.c\ + examples/daemon_error_notify.c \ examples/debugger/attach.c \ examples/debugger/daemon.c \ + examples/debugger/direct.c \ + examples/debugger/direct-multi.c \ + examples/debugger/indirect.c \ + examples/debugger/indirect-multi.c \ examples/debugger/hello.c \ examples/dmodex.c \ examples/dynamic.c \ + examples/error_notify.c \ examples/fault.c \ + examples/hello.c \ examples/jctrl.c \ examples/launcher.c \ + examples/legacy.c \ examples/log.c \ examples/pmi1client.c \ + examples/probe.c \ examples/pub.c \ - examples/tool.c \ examples/server.c \ examples/showkeys.c \ - examples/legacy.c + examples/target.c \ + examples/tool.c From d00d9b3508426b9d244dd5dd3e32b1d59bf52ec8 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 4 Feb 2022 13:06:03 -0800 Subject: [PATCH 3/5] Remove HWLOC version block PRRTE does not attempt to modify environment strings in environ, so the string literal issue that caused failures in PMIx does not impact PRRTE directly. Signed-off-by: Brian Barrett (cherry picked from commit 12c62fb61ce5b8c459385de9b5dbaf654946cb8e) --- config/prte_setup_hwloc.m4 | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/config/prte_setup_hwloc.m4 b/config/prte_setup_hwloc.m4 index 1e73421201..edf704df33 100644 --- a/config/prte_setup_hwloc.m4 +++ b/config/prte_setup_hwloc.m4 @@ -94,27 +94,6 @@ AC_DEFUN([PRTE_SETUP_HWLOC],[ PRTE_FLAGS_PREPEND_UNIQ([LDFLAGS], [$prte_hwloc_LDFLAGS]) PRTE_FLAGS_PREPEND_UNIQ([LIBS], [$prte_hwloc_LIBS]) - AC_MSG_CHECKING([if hwloc version is in 2.5.0-2.7.0 range]) - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([#include ], - [[ - #if HWLOC_VERSION_MAJOR == 2 - #if (HWLOC_VERSION_MINOR == 7 && HWLOC_VERSION_RELEASE == 0) || HWLOC_VERSION_MINOR == 6 || HWLOC_VERSION_MINOR == 5 - #error "hwloc version is in blocklist range" - #endif - #endif - ]])], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes]) - AC_MSG_WARN([***********************************************************]) - AC_MSG_WARN([PRRTE is not compatible with HWLOC versions 2.5.0-2.7.0 (inclusive)]) - AC_MSG_WARN([due to a bug in HWLOC's setting of environmental variables.]) - AC_MSG_WARN([Please switch the HWLOC installation to a version outside]) - AC_MSG_WARN([of that range.]) - AC_MSG_WARN([***********************************************************]) - AC_MSG_ERROR([Cannot continue])]) - - AC_MSG_CHECKING([if hwloc version is 1.5 or greater]) AC_COMPILE_IFELSE( [AC_LANG_PROGRAM([#include ], From 53a80729e6cecd6684895b623699f7141a9fd372 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 4 Feb 2022 18:49:18 -0800 Subject: [PATCH 4/5] Cleanup a bit in prep for PMIx sync to v5 std Use the appropriate macros instead of directly calling the backing functions so PRRTE can work across PMIx versions Signed-off-by: Ralph Castain (cherry picked from commit ee0fdaad011252643582168b2358fd3bba01b62a) --- examples/server.c | 22 ++++++++++++++++------ src/prted/pmix/pmix_server_pub.c | 9 +++++++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/examples/server.c b/examples/server.c index d9f84671d9..71f138cb94 100644 --- a/examples/server.c +++ b/examples/server.c @@ -17,7 +17,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -572,6 +572,7 @@ static pmix_status_t publish_fn(const pmix_proc_t *proc, const pmix_info_t info[ { pmix_locdat_t *p; size_t n; + pmix_status_t rc = PMIX_SUCCESS; pmix_output(0, "SERVER: PUBLISH"); @@ -580,13 +581,16 @@ static pmix_status_t publish_fn(const pmix_proc_t *proc, const pmix_info_t info[ (void) strncpy(p->pdata.proc.nspace, proc->nspace, PMIX_MAX_NSLEN); p->pdata.proc.rank = proc->rank; (void) strncpy(p->pdata.key, info[n].key, PMIX_MAX_KEYLEN); - pmix_value_xfer(&p->pdata.value, (pmix_value_t *) &info[n].value); + PMIX_VALUE_XFER(rc, &p->pdata.value, (pmix_value_t *) &info[n].value); + if (PMIX_SUCCESS != rc) { + break; + } pmix_list_append(&pubdata, &p->super); } if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); + cbfunc(rc, cbdata); } - return PMIX_SUCCESS; + return rc; } static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, const pmix_info_t info[], @@ -609,7 +613,10 @@ static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, const pmix_ (void) strncpy(p2->pdata.proc.nspace, p->pdata.proc.nspace, PMIX_MAX_NSLEN); p2->pdata.proc.rank = p->pdata.proc.rank; (void) strncpy(p2->pdata.key, p->pdata.key, PMIX_MAX_KEYLEN); - pmix_value_xfer(&p2->pdata.value, &p->pdata.value); + PMIX_VALUE_XFER(ret, &p2->pdata.value, &p->pdata.value); + if (PMIX_SUCCESS != ret) { + break; + } pmix_list_append(&results, &p2->super); break; } @@ -624,7 +631,10 @@ static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, const pmix_ (void) strncpy(pd[i].proc.nspace, p->pdata.proc.nspace, PMIX_MAX_NSLEN); pd[i].proc.rank = p->pdata.proc.rank; (void) strncpy(pd[i].key, p->pdata.key, PMIX_MAX_KEYLEN); - pmix_value_xfer(&pd[i].value, &p->pdata.value); + PMIX_VALUE_XFER(ret, &pd[i].value, &p->pdata.value); + if (PMIX_SUCCESS != ret) { + break; + } } } } diff --git a/src/prted/pmix/pmix_server_pub.c b/src/prted/pmix/pmix_server_pub.c index 1e231fd12c..8f578a5dca 100644 --- a/src/prted/pmix/pmix_server_pub.c +++ b/src/prted/pmix/pmix_server_pub.c @@ -18,7 +18,7 @@ * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -559,7 +559,12 @@ void pmix_server_keyval_client(int status, pmix_proc_t *sender, pmix_data_buffer goto release; } PMIX_LOAD_KEY(pdata[n].key, info.key); - pmix_value_xfer(&pdata[n].value, &info.value); + PMIX_VALUE_XFER_DIRECT(ret, &pdata[n].value, &info.value); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + PMIX_DATA_BUFFER_DESTRUCT(&pbkt); + goto release; + } PMIX_INFO_DESTRUCT(&info); } } From 44c016c478219c3244873434b001eaf316736ebc Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 9 Feb 2022 07:57:26 -0800 Subject: [PATCH 5/5] Update NEWS and VERSION Signed-off-by: Ralph Castain --- NEWS | 12 ++++++++++++ VERSION | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 337eb09191..7fc0051f38 100644 --- a/NEWS +++ b/NEWS @@ -35,6 +35,18 @@ This file contains the main features as well as overviews of specific bug fixes (and other actions) for each version of PRRTE since version 1.0. +2.0.2: 11 Feb 2022 +------------------- +**** NOTE: This release contains a workaround that resolves the prior +**** conflict with HWLOC versions 2.5.0 through 2.7.0 (inclusive). +**** Those versions of HWLOC are now supported. + +PR #1197: Cleanup a bit in prep for PMIx v5 release +PR #1195: Remove HWLOC version block +PR #1190: Add missing examples to tarball +PR #1189: Ensure an error gets reported when rankfile fails + + 2.0.1: 1 Feb 2022 ------------------------------------ +**** NOTE: As of v2.0.1, PRRTE no longer has a dependency on "pandoc" diff --git a/VERSION b/VERSION index 496e8c68b4..56e36a895c 100644 --- a/VERSION +++ b/VERSION @@ -17,7 +17,7 @@ major=2 minor=0 -release=1 +release=2 # greek is generally used for alpha or beta release tags. If it is # non-empty, it will be appended to the version number. It does not @@ -26,7 +26,7 @@ release=1 # requirement is that it must be entirely printable ASCII characters # and have no white space. -greek=rc5 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" @@ -78,7 +78,7 @@ date="Nov 24, 2018" # Version numbers are described in the Libtool current:revision:age # format. -libprrte_so_version=2:1:0 +libprrte_so_version=2:2:0 # "Common" components install standalone libraries that are run-time # linked by one or more components. So they need to be versioned as