diff --git a/config/x_ac_platform.m4 b/config/x_ac_platform.m4 index a26bd4a..fbb7922 100644 --- a/config/x_ac_platform.m4 +++ b/config/x_ac_platform.m4 @@ -28,7 +28,8 @@ # -------------------------------------------------------------------------------- # # Update Log: -# May 02 2018 KMD: Added aarch64 support +# Jul 13 2018 DHA: Add powerle support. +# May 02 2018 KMD: Added aarch64 support. # Apr 01 2015 ADG: Added Cray CTI support. # Feb 20 2015 andrewg@cray.com: Fixes for Cray systems. # Jun 11 2008 DHA: File created. @@ -79,6 +80,12 @@ AC_DEFUN([X_AC_PLATFORM], [ AC_SUBST(LNCHR_BIT_FLAGS, -m32) AC_DEFINE(BIT64, 1, [64bit]) ;; + *powerpc64le*)AC_DEFINE(POWERLE_ARCHITECTURE,1,[Define 1 for POWERLEC_ARCHITECTURE]) + ac_have_known_isa="yes" + ac_target_isa="powerle" + AC_SUBST(LNCHR_BIT_FLAGS, -m64) + AC_DEFINE(BIT64, 1, [64bit]) + ;; *powerpc*)AC_DEFINE(PPC_ARCHITECTURE,1,[Define 1 for PPC_ARCHITECTURE]) ac_have_known_isa="yes" ac_target_isa="power" diff --git a/config/x_ac_testnnodes.m4 b/config/x_ac_testnnodes.m4 index 39be81d..a3d2cb3 100644 --- a/config/x_ac_testnnodes.m4 +++ b/config/x_ac_testnnodes.m4 @@ -79,9 +79,9 @@ AC_DEFUN([X_AC_NCORE_SMP], [ AC_DEFUN([X_AC_TEST_RM], [ - AC_MSG_CHECKING([resource manager to test @<:@slurm bgqrm alps orte mpiexec_hydra@:>@]) + AC_MSG_CHECKING([resource manager to test @<:@slurm bgqrm alps orte mpiexec_hydra ibm_spectrum@:>@]) AC_ARG_WITH([test-rm], - AS_HELP_STRING(--with-test-rm@<:@=RM@:>@,specify a resource manager type to test @<:@slurm bgqrm alps orte mpiexec_hydra@:>@ @<:@default=slurm on linux-x86 and linux-x86_64; alps on Cray; bgqrm on linux-power64@:>@), + AS_HELP_STRING(--with-test-rm@<:@=RM@:>@,specify a resource manager type to test @<:@slurm bgqrm alps orte mpiexec_hydra ibm_spectrum@:>@ @<:@default=slurm on linux-x86 and linux-x86_64; alps on Cray; bgqrm on linux-power64; ibm_spectrum on linux-power64le@:>@), [with_rm=$withval], [with_rm="check"]) @@ -182,6 +182,44 @@ AC_DEFUN([X_AC_TEST_RM], [ # AC_MSG_RESULT($with_rm:$rm_found) + elif test "x$with_rm" = "xibm_spectrum" ; then + # + # Configure for IBM Spectrum (jsrun) + # + if test "x$with_launcher" != "xcheck"; then + # + # launcher path given + # + if test ! -z "$with_launcher" -a -f "$with_launcher"; then + pth=`$srcdir/config/ap $with_launcher` + ac_job_launcher_path=$pth + rm_found="yes" + AC_SUBST(TARGET_JOB_LAUNCHER_PATH,$ac_job_launcher_path) + AC_SUBST(RM_TYPE, RC_ibm_spectrum) + fi + else + rm_default_dirs="/opt/ibm/spectrum_mpi/jsm_pmix/bin/stock /usr/bin /usr/local/bin" + for rm_dir in $rm_default_dirs; do + if test ! -z "$rm_dir" -a ! -d "$rm_dir" ; then + continue; + fi + + if test ! -z "$rm_dir/jsrun" -a -f "$rm_dir/jsrun"; then + pth=`$srcdir/config/ap $rm_dir/jsrun` + ac_job_launcher_path=$pth + rm_found="yes" + AC_SUBST(TARGET_JOB_LAUNCHER_PATH,$ac_job_launcher_path) + AC_SUBST(RM_TYPE, RC_ibm_spectrum) + break + fi + done + fi + + # + # This answers whether RM given and found + # + AC_MSG_RESULT($with_rm:$rm_found) + elif test "x$with_rm" = "xalps" ; then # # Configure for Cray ALPS RM diff --git a/etc/Makefile.am b/etc/Makefile.am index a44835a..ec334b0 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -47,7 +47,8 @@ etc_SCRIPTS = \ rm_mchecker.conf \ rm_openrte.conf \ rm_slurm.conf \ - rm_mpiexec_hydra.conf + rm_mpiexec_hydra.conf \ + rm_ibm_spectrum.conf EXTRA_DIST = \ rm_info.conf \ @@ -61,5 +62,6 @@ EXTRA_DIST = \ rm_mchecker.conf \ rm_openrte.conf \ rm_slurm.conf \ - rm_mpiexec_hydra.conf + rm_mpiexec_hydra.conf \ + rm_ibm_spectrum.conf diff --git a/etc/rm_ibm_spectrum.conf b/etc/rm_ibm_spectrum.conf new file mode 100644 index 0000000..0829cea --- /dev/null +++ b/etc/rm_ibm_spectrum.conf @@ -0,0 +1,54 @@ +## $Header: $ +## +## rm_ibm_spectrum.conf +## +##-------------------------------------------------------------------------------- +## Copyright (c) 2008, Lawrence Livermore National Security, LLC. Produced at +## the Lawrence Livermore National Laboratory. Written by Dong H. Ahn . +## LLNL-CODE-409469. All rights reserved. +## +## This file is part of LaunchMON. For details, see +## https://computing.llnl.gov/?set=resources&page=os_projects +## +## Please also read LICENSE -- Our Notice and GNU Lesser General Public License. +## +## +## This program is free software; you can redistribute it and/or modify it under the +## terms of the GNU General Public License (as published by the Free Software +## Foundation) version 2.1 dated February 1999. +## +## This program is distributed in the hope that it will be useful, but WITHOUT ANY +## WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or +## FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU Lesser General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., 59 Temple +## Place, Suite 330, Boston, MA 02111-1307 USA +##-------------------------------------------------------------------------------- +## +## Update Log: +## Apr 17 2018 DHA: Created file. +## +## +## RM: the name of Resource Manager +## RM_launcher: the name of the launcher command +## RM_launcher_id: the rule to get the launcher id +## (e.g., RM_launcher|sym|srun says the launcher is identify by testing +## RM_launcher's symbol by the name of srun) +## RM_jobid: the rule to get the target jobid +## (e.g., RM_jobid=RM_launcher|sym|totalview_jobid|string says +## jobid can be obtained from the launcher's symbol, totalview_jobid, +## interpreting that as the string type. +## RM_launcher_helper= method or command to launch daemons +## RM_launch_str= options and arguements used for RM_launch_mth. +## + +RM=spectrum +RM_MPIR=STD_COLOC_FIFO +RM_launcher=jsrun +RM_launcher_id=RM_launcher|sym|jsm_env_get_jsrun_port +RM_launch_helper=mpir +RM_signal_for_kill=SIGINT +RM_fail_detection=false +RM_launch_str=%o --lmonsharedsec=%s --lmonsecchk=%c diff --git a/etc/rm_info.conf b/etc/rm_info.conf index 2fb4e7d..670a6c3 100644 --- a/etc/rm_info.conf +++ b/etc/rm_info.conf @@ -73,3 +73,6 @@ rm_bgq_slurm.conf rm_mchecker.conf rm_gupc.conf rm_openrte.conf + +[linux-powerle] +rm_ibm_spectrum.conf diff --git a/launchmon/src/linux/lmon_api/lmon_be_sync_mpi.cxx b/launchmon/src/linux/lmon_api/lmon_be_sync_mpi.cxx index e15bc6f..5c8a131 100644 --- a/launchmon/src/linux/lmon_api/lmon_be_sync_mpi.cxx +++ b/launchmon/src/linux/lmon_api/lmon_be_sync_mpi.cxx @@ -116,6 +116,7 @@ lmon_rc_e LMON_be_procctl_init(rm_catalogue_e rmtype, MPIR_PROCDESC_EXT *ptab, case RC_cray: case RC_gupc: case RC_mpiexec_hydra: + case RC_ibm_spectrum: // // Call generic Linux init // @@ -170,6 +171,7 @@ lmon_rc_e LMON_be_procctl_stop(rm_catalogue_e rmtype, MPIR_PROCDESC_EXT *ptab, case RC_cray: case RC_gupc: case RC_mpiexec_hydra: + case RC_ibm_spectrum: // // Call generic Linux stop // @@ -230,6 +232,7 @@ lmon_rc_e LMON_be_procctl_run(rm_catalogue_e rmtype, int signum, case RC_cray: case RC_gupc: case RC_mpiexec_hydra: + case RC_ibm_spectrum: // // Call generic Linux run // @@ -292,6 +295,7 @@ lmon_rc_e LMON_be_procctl_initdone(rm_catalogue_e rmtype, case RC_cray: case RC_gupc: case RC_mpiexec_hydra: + case RC_ibm_spectrum: // // Call generic Linux initdone // @@ -346,6 +350,7 @@ lmon_rc_e LMON_be_procctl_done(rm_catalogue_e rmtype, MPIR_PROCDESC_EXT *ptab, case RC_cray: case RC_gupc: case RC_mpiexec_hydra: + case RC_ibm_spectrum: // // You need to do nothing for these resource managers // @@ -402,6 +407,7 @@ lmon_rc_e LMON_be_procctl_perf(rm_catalogue_e rmtype, MPIR_PROCDESC_EXT *ptab, case RC_cray: case RC_gupc: case RC_mpiexec_hydra: + case RC_ibm_spectrum: // // You need to do nothing for these resource managers // diff --git a/launchmon/src/linux/main.cxx b/launchmon/src/linux/main.cxx index e0e6cba..67bb23d 100644 --- a/launchmon/src/linux/main.cxx +++ b/launchmon/src/linux/main.cxx @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) { try { int rc = EXIT_FAILURE; #if X86_ARCHITECTURE || X86_64_ARCHITECTURE || PPC_ARCHITECTURE || \ - AARCH64_ARCHITECTURE + AARCH64_ARCHITECTURE || POWERLE_ARCHITECTURE // // driver instantiation for the linux platform. // diff --git a/launchmon/src/linux/sdbg_linux_driver_impl.hxx b/launchmon/src/linux/sdbg_linux_driver_impl.hxx index 82c9f08..e593733 100644 --- a/launchmon/src/linux/sdbg_linux_driver_impl.hxx +++ b/launchmon/src/linux/sdbg_linux_driver_impl.hxx @@ -91,7 +91,7 @@ linux_driver_t::create_process(pid_t pid, #if X86_ARCHITECTURE || X86_64_ARCHITECTURE return_proc = new linux_x86_process_t(pid, mi, md, mt, mc); -#elif PPC_ARCHITECTURE +#elif PPC_ARCHITECTURE || POWERLE_ARCHITECTURE return_proc = new linux_ppc_process_t(pid, mi, md, mt, mc); #elif IA64_ARCHITECTURE return_proc = new linux_ia64_process_t(pid, mi, md, mt, mc); @@ -122,7 +122,7 @@ linux_driver_t::create_process(pid_t pid, // #if X86_ARCHITECTURE || X86_64_ARCHITECTURE return_proc = new linux_x86_process_t(pid, mi); -#elif PPC_ARCHITECTURE +#elif PPC_ARCHITECTURE || POWERLE_ARCHITECTURE return_proc = new linux_ppc_process_t(pid, mi); #elif IA64_ARCHITECTURE return_proc = new linux_ia64_process_t(pid, mi); diff --git a/launchmon/src/linux/sdbg_linux_launchmon.cxx b/launchmon/src/linux/sdbg_linux_launchmon.cxx index ccb1ed8..0ac0b39 100644 --- a/launchmon/src/linux/sdbg_linux_launchmon.cxx +++ b/launchmon/src/linux/sdbg_linux_launchmon.cxx @@ -27,6 +27,7 @@ *-------------------------------------------------------------------------------- * * Update Log: + * Oct 10 2018 DHA: Added PowerLE support for Sierra * May 02 2018 KMD: Added aarch64 support * Jul 22 2015 ADG: Fix for on demand proctable * Feb 20 2015 andrewg@cray.com: Added support for RMs that build the @@ -1055,7 +1056,7 @@ bool linux_launchmon_t::handle_mpir_variables( p.set_launch_hidden_bp(NULL); } la_bp = new linux_breakpoint_t(); - la_bp->set_address_at(launch_bp_sym.get_relocated_address()); + la_bp->set_address_at(launch_bp_sym.get_relocated_lowest_address()); #if PPC_ARCHITECTURE // @@ -1063,8 +1064,8 @@ bool linux_launchmon_t::handle_mpir_variables( // PowerPC Linux has begun to change the linking convention // such that binaries no longer export direct function // symbols. (e.g., .MPIR_Breakpoint). But rather, undotted - // global data symbols (e.g., MPIR_Breakpoint) contains the - // address for the corresponding function. + // global data symbols (e.g., MPIR_Breakpoint) is the function + // descriptor // // Added indirect breakpoint support for that and use this // method on all PPC systems across the board including @@ -1487,16 +1488,15 @@ launchmon_event_e linux_launchmon_t::decipher_an_event( // // Parent gets SIGTRAP when a new thread is created // Used to be: return_ev = LM_STOP_NOT_INTERESTED; - int upper16; - upper16 = event.get_rawstatus() >> 16; - if (upper16 == LINUX_TRACER_EVENT_CLONE) { + int high = event.get_rawstatus() >> 8; + if (high == (SIGTRAP | (LINUX_TRACER_EVENT_CLONE << 8))) { return_ev = LM_STOP_AT_THREAD_CREATION; - } else { - // - // SIGTRAP due to fork for example - // - return_ev = LM_STOP_NOT_INTERESTED; - } + } else { + // + // SIGTRAP due to fork for example + // + return_ev = LM_STOP_NOT_INTERESTED; + } } else if (event.get_signum() == SIGSTOP) { return_ev = LM_RELAY_SIGNAL; @@ -1579,12 +1579,13 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_attach_event( #if MEASURE_TRACING_COST beginTS = gettimeofdayD(); +#endif + { - self_trace_t::trace(true, // print always + self_trace_t::trace(LEVELCHK(level2), MODULENAME, 0, "The RM process has just been trapped due to attach"); } -#endif bool use_cxt = true; image_base_t *dynloader_im = NULL; @@ -1650,7 +1651,7 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_attach_event( dynloader_im->get_a_symbol(p.get_loader_breakpoint_sym()); lo_bp = new linux_breakpoint_t(); - addr_dl_bp = dynload_sym.get_relocated_address(); + addr_dl_bp = dynload_sym.get_relocated_lowest_address(); lo_bp->set_address_at(addr_dl_bp); #if PPC_ARCHITECTURE lo_bp->set_use_indirection(); @@ -1676,11 +1677,16 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_attach_event( std::string fip = fifopathbuf; p.rmgr()->set_attach_fifo_path(fip); + get_tracer()->tracer_continue(p, use_cxt); // - // We have to continue the target process before starting FIFO - // otherwise open on the FIFO will block + // NOTE: Depending on how FIFO is polled within the target + // RM process, poking the FIFO may not be effected. RM designer + // must ensure that FIFO is being polled in a way such that + // when 1 is sent to the FIFO while it is being stopped, it + // will ultimately picked up when the process resumes execution. // - get_tracer()->tracer_continue(p, use_cxt); + self_trace_t::trace(LEVELCHK(level2), MODULENAME, + 0, "MPIR_attach_fifo: %s", fifopathbuf); int fifofd = 0; if ((fifofd = open(fifopathbuf, O_WRONLY)) >= 0) { char wakeup = (char)1; @@ -1747,12 +1753,6 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_attach_event( // is invoked just once per job #endif - { - self_trace_t::trace( - true, // print always - MODULENAME, 0, "Just continued the RM process out of the first trap"); - } - set_last_seen(gettimeofdayD()); return LAUNCHMON_OK; @@ -1782,9 +1782,9 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_exec_event( #endif { - self_trace_t::trace(true, // print always - MODULENAME, 0, - "The RM process has just been forked and exec'ed."); + self_trace_t::trace(LEVELCHK(level2), + MODULENAME, 0, "The RM process (%d) has " + "just been forked and exec'ed.", p.get_pid (true)); } bool use_cxt = true; @@ -1831,7 +1831,7 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_exec_event( // // Corner case; we deal with a heuristics // -#if PPC_ARCHITECTURE +#if PPC_ARCHITECTURE || POWERLE_ARCHITECTURE // // DHA Mar 05 2009 // There're systems that do not directly @@ -1859,7 +1859,7 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_exec_event( #else = p.get_gprset(use_cxt)->get_pc() & 0xffff0000; #endif -#else /* PPC_ARCHITECTURE */ +#else /* PPC_ARCHITECTURE || POWERLE_ARCHITECTURE */ // // This requires the actual page size to compute this loader load // address implictly. Just using the following bits for now. @@ -1914,15 +1914,8 @@ launchmon_rc_e linux_launchmon_t::handle_trap_after_exec_event( endTS = gettimeofdayD(); accum += endTS - beginTS; countHandler++; - // accum and countHandler now contain the cost of this handler which - // is invoked just once per job #endif - { - self_trace_t::trace( - true, // print always - MODULENAME, 0, "Just continued the RM process out of the first trap"); - } set_last_seen(gettimeofdayD()); return LAUNCHMON_OK; @@ -2653,12 +2646,19 @@ launchmon_rc_e linux_launchmon_t::handle_thrcreate_request( memset(&tinfo, '\0', sizeof(tinfo)); tinfo.ti_lid = (lwpid_t)newlwpid; + { + self_trace_t::trace(LEVELCHK(level2), MODULENAME,0, + "thread creation request event handler " + "invoked for thread (%d)", newlwpid); + } + + if (p.get_thrlist().find(tinfo.ti_lid) == p.get_thrlist().end()) { // this thread has not been seen #if X86_ARCHITECTURE || X86_64_ARCHITECTURE thread_base_t *thrinfo = new linux_x86_thread_t(); -#elif PPC_ARCHITECTURE +#elif PPC_ARCHITECTURE || POWERLE_ARCHITECTURE thread_base_t *thrinfo = new linux_ppc_thread_t(); #elif AARCH64_ARCHITECTURE diff --git a/launchmon/src/linux/sdbg_linux_mach.cxx b/launchmon/src/linux/sdbg_linux_mach.cxx index 5061acf..1c30a12 100644 --- a/launchmon/src/linux/sdbg_linux_mach.cxx +++ b/launchmon/src/linux/sdbg_linux_mach.cxx @@ -520,7 +520,7 @@ bool linux_x86_process_t::basic_init(const std::string& mi, return true; } -#elif PPC_ARCHITECTURE +#elif PPC_ARCHITECTURE || POWERLE_ARCHITECTURE //////////////////////////////////////////////////////////////////// // diff --git a/launchmon/src/linux/sdbg_linux_mach.hxx b/launchmon/src/linux/sdbg_linux_mach.hxx index 987efb5..afb2413 100644 --- a/launchmon/src/linux/sdbg_linux_mach.hxx +++ b/launchmon/src/linux/sdbg_linux_mach.hxx @@ -516,7 +516,7 @@ struct ps_prochandle { process_base_t* p; }; -#elif PPC_ARCHITECTURE +#elif PPC_ARCHITECTURE || POWERLE_ARCHITECTURE //! linux_ppc_gpr_set_t: /*! diff --git a/launchmon/src/linux/sdbg_linux_std.hxx b/launchmon/src/linux/sdbg_linux_std.hxx index 0fa7340..91e7af2 100644 --- a/launchmon/src/linux/sdbg_linux_std.hxx +++ b/launchmon/src/linux/sdbg_linux_std.hxx @@ -356,7 +356,7 @@ const T_VA T_UNINIT_HEX = 0xdeadbeef; #define SDBG_LINUX_DFLT_INSTANTIATION \ T_VA, T_WT, T_IT, T_GRS, T_FRS, my_thrinfo_t, elf_wrapper -#elif PPC_ARCHITECTURE +#elif PPC_ARCHITECTURE || POWERLE_ARCHITECTURE // // diff --git a/launchmon/src/linux/sdbg_linux_symtab.hxx b/launchmon/src/linux/sdbg_linux_symtab.hxx index b35fc9b..3a4172f 100644 --- a/launchmon/src/linux/sdbg_linux_symtab.hxx +++ b/launchmon/src/linux/sdbg_linux_symtab.hxx @@ -203,6 +203,8 @@ class linux_image_t : public image_base_t { level); } + VA get_local_entry_point (const unsigned char o); + // For self tracing // std::string MODULENAME; diff --git a/launchmon/src/linux/sdbg_linux_symtab_impl.hxx b/launchmon/src/linux/sdbg_linux_symtab_impl.hxx index 5eb7321..c7e864d 100644 --- a/launchmon/src/linux/sdbg_linux_symtab_impl.hxx +++ b/launchmon/src/linux/sdbg_linux_symtab_impl.hxx @@ -28,6 +28,7 @@ * * * Update Log: + * May 19 2018 DHA: Added OpenPower ABI's dual entry points. * Oct 27 2010 DHA: Added is_defined, is_globally_visible, * is_locally_visible virtual methods. * Dec 20 2009 DHA: Fixed a bug that arose when Mark's patch @@ -482,6 +483,10 @@ linux_image_t::read_linkage_symbols() throw( decode_type(first_sym->st_info, tmp); a_linksym->set_type(tmp); a_linksym->set_defined((first_sym->st_shndx != SHN_UNDEF) ? true : false); + a_linksym->set_info(first_sym->st_info); + a_linksym->set_other(first_sym->st_other); + a_linksym->set_local_entry_offset( + get_local_entry_point(first_sym->st_other)); string keystr(symname); @@ -864,6 +869,47 @@ void linux_image_t::decode_visibility( } } + +//! PRIVATE: linux_image_t::get_local_entry_point -- +/*! + Calculate the entry point used by an intramodule function call +*/ +template +VA +linux_image_t::get_local_entry_point (const unsigned char o) +{ + VA rc = 0; +#if POWERLE_ARCHITECTURE + /* The "OpenPOWER ABI for Linux Supplement, Power Architecture 64-Bit ELF V2 + * ABI, Advance": + * "The OpenPOWER ABI uses the three most-significant bits + * in the symbol st_other field to specify the number of instructions between a + * function's global entry point and local entry point. The global entry point + * is used when it is necessary to set up the TOC pointer (r2) for the + * function. The local entry point is used when r2 is known to already be valid + * for the function. A value of zero in these bits asserts that the function + * does not use r2." + */ + const int code = (o >> 5) & 0x7; + switch (code) + { + case 2: /* 1 instruction */ + case 3: /* 2 instructions */ + case 4: /* 4 instructions */ + case 5: /* 8 instructions */ + case 6: /* 16 instructions */ + rc = (1 << (code - 2)) * 4; /* TODO: augment template param to includ IT */ + break; + case 0: /* local == global */ + case 1: /* local == global */ + case 7: /* Reserved */ + break; + } /* switch */ +#endif + return rc; +} + + template void linux_image_t::set_image_base_address(VA ba) { image_base_t::set_image_base_address(ba); diff --git a/launchmon/src/linux/sdbg_proc_service.cxx b/launchmon/src/linux/sdbg_proc_service.cxx index d9f9966..c115b4e 100644 --- a/launchmon/src/linux/sdbg_proc_service.cxx +++ b/launchmon/src/linux/sdbg_proc_service.cxx @@ -62,7 +62,7 @@ extern "C" { #include } -#if X86_ARCHITECTURE || PPC_ARCHITECTURE +#if X86_ARCHITECTURE || PPC_ARCHITECTURE || POWERLE_ARCHITECTURE #ifndef PTRACE_GET_THREAD_AREA #define PTRACE_GET_THREAD_AREA 25 @@ -243,7 +243,7 @@ extern "C" ps_err_e ps_get_thread_area(const struct ps_prochandle *ph, lwpid_t lpid, int x, psaddr_t *addr) { bool use_cxt = true; -#if X86_ARCHITECTURE || PPC_ARCHITECTURE +#if X86_ARCHITECTURE || PPC_ARCHITECTURE || POWERLE_ARCHITECTURE /* * How to fetch thread-specific area for x86/linux and powerPC/linux * diff --git a/launchmon/src/lmon_api/lmon_api_std.h b/launchmon/src/lmon_api/lmon_api_std.h index 9cdb96e..2473033 100644 --- a/launchmon/src/lmon_api/lmon_api_std.h +++ b/launchmon/src/lmon_api/lmon_api_std.h @@ -119,6 +119,7 @@ typedef enum _rm_catalogue_e RC_orte, RC_mpiexec_hydra, RC_gupc, + RC_ibm_spectrum, RC_none /* new RMs should be added here as LaunchMON is ported diff --git a/launchmon/src/sdbg_base_mach.hxx b/launchmon/src/sdbg_base_mach.hxx index 040b8fa..8f6f323 100644 --- a/launchmon/src/sdbg_base_mach.hxx +++ b/launchmon/src/sdbg_base_mach.hxx @@ -27,6 +27,7 @@ *-------------------------------------------------------------------------------- * * Update Log: + * Jul 13 2018 DHA: Remove event_entity support * May 02 2018 ADG: Added aarch64 support * Sep 02 2010 DHA: Added MPIR_attach_fifo support * May 08 2008 DHA: Added an alias (is_master_thread) @@ -157,8 +158,6 @@ enum debug_event_e { EV_INVALID }; -enum eventing_entity_e { EV_ENTITY_THREAD, EV_ENTITY_PROCESS, EV_ENTITY_NONE }; - class debug_event_t { public: debug_event_t() { @@ -167,13 +166,11 @@ class debug_event_t { } ~debug_event_t() {} void set_ev(const enum debug_event_e e) { ev = e; } - void set_en(const enum eventing_entity_e t) { en = t; } void set_signum(const int s) { u.signum = s; } void set_exitcode(const int ec) { u.exitcode = ec; } void set_rawstatus(const int st) { rawstatus = st; } void set_id(const int i) { id = i; } const debug_event_e get_ev() const { return ev; } - const eventing_entity_e get_en() const { return en; } const int get_signum() const { return u.signum; } const int get_exitcode() const { return u.exitcode; } const int get_rawstatus() const { return rawstatus; } @@ -181,7 +178,6 @@ class debug_event_t { private: debug_event_e ev; - eventing_entity_e en; union { int signum; int exitcode; diff --git a/launchmon/src/sdbg_base_symtab.hxx b/launchmon/src/sdbg_base_symtab.hxx index 96cfc32..262e6e2 100644 --- a/launchmon/src/sdbg_base_symtab.hxx +++ b/launchmon/src/sdbg_base_symtab.hxx @@ -27,6 +27,7 @@ *-------------------------------------------------------------------------------- * * Update Log: + * May 19 2018 DHA: Added OpenPower ABI's dual entry points. * Oct 27 2010 DHA: Added is_defined, is_globally_visible, * is_locally_visible virtual methods. * Feb 09 2008 DHA: Added LLNS Copyright @@ -112,7 +113,10 @@ class symbol_base_t { symbol_base_t(const std::string &n, const std::string &bln, const VA rd = SYMTAB_UNINIT_ADDR, - const VA rla = SYMTAB_UNINIT_ADDR); + const VA rla = SYMTAB_UNINIT_ADDR, + const VA lo=SYMTAB_UNINIT_ADDR, + const char i='\0', + const char o='\0'); symbol_base_t(const symbol_base_t &sobj); @@ -125,10 +129,17 @@ class symbol_base_t { void set_base_lib_name(const std::string &bln); void set_raw_address(const VA &ra); void set_relocated_address(const VA &ra); + void set_local_entry_offset (const VA &ra); + void set_other (const char o); + void set_info (const char i); const std::string &get_name() const; const std::string &get_base_lib_name() const; - const VA &get_raw_address() const; - const VA &get_relocated_address() const; + const VA get_raw_address() const; + const VA get_relocated_address() const; + const VA get_local_entry_offset() const; + const VA get_relocated_lowest_address() const; + const char get_other() const; + const char get_info() const; virtual bool is_defined() const { return false; } virtual bool is_globally_visible() const { return false; } @@ -146,6 +157,9 @@ class symbol_base_t { std::string base_lib_name; VA raw_address; VA relocated_address; + VA local_entry_offset; // Support arch like OpenPower with dual entry points + unsigned char info; + unsigned char other; }; //! ltstr diff --git a/launchmon/src/sdbg_base_symtab_impl.hxx b/launchmon/src/sdbg_base_symtab_impl.hxx index ae9afb6..b08cd3b 100644 --- a/launchmon/src/sdbg_base_symtab_impl.hxx +++ b/launchmon/src/sdbg_base_symtab_impl.hxx @@ -27,6 +27,7 @@ *-------------------------------------------------------------------------------- * * Update Log: + * May 19 2018 DHA: Added dual entry points for IBM OpenPower ABI * Feb 09 2008 DHA: Added LLNS Copyright * Jan 10 2006 DHA: Created file. */ @@ -79,11 +80,17 @@ template symbol_base_t::symbol_base_t(const std::string &n, const std::string &bln, const VA rd, - const VA rla) { + const VA rla, + const VA lo, + const char i, + const char o) { name = n; base_lib_name = bln; raw_address = rd; relocated_address = rla; + local_entry_offset = lo; + info = i; + other = o; } //! symbol_base_t<> destructor @@ -122,13 +129,33 @@ const std::string &symbol_base_t::get_base_lib_name() return base_lib_name; } +template +const char symbol_base_t::get_other() const { + return other; +} + +template +void symbol_base_t::set_other(const char o) { + other = o; +} + +template +const char symbol_base_t::get_info() const { + return info; +} + +template +void symbol_base_t::set_info(const char i) { + info = i; +} + template void symbol_base_t::set_raw_address(const VA &ra) { raw_address = ra; } template -const VA &symbol_base_t::get_raw_address() const { +const VA symbol_base_t::get_raw_address() const { return raw_address; } @@ -139,10 +166,30 @@ void symbol_base_t::set_relocated_address( } template -const VA &symbol_base_t::get_relocated_address() const { +const VA symbol_base_t::get_relocated_address() const { return relocated_address; } +template +void symbol_base_t::set_local_entry_offset ( + const VA &lo) { + local_entry_offset = lo; +} + +template +const VA symbol_base_t::get_local_entry_offset () const { + return local_entry_offset; +} + +template +const VA symbol_base_t::get_relocated_lowest_address () + const { + return (local_entry_offset == SYMTAB_UNINIT_ADDR) + ? relocated_address + : relocated_address + local_entry_offset; +} + + //////////////////////////////////////////////////////////////////// // // PUBLIC INTERFACES (class image_base_t<>) diff --git a/launchmon/src/sdbg_event_manager_impl.hxx b/launchmon/src/sdbg_event_manager_impl.hxx index 59cd219..589bc20 100644 --- a/launchmon/src/sdbg_event_manager_impl.hxx +++ b/launchmon/src/sdbg_event_manager_impl.hxx @@ -112,23 +112,13 @@ bool monitor_proc_thread_t::wait_for_all( pid_t rpid; int status; bool rs = false; - eventing_entity_e entity = EV_ENTITY_THREAD; - rpid = waitpid(-1, &status, WNOHANG | WUNTRACED); - if (rpid <= 0) { - rpid = waitpid(-1, &status, WNOHANG | __WCLONE); - } else { - entity = EV_ENTITY_PROCESS; - } - - if (rpid <= 0) { + if ((rpid = waitpid(-1, &status, WNOHANG | WUNTRACED | __WCLONE)) <= 0) { rc.set_ev(EV_NOCHILD); - rc.set_en(EV_ENTITY_NONE); rc.set_id(rpid); return rs; } - rc.set_en(entity); rc.set_id(rpid); if (WIFEXITED(status)) { @@ -202,69 +192,52 @@ bool event_manager_t::poll_processes( launchmon_rc_e rc = LAUNCHMON_OK; launchmon_event_e ev; - if (ev_monitor->wait_for_all(event)) { - if (event.get_en() == EV_ENTITY_PROCESS) { - // - // A process event is reported - // - if (event.get_id() == p.get_pid(false)) { - // - // The target RM_process reported - // - p.make_context(event.get_id()); - ev = lm.decipher_an_event(p, event); - rc = lm.invoke_handler(p, ev, event.get_signum()); - p.check_and_undo_context(event.get_id()); - } else if (event.get_id() == lm.get_toollauncherpid()) { - // RM_process that launched tool daemons reported - // error handling semantics for C.2 - // - if ((event.get_ev() == EV_EXITED) || - (event.get_ev() == EV_TERMINATED)) { - // - // this means that back-end daemons have exited - // Enforcing C.2 error handling semantics. - // - rc = lm.handle_daemon_exit_event(p); - } - // - // in this case rpid won't be part of the thread list - // so that the following loop body won't be executed. - // - } else { - // - // a unknown new process reported - // - if (event.get_ev() == EV_STOPPED) { - p.make_context(event.get_id()); - rc = lm.invoke_handler(p, LM_STOP_NEW_FORKED_PROCESS, event.get_id()); - p.check_and_undo_context(event.get_id()); - } - } - } else if (event.get_en() == EV_ENTITY_THREAD) { - // - // A thread of the target RM_process reported + if (!ev_monitor->wait_for_all(event)) + goto done; + + if (event.get_id() == p.get_pid(false)) { + // + // The target RM_process reported + // + p.make_context ( event.get_id()); + ev = lm.decipher_an_event ( p, event ); + rc = lm.invoke_handler ( p, ev, event.get_signum() ); + p.check_and_undo_context ( event.get_id() ); + } else if (event.get_id() == lm.get_toollauncherpid()) { + // RM_process that launched tool daemons reported + // error handling semantics for C.2 + // + if ((event.get_ev() == EV_EXITED) + || (event.get_ev() == EV_TERMINATED)) { + // this means that back-end daemons have exited + // Enforcing C.2 error handling semantics. // - map*, ltstr>& tl = - p.get_thrlist(); - - if (tl.find(event.get_id()) == tl.end()) { - // - // Possibly an unknown thread to pick up - // - if (event.get_ev() == EV_STOPPED) { - rc = lm.invoke_handler(p, LM_REQUEST_NEW_THREAD, event.get_id()); - } - } - - if (tl.find(event.get_id()) != tl.end()) { - p.make_context(event.get_id()); - ev = lm.decipher_an_event(p, event); - rc = lm.invoke_handler(p, ev, event.get_signum()); - p.check_and_undo_context(event.get_id()); - } + rc = lm.handle_daemon_exit_event(p); } + } else if ( p.get_thrlist().find (event.get_id()) != p.get_thrlist().end()) { + p.make_context(event.get_id()); + ev = lm.decipher_an_event(p, event); + rc = lm.invoke_handler (p, ev, event.get_signum()); + p.check_and_undo_context(event.get_id()); + } else { + // + // a new process reported -- don't follow + // + if (event.get_ev() == EV_STOPPED) { + p.make_context (event.get_id()); + rc = lm.invoke_handler(p, + LM_STOP_NEW_FORKED_PROCESS, + event.get_id() ); + p.check_and_undo_context (event.get_id()); + } + //p.make_context(event.get_id()); + //ev = lm.decipher_an_event(p, event); + //rc = lm.invoke_handler (p, ev, event.get_signum()); + //lm.handle_thrcreate_trap_event (p); + //p.check_and_undo_context(event.get_id()); } + +done: return ((rc == LAUNCHMON_OK) ? true : false); } diff --git a/launchmon/src/sdbg_opt.cxx b/launchmon/src/sdbg_opt.cxx index c7fd2d6..7938bd9 100644 --- a/launchmon/src/sdbg_opt.cxx +++ b/launchmon/src/sdbg_opt.cxx @@ -352,7 +352,8 @@ bool opts_args_t::process_args(int *argc, char ***argv) { // alternative way to set the engine's verbose level // char *l; - if ((l = getenv("LMON_ENGINE_VERBOSE_LEVEL")) != NULL) { + if ((l = getenv("LMON_ENGINE_VERBOSE_LEVEL")) != NULL + || (l = getenv("LMON_VERBOSITY")) != NULL) { int il = atoi(l); self_trace_verbosity verbo; diff --git a/launchmon/src/sdbg_rm_map.cxx b/launchmon/src/sdbg_rm_map.cxx index 3428c58..3daae0a 100644 --- a/launchmon/src/sdbg_rm_map.cxx +++ b/launchmon/src/sdbg_rm_map.cxx @@ -197,6 +197,8 @@ void resource_manager_t::fill_rm_type(const std::string &v) { rm = RC_mpiexec_hydra; } else if (v == std::string("gupc")) { rm = RC_gupc; + } else if (v == std::string("spectrum")) { + rm = RC_ibm_spectrum; } else { rm = RC_none; } diff --git a/test/src/fe_launch_middleware.cxx b/test/src/fe_launch_middleware.cxx index d03cb25..115603a 100644 --- a/test/src/fe_launch_middleware.cxx +++ b/test/src/fe_launch_middleware.cxx @@ -1,40 +1,33 @@ /* - * $Header: $ *-------------------------------------------------------------------------------- - * Copyright (c) 2008-2010, Lawrence Livermore National Security, LLC. Produced - *at + * Copyright (c) 2008, Lawrence Livermore National Security, LLC. Produced at * the Lawrence Livermore National Laboratory. Written by Dong H. Ahn - *. - * LLNL-CODE-409469. All rights reserved. + * . LLNL-CODE-409469. All rights reserved. * * This file is part of LaunchMON. For details, see * https://computing.llnl.gov/?set=resources&page=os_projects * * Please also read LICENSE.txt -- Our Notice and GNU Lesser General Public - *License. + * License. * * * This program is free software; you can redistribute it and/or modify it under - *the - * terms of the GNU General Public License (as published by the Free Software - * Foundation) version 2.1 dated February 1999. + * the terms of the GNU General Public License (as published by the Free + * Software Foundation) version 2.1 dated February 1999. * * This program is distributed in the hope that it will be useful, but WITHOUT - *ANY - * WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or + * ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - *along - * with this program; if not, write to the Free Software Foundation, Inc., 59 - *Temple + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple * Place, Suite 330, Boston, MA 02111-1307 USA *-------------------------------------------------------------------------------- * - * - * * Update Log: + * Jul 18 2018 DHA: Add IBM JSM Spectrum support * Jun 01 2012 DHA: Copied from 0.8-middleware-support branch and merged * with 1.0-BGQ * Aug 03 2020 DHA: Created file. @@ -141,17 +134,7 @@ int main(int argc, char *argv[]) { launcher_argv[4] = strdup(argv[2]); launcher_argv[5] = strdup("--exe"); launcher_argv[6] = strdup(argv[1]); - // manually fill the block - // launcher_argv[7] = strdup("--block"); - // launcher_argv[8] = strdup("R00-M0-N04"); - // manually fill the corner - // launcher_argv[9] = strdup("--corner"); - // launcher_argv[10] = strdup("R00-M0-N04-J07"); - // manually fill the shape - // launcher_argv[11] = strdup("--shape"); - // launcher_argv[12] = strdup("1x1x1x1x1"); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if ((rmenv_str == std::string("RC_bgq_slurm"))) { launcher_argv = (char **)malloc(7 * sizeof(char *)); launcher_argv[0] = strdup(mylauncher); @@ -161,8 +144,6 @@ int main(int argc, char *argv[]) { launcher_argv[4] = strdup(argv[2]); launcher_argv[5] = strdup(argv[1]); launcher_argv[6] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", - "mylauncher"); } else if ((rmenv_str == std::string("RC_bglrm")) || (rmenv_str == std::string("RC_bgprm"))) { launcher_argv = (char **)malloc(8 * sizeof(char *)); @@ -174,7 +155,6 @@ int main(int argc, char *argv[]) { launcher_argv[5] = strdup("-exe"); launcher_argv[6] = strdup(argv[1]); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if (rmenv_str == std::string("RC_slurm")) { numprocs_opt = string("-n") + string(argv[2]); numnodes_opt = string("-N") + string(argv[3]); @@ -204,7 +184,6 @@ int main(int argc, char *argv[]) { launcher_argv[5] = strdup(argv[2]); launcher_argv[6] = strdup(argv[1]); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if (rmenv_str == std::string("RC_mpiexec_hydra")) { launcher_argv = (char **)malloc(5 * sizeof(char *)); launcher_argv[0] = strdup(mylauncher); @@ -212,7 +191,13 @@ int main(int argc, char *argv[]) { launcher_argv[2] = strdup(argv[2]); launcher_argv[3] = strdup(argv[1]); launcher_argv[4] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); + } else if (rmenv_str == std::string("RC_ibm_spectrum")) { + numprocs_opt = string("-p") + string(argv[2]); + launcher_argv = (char **) malloc (4*sizeof(char*)); + launcher_argv[0] = strdup(mylauncher); + launcher_argv[1] = strdup(numprocs_opt.c_str()); + launcher_argv[2] = strdup(argv[1]); + launcher_argv[3] = NULL; } fprintf(stderr, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); diff --git a/test/src/fe_launch_smoketest.cxx b/test/src/fe_launch_smoketest.cxx index 097f785..ca3af06 100644 --- a/test/src/fe_launch_smoketest.cxx +++ b/test/src/fe_launch_smoketest.cxx @@ -29,6 +29,7 @@ * ./fe_launch_smoketest.debug /bin/hostname 9 5 pdebug `pwd`/be_kicker.debug * * Update Log: + * Jul 16 2018 DHA: Add IBM JSM Spectrum support. * Oct 25 2011 DHA: Added BGQ support. * Oct 21 2011 DHA: Added dynamic RM support. * Nov 12 2009 DHA: Change BG mpirun options to cover /P running under @@ -167,18 +168,7 @@ int main(int argc, char *argv[]) { launcher_argv[4] = strdup(argv[2]); launcher_argv[5] = strdup("--exe"); launcher_argv[6] = strdup(argv[1]); - // manually fill the block - // launcher_argv[7] = strdup("--block"); - // launcher_argv[8] = strdup("R00-M0-N04"); - // manually fill the corner - // launcher_argv[9] = strdup("--corner"); - // launcher_argv[10] = strdup("R00-M0-N04-J07"); - // manually fill the shape - // launcher_argv[11] = strdup("--shape"); - // launcher_argv[12] = strdup("1x1x1x1x1"); - // launcher_argv[13] = NULL; launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if ((rmenv_str == std::string("RC_bgq_slurm"))) { launcher_argv = (char **)malloc(7 * sizeof(char *)); launcher_argv[0] = strdup(mylauncher); @@ -188,8 +178,6 @@ int main(int argc, char *argv[]) { launcher_argv[4] = strdup(argv[2]); launcher_argv[5] = strdup(argv[1]); launcher_argv[6] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", - "mylauncher"); } else if ((rmenv_str == std::string("RC_bglrm")) || (rmenv_str == std::string("RC_bgprm"))) { launcher_argv = (char **)malloc(8 * sizeof(char *)); @@ -201,7 +189,6 @@ int main(int argc, char *argv[]) { launcher_argv[5] = strdup("-exe"); launcher_argv[6] = strdup(argv[1]); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if (rmenv_str == std::string("RC_slurm")) { numprocs_opt = string("-n") + string(argv[2]); numnodes_opt = string("-N") + string(argv[3]); @@ -231,7 +218,6 @@ int main(int argc, char *argv[]) { launcher_argv[5] = strdup(argv[2]); launcher_argv[6] = strdup(argv[1]); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if (rmenv_str == std::string("RC_mpiexec_hydra")) { launcher_argv = (char **)malloc(5 * sizeof(char *)); launcher_argv[0] = strdup(mylauncher); @@ -239,9 +225,21 @@ int main(int argc, char *argv[]) { launcher_argv[2] = strdup(argv[2]); launcher_argv[3] = strdup(argv[1]); launcher_argv[4] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); + } else if (rmenv_str == std::string("RC_ibm_spectrum")) { + numprocs_opt = string("-p") + string(argv[2]); + launcher_argv = (char **) malloc (4*sizeof(char*)); + launcher_argv[0] = strdup(mylauncher); + launcher_argv[1] = strdup(numprocs_opt.c_str()); + launcher_argv[2] = strdup(argv[1]); + launcher_argv[3] = NULL; + } else { + fprintf(stdout, "[LMON FE] Unknown Resource Manger: %s\n", + rmenv_str.c_str()); + return EXIT_FAILURE; } + fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); + if ((rc = LMON_fe_init(LMON_VERSION)) != LMON_OK) { fprintf(stdout, "[LMON FE] FAILED\n"); return EXIT_FAILURE; diff --git a/test/src/fe_launch_usrpayload_test.cxx b/test/src/fe_launch_usrpayload_test.cxx index 9cccec4..24bdeab 100644 --- a/test/src/fe_launch_usrpayload_test.cxx +++ b/test/src/fe_launch_usrpayload_test.cxx @@ -27,6 +27,7 @@ *-------------------------------------------------------------------------------- * * Update Log: + * Jul 16 2018 DHA: Add IBM JSM Spectrum support. * Mar 04 2008 DHA: Added generic BlueGene support * Jun 17 2008 DHA: Added BlueGene support * Jun 12 2008 DHA: Added GNU build system support @@ -201,17 +202,7 @@ int main(int argc, char *argv[]) { launcher_argv[4] = strdup(argv[2]); launcher_argv[5] = strdup("--exe"); launcher_argv[6] = strdup(argv[1]); - // manually fill the block - // launcher_argv[7] = strdup("--block"); - // launcher_argv[8] = strdup("R00-M0-N04"); - // manually fill the corner - // launcher_argv[9] = strdup("--corner"); - // launcher_argv[10] = strdup("R00-M0-N04-J07"); - // manually fill the shape - // launcher_argv[11] = strdup("--shape"); - // launcher_argv[12] = strdup("1x1x1x1x1"); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if ((rmenv_str == std::string("RC_bgq_slurm"))) { launcher_argv = (char **)malloc(7 * sizeof(char *)); launcher_argv[0] = strdup(mylauncher); @@ -221,8 +212,6 @@ int main(int argc, char *argv[]) { launcher_argv[4] = strdup(argv[2]); launcher_argv[5] = strdup(argv[1]); launcher_argv[6] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", - "mylauncher"); } else if ((rmenv_str == std::string("RC_bglrm")) || (rmenv_str == std::string("RC_bgprm"))) { launcher_argv = (char **)malloc(8 * sizeof(char *)); @@ -234,7 +223,6 @@ int main(int argc, char *argv[]) { launcher_argv[5] = strdup("-exe"); launcher_argv[6] = strdup(argv[1]); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if (rmenv_str == std::string("RC_slurm")) { numprocs_opt = string("-n") + string(argv[2]); numnodes_opt = string("-N") + string(argv[3]); @@ -264,7 +252,6 @@ int main(int argc, char *argv[]) { launcher_argv[5] = strdup(argv[2]); launcher_argv[6] = strdup(argv[1]); launcher_argv[7] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); } else if (rmenv_str == std::string("RC_mpiexec_hydra")) { launcher_argv = (char **)malloc(5 * sizeof(char *)); launcher_argv[0] = strdup(mylauncher); @@ -272,9 +259,17 @@ int main(int argc, char *argv[]) { launcher_argv[2] = strdup(argv[2]); launcher_argv[3] = strdup(argv[1]); launcher_argv[4] = NULL; - fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); + } else if (rmenv_str == std::string("RC_ibm_spectrum")) { + numprocs_opt = string("-p") + string(argv[2]); + launcher_argv = (char **) malloc (4*sizeof(char*)); + launcher_argv[0] = strdup(mylauncher); + launcher_argv[1] = strdup(numprocs_opt.c_str()); + launcher_argv[2] = strdup(argv[1]); + launcher_argv[3] = NULL; } + fprintf(stdout, "[LMON_FE] launching the job/daemons via %s\n", mylauncher); + if ((rc = LMON_fe_init(LMON_VERSION)) != LMON_OK) { fprintf(stdout, "[LMON FE] LMON_fe_init FAILED\n"); return EXIT_FAILURE; diff --git a/test/src/test.attach_1.in b/test/src/test.attach_1.in index 86dd188..d973760 100644 --- a/test/src/test.attach_1.in +++ b/test/src/test.attach_1.in @@ -30,6 +30,7 @@ # Attach the tool to a running program and unlock the hang. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Oct 21 2011 DHA: Added dynamic RM detection support. # Dec 17 2009 DHA: Added minimum WAITAMOUNT @@ -84,6 +85,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_1_mem_fetcher.in b/test/src/test.attach_1_mem_fetcher.in index 1758d06..4e6c4e5 100644 --- a/test/src/test.attach_1_mem_fetcher.in +++ b/test/src/test.attach_1_mem_fetcher.in @@ -30,6 +30,7 @@ # Attach the tool to a running program and unlock the hang. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Oct 21 2011 DHA: Added dynamic RM detection support. # Dec 17 2009 DHA: Added minimum WAITAMOUNT @@ -82,6 +83,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_1_pdebugmax.in b/test/src/test.attach_1_pdebugmax.in index f4f00d8..da19389 100644 --- a/test/src/test.attach_1_pdebugmax.in +++ b/test/src/test.attach_1_pdebugmax.in @@ -30,6 +30,7 @@ # Attach the tool to a running program and unlock the hang. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Oct 21 2011 DHA: Added dynamic RM detection support. # Mar 06 2009 DHA: Changed bglrm to bgrm @@ -71,6 +72,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_1_remote.in b/test/src/test.attach_1_remote.in index a84ba5a..635da75 100644 --- a/test/src/test.attach_1_remote.in +++ b/test/src/test.attach_1_remote.in @@ -31,6 +31,7 @@ # of its associated job. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Oct 21 2011 DHA: Added dynamic RM detection support. # Dec 17 2009 DHA: Added minimum waittime @@ -77,6 +78,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_2_uneven.in b/test/src/test.attach_2_uneven.in index a1b6656..c093fcd 100644 --- a/test/src/test.attach_2_uneven.in +++ b/test/src/test.attach_2_uneven.in @@ -31,6 +31,7 @@ # number of processors per node, and unlock the hang. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Oct 21 2011 DHA: Added dynamic RM detection support. # Mar 06 2009 DHA: Changed bglrm to bgrm @@ -82,6 +83,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_4_detach.in b/test/src/test.attach_4_detach.in index 355d63b..1e1bbe9 100644 --- a/test/src/test.attach_4_detach.in +++ b/test/src/test.attach_4_detach.in @@ -31,6 +31,7 @@ # and kill the BE daemons. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Aug 02 2012 DHA: Created file. # @@ -75,6 +76,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_4_kill.in b/test/src/test.attach_4_kill.in index 32f7eea..be891eb 100644 --- a/test/src/test.attach_4_kill.in +++ b/test/src/test.attach_4_kill.in @@ -31,6 +31,7 @@ # and kill the BE daemons. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Jun 05 2012 DHA: Added subtest support # Oct 21 2011 DHA: Added dynamic RM detection support. @@ -80,6 +81,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.attach_4_shutdownbe.in b/test/src/test.attach_4_shutdownbe.in index 2313b68..92e182d 100644 --- a/test/src/test.attach_4_shutdownbe.in +++ b/test/src/test.attach_4_shutdownbe.in @@ -31,6 +31,7 @@ # shut down BE daemons . # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM support. # May 04 2016 DHA: Add test in-tree and installed support # Jun 05 2012 DHA: Added subtest support # Oct 21 2011 DHA: Added dynamic RM detection support. @@ -82,6 +83,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.fe_regStatusCB.in b/test/src/test.fe_regStatusCB.in index cf6eb66..25163a6 100644 --- a/test/src/test.fe_regStatusCB.in +++ b/test/src/test.fe_regStatusCB.in @@ -77,6 +77,9 @@ elif test "x$RM_TYPE" = "xRC_orte" ; then elif test "x$RM_TYPE" = "xRC_mpiexec_hydra" ; then WAITAMOUNT=`expr $WAITAMOUNT` $MPI_JOB_LAUNCHER_PATH -n $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.jobsnap_1.in b/test/src/test.jobsnap_1.in index eba4808..d61b032 100644 --- a/test/src/test.jobsnap_1.in +++ b/test/src/test.jobsnap_1.in @@ -61,6 +61,9 @@ if test "x$RM_TYPE" = "xRC_slurm" ; then $MPI_JOB_LAUNCHER_PATH -n$NUMTASKS -N$NUMNODES -ppdebug `pwd`/hang_on_SIGUSR1@EXE@ & elif test "x$RM_TYPE" = "xRC_bgrm" ; then $MPI_JOB_LAUNCHER_PATH -verbose 1 -np $NUMTASKS -exe `pwd`/hang_on_SIGUSR1@EXE@ -cwd `pwd` & +elif test "x$RM_TYPE" = "xRC_ibm_spectrum" ; then + WAITAMOUNT=`expr $WAITAMOUNT` + $MPI_JOB_LAUNCHER_PATH -p $NUMTASKS `pwd`/hang_on_SIGUSR1@EXE@ & else echo "This RM is not supported yet" fi diff --git a/test/src/test.launch_7_kill.in b/test/src/test.launch_7_kill.in index 0f05cbb..f4f6a16 100644 --- a/test/src/test.launch_7_kill.in +++ b/test/src/test.launch_7_kill.in @@ -31,6 +31,7 @@ # one daemon per node, which unlocks the initial hang of all tasks. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM Spectrum support. # May 04 2016 DHA: Add test in-tree and installed support # Aug 1 2012 DHA: Created file. # @@ -48,7 +49,7 @@ export LMON_FE_KILL_TEST=1 NUMNODES=@NNODES@ NOHUP="" -if test "x$RM_TYPE" = "xRC_bglrm" -o "x$RM_TYPE" = "xRC_bgprm"; then +if test "x$RM_TYPE" = "xRC_bglrm" -o "x$RM_TYPE" = "xRC_bgprm" -o "x$RM_TYPE" = "xRC_ibm_spectrum"; then NOHUP=nohup rm -f nohup.out fi diff --git a/test/src/test.launch_7_shutdownbe.in b/test/src/test.launch_7_shutdownbe.in index ee26759..848ca15 100644 --- a/test/src/test.launch_7_shutdownbe.in +++ b/test/src/test.launch_7_shutdownbe.in @@ -31,6 +31,7 @@ # one daemon per node, which unlocks the initial hang of all tasks. # # Update Log: +# Jul 16 2018 DHA: Add IBM JSM Spectrum support # May 04 2016 DHA: Add test in-tree and installed support # Aug 01 2012 DHA: Created the file # @@ -48,7 +49,7 @@ export LMON_FE_SHUTDOWNBE_TEST=1 NUMNODES=@NNODES@ NOHUP="" -if test "x$RM_TYPE" = "xRC_bglrm" -o "x$RM_TYPE" = "xRC_bgprm"; then +if test "x$RM_TYPE" = "xRC_bglrm" -o "x$RM_TYPE" = "xRC_bgprm" -o "x$RM_TYPE" = "xRC_ibm_spectrum"; then NOHUP=nohup rm -f nohup.out fi