diff --git a/.gitignore b/.gitignore index ee1042ab..47c37851 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ MANIFEST /docs/_build .coverage .ipynb_checkpoints +.vscode +/test/tmp/ \ No newline at end of file diff --git a/payu/envmod.py b/payu/envmod.py index 14509455..a74bedea 100644 --- a/payu/envmod.py +++ b/payu/envmod.py @@ -92,21 +92,15 @@ def module(command, *args): exec(envs) -def lib_update(bin_path, lib_name): +def lib_update(required_libs, lib_name): # Local import to avoid reversion interference # TODO: Bad design, fixme! # NOTE: We may be able to move this now that reversion is going away from payu import fsops - # TODO: Use objdump instead of ldd - cmd = 'ldd {0}'.format(bin_path) - ldd_output = subprocess.check_output(shlex.split(cmd)).decode('ascii') - slibs = ldd_output.split('\n') - - for lib_entry in slibs: - if lib_name in lib_entry: - lib_path = lib_entry.split()[2] - + for lib_filename, lib_path in required_libs.items(): + if lib_filename.startswith(lib_name) and lib_path.startswith('/apps/'): + # Load nci's /apps/ version of module if required # pylint: disable=unbalanced-tuple-unpacking mod_name, mod_version = fsops.splitpath(lib_path)[2:4] @@ -115,4 +109,4 @@ def lib_update(bin_path, lib_name): return '{0}/{1}'.format(mod_name, mod_version) # If there are no libraries, return an empty string - return '' + return '' \ No newline at end of file diff --git a/payu/experiment.py b/payu/experiment.py index 3904d72c..a8bd6e00 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -25,7 +25,7 @@ # Local from payu import envmod -from payu.fsops import mkdir_p, make_symlink, read_config, movetree +from payu.fsops import mkdir_p, make_symlink, read_config, movetree, required_libs from payu.schedulers.pbs import get_job_info, pbs_env_init, get_job_id from payu.models import index as model_index import payu.profilers @@ -138,6 +138,10 @@ def init_models(self): submodels = self.config.get('submodels', []) + # Inject information about required dynamically loaded libraries into submodel configuration + for sm in submodels: + sm['required_libs'] = required_libs(sm['exe']) + solo_model = self.config.get('model') if not solo_model: sys.exit('payu: error: Unknown model configuration.') @@ -514,19 +518,18 @@ def run(self, *user_flags): # Update MPI library module (if not explicitly set) # TODO: Check for MPI library mismatch across multiple binaries if mpi_module is None: - mpi_module = envmod.lib_update( - model.exec_path_local, + envmod.lib_update( + model.config.get('required_libs'), 'libmpi.so' ) model_prog = [] - if mpi_module.startswith('openmpi'): - # Our MPICH wrapper does not support a working directory flag - model_prog.append('-wdir {0}'.format(model.work_path)) - elif self.config.get('scheduler') == 'slurm': - # Slurm's launcher controls the working directory - model_prog.append('--chdir {0}'.format(model.work_path)) + wdir_arg = '-wdir' + if self.config.get('scheduler') == 'slurm': + # Option to set the working directory differs in slurm + wdir_arg = '--chdir' + model_prog.append(f'{wdir_arg} {model.work_path}') # Append any model-specific MPI flags model_flags = model.config.get('mpiflags', []) @@ -590,13 +593,6 @@ def run(self, *user_flags): if self.config.get('coredump', False): enable_core_dump() - # Our MVAPICH wrapper does not support working directories - if mpi_module.startswith('mvapich'): - curdir = os.getcwd() - os.chdir(self.work_path) - else: - curdir = None - # Dump out environment with open(self.env_fname, 'w') as file: file.write(yaml.dump(dict(os.environ), default_flow_style=False)) @@ -617,10 +613,6 @@ def run(self, *user_flags): else: rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err) - # Return to control directory - if curdir: - os.chdir(curdir) - f_out.close() f_err.close() diff --git a/payu/fsops.py b/payu/fsops.py index b1b991dc..66317070 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -12,6 +12,8 @@ import os import shutil import sys +import shlex +import subprocess # Extensions import yaml @@ -171,3 +173,28 @@ def is_conda(): """Return True if python interpreter is in a conda environment""" return os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + + +def parse_ldd_output(ldd_output): + """Parses the string output from ldd and returns a dictionary of lib filename and fullpath pairs""" + needed_libs = {} + for line in ldd_output.split("\n"): + word_list = line.split() + if len(word_list) >= 3 and word_list[1] == '=>': + needed_libs[word_list[0]] = word_list[2] + return needed_libs + + +def required_libs(bin_path): + """ + Runs ldd command and parses the output. + This function should only be called once per binary + i.e. Use a singleton pattern in the caller object. + PARAMETERS: + string bin_path: full path to the binary + RETURN: + dict: {filename-of-lib: fullpath-of-file} + """ + cmd = 'ldd {0}'.format(bin_path) + ldd_out = subprocess.check_output(shlex.split(cmd)).decode('ascii') + return parse_ldd_output(ldd_out) \ No newline at end of file diff --git a/payu/models/fms.py b/payu/models/fms.py index e510bce0..f2f3e743 100644 --- a/payu/models/fms.py +++ b/payu/models/fms.py @@ -19,6 +19,7 @@ from payu.models.model import Model from payu import envmod +from payu.fsops import required_libs # There is a limit on the number of command line arguments in a forked # MPI process. This applies only to mppnccombine-fast. The limit is higher @@ -109,7 +110,7 @@ def fms_collate(model): # and mppnccombine-fast uses an explicit -o flag to specify # the output collate_flags = " ".join([collate_flags, '-o']) - envmod.lib_update(mppnc_path, 'libmpi.so') + envmod.lib_update(required_libs(mppnc_path), 'libmpi.so') # Import list of collated files to ignore collate_ignore = collate_config.get('ignore') diff --git a/test/resources/sample_ldd_output.txt b/test/resources/sample_ldd_output.txt new file mode 100644 index 00000000..e684239b --- /dev/null +++ b/test/resources/sample_ldd_output.txt @@ -0,0 +1,5 @@ + linux-vdso.so.1 (0x00007ffd60799000) + libmpi_usempif08_Intel.so.40 => /apps/openmpi/4.0.2/lib/libmpi_usempif08_Intel.so.40 (0x00007fa492a7c000) + libmpi_usempi_ignore_tkr_Intel.so.40 => /apps/openmpi/4.0.2/lib/libmpi_usempi_ignore_tkr_Intel.so.40 (0x00007fa492863000) + libmpi.so.40 => /apps/openmpi/4.0.2/lib/libmpi.so.40 (0x00007fa493665000) + libmpi_mpifh_Intel.so.40 => /apps/openmpi/4.0.2/lib/libmpi_mpifh_Intel.so.40 (0x00007fa4925cb000) \ No newline at end of file diff --git a/test/test_payu.py b/test/test_payu.py index 78952752..7c188dbc 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -10,6 +10,7 @@ import payu import payu.fsops import payu.laboratory +import payu.envmod from .common import testdir, tmpdir, ctrldir, labdir, workdir from .common import make_exe, make_inputs, make_restarts, make_all_files @@ -214,3 +215,30 @@ def test_lab_new(): sys.stdout = StringIO() lab = payu.laboratory.Laboratory('model') sys.stdout = sys.__stdout__ + + +def test_parse_ldd_output(): + ldd_output_path = os.path.join('test', 'resources', 'sample_ldd_output.txt') + with open(ldd_output_path, 'r') as f: + ldd_output = f.read() + required_libs = payu.fsops.parse_ldd_output(ldd_output) + assert(len(required_libs), 4) + assert(required_libs['libmpi.so.40'], '/apps/openmpi/4.0.2/lib/libmpi.so.40') + + +def test_lib_update_lib_if_required(): + required_libs_dict = { + 'libmpi.so.40': '/apps/openmpi/4.0.2/lib/libmpi.so.40', + 'libmpi_usempif08_Intel.so.40': '/apps/openmpi/4.0.2/lib/libmpi_usempif08_Intel.so.40' + } + result = payu.envmod.lib_update(required_libs_dict, 'libmpi.so') + assert(result == 'openmpi/4.0.2') + + +def test_lib_update_if_nci_module_not_required(): + required_libs_dict = { + 'libmpi.so.40': '/$HOME/spack-microarchitectures.git/opt/spack/linux-rocky8-cascadelake/intel-2019.5.281/openmpi-4.1.5-ooyg5wc7sa3tvmcpazqqb44pzip3wbyo/lib/libmpi.so.40', + 'libmpi_usempif08.so.40': '/$HOME/exe/spack-microarchitectures.git/opt/spack/linux-rocky8-cascadelake/intel-2019.5.281/openmpi-4.1.5-ooyg5wc7sa3tvmcpazqqb44pzip3wbyo/lib/libmpi_usempif08.so.40', + } + result = payu.envmod.lib_update(required_libs_dict, 'libmpi.so') + assert(result == '') \ No newline at end of file