Skip to content

Commit

Permalink
Merge pull request #366 from jo-basevi/285-fix-crash-with-restart
Browse files Browse the repository at this point in the history
Refactor experiment.set_counter() and some minor bug fixes
  • Loading branch information
jo-basevi committed Oct 4, 2023
2 parents edf8d8c + b5c0c30 commit fc83ec9
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 58 deletions.
74 changes: 37 additions & 37 deletions payu/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import errno
import getpass
import os
import re
import resource
import sys
import shlex
Expand Down Expand Up @@ -172,41 +173,39 @@ def set_counters(self):

# Initialize counter if unset
if self.counter is None:
# TODO: this logic can probably be streamlined
try:
restart_dirs = [d for d in os.listdir(self.archive_path)
if d.startswith('restart')]
except EnvironmentError as exc:
if exc.errno == errno.ENOENT:
restart_dirs = None
else:
raise

# First test for restarts
if restart_dirs:
self.counter = 1 + max([int(d.lstrip('restart'))
for d in restart_dirs
if d.startswith('restart')])
# Check for restart index
max_restart_index = self.max_output_index(output_type="restart")
if max_restart_index:
self.counter = 1 + max_restart_index
else:
# repeat runs do not generate restart files, so check outputs
try:
output_dirs = [d for d in os.listdir(self.archive_path)
if d.startswith('output')]
except EnvironmentError as exc:
if exc.errno == errno.ENOENT:
output_dirs = None
else:
raise

# First test for restarts
# Now look for output directories
if output_dirs:
self.counter = 1 + max([int(d.lstrip('output'))
for d in output_dirs
if d.startswith('output')])
# Now look for output directories,
# as repeat runs do not generate restart files.
max_output_index = self.max_output_index()
if max_output_index:
self.counter = 1 + max_output_index
else:
self.counter = 0

def max_output_index(self, output_type="output"):
"""Given a output directory type (output or restart),
return the maximum index of output directories found"""
try:
output_dirs = self.list_output_dirs(output_type)
except EnvironmentError as exc:
if exc.errno == errno.ENOENT:
output_dirs = None
else:
raise

if output_dirs and len(output_dirs):
return max([int(d.lstrip(output_type)) for d in output_dirs])

def list_output_dirs(self, output_type="output"):
"""Return a list of restart or output directories in archive"""
naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]$")
return [d for d in os.listdir(self.archive_path)
if naming_pattern.match(d)]

def set_stacksize(self, stacksize):

if stacksize == 'unlimited':
Expand Down Expand Up @@ -562,7 +561,7 @@ def run(self, *user_flags):

for prof in self.profilers:
if prof.runscript:
model_prog = model_prog.append(prof.runscript)
model_prog.append(prof.runscript)

model_prog.append(model.exec_prefix)

Expand Down Expand Up @@ -749,8 +748,7 @@ def archive(self):
default_restart_history)

# Remove any outdated restart files
prior_restart_dirs = [d for d in os.listdir(self.archive_path)
if d.startswith('restart')]
prior_restart_dirs = self.list_output_dirs(output_type="restart")

for res_dir in prior_restart_dirs:

Expand All @@ -766,10 +764,12 @@ def archive(self):
shutil.rmtree(res_path)

# Ensure dynamic library support for subsequent python calls
ld_libpaths = os.environ['LD_LIBRARY_PATH']
ld_libpaths = os.environ.get('LD_LIBRARY_PATH', None)
py_libpath = sysconfig.get_config_var('LIBDIR')
if py_libpath not in ld_libpaths.split(':'):
os.environ['LD_LIBRARY_PATH'] = ':'.join([py_libpath, ld_libpaths])
if ld_libpaths is None:
os.environ['LD_LIBRARY_PATH'] = py_libpath
elif py_libpath not in ld_libpaths.split(':'):
os.environ['LD_LIBRARY_PATH'] = f'{py_libpath}:{ld_libpaths}'

collate_config = self.config.get('collate', {})
collating = collate_config.get('enable', True)
Expand Down
14 changes: 13 additions & 1 deletion payu/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,18 @@ def setup(self):

# Make symlink to executable in work directory
if self.exec_path:
# Check whether executable path exists
if not os.path.isfile(self.exec_path):
raise FileNotFoundError(
f'Executable for {self.name} model '
f'not found on path: {self.exec_path}')

# Check whether executable has executable permission
if not os.access(self.exec_path, os.X_OK):
raise PermissionError(
f'Executable for {self.name} model '
f'is not executable: {self.exec_path}')

# If have exe manifest this implies exe reproduce is True. Do not
# want to overwrite exe manifest in this case
if not self.expt.manifest.have_manifest['exe']:
Expand All @@ -293,7 +305,7 @@ def setup(self):
self.exec_path_local,
self.exec_path
)

# Populate information about required dynamically loaded libraries
self.required_libs = required_libs(self.exec_path)

Expand Down
6 changes: 4 additions & 2 deletions test/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from contextlib import contextmanager
import os
import stat
from pathlib import Path

import yaml
Expand Down Expand Up @@ -120,9 +121,10 @@ def make_exe():
# Create a fake executable file
bindir = labdir / 'bin'
bindir.mkdir(parents=True, exist_ok=True)
exe = config['exe']
exe_path = bindir / config['exe']
exe_size = 199
make_random_file(bindir/exe, exe_size)
make_random_file(exe_path, exe_size)
exe_path.chmod(exe_path.stat().st_mode | stat.S_IEXEC)


def make_payu_exe():
Expand Down
18 changes: 0 additions & 18 deletions test/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,24 +213,6 @@ def test_exe_reproduce():
# Check manifests have changed as expected
assert(not manifests == get_manifests(ctrldir/'manifests'))

# Reset manifests "truth"
manifests = get_manifests(ctrldir/'manifests')

# Make exe in config.yaml unfindable by giving it a non-existent
# path but crucially the same name as the proper executable
config['exe'] = '/bogus/test.exe'

# Change reproduce exe back to True
config['manifest']['reproduce']['exe'] = True

write_config(config)

# Run setup with changed exe but reproduce exe set to True. Should
# work fine as the exe path is in the manifest
payu_setup(lab_path=str(labdir))

assert(manifests == get_manifests(ctrldir/'manifests'))


def test_input_reproduce():

Expand Down

0 comments on commit fc83ec9

Please sign in to comment.