From 9fefd6db329084c616a471250b21e465f0ac526c Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Tue, 30 Jul 2024 09:32:17 +1000 Subject: [PATCH 01/10] Check configured exe path and restarts when manifest reproduce True Previous behaviour would only use the exe path and restarts already set in the Manifest. So they would pick up when the exe/restart that were currently pointing toin the manifest would change, but not the configured one. --- payu/manifest.py | 7 ++++++- payu/models/model.py | 23 ++++++++++------------- test/common.py | 6 ++++-- test/test_manifest.py | 30 +++++++++++++++++++++++++++--- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/payu/manifest.py b/payu/manifest.py index 5681d8a8..6ad27007 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -286,6 +286,9 @@ def __init__(self, config, reproduce): # Set flag to auto-scan input directories self.scaninputs = self.manifest_config.get('scaninputs', True) + # TODO: With reproduce true, should scan inputs be set to True? + # This is so new input files or changed input files to different file in config.yaml + # are always picked up? if self.reproduce['input'] and self.scaninputs: print("scaninputs set to False when reproduce input is True") self.scaninputs = False @@ -351,7 +354,9 @@ def setup(self): if not self.reproduce['restart']: # Re-initialise restart manifest. Only keep restart manifest # if reproduce. Normally want to scan for new restarts - self.init_mf('restart') + self.init_mf('restart') + + #TODO: Should links only be made in add_filepath()? # Check to make all manifests that should be populated are and # make links in work directory for existing manifests diff --git a/payu/models/model.py b/payu/models/model.py index 43200eaf..b5aae7c6 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -256,8 +256,7 @@ def setup(self): self.setup_configuration_files() # Add restart files from prior run to restart manifest - if (not self.expt.manifest.have_manifest['restart'] and - self.prior_restart_path): + if self.prior_restart_path: restart_files = self.get_prior_restart_files() for f_name in restart_files: f_orig = os.path.join(self.prior_restart_path, f_name) @@ -269,8 +268,9 @@ def setup(self): self.copy_restarts ) - # Add input files to manifest if we don't already have a populated + # Add input files to manifest if we don't already have a # input manifest, or we specify scaninputs is True (default) + # TODO: If Repro inputs is True, Scan inputs is set to False so input files are not added to manifest and symlink is not set up if (not self.expt.manifest.have_manifest['input'] or self.expt.manifest.scaninputs): for input_path in self.input_paths: @@ -327,16 +327,13 @@ def setup(self): f'Executable for {self.name} model ' f'is not executable: {self.exec_path}') - # If have exe manifest this implies exe reproduce is True. Do not - # want to overwrite exe manifest in this case - if not self.expt.manifest.have_manifest['exe']: - # Add to exe manifest (this is always done so any change in exe - # path will be picked up) - self.expt.manifest.add_filepath( - 'exe', - self.exec_path_local, - self.exec_path - ) + # Add to exe manifest (this is always done so any change in exe + # path will be picked up) + self.expt.manifest.add_filepath( + 'exe', + self.exec_path_local, + self.exec_path + ) # Populate information about required dynamically loaded libraries self.required_libs = required_libs(self.exec_path) diff --git a/test/common.py b/test/common.py index cbb6b9c9..fbe05652 100644 --- a/test/common.py +++ b/test/common.py @@ -140,11 +140,13 @@ def write_config(config, path=config_path): file.write(yaml.dump(config, default_flow_style=False)) -def make_exe(): +def make_exe(exe_name=None): # Create a fake executable file bindir = labdir / 'bin' bindir.mkdir(parents=True, exist_ok=True) - exe_path = bindir / config['exe'] + if not exe_name: + exe_name = config['exe'] + exe_path = bindir / exe_name exe_size = 199 make_random_file(exe_path, exe_size) exe_path.chmod(exe_path.stat().st_mode | stat.S_IEXEC) diff --git a/test/test_manifest.py b/test/test_manifest.py index 6e7ae3e3..dcc1fdac 100644 --- a/test/test_manifest.py +++ b/test/test_manifest.py @@ -198,6 +198,7 @@ def test_exe_reproduce(): # Reinstate exe path config['exe'] = exe + write_config(config) # Recreate fake executable file make_exe() @@ -220,6 +221,20 @@ def test_exe_reproduce(): # Check manifests have changed as expected assert(not manifests == get_manifests(ctrldir/'manifests')) + # Create a separate executable file + new_exe = "mock_exe_with_a_different_name" + make_exe(new_exe) + + # Change exe path and back to exe reproduce true in config.yaml + config['manifest']['reproduce']['exe'] = True + config['exe'] = new_exe + write_config(config) + + # Run setup again, which should raise an error due to changed executable + with pytest.raises(SystemExit): + # Run setup with unchanged exe but reproduce exe set to True + payu_setup(lab_path=str(labdir)) + def test_input_reproduce(): @@ -392,7 +407,6 @@ def test_restart_reproduce(): # Set reproduce restart to True config['manifest']['reproduce']['input'] = False config['manifest']['reproduce']['restart'] = True - del(config['restart']) write_config(config) manifests = get_manifests(ctrldir/'manifests') @@ -419,8 +433,7 @@ def test_restart_reproduce(): make_restarts() # Run setup again, which should raise an error due to changed restarts - with pytest.raises(SystemExit) as pytest_wrapped_e: - # Run setup with unchanged exe but reproduce exe set to True + with pytest.raises(SystemExit): payu_setup(lab_path=str(labdir)) # Set reproduce restart to False @@ -433,6 +446,17 @@ def test_restart_reproduce(): # Manifests should have changed assert(not manifests == get_manifests(ctrldir/'manifests')) + # Set reproduce restart to True + config['manifest']['reproduce']['restart'] = True + write_config(config) + + # Add a new restart file to restart directory + make_restarts(['restart_005.bin']) + + # Run setup again, which should raise an error due to new restart file + with pytest.raises(SystemExit): + payu_setup(lab_path=str(labdir)) + def test_all_reproduce(): From f14bfa5b250fd354e91d15886c7c09c45325dd57 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 2 Aug 2024 17:50:20 +1000 Subject: [PATCH 02/10] Check configured inputs when manifest reproduce is True and set links when files are added to manifest - When reproduce is True, set scanInputs to True so new/changed inputs are discovered - When reproduce is True, only create work symlinks when filepaths are added in manifest during model.setup - Check for files that exist in the manifest, but haven't been added to work directory, or no longer exist --- docs/source/config.rst | 4 +- payu/manifest.py | 110 ++++++++++++++++++++++------------------- payu/models/model.py | 1 - test/test_manifest.py | 68 ++++++++++++++----------- 4 files changed, 102 insertions(+), 81 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index fc6d3d15..62a7d147 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -260,13 +260,13 @@ section for details. ``input`` (*Default: global reproduce flag*) Enforce input file reproducibility. If set to *True* will refuse to - run if hashes do no match. Will not search for any new files. + run if hashes do no match. It will search for new files. ``restart`` (*Default: global reproduce flag*) Enforce restart file reproducibility. ``scaninputs`` (*Default: True*) - Scan input directories for new files. Set to *False* when reproduce input + Scan input directories for new files. Set to *True* when reproduce input is *True*. If a manifest file is complete and it is desirable to not add spurious diff --git a/payu/manifest.py b/payu/manifest.py index 6ad27007..34070b67 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -50,6 +50,12 @@ def __init__(self, path, self.needsync = False self.existing_filepaths = set() + def set_existing_filepaths(self): + """ + Save existing filepaths information + """ + self.existing_filepaths = set(self.data.keys()) + def check_fast(self, reproduce=False, **args): """ Check hash value for all filepaths using a fast hash function and fall @@ -129,6 +135,15 @@ def check_fast(self, reproduce=False, **args): # Flag need to update version on disk self.needsync = True + # Check for any missing files + if reproduce and len(self.existing_filepaths) > 0: + print( + f"Run cannot reproduce: Files in {self.path} " + + "are no longer in work directory:\n - " + + "\n - ".join(self.existing_filepaths) + ) + exit(1) + def add_filepath(self, filepath, fullpath, hashes, copy=False): """ Bespoke function to add filepath & fullpath to manifest @@ -187,15 +202,11 @@ def make_link(self, filepath): Payu integration function for creating symlinks in work directories which point back to the original file. """ - # Check file exists. It may have been deleted but still in manifest if not os.path.exists(self.fullpath(filepath)): - print('File not found: {filepath}'.format( - filepath=self.fullpath(filepath))) - if self.contains(filepath): - print('removing from manifest') - self.delete(filepath) - self.needsync = True - self.existing_filepaths.discard(filepath) + raise FileNotFoundError( + "Unable to create symlink in work directory. " + f"File not found: {self.fullpath(filepath)}" + ) else: try: destdir = os.path.dirname(filepath) @@ -223,10 +234,11 @@ def make_link(self, filepath): def make_links(self): """ - Used to make all links at once for reproduce runs or scaninputs=False + Used to make all links at once for scaninputs=False """ for filepath in list(self): - self.make_link(filepath) + if os.path.exists(self.fullpath(filepath)): + self.make_link(filepath) def copy(self, path): """ @@ -286,12 +298,10 @@ def __init__(self, config, reproduce): # Set flag to auto-scan input directories self.scaninputs = self.manifest_config.get('scaninputs', True) - # TODO: With reproduce true, should scan inputs be set to True? - # This is so new input files or changed input files to different file in config.yaml - # are always picked up? - if self.reproduce['input'] and self.scaninputs: - print("scaninputs set to False when reproduce input is True") - self.scaninputs = False + # Ensure configured input files are discovered when reproduce is True + if self.reproduce['input'] and not self.scaninputs: + print("scaninputs set to True when reproduce input is True") + self.scaninputs = True def init_mf(self, mf): # Initialise a sub-manifest object @@ -332,46 +342,46 @@ def load(self): if len(self.manifests[mf]) > 0: self.have_manifest[mf] = True + def set_existing_filepaths(self): + """ + Save the existing filepath infomation to each manifest + """ + for mf in self.manifests.keys(): + if self.have_manifest[mf]: + self.manifests[mf].set_existing_filepaths() + def setup(self): # Load all available manifests self.load() - if self.have_manifest['input']: - if self.scaninputs: # Must be False for reproduce=True - # Save existing filepath information - self.manifests['input'].existing_filepaths = \ - set(self.manifests['input'].data.keys()) - - if self.have_manifest['exe']: - if not self.reproduce['exe']: - # Re-initialise exe manifest. Trivial to recreate - # and means no check required for changed executable - # paths - self.init_mf('exe') - - if self.have_manifest['restart']: - if not self.reproduce['restart']: - # Re-initialise restart manifest. Only keep restart manifest - # if reproduce. Normally want to scan for new restarts - self.init_mf('restart') - - #TODO: Should links only be made in add_filepath()? - - # Check to make all manifests that should be populated are and - # make links in work directory for existing manifests + # Save existing filepaths infomation in each manifest + self.set_existing_filepaths() + + if self.have_manifest['exe'] and not self.reproduce['exe']: + # Re-initialise exe manifest. Trivial to recreate + # and means no check required for changed executable + # paths + self.init_mf('exe') + + if self.have_manifest['restart'] and not self.reproduce['restart']: + # Re-initialise restart manifest. Only keep restart manifest + # if reproduce. Normally want to scan for new restarts + self.init_mf('restart') + + # Make links in work directory for existing input manifest + # when scan inputs is set to False + if self.have_manifest['input'] and not self.scaninputs: + print('Making input symlinks using the existing manifest') + self.manifests['input'].make_links() + + # Check manifests are populated when reproduce is configured for mf in self.manifests.keys(): - if self.have_manifest[mf]: - # Don't make links for inputs when scaninputs is True - if mf == 'input' and self.scaninputs: - continue - print('Making {} links'.format(mf)) - self.manifests[mf].make_links() - else: - if self.reproduce[mf]: - print('{} manifest must exist if reproduce is True' - ''.format(mf.capitalize())) - exit(1) + if not self.have_manifest[mf] and self.reproduce[mf]: + print( + f'{mf.capitalize()} manifest must exist if reproduce is True' + ) + sys.exit(1) def check_manifests(self): diff --git a/payu/models/model.py b/payu/models/model.py index b5aae7c6..41ac4460 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -270,7 +270,6 @@ def setup(self): # Add input files to manifest if we don't already have a # input manifest, or we specify scaninputs is True (default) - # TODO: If Repro inputs is True, Scan inputs is set to False so input files are not added to manifest and symlink is not set up if (not self.expt.manifest.have_manifest['input'] or self.expt.manifest.scaninputs): for input_path in self.input_paths: diff --git a/test/test_manifest.py b/test/test_manifest.py index dcc1fdac..eee01ed6 100644 --- a/test/test_manifest.py +++ b/test/test_manifest.py @@ -176,7 +176,7 @@ def test_exe_reproduce(): # Update the modification time of the executable, should run ok (bindir/exe).touch() - # Run setup with changed exe but reproduce exe set to False + # Run setup with changed exe payu_setup(lab_path=str(labdir)) # Manifests will have changed as fasthash is altered @@ -185,21 +185,6 @@ def test_exe_reproduce(): # Reset manifests "truth" manifests = get_manifests(ctrldir/'manifests') - # Delete exe path from config, should get it from manifest - del(config['exe']) - write_config(config) - - # Run setup with changed exe but reproduce exe set to False - payu_setup(lab_path=str(labdir)) - - # Manifests will not have changed - assert(manifests == get_manifests(ctrldir/'manifests')) - assert((workdir/exe).resolve() == (bindir/exe).resolve()) - - # Reinstate exe path - config['exe'] = exe - write_config(config) - # Recreate fake executable file make_exe() @@ -244,8 +229,9 @@ def test_input_reproduce(): # Set reproduce input to True config['manifest']['reproduce']['exe'] = False config['manifest']['reproduce']['input'] = True - # Set back to false - config['manifest']['scaninputs'] = False + + # Set scan inputs to True + config['manifest']['scaninputs'] = True config['exe'] = config_orig['exe'] write_config(config) manifests = get_manifests(ctrldir/'manifests') @@ -255,17 +241,25 @@ def test_input_reproduce(): payu_setup(lab_path=str(labdir)) assert(manifests == get_manifests(ctrldir/'manifests')) - # Delete input directory from config, should still work from - # manifest with input reproduce True + # Delete input directory from config input = config['input'] - write_config(config) del(config['input']) + write_config(config) - # Run setup, should work - payu_setup(lab_path=str(labdir)) + # Expect setup to fail + with pytest.raises(SystemExit) as pytest_wrapped_e: + payu_setup(lab_path=str(labdir)) + + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 1 + # Assert manifests have not changed assert(manifests == get_manifests(ctrldir/'manifests')) + # Add input directory back into config + config['input'] = input + write_config(config) + # Update modification times for input files for i in range(1, 4): (inputdir/'input_00{i}.bin'.format(i=i)).touch() @@ -280,11 +274,8 @@ def test_input_reproduce(): # Reset manifest "truth" manifests = get_manifests(ctrldir/'manifests') - # Re-create input files. Have to set input path for this purpose - # but not written to config.yaml, so doesn't affect payu commands - config['input'] = input + # Re-create input files make_inputs() - del(config['input']) # Run setup again, which should raise an error due to changed inputs with pytest.raises(SystemExit) as pytest_wrapped_e: @@ -312,6 +303,10 @@ def test_input_reproduce(): # Delete input manifest (ctrldir/'manifests'/'input.yaml').unlink() + # Remove input from config + del config['input'] + write_config(config) + # Setup with no input dir and no manifest. Should work ok payu_setup(lab_path=str(labdir)) @@ -324,6 +319,22 @@ def test_input_reproduce(): write_config(config) payu_setup(lab_path=str(labdir)) + # Make a new input file + (inputdir / 'newInputFile').touch() + + # Set reproduce to true + config['manifest']['reproduce']['input'] = True + # Set scan inputs to False - This should be ignored and set to true + config['manifest']['scaninputs'] = False + write_config(config) + + # Run setup again, which should raise an error due to new input file + with pytest.raises(SystemExit) as pytest_wrapped_e: + payu_setup(lab_path=str(labdir)) + + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 1 + def test_input_scaninputs(): @@ -336,6 +347,7 @@ def test_input_scaninputs(): # Set scaninputs input to True config['manifest']['scaninputs'] = True + config['manifest']['reproduce']['input'] = False write_config(config) # Run setup with unchanged input @@ -387,7 +399,7 @@ def test_input_scaninputs(): write_config(config) # Run setup again. Should be fine, but manifests changed now - # as scaninputs=False + # as scaninputs=True payu_setup(lab_path=str(labdir)) assert(not manifests == get_manifests(ctrldir/'manifests')) assert((workdir/'lala').is_file()) From 3c1185adf9d3cdfa75041936c01acab5d0b2e0d1 Mon Sep 17 00:00:00 2001 From: jo-basevi Date: Fri, 9 Aug 2024 10:20:48 +1000 Subject: [PATCH 03/10] Apply suggestions from code review Fix some typos Co-authored-by: Aidan Heerdegen --- docs/source/config.rst | 2 +- payu/models/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index 62a7d147..dbc2a03d 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -260,7 +260,7 @@ section for details. ``input`` (*Default: global reproduce flag*) Enforce input file reproducibility. If set to *True* will refuse to - run if hashes do no match. It will search for new files. + run if hashes do not match. It will search for new files. ``restart`` (*Default: global reproduce flag*) Enforce restart file reproducibility. diff --git a/payu/models/model.py b/payu/models/model.py index 41ac4460..e4a01d21 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -268,7 +268,7 @@ def setup(self): self.copy_restarts ) - # Add input files to manifest if we don't already have a + # Add input files to manifest if we don't already have an # input manifest, or we specify scaninputs is True (default) if (not self.expt.manifest.have_manifest['input'] or self.expt.manifest.scaninputs): From 9d34e85d5c0601d01fbeaf6ad5eeef4f23642ce2 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 9 Aug 2024 11:13:59 +1000 Subject: [PATCH 04/10] Add in review suggestions to manifests.py - Rename existing_filepaths to previous_filepaths - Add description that previous_filepaths keep track of filepaths from previous experiment run - Move reproduce manifest exists check to load() --- payu/manifest.py | 49 +++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/payu/manifest.py b/payu/manifest.py index 34070b67..4dd198a9 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -48,13 +48,16 @@ def __init__(self, path, self.ignore = ignore self.needsync = False - self.existing_filepaths = set() - def set_existing_filepaths(self): + self.previous_filepaths = set() + + def set_previous_filepaths(self): """ - Save existing filepaths information + Save previous filepaths information - this is obtained from the + pre-existing manifests. This is used to keep track of changes to + the manifest. """ - self.existing_filepaths = set(self.data.keys()) + self.previous_filepaths = set(self.data.keys()) def check_fast(self, reproduce=False, **args): """ @@ -136,11 +139,11 @@ def check_fast(self, reproduce=False, **args): self.needsync = True # Check for any missing files - if reproduce and len(self.existing_filepaths) > 0: - print( + if reproduce and len(self.previous_filepaths) > 0: + sys.stderr.write( f"Run cannot reproduce: Files in {self.path} " + "are no longer in work directory:\n - " + - "\n - ".join(self.existing_filepaths) + "\n - ".join(self.previous_filepaths) + '\n' ) exit(1) @@ -171,8 +174,8 @@ def add_filepath(self, filepath, fullpath, hashes, copy=False): if copy: self.data[filepath]['copy'] = copy - if filepath in self.existing_filepaths: - self.existing_filepaths.remove(filepath) + if filepath in self.previous_filepaths: + self.previous_filepaths.remove(filepath) return True @@ -230,7 +233,7 @@ def make_link(self, filepath): local=filepath)) raise finally: - self.existing_filepaths.discard(filepath) + self.previous_filepaths.discard(filepath) def make_links(self): """ @@ -342,13 +345,21 @@ def load(self): if len(self.manifests[mf]) > 0: self.have_manifest[mf] = True - def set_existing_filepaths(self): + # Check manifests are populated when reproduce is configured + if not self.have_manifest[mf] and self.reproduce[mf]: + sys.stderr.write( + f'{mf.capitalize()} manifest must exist and be populated ' + 'if reproduce is configured to True\n' + ) + sys.exit(1) + + def set_previous_filepaths(self): """ Save the existing filepath infomation to each manifest """ for mf in self.manifests.keys(): if self.have_manifest[mf]: - self.manifests[mf].set_existing_filepaths() + self.manifests[mf].set_previous_filepaths() def setup(self): @@ -356,7 +367,7 @@ def setup(self): self.load() # Save existing filepaths infomation in each manifest - self.set_existing_filepaths() + self.set_previous_filepaths() if self.have_manifest['exe'] and not self.reproduce['exe']: # Re-initialise exe manifest. Trivial to recreate @@ -375,23 +386,15 @@ def setup(self): print('Making input symlinks using the existing manifest') self.manifests['input'].make_links() - # Check manifests are populated when reproduce is configured - for mf in self.manifests.keys(): - if not self.have_manifest[mf] and self.reproduce[mf]: - print( - f'{mf.capitalize()} manifest must exist if reproduce is True' - ) - sys.exit(1) - def check_manifests(self): print("Checking exe and input manifests") self.manifests['exe'].check_fast(reproduce=self.reproduce['exe']) if not self.reproduce['input']: - if len(self.manifests['input'].existing_filepaths) > 0: + if len(self.manifests['input'].previous_filepaths) > 0: # Delete missing filepaths from input manifest - for filepath in self.manifests['input'].existing_filepaths: + for filepath in self.manifests['input'].previous_filepaths: print('File no longer in input directory: {file} ' 'removing from manifest'.format(file=filepath)) self.manifests['input'].delete(filepath) From bfb2bb0ec6c2e18a514fe9da1480656cddfa80b3 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 9 Aug 2024 12:15:10 +1000 Subject: [PATCH 05/10] Remove manifests scanInputs config option and logic --- docs/source/config.rst | 14 ++------ payu/manifest.py | 43 ++++++---------------- payu/models/model.py | 75 +++++++++++++++++++------------------- test/common.py | 1 - test/test_manifest.py | 82 ++---------------------------------------- 5 files changed, 52 insertions(+), 163 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index dbc2a03d..863a13dc 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -255,24 +255,14 @@ section for details. relevant manifest do not match. ``exe`` (*Default: global reproduce flag*) - Enforce executable reproducibility. If set to *True* will refuse to - run if hashes do not match. + Enforce executable reproducibility. ``input`` (*Default: global reproduce flag*) - Enforce input file reproducibility. If set to *True* will refuse to - run if hashes do not match. It will search for new files. + Enforce input file reproducibility. ``restart`` (*Default: global reproduce flag*) Enforce restart file reproducibility. -``scaninputs`` (*Default: True*) - Scan input directories for new files. Set to *True* when reproduce input - is *True*. - - If a manifest file is complete and it is desirable to not add spurious - files to the manifest but allow existing files to change, setting this - option to *False* would allow that behaviour. - ``ignore`` (*Default: .\**): List of ``glob`` patterns which match files to ignore when scanning input directories. This is an array, so multiple patterns can be specified on diff --git a/payu/manifest.py b/payu/manifest.py index 4dd198a9..d944cd01 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -235,14 +235,6 @@ def make_link(self, filepath): finally: self.previous_filepaths.discard(filepath) - def make_links(self): - """ - Used to make all links at once for scaninputs=False - """ - for filepath in list(self): - if os.path.exists(self.fullpath(filepath)): - self.make_link(filepath) - def copy(self, path): """ Copy myself to another location @@ -298,14 +290,6 @@ def __init__(self, config, reproduce): # Make sure the manifests directory exists mkdir_p(os.path.dirname(self.manifests['exe'].path)) - # Set flag to auto-scan input directories - self.scaninputs = self.manifest_config.get('scaninputs', True) - - # Ensure configured input files are discovered when reproduce is True - if self.reproduce['input'] and not self.scaninputs: - print("scaninputs set to True when reproduce input is True") - self.scaninputs = True - def init_mf(self, mf): # Initialise a sub-manifest object self.manifests[mf] = PayuManifest( @@ -369,22 +353,17 @@ def setup(self): # Save existing filepaths infomation in each manifest self.set_previous_filepaths() - if self.have_manifest['exe'] and not self.reproduce['exe']: - # Re-initialise exe manifest. Trivial to recreate - # and means no check required for changed executable - # paths - self.init_mf('exe') - - if self.have_manifest['restart'] and not self.reproduce['restart']: - # Re-initialise restart manifest. Only keep restart manifest - # if reproduce. Normally want to scan for new restarts - self.init_mf('restart') - - # Make links in work directory for existing input manifest - # when scan inputs is set to False - if self.have_manifest['input'] and not self.scaninputs: - print('Making input symlinks using the existing manifest') - self.manifests['input'].make_links() + # Re-initialise executable and restart manifests, unless reproduce + # is configured: + # - Executables are generally small in size so it is trivial to + # recalculate full MD5 hashes for each experiment run + # - Restarts are usually different between runs. + # - Input manifests are not re-initialised as inputs are typically + # large files that change rarely, so MD5 hashes are computed only + # if there has been a change + for mf in ['exe', 'restart']: + if self.have_manifest[mf] and not self.reproduce[mf]: + self.init_mf(mf) def check_manifests(self): diff --git a/payu/models/model.py b/payu/models/model.py index e4a01d21..103f8530 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -268,49 +268,46 @@ def setup(self): self.copy_restarts ) - # Add input files to manifest if we don't already have an - # input manifest, or we specify scaninputs is True (default) - if (not self.expt.manifest.have_manifest['input'] or - self.expt.manifest.scaninputs): - for input_path in self.input_paths: - if os.path.isfile(input_path): - # Build a mock walk iterator for a single file - fwalk = iter([( - os.path.dirname(input_path), - [], - [os.path.basename(input_path)] - )]) - # Overwrite the input_path as a directory - input_path = os.path.dirname(input_path) - else: - fwalk = os.walk(input_path) + # Add input files to input manifest + for input_path in self.input_paths: + if os.path.isfile(input_path): + # Build a mock walk iterator for a single file + fwalk = iter([( + os.path.dirname(input_path), + [], + [os.path.basename(input_path)] + )]) + # Overwrite the input_path as a directory + input_path = os.path.dirname(input_path) + else: + fwalk = os.walk(input_path) - for path, dirs, files in fwalk: - workrelpath = os.path.relpath(path, input_path) - subdir = os.path.normpath( - os.path.join(self.work_input_path_local, - workrelpath) - ) + for path, dirs, files in fwalk: + workrelpath = os.path.relpath(path, input_path) + subdir = os.path.normpath( + os.path.join(self.work_input_path_local, + workrelpath) + ) - if not os.path.exists(subdir): - os.mkdir(subdir) + if not os.path.exists(subdir): + os.mkdir(subdir) - for f_name in files: - f_orig = os.path.join(path, f_name) - f_link = os.path.join( - self.work_input_path_local, - workrelpath, - f_name + for f_name in files: + f_orig = os.path.join(path, f_name) + f_link = os.path.join( + self.work_input_path_local, + workrelpath, + f_name + ) + # Do not use input file if already linked + # as a restart file + if not os.path.exists(f_link): + self.expt.manifest.add_filepath( + 'input', + f_link, + f_orig, + self.copy_inputs ) - # Do not use input file if already linked - # as a restart file - if not os.path.exists(f_link): - self.expt.manifest.add_filepath( - 'input', - f_link, - f_orig, - self.copy_inputs - ) # Make symlink to executable in work directory if self.exec_path: diff --git a/test/common.py b/test/common.py index fbe05652..8bb06197 100644 --- a/test/common.py +++ b/test/common.py @@ -48,7 +48,6 @@ 'exe': 'test.exe', 'input': 'testrun_1', 'manifest': { - 'scaninputs': False, 'reproduce': { 'input': False, 'exe': False diff --git a/test/test_manifest.py b/test/test_manifest.py index eee01ed6..d352438e 100644 --- a/test/test_manifest.py +++ b/test/test_manifest.py @@ -154,8 +154,6 @@ def test_exe_reproduce(): # Set reproduce exe to True config['manifest']['reproduce']['exe'] = True - # Also set scaninputs to True to catch bug #388 - config['manifest']['scaninputs'] = True write_config(config) manifests = get_manifests(ctrldir/'manifests') @@ -230,8 +228,6 @@ def test_input_reproduce(): config['manifest']['reproduce']['exe'] = False config['manifest']['reproduce']['input'] = True - # Set scan inputs to True - config['manifest']['scaninputs'] = True config['exe'] = config_orig['exe'] write_config(config) manifests = get_manifests(ctrldir/'manifests') @@ -324,8 +320,6 @@ def test_input_reproduce(): # Set reproduce to true config['manifest']['reproduce']['input'] = True - # Set scan inputs to False - This should be ignored and set to true - config['manifest']['scaninputs'] = False write_config(config) # Run setup again, which should raise an error due to new input file @@ -335,83 +329,13 @@ def test_input_reproduce(): assert pytest_wrapped_e.type == SystemExit assert pytest_wrapped_e.value.code == 1 - -def test_input_scaninputs(): - - # Re-create input files - make_config_files() - make_inputs() - - inputdir = labdir / 'input' / config['input'] - inputdir.mkdir(parents=True, exist_ok=True) - - # Set scaninputs input to True - config['manifest']['scaninputs'] = True + # Set reproduce manifests back to False config['manifest']['reproduce']['input'] = False write_config(config) - # Run setup with unchanged input - payu_setup(lab_path=str(labdir)) - manifests = get_manifests(ctrldir/'manifests') - - # Set scaninputs input to False - config['manifest']['scaninputs'] = False - write_config(config) - - # Run setup, should work and manifests unchanged - payu_setup(lab_path=str(labdir)) - assert(manifests == get_manifests(ctrldir/'manifests')) - - # Update modification times for input files - for i in range(1, 4): - (inputdir/'input_00{i}.bin'.format(i=i)).touch() - - # Run setup, should work as only fasthash will differ, code then - # checks full hash and updates fasthash if fullhash matches - payu_setup(lab_path=str(labdir)) - - # Manifests should no longer match as fasthashes have been updated - assert(not manifests == get_manifests(ctrldir/'manifests')) - - # Reset manifest "truth" - manifests = get_manifests(ctrldir/'manifests') - - # Re-create input files - make_inputs() - - # Run setup again. Should be fine, but manifests changed - payu_setup(lab_path=str(labdir)) - assert(not manifests == get_manifests(ctrldir/'manifests')) - - # Reset manifest "truth" - manifests = get_manifests(ctrldir/'manifests') - - # Make a new input file - (inputdir/'lala').touch() - - # Run setup again. Should be fine, manifests unchanged as - # scaninputs=False - payu_setup(lab_path=str(labdir)) - assert(manifests == get_manifests(ctrldir/'manifests')) - - # Set scaninputs input to True - config['manifest']['scaninputs'] = True - write_config(config) - - # Run setup again. Should be fine, but manifests changed now - # as scaninputs=True + # Check setup runs without an error and new input is linked payu_setup(lab_path=str(labdir)) - assert(not manifests == get_manifests(ctrldir/'manifests')) - assert((workdir/'lala').is_file()) - - # Delete silly input file - (inputdir/'lala').unlink() - - # Re-run after removing silly input file - payu_setup(lab_path=str(labdir)) - - # Reset manifest "truth" - manifests = get_manifests(ctrldir/'manifests') + assert (workdir / 'newInputFile').is_file() def test_restart_reproduce(): From 0459afb45ec340b025144393917855388826d874 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Mon, 12 Aug 2024 14:55:00 +1000 Subject: [PATCH 06/10] Refactor manifest logic to store previous manifests to try simplify reproduce checks --- payu/manifest.py | 252 +++++++++++++++++------------------------------ 1 file changed, 90 insertions(+), 162 deletions(-) diff --git a/payu/manifest.py b/payu/manifest.py index d944cd01..2296925d 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -47,105 +47,71 @@ def __init__(self, path, if ignore is not None: self.ignore = ignore - self.needsync = False - - self.previous_filepaths = set() - - def set_previous_filepaths(self): - """ - Save previous filepaths information - this is obtained from the - pre-existing manifests. This is used to keep track of changes to - the manifest. - """ - self.previous_filepaths = set(self.data.keys()) - - def check_fast(self, reproduce=False, **args): + def calculate_fast(self, previous_manifest): """ - Check hash value for all filepaths using a fast hash function and fall - back to slower full hash functions if fast hashes fail to agree. + Calculate hash value for all filepaths using a fast hash function and + fall back to slower full hash functions if fast hashes fail to agree, + with the pre-existing manifest """ - hashvals = {} - - fast_check = self.check_file( + # Calculate all fast hashes + self.add( filepaths=self.data.keys(), - hashvals=hashvals, hashfn=self.fast_hashes, - shortcircuit=True, - **args + force=True, + fullpaths=[self.fullpath(fpath) for fpath + in list(self.data.keys())] ) - if not fast_check: - - # Save all the fast hashes for failed files that we've already - # calculated - for filepath in hashvals: - for hash, val in hashvals[filepath].items(): - self.data[filepath]['hashes'][hash] = val - - if reproduce: - for filepath in hashvals: - print('Check failed for {0} {1}' - ''.format(filepath, hashvals[filepath])) - tmphash = {} - full_check = self.check_file( - filepaths=filepath, - hashfn=self.full_hashes, - hashvals=tmphash, - shortcircuit=False, - **args - ) - - if full_check: - # File is still ok, so replace fast hashes - print('Full hashes ({0}) checked ok' - ''.format(self.full_hashes)) - print('Updating fast hashes for {0} in {1}' - ''.format(filepath, self.path)) - self.add_fast(filepath, force=True) - print('Saving updated manifest') - self.needsync = True - else: - sys.stderr.write( - 'Run cannot reproduce: manifest {0} is not ' - 'correct\n'.format(self.path) - ) - for path, hashdict in tmphash.items(): - print(' {0}:'.format(path)) - for hash, val in hashdict.items(): - hash_table = self.data[path]['hashes'] - hash_table_val = hash_table.get(hash, None) - print(' {0}: {1} != {2}' - ''.format(hash, val, hash_table_val)) - sys.exit(1) - else: - # Not relevant if full hashes are correct. Regenerate full - # hashes for all filepaths that failed fast check. - print('Updating full hashes for {0} files in {1}' - ''.format(len(hashvals), self.path)) - - # Add all full hashes at once -- much faster. Definitely want - # to force the full hash to be updated. In the specific case of - # an empty hash the value will be None, without force it will - # be written as null. - self.add( - filepaths=list(hashvals.keys()), - hashfn=self.full_hashes, - force=True, - fullpaths=[self.fullpath(fpath) for fpath - in list(hashvals.keys())] - ) - - # Flag need to update version on disk - self.needsync = True + # If fast hashes from previous manifest match, use previous full hashes + # to avoid re-calculating slow hashes + self.update_matching_hashes(other=previous_manifest) + + # Search for new files and files with changed fast hashes + changed_filepaths = set() + for filepath in self.data.keys(): + for hash in self.data[filepath]['hashes'].values(): + if hash == None: + changed_filepaths.add(filepath) + + # Calculate full hashes for these changed filepaths + if len(changed_filepaths) > 0: + self.add( + filepaths=list(changed_filepaths), + hashfn=self.full_hashes, + force=True, + fullpaths=[self.fullpath(fpath) for fpath + in list(changed_filepaths)] + ) - # Check for any missing files - if reproduce and len(self.previous_filepaths) > 0: + def check_reproduce(self, previous_manifest): + """ + Compare full hashes with previous manifest + """ + # Use paths in both manifests to pick up new and missing files + all_filepaths = set(self.data.keys()).union( + previous_manifest.data.keys() + ) + differences = [] + for filepath in all_filepaths: + for hashfn in self.full_hashes: + hash = self.get(filepath, hashfn) + previous_hash = previous_manifest.get(filepath, hashfn) + + if hash != previous_hash: + differences.append( + f"{filepath}: {hashfn}: {previous_hash} != {hash}" + ) + + if len(differences) != 0: sys.stderr.write( - f"Run cannot reproduce: Files in {self.path} " + - "are no longer in work directory:\n - " + - "\n - ".join(self.previous_filepaths) + '\n' + f'Run cannot reproduce: manifest {self.path} is not correct\n' ) - exit(1) + print("Manifest path: hash != calculated hash") + for row in differences: + print(row) + + sys.exit(1) + def add_filepath(self, filepath, fullpath, hashes, copy=False): """ @@ -157,11 +123,13 @@ def add_filepath(self, filepath, fullpath, hashes, copy=False): # Ignore directories if os.path.isdir(fullpath): + # TODO: Add debug logging return False # Ignore anything matching the ignore patterns for pattern in self.ignore: if fnmatch.fnmatch(os.path.basename(fullpath), pattern): + #TODO: Add debug logging return False if filepath not in self.data: @@ -174,9 +142,6 @@ def add_filepath(self, filepath, fullpath, hashes, copy=False): if copy: self.data[filepath]['copy'] = copy - if filepath in self.previous_filepaths: - self.previous_filepaths.remove(filepath) - return True def add_fast(self, filepath, hashfn=None, force=False): @@ -232,8 +197,6 @@ def make_link(self, filepath): orig=self.fullpath(filepath), local=filepath)) raise - finally: - self.previous_filepaths.discard(filepath) def copy(self, path): """ @@ -280,7 +243,7 @@ def __init__(self, config, reproduce): # Initialise manifests and reproduce flags self.manifests = {} - self.have_manifest = {} + self.previous_manifests = {} reproduce_config = self.manifest_config.get('reproduce', {}) self.reproduce = {} for mf in ['input', 'restart', 'exe']: @@ -298,7 +261,14 @@ def init_mf(self, mf): fast_hashes=self.fast_hashes, full_hashes=self.full_hashes ) - self.have_manifest[mf] = False + + # Initialise a sub-manifest object to store pre-existing manifests + self.previous_manifests[mf] = PayuManifest( + os.path.join('manifests', '{}.yaml'.format(mf)), + ignore=self.ignore, + fast_hashes=self.fast_hashes, + full_hashes=self.full_hashes + ) def __iter__(self): """ @@ -310,88 +280,46 @@ def __iter__(self): def __len__(self): """Return the number of manifests in the manifest class.""" return len(self.manifests) - - def load(self): + + def load_previous_manifests(self): """ - Load manifests + Load pre-existing manifests """ - for mf in self.manifests: - self.have_manifest[mf] = False - if (os.path.exists(self.manifests[mf].path)): + for mf in self.previous_manifests: + manifest_path = self.previous_manifests[mf].path + if os.path.exists(manifest_path): try: - print('Loading {mf} manifest: {path}' - ''.format(mf=mf, path=self.manifests[mf].path)) - self.manifests[mf].load() + print(f'Loading {mf} manifest: {manifest_path}') + self.previous_manifests[mf].load() except Exception as e: - print('Error loading {mf} manifest: ' - '{error}'.format(mf=mf, error=e)) - finally: - if len(self.manifests[mf]) > 0: - self.have_manifest[mf] = True + print(f'Error loading {mf} manifest: {e}') # Check manifests are populated when reproduce is configured - if not self.have_manifest[mf] and self.reproduce[mf]: + if len(self.previous_manifests[mf]) == 0 and self.reproduce[mf]: sys.stderr.write( f'{mf.capitalize()} manifest must exist and be populated ' 'if reproduce is configured to True\n' ) sys.exit(1) - def set_previous_filepaths(self): - """ - Save the existing filepath infomation to each manifest - """ - for mf in self.manifests.keys(): - if self.have_manifest[mf]: - self.manifests[mf].set_previous_filepaths() - def setup(self): - # Load all available manifests - self.load() - - # Save existing filepaths infomation in each manifest - self.set_previous_filepaths() - - # Re-initialise executable and restart manifests, unless reproduce - # is configured: - # - Executables are generally small in size so it is trivial to - # recalculate full MD5 hashes for each experiment run - # - Restarts are usually different between runs. - # - Input manifests are not re-initialised as inputs are typically - # large files that change rarely, so MD5 hashes are computed only - # if there has been a change - for mf in ['exe', 'restart']: - if self.have_manifest[mf] and not self.reproduce[mf]: - self.init_mf(mf) + self.load_previous_manifests() def check_manifests(self): - - print("Checking exe and input manifests") - self.manifests['exe'].check_fast(reproduce=self.reproduce['exe']) - - if not self.reproduce['input']: - if len(self.manifests['input'].previous_filepaths) > 0: - # Delete missing filepaths from input manifest - for filepath in self.manifests['input'].previous_filepaths: - print('File no longer in input directory: {file} ' - 'removing from manifest'.format(file=filepath)) - self.manifests['input'].delete(filepath) - self.manifests['input'].needsync = True - - self.manifests['input'].check_fast(reproduce=self.reproduce['input']) - - if self.reproduce['restart']: - print("Checking restart manifest") - else: - print("Creating restart manifest") - self.manifests['restart'].needsync = True - self.manifests['restart'].check_fast( - reproduce=self.reproduce['restart']) - - # Write updates to version on disk + print("Checking exe, input and restart manifests") + for mf in self.manifests: + # Calculate hashes in manifests + self.manifests[mf].calculate_fast(self.previous_manifests[mf]) + + if self.reproduce[mf]: + # Compare manifest with previous + self.manifests[mf].check_reproduce(self.previous_manifests[mf]) + + # Update manifests if there's any changes, or create file if empty for mf in self.manifests: - if self.manifests[mf].needsync: + if (self.manifests[mf].data != self.previous_manifests[mf].data + or len(self.manifests[mf]) == 0): print("Writing {}".format(self.manifests[mf].path)) self.manifests[mf].dump() From c94c2b2ecbe8cb70d6ac58093767b9b092a96963 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Wed, 14 Aug 2024 14:33:16 +1000 Subject: [PATCH 07/10] Update get manifests paths method to use pre-existing manifests --- payu/manifest.py | 10 +++++----- payu/schedulers/pbs.py | 4 ++-- test/test_manifest.py | 9 ++++----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/payu/manifest.py b/payu/manifest.py index 2296925d..668daff2 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -281,7 +281,7 @@ def __len__(self): """Return the number of manifests in the manifest class.""" return len(self.manifests) - def load_previous_manifests(self): + def load_manifests(self): """ Load pre-existing manifests """ @@ -304,7 +304,7 @@ def load_previous_manifests(self): def setup(self): # Load all available manifests - self.load_previous_manifests() + self.load_manifests() def check_manifests(self): print("Checking exe, input and restart manifests") @@ -346,11 +346,11 @@ def add_filepath(self, manifest, filepath, fullpath, copy=False): # Only link if filepath was added self.manifests[manifest].make_link(filepath) - def get_all_fullpaths(self): + def get_all_previous_fullpaths(self): """ Return a list of all fullpaths in manifest files """ files = [] - for mf in self.manifests: - files.extend(self.manifests[mf].get_fullpaths()) + for mf in self.previous_manifests: + files.extend(self.previous_manifests[mf].get_fullpaths()) return files diff --git a/payu/schedulers/pbs.py b/payu/schedulers/pbs.py index 931fc891..808361b9 100644 --- a/payu/schedulers/pbs.py +++ b/payu/schedulers/pbs.py @@ -277,6 +277,6 @@ def get_manifest_paths(): storage paths """ tmpmanifest = Manifest(config={}, reproduce=False) - tmpmanifest.load() + tmpmanifest.load_manifests() - return tmpmanifest.get_all_fullpaths() + return tmpmanifest.get_all_previous_fullpaths() diff --git a/test/test_manifest.py b/test/test_manifest.py index d352438e..447dbb2d 100644 --- a/test/test_manifest.py +++ b/test/test_manifest.py @@ -420,7 +420,7 @@ def test_all_reproduce(): assert(not manifests == get_manifests(ctrldir/'manifests')) -def test_get_all_fullpaths(): +def test_get_all_previous_fullpaths(): make_all_files() make_config_files() @@ -433,10 +433,9 @@ def test_get_all_fullpaths(): sweep_work() with cd(ctrldir): - lab = payu.laboratory.Laboratory(lab_path=str(labdir)) - expt = payu.experiment.Experiment(lab, reproduce=False) - expt.setup() - files = expt.manifest.get_all_fullpaths() + manifest = payu.manifest.Manifest(config={}, reproduce=False) + manifest.load_manifests() + files = manifest.get_all_previous_fullpaths() allfiles = [] for mf in manifests: From 3b6633a901e8e37565f116485cdef1461312098b Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Thu, 15 Aug 2024 14:25:29 +1000 Subject: [PATCH 08/10] Update documentation to reflect manifest refactor changes --- docs/source/config.rst | 2 +- docs/source/manifests.rst | 24 ++++++------------------ payu/manifest.py | 2 +- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index 863a13dc..72636235 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -251,7 +251,7 @@ section for details. reproducible experiments. The default value is the value of the global ``reproduce`` flag, which is set using a command line argument and defaults to *False*. These options **override** the global ``reproduce`` - flag. If set to *True* payu will refuse to run if the hashes in the + flag. If set to *True* payu will refuse to run if the MD5 hashes in the relevant manifest do not match. ``exe`` (*Default: global reproduce flag*) diff --git a/docs/source/manifests.rst b/docs/source/manifests.rst index 877b8698..1c77a59d 100644 --- a/docs/source/manifests.rst +++ b/docs/source/manifests.rst @@ -71,24 +71,12 @@ for each model run. Manifest updates ---------------- -Each of the manifests is updated in a slightly different way which reflects -the way the files are expected to change during an experiment. - -The executable manifest is recalculated each time the model is run. -Executables are generally fairly small in size and number, so there is very -little overhead calculating full MD5 hashes. This also means there is no -need to check that exectutable paths are still correct and also any -changes to executables are automatically included in the manifest. - -The restart manifest is also recalculated for every run as there is no expectation -that restart (or pickup) files are ever the same between normal model runs. - -The input manifest changes relatively rarely and can often contain a small -number of very large files. It is this combination that can cause a significant -time overhead if full MD5 hashes have to be computed for every run. By using -binhash, a fast change-sensitive hash, these time consuming hashes only -need be computed when a change has been detected. So the slow md5 hashes -are recalculated as little as possible. +Each time the model is run, binhash for each filepath is recalculated +and compared with stored manifest values. If a new filepath has been added, +or the binhash differs from the stored value, the full MD5 hash is +recalculated. By using binhash, a fast change-sensitive hash, +these time consuming MD5 hashes only need be computed when a change has +been detected. So the slow md5 hashes are recalculated as little as possible. Manifest options ---------------- diff --git a/payu/manifest.py b/payu/manifest.py index 668daff2..bb87f915 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -313,7 +313,7 @@ def check_manifests(self): self.manifests[mf].calculate_fast(self.previous_manifests[mf]) if self.reproduce[mf]: - # Compare manifest with previous + # Compare manifest with previous manifest self.manifests[mf].check_reproduce(self.previous_manifests[mf]) # Update manifests if there's any changes, or create file if empty From 7ff9f46afadba9ab0c9cf9e3a6b32a66b9af2db4 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Thu, 15 Aug 2024 14:30:58 +1000 Subject: [PATCH 09/10] Fix pycodestyle errors --- payu/manifest.py | 14 ++++++-------- payu/models/model.py | 2 +- test/common.py | 10 +++++----- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/payu/manifest.py b/payu/manifest.py index bb87f915..bc46788c 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -59,7 +59,7 @@ def calculate_fast(self, previous_manifest): hashfn=self.fast_hashes, force=True, fullpaths=[self.fullpath(fpath) for fpath - in list(self.data.keys())] + in list(self.data.keys())] ) # If fast hashes from previous manifest match, use previous full hashes @@ -70,7 +70,7 @@ def calculate_fast(self, previous_manifest): changed_filepaths = set() for filepath in self.data.keys(): for hash in self.data[filepath]['hashes'].values(): - if hash == None: + if hash is None: changed_filepaths.add(filepath) # Calculate full hashes for these changed filepaths @@ -80,7 +80,7 @@ def calculate_fast(self, previous_manifest): hashfn=self.full_hashes, force=True, fullpaths=[self.fullpath(fpath) for fpath - in list(changed_filepaths)] + in list(changed_filepaths)] ) def check_reproduce(self, previous_manifest): @@ -101,7 +101,7 @@ def check_reproduce(self, previous_manifest): differences.append( f"{filepath}: {hashfn}: {previous_hash} != {hash}" ) - + if len(differences) != 0: sys.stderr.write( f'Run cannot reproduce: manifest {self.path} is not correct\n' @@ -123,13 +123,11 @@ def add_filepath(self, filepath, fullpath, hashes, copy=False): # Ignore directories if os.path.isdir(fullpath): - # TODO: Add debug logging return False # Ignore anything matching the ignore patterns for pattern in self.ignore: if fnmatch.fnmatch(os.path.basename(fullpath), pattern): - #TODO: Add debug logging return False if filepath not in self.data: @@ -280,7 +278,7 @@ def __iter__(self): def __len__(self): """Return the number of manifests in the manifest class.""" return len(self.manifests) - + def load_manifests(self): """ Load pre-existing manifests @@ -315,7 +313,7 @@ def check_manifests(self): if self.reproduce[mf]: # Compare manifest with previous manifest self.manifests[mf].check_reproduce(self.previous_manifests[mf]) - + # Update manifests if there's any changes, or create file if empty for mf in self.manifests: if (self.manifests[mf].data != self.previous_manifests[mf].data diff --git a/payu/models/model.py b/payu/models/model.py index 103f8530..1b9da485 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -286,7 +286,7 @@ def setup(self): workrelpath = os.path.relpath(path, input_path) subdir = os.path.normpath( os.path.join(self.work_input_path_local, - workrelpath) + workrelpath) ) if not os.path.exists(subdir): diff --git a/test/common.py b/test/common.py index 8bb06197..80206320 100644 --- a/test/common.py +++ b/test/common.py @@ -48,11 +48,11 @@ 'exe': 'test.exe', 'input': 'testrun_1', 'manifest': { - 'reproduce': { - 'input': False, - 'exe': False - } - }, + 'reproduce': { + 'input': False, + 'exe': False + } + }, 'runlog': False, "experiment": ctrldir_basename, "metadata": { From 8fae6f95b8730068456ebdeb0881535faef62d2b Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Thu, 22 Aug 2024 11:38:23 +1000 Subject: [PATCH 10/10] Manifest reproduce logs note missing or new files --- payu/manifest.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/payu/manifest.py b/payu/manifest.py index bc46788c..cd6ebc4d 100644 --- a/payu/manifest.py +++ b/payu/manifest.py @@ -97,16 +97,25 @@ def check_reproduce(self, previous_manifest): hash = self.get(filepath, hashfn) previous_hash = previous_manifest.get(filepath, hashfn) - if hash != previous_hash: + if hash is None: + differences.append( + f" {filepath}: Missing file (file not in " + + "calculated manifest)" + ) + elif previous_hash is None: + differences.append( + f" {filepath}: New file (file not in stored manifest)" + ) + elif hash != previous_hash: differences.append( - f"{filepath}: {hashfn}: {previous_hash} != {hash}" + f" {filepath}: {hashfn}: {previous_hash} != {hash}" ) if len(differences) != 0: sys.stderr.write( f'Run cannot reproduce: manifest {self.path} is not correct\n' ) - print("Manifest path: hash != calculated hash") + print(f"Manifest path: stored hash != calculated hash") for row in differences: print(row)