diff --git a/docs/source/config.rst b/docs/source/config.rst index e9c3c4d9..4f1483a3 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -189,16 +189,29 @@ configuration. ncpus: 0 ``restart_freq`` (*Default:* ``5``) - Specifies the rate of saved restart files. For the default rate of 5, we - keep the restart files for every fifth run (``restart004``, ``restart009``, - ``restart014``, etc.). - - Intermediate restarts are not deleted until a permanently archived restart - has been produced. For example, if we have just completed run ``11``, then - we keep ``restart004``, ``restart009``, ``restart010``, and ``restart011``. - Restarts 10 through 13 are not deleted until ``restart014`` has been saved. - - ``restart_freq: 1`` saves all restart files. + Specifies the rate of saved restart files. This rate can be either an + integer or date-based. For the default rate of 5, we + keep the restart files for every fifth run (``restart000``, ``restart005``, + ``restart010``, etc.). To save all restart files, set ``restart_freq: 1``. + + If ``restart_history`` is not configured, intermediate restarts are not + deleted until a permanently archived restart has been produced. + For example, if we have just completed run ``11``, then + we keep ``restart000``, ``restart005``, ``restart010``, and ``restart011``. + Restarts 11 through 14 are not deleted until ``restart015`` has been saved. + + To use a date-based restart frequency, specify a number with a time unit. + The supported time units are ``YS`` - year-start, ``MS`` - month-start, + ``W`` - week, ``D`` - day, ``H`` - hour, ``T`` - minute and ``S`` - second. + For example, ``restart_freq: 10YS`` would save earliest restart of the year, + 10 years from the last permanently archived restart's datetime. + + Please note that currently, only ACCESS-OM2, MOM5 and MOM6 models support + date-based restart frequency, as it depends on the payu model driver being + able to parse restarts files for a datetime. + +``restart_history`` + Specifies how many of the most recent restart files to retain regardless of `restart_freq` *The following model-based tags are typically not configured* diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 23a8db2c..086b4ca7 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -232,6 +232,12 @@ To run from an existing model run, also called a warm start, set the ``restart`` option to point to the folder containing the restart files from a previous matching experiment. +If restart pruning configuration has changed, there may be warnings if +many restarts will be pruned as a result. If this is desired, at the next +run use ``-F/--force-prune-restarts`` flag: + + payu run --force-prune-restarts + Cleaning up =========== diff --git a/payu/calendar.py b/payu/calendar.py index c433cb82..7e2beab3 100644 --- a/payu/calendar.py +++ b/payu/calendar.py @@ -1,5 +1,8 @@ -from dateutil.relativedelta import relativedelta import datetime +import re + +from dateutil.relativedelta import relativedelta +import cftime NOLEAP, GREGORIAN = range(2) @@ -17,8 +20,7 @@ def int_to_date(date): def date_to_int(date): - - return (date.year * 10**4 + date.month * 10**2 + date.day) + return date.year * 10**4 + date.month * 10**2 + date.day def runtime_from_date(start_date, years, months, days, seconds, caltype): @@ -28,8 +30,9 @@ def runtime_from_date(start_date, years, months, days, seconds, caltype): Ignores Feb 29 for caltype == NOLEAP. """ - end_date = start_date + relativedelta(years=years, months=months, - days=days) + end_date = start_date + relativedelta( + years=years, months=months, days=days + ) runtime = end_date - start_date if caltype == NOLEAP: @@ -67,7 +70,6 @@ def get_leapdays(init_date, final_date): leap_days = 0 while curr_date != final_date: - if curr_date.month == 2 and curr_date.day == 29: leap_days += 1 @@ -86,3 +88,110 @@ def calculate_leapdays(init_date, final_date): # TODO: Internal date correction (e.g. init_date is 1-March or later) return datetime.timedelta(days=leap_days) + + +def add_year_start_offset_to_datetime(initial_dt, n): + """Return a cftime datetime at the start of the year, that is n years + from the initial datetime""" + return cftime.datetime( + year=initial_dt.year + n, + month=1, + day=1, + hour=0, + minute=0, + second=0, + calendar=initial_dt.calendar, + ) + + +def add_month_start_offset_to_datetime(initial_dt, n): + """Return a cftime datetime of the start of the month, that is n months + from the initial datetime""" + years_to_add = (initial_dt.month + n - 1) // 12 + months_to_add = n - years_to_add * 12 + + return cftime.datetime( + year=initial_dt.year + years_to_add, + month=initial_dt.month + months_to_add, + day=1, + hour=0, + minute=0, + second=0, + calendar=initial_dt.calendar, + ) + + +def add_timedelta_fn(timedelta): + """Returns a function that takes initial datetime and multiplier n, + and returns a datetime that is n * offset from the initial datetime""" + return lambda initial_dt, n: initial_dt + n * timedelta + + +class DatetimeOffset: + """A utility class for adding various time offsets to cftime datetimes. + + Parameters: + unit (str): The unit of the time offset. Supported units are: + - "YS" for years (start of the year) + - "MS" for months (start of the month) + - "W" for weeks + - "D" for days + - "H" for hours + - "T" for minutes + - "S" for seconds + magnitude (int): The magnitude of the time offset. + + Methods: + - `add_to_datetime(initial_dt: cftime.datetime) -> cftime.datetime`: + Adds the specified time offset to the given cftime datetime and + returns the resulting datetime. + + Attributes: + - unit (str): The unit of the time offset. + - magnitude (int): The magnitude of the time offset. + """ + + def __init__(self, unit, magnitude): + supported_datetime_offsets = { + "YS": add_year_start_offset_to_datetime, + "MS": add_month_start_offset_to_datetime, + "W": add_timedelta_fn(datetime.timedelta(weeks=1)), + "D": add_timedelta_fn(datetime.timedelta(days=1)), + "H": add_timedelta_fn(datetime.timedelta(hours=1)), + "T": add_timedelta_fn(datetime.timedelta(minutes=1)), + "S": add_timedelta_fn(datetime.timedelta(seconds=1)), + } + if unit not in supported_datetime_offsets: + raise ValueError( + f"Unsupported datetime offset: {unit}. " + "Supported offsets: YS, MS, W, D, H, T, S" + ) + self.unit = unit + self.magnitude = magnitude + self._add_offset_to_datetime = supported_datetime_offsets[unit] + + def add_to_datetime(self, initial_dt): + """Takes an initial cftime datetime, + and returns a datetime with the offset added""" + + if not (isinstance(initial_dt, cftime.datetime)): + raise TypeError( + f"Invalid initial datetime type: {type(initial_dt)}. " + "Expected type: cftime.datetime" + ) + + return self._add_offset_to_datetime( + initial_dt=initial_dt, n=self.magnitude + ) + + +def parse_date_offset(offset): + """Parse a given string date offset string and return an DatetimeOffset""" + match = re.search("[0-9]+", offset) + if match is None: + raise ValueError( + f"No numerical value given for offset: {offset}" + ) + n = match.group() + unit = offset.lstrip(n) + return DatetimeOffset(unit=unit, magnitude=int(n)) diff --git a/payu/cli.py b/payu/cli.py index 4887174d..a2876a14 100644 --- a/payu/cli.py +++ b/payu/cli.py @@ -89,7 +89,7 @@ def get_model_type(model_type, config): def set_env_vars(init_run=None, n_runs=None, lab_path=None, dir_path=None, - reproduce=False, force=False): + reproduce=False, force=False, force_prune_restarts=False): """Construct the environment variables used by payu for resubmissions.""" payu_env_vars = {} @@ -134,6 +134,9 @@ def set_env_vars(init_run=None, n_runs=None, lab_path=None, dir_path=None, if force: payu_env_vars['PAYU_FORCE'] = force + if force_prune_restarts: + payu_env_vars['PAYU_FORCE_PRUNE_RESTARTS'] = force_prune_restarts + # Pass through important module related environment variables module_env_vars = ['MODULESHOME', 'MODULES_CMD', 'MODULEPATH', 'MODULEV'] for var in module_env_vars: diff --git a/payu/experiment.py b/payu/experiment.py index 751a84cd..15f5f8cb 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -26,12 +26,13 @@ # Local from payu import envmod -from payu.fsops import mkdir_p, make_symlink, read_config, movetree, required_libs +from payu.fsops import mkdir_p, make_symlink, read_config, movetree from payu.schedulers.pbs import get_job_info, pbs_env_init, get_job_id from payu.models import index as model_index import payu.profilers from payu.runlog import Runlog from payu.manifest import Manifest +from payu.calendar import parse_date_offset # Environment module support on vayu # TODO: To be removed @@ -40,7 +41,6 @@ # Default payu parameters default_archive_url = 'dc.nci.org.au' default_restart_freq = 5 -default_restart_history = 5 class Experiment(object): @@ -88,7 +88,6 @@ def __init__(self, lab, reproduce=False, force=False): self.set_expt_pathnames() self.set_counters() - for model in self.models: model.set_input_paths() @@ -199,13 +198,15 @@ def max_output_index(self, output_type="output"): raise if output_dirs and len(output_dirs): - return max([int(d.lstrip(output_type)) for d in output_dirs]) + return int(output_dirs[-1].lstrip(output_type)) def list_output_dirs(self, output_type="output"): - """Return a list of restart or output directories in archive""" - naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]$") - return [d for d in os.listdir(self.archive_path) + """Return a sorted list of restart or output directories in archive""" + naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]+$") + dirs = [d for d in os.listdir(self.archive_path) if naming_pattern.match(d)] + dirs.sort(key=lambda d: int(d.lstrip(output_type))) + return dirs def set_stacksize(self, stacksize): @@ -344,7 +345,7 @@ def set_output_paths(self): user_restart_dir = self.config.get('restart') if (self.counter == 0 or self.repeat_run) and user_restart_dir: # TODO: Some user friendliness needed... - assert(os.path.isdir(user_restart_dir)) + assert (os.path.isdir(user_restart_dir)) self.prior_restart_path = user_restart_dir else: prior_restart_dir = 'restart{0:03}'.format(self.counter - 1) @@ -445,14 +446,19 @@ def setup(self, force_archive=False): # Testing prof.setup() + # Check restart pruning for valid configuration values and + # warns user if more restarts than expected would be pruned + if self.config.get('archive', True): + self.get_restarts_to_prune() + def run(self, *user_flags): # XXX: This was previously done in reversion envmod.setup() - + # Add any user-defined module dir(s) to MODULEPATH for module_dir in self.config.get('modules', {}).get('use', []): - envmod.module('use', module_dir) + envmod.module('use', module_dir) self.load_modules() @@ -487,7 +493,7 @@ def run(self, *user_flags): mpi_flags = self.config.get('mpirun', []) # TODO: Legacy config removal warning - if type(mpi_flags) != list: + if not isinstance(mpi_flags, list): mpi_flags = [mpi_flags] # TODO: More uniform support needed here @@ -711,7 +717,7 @@ def run(self, *user_flags): if run_script: self.run_userscript(run_script) - def archive(self): + def archive(self, force_prune_restarts=False): if not self.config.get('archive', True): print('payu: not archiving due to config.yaml setting.') return @@ -742,27 +748,21 @@ def archive(self): movetree(self.work_path, self.output_path) - # Remove old restart files - # TODO: Move to subroutine - restart_freq = self.config.get('restart_freq', default_restart_freq) - restart_history = self.config.get('restart_history', - default_restart_history) - # Remove any outdated restart files - prior_restart_dirs = self.list_output_dirs(output_type="restart") - - for res_dir in prior_restart_dirs: - - res_idx = int(res_dir.lstrip('restart')) - if (self.repeat_run or - (not res_idx % restart_freq == 0 and - res_idx <= (self.counter - restart_history))): - - res_path = os.path.join(self.archive_path, res_dir) - - # Only delete real directories; ignore symbolic restart links - if (os.path.isdir(res_path) and not os.path.islink(res_path)): - shutil.rmtree(res_path) + try: + restarts_to_prune = self.get_restarts_to_prune( + force=force_prune_restarts) + except Exception as e: + print(e) + print("payu: error: Skipping pruning restarts") + restarts_to_prune = [] + + for restart in restarts_to_prune: + restart_path = os.path.join(self.archive_path, restart) + # Only delete real directories; ignore symbolic restart links + if (os.path.isdir(restart_path) and + not os.path.islink(restart_path)): + shutil.rmtree(restart_path) # Ensure dynamic library support for subsequent python calls ld_libpaths = os.environ.get('LD_LIBRARY_PATH', None) @@ -997,6 +997,126 @@ def sweep(self, hard_sweep=False): print('Removing symlink {0}'.format(self.work_sym_path)) os.remove(self.work_sym_path) + def get_restarts_to_prune(self, + ignore_intermediate_restarts=False, + force=False): + """Returns a list of restart directories that can be pruned""" + # Check if archive path exists + if not os.path.exists(self.archive_path): + return [] + + # List all restart directories in archive + restarts = self.list_output_dirs(output_type='restart') + + # TODO: Previous logic was to prune all restarts if self.repeat_run + # Still need to figure out what should happen in this case + if self.repeat_run: + return [os.path.join(self.archive_path, restart) + for restart in restarts] + + # Use restart_freq to decide what restarts to prune + restarts_to_prune = [] + intermediate_restarts, previous_intermediate_restarts = [], [] + restart_freq = self.config.get('restart_freq', default_restart_freq) + if isinstance(restart_freq, int): + # Using integer frequency to prune restarts + for restart in restarts: + restart_idx = int(restart.lstrip('restart')) + if not restart_idx % restart_freq == 0: + intermediate_restarts.append(restart) + else: + # Add any intermediate restarts to restarts to prune + restarts_to_prune.extend(intermediate_restarts) + previous_intermediate_restarts = intermediate_restarts + intermediate_restarts = [] + else: + # Using date-based frequency to prune restarts + try: + date_offset = parse_date_offset(restart_freq) + except ValueError as e: + print('payu: error: Invalid configuration for restart_freq:', + restart_freq) + raise + + next_dt = None + for restart in restarts: + # Use model-driver to parse restart directory for a datetime + restart_path = os.path.join(self.archive_path, restart) + try: + restart_dt = self.model.get_restart_datetime(restart_path) + except NotImplementedError: + print('payu: error: Date-based restart pruning is not ' + f'implemented for the {self.model.model_type} ' + 'model. To use integer based restart pruning, ' + 'set restart_freq to an integer value.') + raise + except Exception: + print('payu: error: Error parsing restart directory ', + f'{restart} for a datetime to prune restarts.') + raise + + if (next_dt is not None and restart_dt < next_dt): + intermediate_restarts.append(restart) + else: + # Keep the earliest datetime and use last kept datetime + # as point of reference when adding the next time interval + next_dt = date_offset.add_to_datetime(restart_dt) + + # Add intermediate restarts to restarts to prune + restarts_to_prune.extend(intermediate_restarts) + previous_intermediate_restarts = intermediate_restarts + intermediate_restarts = [] + + if ignore_intermediate_restarts: + # Return all restarts that'll eventually be pruned + restarts_to_prune.extend(intermediate_restarts) + return restarts_to_prune + + if not force: + # check environment for --force-prune-restarts flag + force = os.environ.get('PAYU_FORCE_PRUNE_RESTARTS', False) + + # Flag to check whether more restarts than expected will be deleted + is_unexpected = restarts_to_prune != previous_intermediate_restarts + + # Restart_history override + restart_history = self.config.get('restart_history', None) + if restart_history is not None: + if not isinstance(restart_history, int): + raise ValueError("payu: error: restart_history is not an " + f"integer value: {restart_history}") + + # Keep restart_history latest restarts, in addition to the + # permanently saved restarts defined by restart_freq + restarts_to_prune.extend(intermediate_restarts) + max_index = self.max_output_index(output_type="restart") + index_bound = max_index - restart_history + restarts_to_prune = [res for res in restarts_to_prune + if int(res.lstrip('restart')) <= index_bound] + + # Only expect at most 1 restart to be pruned with restart_history + is_unexpected = len(restarts_to_prune) > 1 + + # Log out warning if more restarts than expected will be deleted + if not force and is_unexpected: + config_info = (f'restart pruning frequency of {restart_freq}') + if restart_history: + config_info += f' and restart history of {restart_history}' + + print(f'payu: warning: Current {config_info} would result in ' + 'following restarts being pruned: ' + f'{" ".join(restarts_to_prune)}\n' + 'If this is expected, use --force-prune-restarts flag at ' + 'next run or archive (if running archive manually), e.g.:\n' + ' payu run --force-prune-restarts, or\n' + ' payu archive --force-prune-restarts\n' + 'Otherwise, no restarts will be pruned') + + # Return empty list to prevent restarts being pruned + restarts_to_prune = [] + + return restarts_to_prune + def enable_core_dump(): # Newer Intel compilers support 'FOR_DUMP_CORE_FILE' while most support diff --git a/payu/models/accessom2.py b/payu/models/accessom2.py index 6f251df9..cfa8e930 100644 --- a/payu/models/accessom2.py +++ b/payu/models/accessom2.py @@ -86,3 +86,15 @@ def archive(self): def collate(self): pass + + def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and + return a cftime datetime (for date-based restart pruning)""" + for model in self.expt.models: + if model.model_type == 'mom': + mom_restart_path = os.path.join(restart_path, model.name) + return model.get_restart_datetime(mom_restart_path) + + raise NotImplementedError( + 'Cannot find mom sub-model: access-om2 date-based restart pruning ' + 'requires the mom sub-model to determine restart dates') diff --git a/payu/models/fms.py b/payu/models/fms.py index f2f3e743..5da78635 100644 --- a/payu/models/fms.py +++ b/payu/models/fms.py @@ -17,6 +17,8 @@ from itertools import count import fnmatch +import cftime + from payu.models.model import Model from payu import envmod from payu.fsops import required_libs @@ -253,3 +255,31 @@ def archive(self, **kwargs): def collate(self): fms_collate(self) + + def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and + return a cftime datetime (for date-based restart pruning)""" + # Check for ocean_solo.res file + ocean_solo_path = os.path.join(restart_path, 'ocean_solo.res') + if not os.path.exists(ocean_solo_path): + raise NotImplementedError( + 'Cannot find ocean_solo.res file, which is required for ' + 'date-based restart pruning') + + with open(ocean_solo_path, 'r') as ocean_solo: + lines = ocean_solo.readlines() + + calendar_int = int(lines[0].split()[0]) + cftime_calendars = { + 1: "360_day", + 2: "julian", + 3: "proleptic_gregorian", + 4: "noleap" + } + calendar = cftime_calendars[calendar_int] + + last_date_line = lines[-1].split() + date_values = [int(i) for i in last_date_line[:6]] + year, month, day, hour, minute, second = date_values + return cftime.datetime(year, month, day, hour, minute, second, + calendar=calendar) diff --git a/payu/models/model.py b/payu/models/model.py index 8c6b0875..8b49d94d 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -145,7 +145,7 @@ def set_input_paths(self): if input_dirs is None: input_dirs = [] - elif type(input_dirs) == str: + elif isinstance(input_dirs, str): input_dirs = [input_dirs] self.input_paths = [] @@ -470,3 +470,8 @@ def profile(self): if f.endswith('.cubex')][0] cmd = 'scorep-score {0}'.format(cube_path) sp.check_call(shlex.split(cmd)) + + def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and return a cftime + datetime (currently used for date-based restart pruning)""" + raise NotImplementedError diff --git a/payu/models/mom.py b/payu/models/mom.py index 95f958b4..6a28f2b2 100644 --- a/payu/models/mom.py +++ b/payu/models/mom.py @@ -9,7 +9,7 @@ import subprocess import f90nml -import payu.envmod + from payu.models.fms import Fms from payu.fsops import mkdir_p, make_symlink @@ -101,7 +101,7 @@ def setup(self): os.remove(mask_path) # Reference mask table - assert('layout' in input_nml['ocean_model_nml']) + assert ('layout' in input_nml['ocean_model_nml']) nx, ny = input_nml['ocean_model_nml'].get('layout') n_masked_cpus = nx * ny - self.config.get('ncpus') diff --git a/payu/subcommands/archive_cmd.py b/payu/subcommands/archive_cmd.py index 18dfa5a3..6d5f0131 100644 --- a/payu/subcommands/archive_cmd.py +++ b/payu/subcommands/archive_cmd.py @@ -7,15 +7,16 @@ title = 'archive' parameters = {'description': 'Archive model output after run'} -arguments = [args.model, args.config, args.laboratory] +arguments = [args.model, args.config, args.laboratory, + args.force_prune_restarts] -def runcmd(model_type, config_path, lab_path): +def runcmd(model_type, config_path, lab_path, force_prune_restarts): lab = Laboratory(model_type, config_path, lab_path) expt = Experiment(lab) - expt.archive() + expt.archive(force_prune_restarts) runscript = runcmd diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index f5f38e6d..167fbec6 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -114,3 +114,15 @@ 'help': 'Force run to proceed, overwriting existing directories', } } + +# Force restarts to be pruned despite changes to configuration +force_prune_restarts = { + 'flags': ('--force-prune-restarts', '-F'), + 'parameters': { + 'action': 'store_true', + 'dest': 'force_prune_restarts', + 'default': False, + 'help': 'Force outdated restart directories to be pruned during \ + archive, ignoring changes made to configuration.', + } +} diff --git a/payu/subcommands/run_cmd.py b/payu/subcommands/run_cmd.py index 089e07d8..d4d1d7d6 100644 --- a/payu/subcommands/run_cmd.py +++ b/payu/subcommands/run_cmd.py @@ -12,11 +12,12 @@ parameters = {'description': 'Run the model experiment'} arguments = [args.model, args.config, args.initial, args.nruns, - args.laboratory, args.reproduce, args.force] + args.laboratory, args.reproduce, args.force, + args.force_prune_restarts] def runcmd(model_type, config_path, init_run, n_runs, lab_path, - reproduce=False, force=False): + reproduce=False, force=False, force_prune_restarts=False): # Get job submission configuration pbs_config = fsops.read_config(config_path) @@ -24,7 +25,8 @@ def runcmd(model_type, config_path, init_run, n_runs, lab_path, n_runs=n_runs, lab_path=lab_path, reproduce=reproduce, - force=force) + force=force, + force_prune_restarts=force_prune_restarts) # Set the queue # NOTE: Maybe force all jobs on the normal queue @@ -130,7 +132,7 @@ def runscript(): expt.setup() expt.run() - expt.archive() + expt.archive(force_prune_restarts=run_args.force_prune_restarts) # Finished runs if expt.n_runs == 0: diff --git a/setup.py b/setup.py index 25c39a4c..5e870aa2 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ 'yamanifest', 'dateutil', 'tenacity', + 'cftime' ], install_requires=[ 'f90nml >= 0.16', @@ -44,6 +45,7 @@ 'requests[security]', 'python-dateutil', 'tenacity!=7.0.0', + 'cftime' ], tests_require=[ 'pytest', diff --git a/test/common.py b/test/common.py index 867a7938..0001dcfa 100644 --- a/test/common.py +++ b/test/common.py @@ -2,6 +2,8 @@ import os import stat from pathlib import Path +import re +import shutil import yaml @@ -22,6 +24,8 @@ workdir = ctrldir / 'work' payudir = tmpdir / 'payu' +archive_dir = labdir / 'archive' +expt_archive_dir = archive_dir / ctrldir_basename expt_workdir = labdir / 'work' / ctrldir_basename print('tmpdir: {}'.format(tmpdir)) @@ -48,6 +52,7 @@ } + @contextmanager def cd(directory): """ @@ -160,6 +165,42 @@ def make_restarts(fnames=None): make_random_file(restartdir/fname, 5000**2 + i) +def make_expt_archive_dir(type='restart', index=0, additional_path=None): + """Make experiment archive directory of given type (i.e. restart or + output)""" + dir_path = os.path.join(expt_archive_dir, f'{type}{index:03d}') + if additional_path: + dir_path = os.path.join(dir_path, additional_path) + + os.makedirs(dir_path) + return dir_path + + +def list_expt_archive_dirs(type='restart', full_path=True): + """Return a list of output/restart paths in experiment archive + path""" + dirs = [] + if os.path.exists(expt_archive_dir): + if os.path.isdir(expt_archive_dir): + naming_pattern = re.compile(fr"^{type}[0-9][0-9][0-9]$") + dirs = [d for d in os.listdir(expt_archive_dir) + if naming_pattern.match(d)] + + if full_path: + dirs = [os.path.join(expt_archive_dir, d) for d in dirs] + return dirs + + +def remove_expt_archive_dirs(type='restart'): + """Remove experiment archive directories of the given type (i.e. restart + or output). Useful for cleaning up archive between tests""" + for dir_path in list_expt_archive_dirs(type): + try: + shutil.rmtree(dir_path) + except Exception as e: + print(e) + + def make_all_files(): make_inputs() make_exe() diff --git a/test/models/test_mom.py b/test/models/test_mom.py new file mode 100644 index 00000000..2847e181 --- /dev/null +++ b/test/models/test_mom.py @@ -0,0 +1,157 @@ +import copy +import os +import shutil + +import pytest +import cftime + +import payu + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir +from test.common import config as config_orig +from test.common import write_config +from test.common import make_all_files +from test.common import list_expt_archive_dirs +from test.common import make_expt_archive_dir, remove_expt_archive_dirs + + +verbose = True + +# Global config +config = copy.deepcopy(config_orig) + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + if verbose: + print("setup_module module:%s" % module.__name__) + + # Should be taken care of by teardown, in case remnants lying around + try: + shutil.rmtree(tmpdir) + except FileNotFoundError: + pass + + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + make_all_files() + except Exception as e: + print(e) + + # Write config + test_config = config + test_config['model'] = 'mom' + write_config(test_config) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + if verbose: + print("teardown_module module:%s" % module.__name__) + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +@pytest.fixture(autouse=True) +def teardown(): + # Run test + yield + + # Remove any created restart files + remove_expt_archive_dirs(type='restart') + + +def make_ocean_restart_dir(start_dt, + run_dt, + calendar, + restart_index=0, + additional_path=None): + """Create tests restart directory with ocean_solo.res file""" + # Create restart directory + restart_path = make_expt_archive_dir(type='restart', + index=restart_index, + additional_path=additional_path) + + # Create ocean_solo.res file + lines = (f"{calendar:6d} " + "(Calendar: no_calendar=0, thirty_day_months=1, julian=2, " + "gregorian=3, noleap=4)\n") + + init_dt_desc = "Model start time: year, month, day, hour, minute, second" + lines += format_ocean_solo_datetime_line(start_dt, init_dt_desc) + + run_dt_desc = "Current model time: year, month, day, hour, minute, second" + lines += format_ocean_solo_datetime_line(run_dt, run_dt_desc) + + ocean_solo_path = os.path.join(restart_path, "ocean_solo.res") + with open(ocean_solo_path, "w") as ocean_solo_file: + ocean_solo_file.write(lines) + + +def format_ocean_solo_datetime_line(dt_string, description): + """Format datetime string to match actual output files""" + dt_array = convert_date_string_to_array(dt_string) + year, month, day, hour, minute, second = dt_array + return ( + f"{year:6d}{month:6d}{day:6d}{hour:6d}{minute:6d}{second:6d}" + f" {description}\n" + ) + + +def convert_date_string_to_array(dt_string): + """Convert string of YYYY-MM-DD hh:mm:ss to array of integers of + [year, month, day, hour, minute, second] format""" + date, time = dt_string.split(' ') + year, month, day = map(int, date.split('-')) + hour, minute, second = map(int, time.split(':')) + return [year, month, day, hour, minute, second] + + +@pytest.mark.parametrize( + "run_dt, calendar, expected_cftime", + [ + ( + "1900-02-01 00:00:00", + 4, + cftime.datetime(1900, 2, 1, calendar="noleap") + ), + ( + "1900-06-01 00:00:00", + 3, + cftime.datetime(1900, 6, 1, calendar="proleptic_gregorian") + ), + ( + "1000-11-12 12:23:34", + 2, + cftime.datetime(1000, 11, 12, 12, 23, 34, + calendar="julian") + ), + ( + "1900-02-30 00:00:00", + 1, + cftime.datetime(1900, 2, 30, calendar="360_day") + ), + ]) +def test_mom_get_restart_datetime(run_dt, calendar, expected_cftime): + # Create 1 mom restart directory + start_dt = "1900-01-01 00:00:00" + make_ocean_restart_dir(start_dt, run_dt, calendar) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + + restart_path = list_expt_archive_dirs()[0] + parsed_run_dt = expt.model.get_restart_datetime(restart_path) + assert parsed_run_dt == expected_cftime diff --git a/test/requirements_test.txt b/test/requirements_test.txt index 9f77fcef..cbfe8068 100644 --- a/test/requirements_test.txt +++ b/test/requirements_test.txt @@ -6,3 +6,4 @@ mnctools Sphinx pytest-cov numpy>=1.16.0 +cftime \ No newline at end of file diff --git a/test/test_calendar.py b/test/test_calendar.py new file mode 100644 index 00000000..8ae015e5 --- /dev/null +++ b/test/test_calendar.py @@ -0,0 +1,152 @@ +import cftime +import pytest + +from payu.calendar import parse_date_offset, DatetimeOffset + + +@pytest.mark.parametrize( + "offset, initial_dt, expected", + [ + ( + "5YS", + cftime.datetime(year=2000, month=10, day=31, + hour=10, minute=5, second=2, + calendar="noleap"), + cftime.datetime(year=2005, month=1, day=1, calendar="noleap"), + ), + ( + "1YS", + cftime.datetime(year=1700, month=2, day=15, + hour=11, minute=5, second=2, + calendar="proleptic_gregorian"), + cftime.datetime(year=1701, month=1, day=1, + calendar="proleptic_gregorian"), + ), + ( + "20YS", + cftime.datetime(year=2200, month=2, day=30, + hour=1, minute=4, second=20, + calendar="360_day"), + cftime.datetime(year=2220, month=1, day=1, calendar="360_day"), + ), + ], +) +def test_year_start_date_offset_add_to_datetime(offset, initial_dt, expected): + date_offset = parse_date_offset(offset) + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected + + +@pytest.mark.parametrize( + "offset, initial_dt, expected", + [ + ( + "5MS", + cftime.datetime(year=2000, month=10, day=1, + hour=10, minute=5, second=2, + calendar="noleap"), + cftime.datetime(year=2001, month=3, day=1, calendar="noleap"), + ), + ( + "13MS", + cftime.datetime(year=1500, month=10, day=30, + hour=10, minute=5, second=2, + calendar="360_day"), + cftime.datetime(year=1501, month=11, day=1, calendar="360_day"), + ), + ( + "24MS", + cftime.datetime(year=2200, month=1, day=1, calendar="gregorian"), + cftime.datetime(year=2202, month=1, day=1, calendar="gregorian"), + ), + ], +) +def test_month_start_date_offset_add_to_datetime(offset, initial_dt, expected): + date_offset = parse_date_offset(offset) + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected + + +@pytest.mark.parametrize( + "offset, initial_dt, expected", + [ + ( + "100S", + cftime.datetime(year=2000, month=10, day=31, + hour=10, minute=5, second=2, + calendar="julian"), + cftime.datetime(year=2000, month=10, day=31, + hour=10, minute=6, second=42, + calendar="julian"), + ), + ( + "25H", + cftime.datetime(year=1500, month=10, day=30, + hour=10, minute=5, second=2, + calendar="360_day"), + cftime.datetime(year=1500, month=11, day=1, + hour=11, minute=5, second=2, + calendar="360_day") + ), + ( + "3W", + cftime.datetime(year=2200, month=1, day=1), + cftime.datetime(year=2200, month=1, day=22), + ), + ( + "4T", + cftime.datetime( + year=2200, month=1, day=1, hour=0, minute=0, second=0 + ), + cftime.datetime( + year=2200, month=1, day=1, hour=0, minute=4, second=0 + ), + ), + ( + "30D", + cftime.datetime(year=2200, month=2, day=1, calendar="noleap"), + cftime.datetime(year=2200, month=3, day=3, calendar="noleap"), + ), + ], +) +def test_timedelta_date_offset_add_to_datetime(offset, initial_dt, expected): + # Week, Day, Minute, Hour, Second offsets + date_offset = parse_date_offset(offset) + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected + + +def test_date_offset_add_to_datetime_invalid_dt(): + initial_dt = "stringInput" + datetime_offset = DatetimeOffset(unit="YS", magnitude=2) + + with pytest.raises(TypeError) as exc_info: + datetime_offset.add_to_datetime(initial_dt) + + expected_error = ( + "Invalid initial datetime type: . " + "Expected type: cftime.datetime" + ) + assert str(exc_info.value) == expected_error + + +def test_date_offset_unsupported_offset(): + with pytest.raises(ValueError) as exc_info: + DatetimeOffset(unit="Y", magnitude=2) + + expected_error = ( + "Unsupported datetime offset: Y. " + "Supported offsets: YS, MS, W, D, H, T, S" + ) + assert str(exc_info.value) == expected_error + + +def test_parse_date_offset_no_offset_magnitude(): + with pytest.raises(ValueError) as exc_info: + parse_date_offset("YS") + + expected_error = "No numerical value given for offset: YS" + assert str(exc_info.value) == expected_error diff --git a/test/test_cli.py b/test/test_cli.py index b702075a..b2b8933c 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -132,6 +132,7 @@ def test_parse_run(): assert args.pop('force') is False assert args.pop('init_run') is None assert args.pop('n_runs') is None + assert args.pop('force_prune_restarts') is False assert len(args) == 0 @@ -143,7 +144,8 @@ def test_parse_run(): '--force ' '--initial 99 ' '--nruns 999 ' - '--reproduce'.format(cmd=cmd)) + '--reproduce ' + '--force-prune-restarts'.format(cmd=cmd)) args = vars(parser.parse_args(arguments[1:])) @@ -158,6 +160,7 @@ def test_parse_run(): assert args.pop('force') is True assert args.pop('init_run') == '99' assert args.pop('n_runs') == '999' + assert args.pop('force_prune_restarts') is True assert len(args) == 0 @@ -169,7 +172,8 @@ def test_parse_run(): '-f ' '-i 99 ' '-n 999 ' - '-r'.format(cmd=cmd)) + '-r ' + '-F'.format(cmd=cmd)) args = vars(parser.parse_args(arguments[1:])) @@ -184,6 +188,7 @@ def test_parse_run(): assert args.pop('force') is True assert args.pop('init_run') == '99' assert args.pop('n_runs') == '999' + assert args.pop('force_prune_restarts') is True assert len(args) == 0 diff --git a/test/test_prune_restarts.py b/test/test_prune_restarts.py new file mode 100644 index 00000000..253e02b1 --- /dev/null +++ b/test/test_prune_restarts.py @@ -0,0 +1,224 @@ +import copy +import shutil + +import pytest + +import payu + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir +from test.common import config as config_orig +from test.common import write_config +from test.common import make_all_files +from test.common import remove_expt_archive_dirs +from test.models.test_mom import make_ocean_restart_dir + +verbose = True + +# Global config +config = copy.deepcopy(config_orig) + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + if verbose: + print("setup_module module:%s" % module.__name__) + + # Should be taken care of by teardown, in case remnants lying around + try: + shutil.rmtree(tmpdir) + except FileNotFoundError: + pass + + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + make_all_files() + except Exception as e: + print(e) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + if verbose: + print("teardown_module module:%s" % module.__name__) + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +@pytest.fixture(autouse=True) +def teardown(): + # Run test + yield + + # Remove any created restart files + remove_expt_archive_dirs(type='restart') + + +def create_test_2Y_1_month_frequency_restarts(): + """Create 2 years + 1 month worth of mom restarts directories + with 1 month runtimes - starting from 1900/02/01 to 1902/02/01 + e.g (run_date, restart_directory) + (1900/02/01, restart000) + (1900/03/01, restart001) + ... + (1902/02/01, restart024)""" + restart_dts = [] + for year in [1900, 1901, 1902]: + for month in range(1, 13): + if (year == 1900 and month == 1) or (year == 1902 and month > 2): + # Ignore the first date and dates from 1902/03/01 onwards + continue + restart_dts.append(f"{year}-{month}-01 00:00:00") + + for index, run_dt in enumerate(restart_dts): + make_ocean_restart_dir(start_dt="1900-01-01 00:00:00", + run_dt=run_dt, + calendar=4, + restart_index=index, + additional_path='ocean') + + +def write_test_config(restart_freq, restart_history=None): + test_config = copy.deepcopy(config) + test_config['model'] = 'access-om2' + test_config['submodels'] = [ + {'name': 'atmosphere', 'model': 'yatm'}, + {'name': 'ocean', 'model': 'mom'} + ] + test_config['restart_freq'] = restart_freq + if restart_history: + test_config['restart_history'] = restart_history + + write_config(test_config) + + +@pytest.mark.parametrize( + "restart_freq, restart_history, expected_pruned_restarts_indices", + [ + ("1MS", None, []), + ("2MS", None, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23]), + ("2MS", 5, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]), + ("12MS", None, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + ("1YS", None, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + (1, 1, []), + (5, 3, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21]), + (5, 7, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17]), + (5, None, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]) + ]) +def test_force_prune_restarts(restart_freq, + restart_history, + expected_pruned_restarts_indices): + # Test --force-prune-restarts with varying restart_freq and restart_history + + # Create restart files 1900/02/01-restart000 to 1902/02/01-restart024 + create_test_2Y_1_month_frequency_restarts() + + # Set up config + write_test_config(restart_freq, restart_history) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + + # Function to test + restarts_to_prune = expt.get_restarts_to_prune(force=True) + + # Extract out index + restarts_to_prune_indices = [ + int(restart.lstrip('restart')) for restart in restarts_to_prune + ] + + assert restarts_to_prune_indices == expected_pruned_restarts_indices + + +@pytest.mark.parametrize( + "restarts, restart_freq, restart_history, expected_restart_indices", + [ + ([ + (0, "1901-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + (5, "1906-01-01 00:00:00"), + (6, "1907-01-01 00:00:00") + ], "3YS", None, [4, 5]), + ([ + (0, "1901-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + (5, "1906-01-01 00:00:00"), + (6, "1907-01-01 00:00:00") + ], "3YS", 2, [4]), + ([ + (0, "1901-01-01 00:00:00"), + (1, "1902-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00") + ], "2YS", 1, []), + ([ + (0, "1901-01-01 00:00:00"), + (1, "1902-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00") + ], "2YS", None, []), + ([ + (0, "1901-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + ], 2, None, []), + ([ + (0, "1901-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + ], 2, None, [3]), + ([ + (2, "1903-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + (6, "1907-01-01 00:00:00"), + (8, "1909-01-01 00:00:00"), + ], 4, None, []), + ]) +def test_prune_restarts(restarts, + restart_freq, + restart_history, + expected_restart_indices): + # Create restart files + for index, datetime in restarts: + make_ocean_restart_dir(start_dt="1900-01-01 00:00:00", + run_dt=datetime, + calendar=4, + restart_index=index, + additional_path='ocean') + + # Set up config + write_test_config(restart_freq, restart_history) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + + # Function to test - Note: with force=False which is default + restarts_to_prune = expt.get_restarts_to_prune() + + # Extract out index + restarts_to_prune_indices = [ + int(restart.lstrip('restart')) for restart in restarts_to_prune + ] + + assert restarts_to_prune_indices == expected_restart_indices