From 82df4f8558574c4c2336c2a98ed0d9af5280ba85 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 15 Sep 2023 20:13:09 +1000 Subject: [PATCH 1/5] Add support for date-based restart frequency - Move logic for deciding what restarts to prune to Experiment.prune_restarts() - Extend access-om2 and mom driver to parse restart files for a cftime datetime - Add functionality to calendar.py to parse date-based frequency values and add these intervals to different datetimes --- payu/calendar.py | 111 ++++++++++++++++++++++++++++++- payu/experiment.py | 76 +++++++++++++++------ payu/models/accessom2.py | 8 +++ payu/models/model.py | 5 ++ payu/models/mom.py | 24 +++++++ setup.py | 2 + test/requirements_test.txt | 1 + test/test_calendar.py | 132 +++++++++++++++++++++++++++++++++++++ 8 files changed, 338 insertions(+), 21 deletions(-) create mode 100644 test/test_calendar.py diff --git a/payu/calendar.py b/payu/calendar.py index c433cb82..1c3e5f25 100644 --- a/payu/calendar.py +++ b/payu/calendar.py @@ -1,5 +1,8 @@ -from dateutil.relativedelta import relativedelta import datetime +import re + +from dateutil.relativedelta import relativedelta +import cftime NOLEAP, GREGORIAN = range(2) @@ -86,3 +89,109 @@ def calculate_leapdays(init_date, final_date): # TODO: Internal date correction (e.g. init_date is 1-March or later) return datetime.timedelta(days=leap_days) + + +def add_year_offset_to_datetime(initial_dt, n): + """Return a datetime n years from the initial datetime""" + if isinstance(initial_dt, datetime.datetime): # Standard datetime Calendar + return initial_dt + relativedelta(years=n) + + if isinstance(initial_dt, cftime.datetime): # Non-standard Calendars + return initial_dt.replace(year=initial_dt.year + n) + + +def add_year_start_offset_to_datetime(initial_dt, n): + """Return a datetime at the start of the year - n years from the initial datetime""" + if isinstance(initial_dt, datetime.datetime): + return initial_dt + relativedelta(years=n, month=1, day=1, hour=0, minute=0, second=0) + + if isinstance(initial_dt, cftime.datetime): + return initial_dt.replace(year=initial_dt.year + n, month=1, day=1, hour=0, minute=0, second=0) + + +def add_month_start_offset_to_datetime(initial_dt, n): + """Return a datetime of the start of the month - n months from the initial datetime""" + if isinstance(initial_dt, datetime.datetime): + return initial_dt + relativedelta(months=n, day=1, hour=0, minute=0, second=0) + + if isinstance(initial_dt, cftime.datetime): + year = initial_dt.year + ((initial_dt.month + n - 1) // 12) + month = (initial_dt.month + n - 1) % 12 + 1 + + return initial_dt.replace(year=year, month=month, day=1, hour=0, minute=0, second=0) + + +def add_month_offset_to_datetime(initial_dt, n): + """Return a datetime n months from the initial datetime""" + if isinstance(initial_dt, datetime.datetime): + return initial_dt + relativedelta(months=n) + + if isinstance(initial_dt, cftime.datetime): + year = initial_dt.year + ((initial_dt.month + n - 1) // 12) + month = (initial_dt.month + n - 1) % 12 + 1 + day = initial_dt.day + + max_day_in_month = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31} + if initial_dt.calendar == "noleap": + day = initial_dt.day if initial_dt.day <= max_day_in_month[month] else max_day_in_month[month] + + if initial_dt.calendar == "all_leap": + max_day_in_month[2] = 29 # Every year is a leap year + day = initial_dt.day if initial_dt.day <= max_day_in_month[month] else max_day_in_month[month] + + return initial_dt.replace(year=year, month=month, day=day) + + +def add_timedelta_fn(timedelta): + """Returns a function that adds a timedelta - n times to an initial datetime""" + # Standard and cftime datetimes supports timedelta operations + return lambda initial_dt, n: initial_dt + n * timedelta + + +class DatetimeOffset: + + def __init__(self, unit, magnitude): + # Dictionary of 'offset units' to functions which takes an initial_dt (Standard or cftime datetime) + # and n (multiplier of the offset unit), and returns the next datetime with the offset added + supported_datetime_offsets = { + 'Y': add_year_offset_to_datetime, + 'YS': add_year_start_offset_to_datetime, + 'M': add_month_offset_to_datetime, + 'MS': add_month_start_offset_to_datetime, + 'W': add_timedelta_fn(datetime.timedelta(weeks=1)), + 'D': add_timedelta_fn(datetime.timedelta(days=1)), + 'H': add_timedelta_fn(datetime.timedelta(hours=1)), + 'T': add_timedelta_fn(datetime.timedelta(minutes=1)), + 'S': add_timedelta_fn(datetime.timedelta(seconds=1)) + } + assert unit in supported_datetime_offsets, f"payu: error: unsupported datetime offset: {unit}" + self.unit = unit + self.magnitude = magnitude + self.add_offset_to_datetime = supported_datetime_offsets[unit] + + + def add_to_datetime(self, initial_dt): + """Takes a datetime object (standard or cftime datetime), + and returns a datetime with the offset added if possible, returns None otherwise""" + + if self.unit in ['M', 'Y'] and isinstance(initial_dt, cftime.datetime): + if initial_dt.datetime_compatible: + # Transform cftime datetime to standard datetime + initial_dt = datetime.datetime(initial_dt.year, initial_dt.month, initial_dt.day, + initial_dt.hour, initial_dt.minute, initial_dt.second) + elif initial_dt.calendar not in ["360_day", "noleap", "all_leap"]: + raise ValueError(f"Calendar type {initial_dt.calendar} is unsupported for given date offset {self.unit}") + + if not (isinstance(initial_dt, cftime.datetime) or isinstance(initial_dt, datetime.datetime)): + raise TypeError(f"Invalid initial datetime type: {type(initial_dt)}. Expected types: cftime.datetime or datetime.datetime") + + return self.add_offset_to_datetime(initial_dt=initial_dt, n=self.magnitude) + + +def parse_date_offset(offset): + """Parse a given string date offset string, and returns an DatetimeOffset""" + match = re.search('[0-9]+', offset) + assert match is not None, f"payu: error: no numerical value given for offset: {offset}" + n = match.group() + unit = offset.lstrip(n) + return DatetimeOffset(unit=unit, magnitude=int(n)) \ No newline at end of file diff --git a/payu/experiment.py b/payu/experiment.py index 751a84cd..780a08f8 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -32,6 +32,7 @@ import payu.profilers from payu.runlog import Runlog from payu.manifest import Manifest +from payu.calendar import parse_date_offset # Environment module support on vayu # TODO: To be removed @@ -742,27 +743,12 @@ def archive(self): movetree(self.work_path, self.output_path) - # Remove old restart files - # TODO: Move to subroutine - restart_freq = self.config.get('restart_freq', default_restart_freq) - restart_history = self.config.get('restart_history', - default_restart_history) - # Remove any outdated restart files - prior_restart_dirs = self.list_output_dirs(output_type="restart") - - for res_dir in prior_restart_dirs: - - res_idx = int(res_dir.lstrip('restart')) - if (self.repeat_run or - (not res_idx % restart_freq == 0 and - res_idx <= (self.counter - restart_history))): - - res_path = os.path.join(self.archive_path, res_dir) - - # Only delete real directories; ignore symbolic restart links - if (os.path.isdir(res_path) and not os.path.islink(res_path)): - shutil.rmtree(res_path) + restarts_to_prune = self.prune_restarts() + for restart_path in restarts_to_prune: + # Only delete real directories; ignore symbolic restart links + if (os.path.isdir(restart_path) and not os.path.islink(restart_path)): + shutil.rmtree(restart_path) # Ensure dynamic library support for subsequent python calls ld_libpaths = os.environ.get('LD_LIBRARY_PATH', None) @@ -996,6 +982,56 @@ def sweep(self, hard_sweep=False): if os.path.islink(self.work_sym_path): print('Removing symlink {0}'.format(self.work_sym_path)) os.remove(self.work_sym_path) + + def prune_restarts(self, from_n_restart=0, to_n_restart=None): + """Returns a list of restart directories that can be pruned""" + restart_freq = self.config.get('restart_freq', 5) + restart_history = self.config.get('restart_history', default_restart_history) + + # All restarts directories + restarts = [d for d in os.listdir(self.archive_path) + if d.startswith('restart')] + # Sort restarts based on counter - in increasing date order + restarts.sort(key=lambda d: int(d.lstrip('restart'))) + + # Note: from_n_restart and to_end_restart could be useful for inspecting only the more recent restarts + if to_n_restart is None: + # Keep restart_history n restarts + to_n_restart = -restart_history + restarts = restarts[from_n_restart:to_n_restart] + + restarts_to_prune = [] + if self.repeat_run: + # TODO: Previous logic was to prune all restarts if self.repeat_run - is that still the case? + restarts_to_prune = [os.path.join(self.archive_path, restart) for restart in restarts] + elif isinstance(restart_freq, int): + # Using integer frequency to prune restarts + for restart in restarts: + restart_idx = int(restart.lstrip('restart')) + if not restart_idx % restart_freq == 0: + restart_path = os.path.join(self.archive_path, restart) + restarts_to_prune.append(restart_path) + else: + # Using date-based frequency to prune restarts + try: + date_offset = parse_date_offset(restart_freq) + + next_datetime = None + for restart in restarts: + restart_path = os.path.join(self.archive_path, restart) + + # Use model-driver to parse restart files for a datetime + restart_datetime = self.model.get_restart_datetime(restart_path) + + if next_datetime is not None and restart_datetime < next_datetime: + restarts_to_prune.append(restart_path) + else: + # Keep the earliest datetime and use last kept datetime as point of reference when adding the next time interval + next_datetime = date_offset.add_to_datetime(restart_datetime) + except Exception as e: + print("payu: error occured during date-based restart pruning:", e) + + return restarts_to_prune def enable_core_dump(): diff --git a/payu/models/accessom2.py b/payu/models/accessom2.py index 6f251df9..7c6d605d 100644 --- a/payu/models/accessom2.py +++ b/payu/models/accessom2.py @@ -86,3 +86,11 @@ def archive(self): def collate(self): pass + + def get_restart_datetime(self, restart_path): + for model in self.expt.models: + if model.model_type == 'mom': + mom_restart_path = os.path.join(restart_path, model.name) + return model.get_restart_datetime(mom_restart_path) + + raise NotImplementedError('access-om2 date-based restart pruning currently only uses the mom sub-model to find restart dates') diff --git a/payu/models/model.py b/payu/models/model.py index 8c6b0875..807b1877 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -470,3 +470,8 @@ def profile(self): if f.endswith('.cubex')][0] cmd = 'scorep-score {0}'.format(cube_path) sp.check_call(shlex.split(cmd)) + + def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and return a cftime or standard datetime object to be used for + date-based restart pruning""" + raise NotImplementedError \ No newline at end of file diff --git a/payu/models/mom.py b/payu/models/mom.py index 95f958b4..31c76dd1 100644 --- a/payu/models/mom.py +++ b/payu/models/mom.py @@ -10,6 +10,8 @@ import f90nml import payu.envmod +import cftime + from payu.models.fms import Fms from payu.fsops import mkdir_p, make_symlink @@ -224,3 +226,25 @@ def create_mask_table(self, input_nml): land_cells = int(fmask.readline()) return land_cells + + + def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and return a cftime datetime (for date-based restart pruning)""" + ocean_solo_path = os.path.join(restart_path, 'ocean_solo.res') + with open(ocean_solo_path, 'r') as ocean_solo: + lines = ocean_solo.readlines() + + calendar_int = int(lines[0].split()[0]) + cftime_calendars = { + 1: "360_day", + 2: "julian", + 3: "gregorian", + 4: "noleap" + } + calendar = cftime_calendars[calendar_int] + + last_date_line = lines[-1].split() + year, month, day, hour, minute, second = [int(i) for i in last_date_line[:6]] + return cftime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second, + calendar=calendar) + \ No newline at end of file diff --git a/setup.py b/setup.py index 25c39a4c..5e870aa2 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ 'yamanifest', 'dateutil', 'tenacity', + 'cftime' ], install_requires=[ 'f90nml >= 0.16', @@ -44,6 +45,7 @@ 'requests[security]', 'python-dateutil', 'tenacity!=7.0.0', + 'cftime' ], tests_require=[ 'pytest', diff --git a/test/requirements_test.txt b/test/requirements_test.txt index 9f77fcef..cbfe8068 100644 --- a/test/requirements_test.txt +++ b/test/requirements_test.txt @@ -6,3 +6,4 @@ mnctools Sphinx pytest-cov numpy>=1.16.0 +cftime \ No newline at end of file diff --git a/test/test_calendar.py b/test/test_calendar.py new file mode 100644 index 00000000..638923ab --- /dev/null +++ b/test/test_calendar.py @@ -0,0 +1,132 @@ +import datetime + +import cftime +import pytest + +from payu.calendar import parse_date_offset, DatetimeOffset + +def assert_add_date_offset(initial_dt, test_data): + """Arguments: + - initial_dt - initial datetime, + - test_data which dictionary mapping date-offset strings to expected datetime values""" + for date_offset_str, expected_next_dt in test_data.items(): + date_offset = parse_date_offset(date_offset_str) + + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected_next_dt + + +def test_date_offset_add_to_datetime_cftime_no_leap(): + # "noleap" cftime calendar + initial_dt = cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, + calendar='noleap') + + test_data = { + '5Y': cftime.datetime(year=2005, month=10, day=31, hour=10, minute=5, second=2, + calendar='noleap'), + '5YS': cftime.datetime(year=2005, month=1, day=1, hour=0, minute=0, second=0, + calendar='noleap'), + '5M': cftime.datetime(year=2001, month=3, day=31, hour=10, minute=5, second=2, + calendar='noleap'), + '5MS': cftime.datetime(year=2001, month=3, day=1, hour=0, minute=0, second=0, + calendar='noleap'), + '4M': cftime.datetime(year=2001, month=2, day=28, hour=10, minute=5, second=2, + calendar='noleap') + } + + assert_add_date_offset(initial_dt, test_data) + + +def test_date_offset_add_to_datetime_cftime_all_leap(): + # "all_leap" cftime calendar + initial_dt = cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, + calendar='all_leap') + + test_data = { + '5Y': cftime.datetime(year=2005, month=10, day=31, hour=10, minute=5, second=2, + calendar='all_leap'), + '5YS': cftime.datetime(year=2005, month=1, day=1, hour=0, minute=0, second=0, + calendar='all_leap'), + '5M': cftime.datetime(year=2001, month=3, day=31, hour=10, minute=5, second=2, + calendar='all_leap'), + '5MS': cftime.datetime(year=2001, month=3, day=1, hour=0, minute=0, second=0, + calendar='all_leap'), + '4M': cftime.datetime(year=2001, month=2, day=29, hour=10, minute=5, second=2, + calendar='all_leap') + } + + assert_add_date_offset(initial_dt, test_data) + + +def test_date_offset_add_to_datetime_cftime_360_day(): + # "360_day" cftime calendar + initial_dt = cftime.datetime(year=2000, month=10, day=30, hour=10, minute=5, second=2, + calendar='360_day') + + test_data = { + '5Y': cftime.datetime(year=2005, month=10, day=30, hour=10, minute=5, second=2, + calendar='360_day'), + '5YS': cftime.datetime(year=2005, month=1, day=1, calendar='360_day'), + '5M': cftime.datetime(year=2001, month=3, day=30, hour=10, minute=5, second=2, + calendar='360_day'), + '5MS': cftime.datetime(year=2001, month=3, day=1, calendar='360_day') + } + + assert_add_date_offset(initial_dt, test_data) + +def test_date_offset_add_to_datetime_standard(): + # Standard datetime and cftime standard calendar + initial_dts = [ + cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, + calendar='standard'), + datetime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2) + ] + + test_data = { + '5Y': datetime.datetime(year=2005, month=10, day=31, hour=10, minute=5, second=2), + '5YS': datetime.datetime(year=2005, month=1, day=1), + '5M': datetime.datetime(year=2001, month=3, day=31, hour=10, minute=5, second=2), + '5MS': datetime.datetime(year=2001, month=3, day=1) + } + + for initial_dt in initial_dts: + assert_add_date_offset(initial_dt, test_data) + + +def test_date_offset_add_to_datetime_unsupported_calendar(): + # Currently Julian calendar isn't supported for Y, M offsets + initial_dt = cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, + calendar='julian') + + for unit in ['Y', 'M']: + datetime_offset = DatetimeOffset(unit=unit, magnitude=1) + with pytest.raises(ValueError) as exc_info: + datetime_offset.add_to_datetime(initial_dt) + + assert str(exc_info.value) == f"Calendar type julian is unsupported for given date offset {unit}" + + +def test_date_offset_add_to_datetime_invalid_dt(): + initial_dt = "stringInput" + + datetime_offset = DatetimeOffset(unit='Y', magnitude=2) + with pytest.raises(TypeError) as exc_info: + datetime_offset.add_to_datetime(initial_dt) + + expected_error = "Invalid initial datetime type: . Expected types: cftime.datetime or datetime.datetime" + assert str(exc_info.value) == expected_error + + +def test_date_offset_add_to_datetime_using_timedelta(): + initial_dt = datetime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2) + + test_data = { + "100S": datetime.datetime(year=2000, month=10, day=31, hour=10, minute=6, second=42), + "2H": datetime.datetime(year=2000, month=10, day=31, hour=12, minute=5, second=2), + "3W": datetime.datetime(year=2000, month=11, day=21, hour=10, minute=5, second=2), + "4T": datetime.datetime(year=2000, month=10, day=31, hour=10, minute=9, second=2), + "5D": datetime.datetime(year=2000, month=11, day=5, hour=10, minute=5, second=2) + } + + assert_add_date_offset(initial_dt, test_data) \ No newline at end of file From 0b922ce9ada428d483f6e52f378b5c5ee54babef Mon Sep 17 00:00:00 2001 From: jo-basevi Date: Fri, 22 Sep 2023 10:44:33 +1000 Subject: [PATCH 2/5] Add tests and documentation for date-based restart pruning - remove logic for Y,M frequency offsets as YS/MS may be sufficient in this case - rewrite/parametrize calendar tests - add tests for mom-driver parsing restart files - add integration tests for prune_restarts and access-om2 model driver - add documentation on date-based restart freq and restart_history - refactor make/list/del restart dir test code to common --- docs/source/config.rst | 24 +++- payu/calendar.py | 171 ++++++++++++------------ payu/experiment.py | 61 +++++---- payu/models/accessom2.py | 8 +- payu/models/model.py | 10 +- payu/models/mom.py | 28 ++-- test/common.py | 44 +++++++ test/models/test_mom.py | 150 +++++++++++++++++++++ test/test_calendar.py | 254 +++++++++++++++++++----------------- test/test_prune_restarts.py | 133 +++++++++++++++++++ 10 files changed, 629 insertions(+), 254 deletions(-) create mode 100644 test/models/test_mom.py create mode 100644 test/test_prune_restarts.py diff --git a/docs/source/config.rst b/docs/source/config.rst index e9c3c4d9..55991481 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -192,13 +192,23 @@ configuration. Specifies the rate of saved restart files. For the default rate of 5, we keep the restart files for every fifth run (``restart004``, ``restart009``, ``restart014``, etc.). - - Intermediate restarts are not deleted until a permanently archived restart - has been produced. For example, if we have just completed run ``11``, then - we keep ``restart004``, ``restart009``, ``restart010``, and ``restart011``. - Restarts 10 through 13 are not deleted until ``restart014`` has been saved. - - ``restart_freq: 1`` saves all restart files. + Using ``restart_freq: 1`` will save all restart files. + + For both integer and date-based restart frequency, the first restart and, + by default, the 5 latest restarts are saved. + + To use a date-based restart frequency, specify a number with a time unit. + The supported time units are ``YS`` - year-start, ``MS`` - month-start, + ``W`` - week, ``D`` - day, ``H`` - hour, ``T`` - minute and ``S`` - second. + For example, ``restart_freq: 10YS`` would save earliest restart of the year, + 10 years from the last permanently saved restart's datetime. + + Please note that currently, only ACCESS-OM2 and MOM models support + date-based restart frequency, as it depends the payu model driver being + able to parse restarts files for a datetime. + +``restart_history`` (*Default:* ``5``) + Specifies the number of latest restart files to save *The following model-based tags are typically not configured* diff --git a/payu/calendar.py b/payu/calendar.py index 1c3e5f25..9037f36e 100644 --- a/payu/calendar.py +++ b/payu/calendar.py @@ -20,8 +20,7 @@ def int_to_date(date): def date_to_int(date): - - return (date.year * 10**4 + date.month * 10**2 + date.day) + return date.year * 10**4 + date.month * 10**2 + date.day def runtime_from_date(start_date, years, months, days, seconds, caltype): @@ -31,8 +30,9 @@ def runtime_from_date(start_date, years, months, days, seconds, caltype): Ignores Feb 29 for caltype == NOLEAP. """ - end_date = start_date + relativedelta(years=years, months=months, - days=days) + end_date = start_date + relativedelta( + years=years, months=months, days=days + ) runtime = end_date - start_date if caltype == NOLEAP: @@ -70,7 +70,6 @@ def get_leapdays(init_date, final_date): leap_days = 0 while curr_date != final_date: - if curr_date.month == 2 and curr_date.day == 29: leap_days += 1 @@ -91,107 +90,109 @@ def calculate_leapdays(init_date, final_date): return datetime.timedelta(days=leap_days) -def add_year_offset_to_datetime(initial_dt, n): - """Return a datetime n years from the initial datetime""" - if isinstance(initial_dt, datetime.datetime): # Standard datetime Calendar - return initial_dt + relativedelta(years=n) - - if isinstance(initial_dt, cftime.datetime): # Non-standard Calendars - return initial_dt.replace(year=initial_dt.year + n) - - def add_year_start_offset_to_datetime(initial_dt, n): - """Return a datetime at the start of the year - n years from the initial datetime""" - if isinstance(initial_dt, datetime.datetime): - return initial_dt + relativedelta(years=n, month=1, day=1, hour=0, minute=0, second=0) - - if isinstance(initial_dt, cftime.datetime): - return initial_dt.replace(year=initial_dt.year + n, month=1, day=1, hour=0, minute=0, second=0) + """Return a cftime datetime at the start of the year, that is n years + from the initial datetime""" + return cftime.datetime( + year=initial_dt.year + n, + month=1, + day=1, + hour=0, + minute=0, + second=0, + calendar=initial_dt.calendar, + ) def add_month_start_offset_to_datetime(initial_dt, n): - """Return a datetime of the start of the month - n months from the initial datetime""" - if isinstance(initial_dt, datetime.datetime): - return initial_dt + relativedelta(months=n, day=1, hour=0, minute=0, second=0) - - if isinstance(initial_dt, cftime.datetime): - year = initial_dt.year + ((initial_dt.month + n - 1) // 12) - month = (initial_dt.month + n - 1) % 12 + 1 - - return initial_dt.replace(year=year, month=month, day=1, hour=0, minute=0, second=0) - - -def add_month_offset_to_datetime(initial_dt, n): - """Return a datetime n months from the initial datetime""" - if isinstance(initial_dt, datetime.datetime): - return initial_dt + relativedelta(months=n) - - if isinstance(initial_dt, cftime.datetime): - year = initial_dt.year + ((initial_dt.month + n - 1) // 12) - month = (initial_dt.month + n - 1) % 12 + 1 - day = initial_dt.day - - max_day_in_month = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31} - if initial_dt.calendar == "noleap": - day = initial_dt.day if initial_dt.day <= max_day_in_month[month] else max_day_in_month[month] - - if initial_dt.calendar == "all_leap": - max_day_in_month[2] = 29 # Every year is a leap year - day = initial_dt.day if initial_dt.day <= max_day_in_month[month] else max_day_in_month[month] - - return initial_dt.replace(year=year, month=month, day=day) - + """Return a cftime datetime of the start of the month, that is n months + from the initial datetime""" + years_to_add = (initial_dt.month + n - 1) // 12 + months_to_add = n - years_to_add * 12 + + return cftime.datetime( + year=initial_dt.year + years_to_add, + month=initial_dt.month + months_to_add, + day=1, + hour=0, + minute=0, + second=0, + calendar=initial_dt.calendar, + ) + def add_timedelta_fn(timedelta): - """Returns a function that adds a timedelta - n times to an initial datetime""" - # Standard and cftime datetimes supports timedelta operations + """Returns a function that takes initial datetime and multiplier n, + and returns a datetime that is n * offset from the initial datetime""" + # cftime datetimes supports timedelta operations return lambda initial_dt, n: initial_dt + n * timedelta class DatetimeOffset: - + """A utility class for adding various time offsets to cftime datetimes. + + Parameters: + unit (str): The unit of the time offset. Supported units are: + - "YS" for years (start of the year) + - "MS" for months (start of the month) + - "W" for weeks + - "D" for days + - "H" for hours + - "T" for minutes + - "S" for seconds + magnitude (int): The magnitude of the time offset. + + Methods: + - `add_to_datetime(initial_dt: cftime.datetime) -> cftime.datetime`: + Adds the specified time offset to the given cftime datetime and + returns the resulting datetime. + + Attributes: + - unit (str): The unit of the time offset. + - magnitude (int): The magnitude of the time offset. + """ + def __init__(self, unit, magnitude): - # Dictionary of 'offset units' to functions which takes an initial_dt (Standard or cftime datetime) - # and n (multiplier of the offset unit), and returns the next datetime with the offset added supported_datetime_offsets = { - 'Y': add_year_offset_to_datetime, - 'YS': add_year_start_offset_to_datetime, - 'M': add_month_offset_to_datetime, - 'MS': add_month_start_offset_to_datetime, - 'W': add_timedelta_fn(datetime.timedelta(weeks=1)), - 'D': add_timedelta_fn(datetime.timedelta(days=1)), - 'H': add_timedelta_fn(datetime.timedelta(hours=1)), - 'T': add_timedelta_fn(datetime.timedelta(minutes=1)), - 'S': add_timedelta_fn(datetime.timedelta(seconds=1)) + "YS": add_year_start_offset_to_datetime, + "MS": add_month_start_offset_to_datetime, + "W": add_timedelta_fn(datetime.timedelta(weeks=1)), + "D": add_timedelta_fn(datetime.timedelta(days=1)), + "H": add_timedelta_fn(datetime.timedelta(hours=1)), + "T": add_timedelta_fn(datetime.timedelta(minutes=1)), + "S": add_timedelta_fn(datetime.timedelta(seconds=1)), } - assert unit in supported_datetime_offsets, f"payu: error: unsupported datetime offset: {unit}" + if unit not in supported_datetime_offsets: + raise ValueError( + f"Unsupported datetime offset: {unit}. " + "Supported offsets: YS, MS, W, D, H, T, S" + ) self.unit = unit self.magnitude = magnitude - self.add_offset_to_datetime = supported_datetime_offsets[unit] - + self._add_offset_to_datetime = supported_datetime_offsets[unit] def add_to_datetime(self, initial_dt): - """Takes a datetime object (standard or cftime datetime), - and returns a datetime with the offset added if possible, returns None otherwise""" + """Takes an initial cftime datetime, + and returns a datetime with the offset added""" - if self.unit in ['M', 'Y'] and isinstance(initial_dt, cftime.datetime): - if initial_dt.datetime_compatible: - # Transform cftime datetime to standard datetime - initial_dt = datetime.datetime(initial_dt.year, initial_dt.month, initial_dt.day, - initial_dt.hour, initial_dt.minute, initial_dt.second) - elif initial_dt.calendar not in ["360_day", "noleap", "all_leap"]: - raise ValueError(f"Calendar type {initial_dt.calendar} is unsupported for given date offset {self.unit}") - - if not (isinstance(initial_dt, cftime.datetime) or isinstance(initial_dt, datetime.datetime)): - raise TypeError(f"Invalid initial datetime type: {type(initial_dt)}. Expected types: cftime.datetime or datetime.datetime") + if not (isinstance(initial_dt, cftime.datetime)): + raise TypeError( + f"Invalid initial datetime type: {type(initial_dt)}. " + "Expected type: cftime.datetime" + ) - return self.add_offset_to_datetime(initial_dt=initial_dt, n=self.magnitude) + return self._add_offset_to_datetime( + initial_dt=initial_dt, n=self.magnitude + ) def parse_date_offset(offset): - """Parse a given string date offset string, and returns an DatetimeOffset""" - match = re.search('[0-9]+', offset) - assert match is not None, f"payu: error: no numerical value given for offset: {offset}" + """Parse a given string date offset string and return an DatetimeOffset""" + match = re.search("[0-9]+", offset) + if match is None: + raise ValueError( + f"No numerical value given for offset: {offset}" + ) n = match.group() unit = offset.lstrip(n) - return DatetimeOffset(unit=unit, magnitude=int(n)) \ No newline at end of file + return DatetimeOffset(unit=unit, magnitude=int(n)) diff --git a/payu/experiment.py b/payu/experiment.py index 780a08f8..4cfe5281 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -26,7 +26,7 @@ # Local from payu import envmod -from payu.fsops import mkdir_p, make_symlink, read_config, movetree, required_libs +from payu.fsops import mkdir_p, make_symlink, read_config, movetree from payu.schedulers.pbs import get_job_info, pbs_env_init, get_job_id from payu.models import index as model_index import payu.profilers @@ -345,7 +345,7 @@ def set_output_paths(self): user_restart_dir = self.config.get('restart') if (self.counter == 0 or self.repeat_run) and user_restart_dir: # TODO: Some user friendliness needed... - assert(os.path.isdir(user_restart_dir)) + assert (os.path.isdir(user_restart_dir)) self.prior_restart_path = user_restart_dir else: prior_restart_dir = 'restart{0:03}'.format(self.counter - 1) @@ -450,10 +450,10 @@ def run(self, *user_flags): # XXX: This was previously done in reversion envmod.setup() - + # Add any user-defined module dir(s) to MODULEPATH for module_dir in self.config.get('modules', {}).get('use', []): - envmod.module('use', module_dir) + envmod.module('use', module_dir) self.load_modules() @@ -488,7 +488,7 @@ def run(self, *user_flags): mpi_flags = self.config.get('mpirun', []) # TODO: Legacy config removal warning - if type(mpi_flags) != list: + if not isinstance(mpi_flags, list): mpi_flags = [mpi_flags] # TODO: More uniform support needed here @@ -747,7 +747,8 @@ def archive(self): restarts_to_prune = self.prune_restarts() for restart_path in restarts_to_prune: # Only delete real directories; ignore symbolic restart links - if (os.path.isdir(restart_path) and not os.path.islink(restart_path)): + if (os.path.isdir(restart_path) and + not os.path.islink(restart_path)): shutil.rmtree(restart_path) # Ensure dynamic library support for subsequent python calls @@ -982,30 +983,32 @@ def sweep(self, hard_sweep=False): if os.path.islink(self.work_sym_path): print('Removing symlink {0}'.format(self.work_sym_path)) os.remove(self.work_sym_path) - + def prune_restarts(self, from_n_restart=0, to_n_restart=None): """Returns a list of restart directories that can be pruned""" - restart_freq = self.config.get('restart_freq', 5) - restart_history = self.config.get('restart_history', default_restart_history) + restart_freq = self.config.get('restart_freq', default_restart_freq) + restart_history = self.config.get('restart_history', + default_restart_history) # All restarts directories restarts = [d for d in os.listdir(self.archive_path) - if d.startswith('restart')] + if d.startswith('restart')] # Sort restarts based on counter - in increasing date order restarts.sort(key=lambda d: int(d.lstrip('restart'))) - # Note: from_n_restart and to_end_restart could be useful for inspecting only the more recent restarts if to_n_restart is None: - # Keep restart_history n restarts + # Keep restart_history n restarts to_n_restart = -restart_history - restarts = restarts[from_n_restart:to_n_restart] + restarts = restarts[from_n_restart:to_n_restart] restarts_to_prune = [] if self.repeat_run: - # TODO: Previous logic was to prune all restarts if self.repeat_run - is that still the case? - restarts_to_prune = [os.path.join(self.archive_path, restart) for restart in restarts] + # TODO: Previous logic was to prune all restarts if + # self.repeat_run - is that still the case? + restarts_to_prune = [os.path.join(self.archive_path, restart) + for restart in restarts] elif isinstance(restart_freq, int): - # Using integer frequency to prune restarts + # Using integer frequency to prune restarts for restart in restarts: restart_idx = int(restart.lstrip('restart')) if not restart_idx % restart_freq == 0: @@ -1016,21 +1019,27 @@ def prune_restarts(self, from_n_restart=0, to_n_restart=None): try: date_offset = parse_date_offset(restart_freq) - next_datetime = None + next_dt = None for restart in restarts: restart_path = os.path.join(self.archive_path, restart) - + # Use model-driver to parse restart files for a datetime - restart_datetime = self.model.get_restart_datetime(restart_path) - - if next_datetime is not None and restart_datetime < next_datetime: + restart_dt = self.model.get_restart_datetime(restart_path) + + if (next_dt is not None and restart_dt < next_dt): restarts_to_prune.append(restart_path) else: - # Keep the earliest datetime and use last kept datetime as point of reference when adding the next time interval - next_datetime = date_offset.add_to_datetime(restart_datetime) - except Exception as e: - print("payu: error occured during date-based restart pruning:", e) - + # Keep the earliest datetime and use last kept datetime + # as point of reference when adding the next time + # interval + next_dt = date_offset.add_to_datetime(restart_dt) + + except Exception as error: + print( + "payu: error occured during date-based restart pruning:", + error + ) + return restarts_to_prune diff --git a/payu/models/accessom2.py b/payu/models/accessom2.py index 7c6d605d..cfa8e930 100644 --- a/payu/models/accessom2.py +++ b/payu/models/accessom2.py @@ -88,9 +88,13 @@ def collate(self): pass def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and + return a cftime datetime (for date-based restart pruning)""" for model in self.expt.models: if model.model_type == 'mom': mom_restart_path = os.path.join(restart_path, model.name) return model.get_restart_datetime(mom_restart_path) - - raise NotImplementedError('access-om2 date-based restart pruning currently only uses the mom sub-model to find restart dates') + + raise NotImplementedError( + 'Cannot find mom sub-model: access-om2 date-based restart pruning ' + 'requires the mom sub-model to determine restart dates') diff --git a/payu/models/model.py b/payu/models/model.py index 807b1877..8b49d94d 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -145,7 +145,7 @@ def set_input_paths(self): if input_dirs is None: input_dirs = [] - elif type(input_dirs) == str: + elif isinstance(input_dirs, str): input_dirs = [input_dirs] self.input_paths = [] @@ -470,8 +470,8 @@ def profile(self): if f.endswith('.cubex')][0] cmd = 'scorep-score {0}'.format(cube_path) sp.check_call(shlex.split(cmd)) - + def get_restart_datetime(self, restart_path): - """Given a restart path, parse the restart files and return a cftime or standard datetime object to be used for - date-based restart pruning""" - raise NotImplementedError \ No newline at end of file + """Given a restart path, parse the restart files and return a cftime + datetime (currently used for date-based restart pruning)""" + raise NotImplementedError diff --git a/payu/models/mom.py b/payu/models/mom.py index 31c76dd1..0596be52 100644 --- a/payu/models/mom.py +++ b/payu/models/mom.py @@ -9,8 +9,7 @@ import subprocess import f90nml -import payu.envmod -import cftime +import cftime from payu.models.fms import Fms from payu.fsops import mkdir_p, make_symlink @@ -103,7 +102,7 @@ def setup(self): os.remove(mask_path) # Reference mask table - assert('layout' in input_nml['ocean_model_nml']) + assert ('layout' in input_nml['ocean_model_nml']) nx, ny = input_nml['ocean_model_nml'].get('layout') n_masked_cpus = nx * ny - self.config.get('ncpus') @@ -227,24 +226,29 @@ def create_mask_table(self, input_nml): return land_cells - def get_restart_datetime(self, restart_path): - """Given a restart path, parse the restart files and return a cftime datetime (for date-based restart pruning)""" + """Given a restart path, parse the restart files and + return a cftime datetime (for date-based restart pruning)""" ocean_solo_path = os.path.join(restart_path, 'ocean_solo.res') with open(ocean_solo_path, 'r') as ocean_solo: lines = ocean_solo.readlines() - + calendar_int = int(lines[0].split()[0]) cftime_calendars = { 1: "360_day", 2: "julian", - 3: "gregorian", + 3: "proleptic_gregorian", 4: "noleap" } calendar = cftime_calendars[calendar_int] - + last_date_line = lines[-1].split() - year, month, day, hour, minute, second = [int(i) for i in last_date_line[:6]] - return cftime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second, - calendar=calendar) - \ No newline at end of file + date_values = [int(i) for i in last_date_line[:6]] + year, month, day, hour, minute, second = date_values + return cftime.datetime(year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + calendar=calendar) diff --git a/test/common.py b/test/common.py index 867a7938..b77ed093 100644 --- a/test/common.py +++ b/test/common.py @@ -2,6 +2,8 @@ import os import stat from pathlib import Path +import re +import shutil import yaml @@ -22,6 +24,8 @@ workdir = ctrldir / 'work' payudir = tmpdir / 'payu' +archive_dir = labdir / 'archive' +expt_archive_dir = archive_dir / ctrldir_basename expt_workdir = labdir / 'work' / ctrldir_basename print('tmpdir: {}'.format(tmpdir)) @@ -48,6 +52,7 @@ } + @contextmanager def cd(directory): """ @@ -160,6 +165,45 @@ def make_restarts(fnames=None): make_random_file(restartdir/fname, 5000**2 + i) +def make_expt_archive_dirs(dir_type, num_dirs=5, additional_path=None): + """Make experiment archive directories of given type (i.e. "restart" or + "output")""" + created_dirs = [] + for i in range(num_dirs): + dir_path = os.path.join(expt_archive_dir, f'{dir_type}{i:03d}') + if additional_path: + dir_path = os.path.join(dir_path, additional_path) + + os.makedirs(dir_path) + created_dirs.append(dir_path) + return created_dirs + + +def list_expt_archive_dirs(dir_type='restart', full_path=True): + """Return a list of output/restart paths in experiment archive + path""" + dirs = [] + if os.path.exists(expt_archive_dir): + if os.path.isdir(expt_archive_dir): + naming_pattern = re.compile(fr"^{dir_type}[0-9][0-9][0-9]$") + dirs = [d for d in os.listdir(expt_archive_dir) + if naming_pattern.match(d)] + + if full_path: + dirs = [os.path.join(expt_archive_dir, d) for d in dirs] + return dirs + + +def remove_expt_archive_dirs(dir_type='restart'): + """Remove experiment archive directories of the given type (i.e. "restart" + or "output"). Useful for cleaning up archive between tests""" + for dir_path in list_expt_archive_dirs(dir_type): + try: + shutil.rmtree(dir_path) + except Exception as e: + print(e) + + def make_all_files(): make_inputs() make_exe() diff --git a/test/models/test_mom.py b/test/models/test_mom.py new file mode 100644 index 00000000..885fd01a --- /dev/null +++ b/test/models/test_mom.py @@ -0,0 +1,150 @@ +import copy +import os +import shutil + +import pytest +import cftime + +import payu + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir +from test.common import config as config_orig +from test.common import write_config +from test.common import make_all_files +from test.common import list_expt_archive_dirs +from test.common import make_expt_archive_dirs, remove_expt_archive_dirs + + +verbose = True + +# Global config +config = copy.deepcopy(config_orig) + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + if verbose: + print("setup_module module:%s" % module.__name__) + + # Should be taken care of by teardown, in case remnants lying around + try: + shutil.rmtree(tmpdir) + except FileNotFoundError: + pass + + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + make_all_files() + except Exception as e: + print(e) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + if verbose: + print("teardown_module module:%s" % module.__name__) + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +@pytest.fixture(autouse=True) +def teardown(): + # Run test + yield + + # Remove any created restart files + remove_expt_archive_dirs(dir_type='restart') + + +def make_ocean_restart_files(init_dt_array, + run_dt_arrays, + calendar, + additional_path=None): + restart_paths = make_expt_archive_dirs(dir_type='restart', + num_dirs=len(run_dt_arrays), + additional_path=additional_path) + + for index, run_dt_array in enumerate(run_dt_arrays): + # Create ocean_solo.res file + make_ocean_solo_file(restart_paths[index], + init_dt_array, + run_dt_array, + calendar) + + +def make_ocean_solo_file(restart_path, init_dt_array, run_dt_array, calendar): + "Create test ocean_solo.res files in restart directories" + lines = (f"{calendar:6d} " + "(Calendar: no_calendar=0, thirty_day_months=1, julian=2, " + "gregorian=3, noleap=4)\n") + + init_dt_desc = "Model start time: year, month, day, hour, minute, second" + lines += format_ocean_solo_datetime_line(init_dt_array, init_dt_desc) + + run_dt_desc = "Current model time: year, month, day, hour, minute, second" + lines += format_ocean_solo_datetime_line(run_dt_array, run_dt_desc) + + ocean_solo_path = os.path.join(restart_path, "ocean_solo.res") + with open(ocean_solo_path, "w") as ocean_solo_file: + ocean_solo_file.write(lines) + + +def format_ocean_solo_datetime_line(dt_array, description): + year, month, day, hour, minute, second = dt_array + return ( + f"{year:6d}{month:6d}{day:6d}{hour:6d}{minute:6d}{second:6d}" + f" {description}\n" + ) + + +@pytest.mark.parametrize( + "run_dt_arrays, calendar, expected_cftimes", + [ + ( + [[1900, 2, 1, 0, 0, 0], [1900, 3, 1, 0, 0, 0]], + 4, + [ + cftime.datetime(1900, 2, 1, calendar="noleap"), + cftime.datetime(1900, 3, 1, calendar="noleap"), + ], + ), + ( + [[1900, 6, 1, 0, 0, 0], [1901, 1, 1, 0, 0, 0]], + 3, + [ + cftime.datetime(1900, 6, 1, calendar="proleptic_gregorian"), + cftime.datetime(1901, 1, 1, calendar="proleptic_gregorian"), + ], + ) + ]) +def test_mom_get_restart_datetime(run_dt_arrays, calendar, expected_cftimes): + # Create mom restart files + init_dt_array = [1900, 1, 1, 0, 0, 0] + make_ocean_restart_files(init_dt_array, run_dt_arrays, calendar) + + # Write config + test_config = config + test_config['model'] = 'mom' + write_config(test_config) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + + restart_paths = list_expt_archive_dirs(dir_type='restart') + + for index, expected_cftime in enumerate(expected_cftimes): + restart_path = restart_paths[index] + run_dt = expt.model.get_restart_datetime(restart_path) + assert run_dt == expected_cftime diff --git a/test/test_calendar.py b/test/test_calendar.py index 638923ab..8ae015e5 100644 --- a/test/test_calendar.py +++ b/test/test_calendar.py @@ -1,132 +1,152 @@ -import datetime - import cftime import pytest from payu.calendar import parse_date_offset, DatetimeOffset -def assert_add_date_offset(initial_dt, test_data): - """Arguments: - - initial_dt - initial datetime, - - test_data which dictionary mapping date-offset strings to expected datetime values""" - for date_offset_str, expected_next_dt in test_data.items(): - date_offset = parse_date_offset(date_offset_str) - - next_dt = date_offset.add_to_datetime(initial_dt) - - assert next_dt == expected_next_dt - - -def test_date_offset_add_to_datetime_cftime_no_leap(): - # "noleap" cftime calendar - initial_dt = cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, - calendar='noleap') - - test_data = { - '5Y': cftime.datetime(year=2005, month=10, day=31, hour=10, minute=5, second=2, - calendar='noleap'), - '5YS': cftime.datetime(year=2005, month=1, day=1, hour=0, minute=0, second=0, - calendar='noleap'), - '5M': cftime.datetime(year=2001, month=3, day=31, hour=10, minute=5, second=2, - calendar='noleap'), - '5MS': cftime.datetime(year=2001, month=3, day=1, hour=0, minute=0, second=0, - calendar='noleap'), - '4M': cftime.datetime(year=2001, month=2, day=28, hour=10, minute=5, second=2, - calendar='noleap') - } - - assert_add_date_offset(initial_dt, test_data) - - -def test_date_offset_add_to_datetime_cftime_all_leap(): - # "all_leap" cftime calendar - initial_dt = cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, - calendar='all_leap') - - test_data = { - '5Y': cftime.datetime(year=2005, month=10, day=31, hour=10, minute=5, second=2, - calendar='all_leap'), - '5YS': cftime.datetime(year=2005, month=1, day=1, hour=0, minute=0, second=0, - calendar='all_leap'), - '5M': cftime.datetime(year=2001, month=3, day=31, hour=10, minute=5, second=2, - calendar='all_leap'), - '5MS': cftime.datetime(year=2001, month=3, day=1, hour=0, minute=0, second=0, - calendar='all_leap'), - '4M': cftime.datetime(year=2001, month=2, day=29, hour=10, minute=5, second=2, - calendar='all_leap') - } - - assert_add_date_offset(initial_dt, test_data) - - -def test_date_offset_add_to_datetime_cftime_360_day(): - # "360_day" cftime calendar - initial_dt = cftime.datetime(year=2000, month=10, day=30, hour=10, minute=5, second=2, - calendar='360_day') - - test_data = { - '5Y': cftime.datetime(year=2005, month=10, day=30, hour=10, minute=5, second=2, - calendar='360_day'), - '5YS': cftime.datetime(year=2005, month=1, day=1, calendar='360_day'), - '5M': cftime.datetime(year=2001, month=3, day=30, hour=10, minute=5, second=2, - calendar='360_day'), - '5MS': cftime.datetime(year=2001, month=3, day=1, calendar='360_day') - } - - assert_add_date_offset(initial_dt, test_data) - -def test_date_offset_add_to_datetime_standard(): - # Standard datetime and cftime standard calendar - initial_dts = [ - cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, - calendar='standard'), - datetime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2) - ] - - test_data = { - '5Y': datetime.datetime(year=2005, month=10, day=31, hour=10, minute=5, second=2), - '5YS': datetime.datetime(year=2005, month=1, day=1), - '5M': datetime.datetime(year=2001, month=3, day=31, hour=10, minute=5, second=2), - '5MS': datetime.datetime(year=2001, month=3, day=1) - } - - for initial_dt in initial_dts: - assert_add_date_offset(initial_dt, test_data) - - -def test_date_offset_add_to_datetime_unsupported_calendar(): - # Currently Julian calendar isn't supported for Y, M offsets - initial_dt = cftime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2, - calendar='julian') - - for unit in ['Y', 'M']: - datetime_offset = DatetimeOffset(unit=unit, magnitude=1) - with pytest.raises(ValueError) as exc_info: - datetime_offset.add_to_datetime(initial_dt) - - assert str(exc_info.value) == f"Calendar type julian is unsupported for given date offset {unit}" + +@pytest.mark.parametrize( + "offset, initial_dt, expected", + [ + ( + "5YS", + cftime.datetime(year=2000, month=10, day=31, + hour=10, minute=5, second=2, + calendar="noleap"), + cftime.datetime(year=2005, month=1, day=1, calendar="noleap"), + ), + ( + "1YS", + cftime.datetime(year=1700, month=2, day=15, + hour=11, minute=5, second=2, + calendar="proleptic_gregorian"), + cftime.datetime(year=1701, month=1, day=1, + calendar="proleptic_gregorian"), + ), + ( + "20YS", + cftime.datetime(year=2200, month=2, day=30, + hour=1, minute=4, second=20, + calendar="360_day"), + cftime.datetime(year=2220, month=1, day=1, calendar="360_day"), + ), + ], +) +def test_year_start_date_offset_add_to_datetime(offset, initial_dt, expected): + date_offset = parse_date_offset(offset) + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected + + +@pytest.mark.parametrize( + "offset, initial_dt, expected", + [ + ( + "5MS", + cftime.datetime(year=2000, month=10, day=1, + hour=10, minute=5, second=2, + calendar="noleap"), + cftime.datetime(year=2001, month=3, day=1, calendar="noleap"), + ), + ( + "13MS", + cftime.datetime(year=1500, month=10, day=30, + hour=10, minute=5, second=2, + calendar="360_day"), + cftime.datetime(year=1501, month=11, day=1, calendar="360_day"), + ), + ( + "24MS", + cftime.datetime(year=2200, month=1, day=1, calendar="gregorian"), + cftime.datetime(year=2202, month=1, day=1, calendar="gregorian"), + ), + ], +) +def test_month_start_date_offset_add_to_datetime(offset, initial_dt, expected): + date_offset = parse_date_offset(offset) + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected + + +@pytest.mark.parametrize( + "offset, initial_dt, expected", + [ + ( + "100S", + cftime.datetime(year=2000, month=10, day=31, + hour=10, minute=5, second=2, + calendar="julian"), + cftime.datetime(year=2000, month=10, day=31, + hour=10, minute=6, second=42, + calendar="julian"), + ), + ( + "25H", + cftime.datetime(year=1500, month=10, day=30, + hour=10, minute=5, second=2, + calendar="360_day"), + cftime.datetime(year=1500, month=11, day=1, + hour=11, minute=5, second=2, + calendar="360_day") + ), + ( + "3W", + cftime.datetime(year=2200, month=1, day=1), + cftime.datetime(year=2200, month=1, day=22), + ), + ( + "4T", + cftime.datetime( + year=2200, month=1, day=1, hour=0, minute=0, second=0 + ), + cftime.datetime( + year=2200, month=1, day=1, hour=0, minute=4, second=0 + ), + ), + ( + "30D", + cftime.datetime(year=2200, month=2, day=1, calendar="noleap"), + cftime.datetime(year=2200, month=3, day=3, calendar="noleap"), + ), + ], +) +def test_timedelta_date_offset_add_to_datetime(offset, initial_dt, expected): + # Week, Day, Minute, Hour, Second offsets + date_offset = parse_date_offset(offset) + next_dt = date_offset.add_to_datetime(initial_dt) + + assert next_dt == expected def test_date_offset_add_to_datetime_invalid_dt(): initial_dt = "stringInput" - - datetime_offset = DatetimeOffset(unit='Y', magnitude=2) + datetime_offset = DatetimeOffset(unit="YS", magnitude=2) + with pytest.raises(TypeError) as exc_info: datetime_offset.add_to_datetime(initial_dt) - - expected_error = "Invalid initial datetime type: . Expected types: cftime.datetime or datetime.datetime" + + expected_error = ( + "Invalid initial datetime type: . " + "Expected type: cftime.datetime" + ) assert str(exc_info.value) == expected_error - -def test_date_offset_add_to_datetime_using_timedelta(): - initial_dt = datetime.datetime(year=2000, month=10, day=31, hour=10, minute=5, second=2) - test_data = { - "100S": datetime.datetime(year=2000, month=10, day=31, hour=10, minute=6, second=42), - "2H": datetime.datetime(year=2000, month=10, day=31, hour=12, minute=5, second=2), - "3W": datetime.datetime(year=2000, month=11, day=21, hour=10, minute=5, second=2), - "4T": datetime.datetime(year=2000, month=10, day=31, hour=10, minute=9, second=2), - "5D": datetime.datetime(year=2000, month=11, day=5, hour=10, minute=5, second=2) - } +def test_date_offset_unsupported_offset(): + with pytest.raises(ValueError) as exc_info: + DatetimeOffset(unit="Y", magnitude=2) - assert_add_date_offset(initial_dt, test_data) \ No newline at end of file + expected_error = ( + "Unsupported datetime offset: Y. " + "Supported offsets: YS, MS, W, D, H, T, S" + ) + assert str(exc_info.value) == expected_error + + +def test_parse_date_offset_no_offset_magnitude(): + with pytest.raises(ValueError) as exc_info: + parse_date_offset("YS") + + expected_error = "No numerical value given for offset: YS" + assert str(exc_info.value) == expected_error diff --git a/test/test_prune_restarts.py b/test/test_prune_restarts.py new file mode 100644 index 00000000..2fb1c79f --- /dev/null +++ b/test/test_prune_restarts.py @@ -0,0 +1,133 @@ +import copy +import shutil +import re + +import pytest + +import payu + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir +from test.common import config as config_orig +from test.common import write_config +from test.common import make_all_files +from test.common import remove_expt_archive_dirs +from test.models.test_mom import make_ocean_restart_files + +verbose = True + +# Global config +config = copy.deepcopy(config_orig) + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + if verbose: + print("setup_module module:%s" % module.__name__) + + # Should be taken care of by teardown, in case remnants lying around + try: + shutil.rmtree(tmpdir) + except FileNotFoundError: + pass + + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + make_all_files() + except Exception as e: + print(e) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + if verbose: + print("teardown_module module:%s" % module.__name__) + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +@pytest.fixture(autouse=True) +def teardown(): + # Run test + yield + + # Remove any created restart files + remove_expt_archive_dirs(dir_type='restart') + + +@pytest.mark.parametrize( + "restart_freq, restart_history, expected_pruned_restarts_indices", + [ + ("1MS", 1, []), + ("2MS", 5, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]), + ("12MS", + 1, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + ("1YS", + 1, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + (1, 1, []), + (5, 5, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]) + ]) +def test_prune_restarts(restart_freq, + restart_history, + expected_pruned_restarts_indices): + + # Create 2 years + 1 month worth of mom restarts directories + # with 1 month runtimes - starting from 1900/02/01 to 1902/02/01 + # e.g (run_date, restart_directory) + # (1900/02/01, restart000) + # (1900/03/01, restart001) + # ... + # (1902/02/01, restart024) + restart_dts = [] + for year in [1900, 1901, 1902]: + for month in range(1, 13): + if (year == 1900 and month == 1) or (year == 1902 and month > 2): + # Ignore the first date and dates from 1902/03/01 onwards + continue + restart_dts.append([year, month, 1, 0, 0, 0]) + + make_ocean_restart_files( + init_dt_array=[1900, 1, 1, 0, 0, 0], + run_dt_arrays=restart_dts, + calendar=4, + additional_path='ocean') + + # Set up config + test_config = config + test_config['model'] = 'access-om2' + test_config['submodels'] = [ + {'name': 'atmosphere', 'model': 'yatm'}, + {'name': 'ocean', 'model': 'mom'} + ] + test_config['restart_freq'] = restart_freq + test_config['restart_history'] = restart_history + write_config(test_config) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + + # Function to test + restarts_to_prune = expt.prune_restarts() + + # Extract out index from the full paths + restarts_to_prune_indices = [ + int(re.search("[0-9]+$", restart_path).group()) + for restart_path in restarts_to_prune + ] + + assert restarts_to_prune_indices == expected_pruned_restarts_indices From 56f45dae8f33a812a09dbdf6b262414888ffdfbf Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Wed, 4 Oct 2023 15:32:57 +1100 Subject: [PATCH 3/5] Add in logic to keep intermediate restarts and checks/restrictions on pruning - Moved restart parsing code in Mom driver to Fms so it can also work with Mom6 - Add checks to setup for invalid restart pruning configuration or warn if there's changes - add --force-prune-restarts flag to run and archive cmds - add restart_history as an override to how many latest restarts to keep --- payu/calendar.py | 1 - payu/cli.py | 5 +- payu/experiment.py | 160 ++++++++++++++++++++++-------- payu/models/fms.py | 30 ++++++ payu/models/mom.py | 28 ------ payu/subcommands/archive_cmd.py | 7 +- payu/subcommands/args.py | 12 +++ payu/subcommands/run_cmd.py | 10 +- test/common.py | 31 +++--- test/models/test_mom.py | 105 +++++++++++--------- test/test_cli.py | 9 +- test/test_prune_restarts.py | 170 ++++++++++++++++++++++++-------- 12 files changed, 377 insertions(+), 191 deletions(-) diff --git a/payu/calendar.py b/payu/calendar.py index 9037f36e..7e2beab3 100644 --- a/payu/calendar.py +++ b/payu/calendar.py @@ -124,7 +124,6 @@ def add_month_start_offset_to_datetime(initial_dt, n): def add_timedelta_fn(timedelta): """Returns a function that takes initial datetime and multiplier n, and returns a datetime that is n * offset from the initial datetime""" - # cftime datetimes supports timedelta operations return lambda initial_dt, n: initial_dt + n * timedelta diff --git a/payu/cli.py b/payu/cli.py index 4887174d..a2876a14 100644 --- a/payu/cli.py +++ b/payu/cli.py @@ -89,7 +89,7 @@ def get_model_type(model_type, config): def set_env_vars(init_run=None, n_runs=None, lab_path=None, dir_path=None, - reproduce=False, force=False): + reproduce=False, force=False, force_prune_restarts=False): """Construct the environment variables used by payu for resubmissions.""" payu_env_vars = {} @@ -134,6 +134,9 @@ def set_env_vars(init_run=None, n_runs=None, lab_path=None, dir_path=None, if force: payu_env_vars['PAYU_FORCE'] = force + if force_prune_restarts: + payu_env_vars['PAYU_FORCE_PRUNE_RESTARTS'] = force_prune_restarts + # Pass through important module related environment variables module_env_vars = ['MODULESHOME', 'MODULES_CMD', 'MODULEPATH', 'MODULEV'] for var in module_env_vars: diff --git a/payu/experiment.py b/payu/experiment.py index 4cfe5281..36a16fe8 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -41,7 +41,6 @@ # Default payu parameters default_archive_url = 'dc.nci.org.au' default_restart_freq = 5 -default_restart_history = 5 class Experiment(object): @@ -89,7 +88,6 @@ def __init__(self, lab, reproduce=False, force=False): self.set_expt_pathnames() self.set_counters() - for model in self.models: model.set_input_paths() @@ -446,6 +444,11 @@ def setup(self, force_archive=False): # Testing prof.setup() + # Check restart pruning for valid configuration values and + # warns user if more restarts than expected would be pruned + if self.config.get('archive', True): + self.get_restarts_to_prune() + def run(self, *user_flags): # XXX: This was previously done in reversion @@ -712,7 +715,7 @@ def run(self, *user_flags): if run_script: self.run_userscript(run_script) - def archive(self): + def archive(self, force_prune_restarts=False): if not self.config.get('archive', True): print('payu: not archiving due to config.yaml setting.') return @@ -744,8 +747,16 @@ def archive(self): movetree(self.work_path, self.output_path) # Remove any outdated restart files - restarts_to_prune = self.prune_restarts() - for restart_path in restarts_to_prune: + try: + restarts_to_prune = self.get_restarts_to_prune( + force=force_prune_restarts) + except Exception as e: + print(e) + print("payu: error: Skipping pruning restarts") + restarts_to_prune = [] + + for restart in restarts_to_prune: + restart_path = os.path.join(self.archive_path, restart) # Only delete real directories; ignore symbolic restart links if (os.path.isdir(restart_path) and not os.path.islink(restart_path)): @@ -984,61 +995,124 @@ def sweep(self, hard_sweep=False): print('Removing symlink {0}'.format(self.work_sym_path)) os.remove(self.work_sym_path) - def prune_restarts(self, from_n_restart=0, to_n_restart=None): + def get_restarts_to_prune(self, + ignore_intermediate_restarts=False, + force=False): """Returns a list of restart directories that can be pruned""" - restart_freq = self.config.get('restart_freq', default_restart_freq) - restart_history = self.config.get('restart_history', - default_restart_history) + # Check if archive path exists + if not os.path.exists(self.archive_path): + return [] - # All restarts directories - restarts = [d for d in os.listdir(self.archive_path) - if d.startswith('restart')] - # Sort restarts based on counter - in increasing date order + # List all and sort restart directories in archive + restarts = self.list_output_dirs(output_type='restart') restarts.sort(key=lambda d: int(d.lstrip('restart'))) - if to_n_restart is None: - # Keep restart_history n restarts - to_n_restart = -restart_history - restarts = restarts[from_n_restart:to_n_restart] + # TODO: Previous logic was to prune all restarts if self.repeat_run + # Still need to figure out what should happen in this case + if self.repeat_run: + return [os.path.join(self.archive_path, restart) + for restart in restarts] + # Use restart_freq to decide what restarts to prune restarts_to_prune = [] - if self.repeat_run: - # TODO: Previous logic was to prune all restarts if - # self.repeat_run - is that still the case? - restarts_to_prune = [os.path.join(self.archive_path, restart) - for restart in restarts] - elif isinstance(restart_freq, int): + intermediate_restarts, previous_intermediate_restarts = [], [] + restart_freq = self.config.get('restart_freq', default_restart_freq) + if isinstance(restart_freq, int): # Using integer frequency to prune restarts for restart in restarts: restart_idx = int(restart.lstrip('restart')) if not restart_idx % restart_freq == 0: - restart_path = os.path.join(self.archive_path, restart) - restarts_to_prune.append(restart_path) + intermediate_restarts.append(restart) + else: + # Add any intermediate restarts to restarts to prune + restarts_to_prune.extend(intermediate_restarts) + previous_intermediate_restarts = intermediate_restarts + intermediate_restarts = [] else: # Using date-based frequency to prune restarts try: date_offset = parse_date_offset(restart_freq) + except ValueError as e: + print('payu: error: Invalid configuration for restart_freq:', + restart_freq) + raise - next_dt = None - for restart in restarts: - restart_path = os.path.join(self.archive_path, restart) - - # Use model-driver to parse restart files for a datetime + next_dt = None + for restart in restarts: + # Use model-driver to parse restart directory for a datetime + restart_path = os.path.join(self.archive_path, restart) + try: restart_dt = self.model.get_restart_datetime(restart_path) + except NotImplementedError as e: + print('payu: error: Date-based restart pruning is not ' + f'implemented for the {self.model.model_type} ' + 'model. To use integer based restart pruning, ' + 'set restart_freq to an integer value.') + raise + except Exception as e: + print('payu: error: Error parsing restart directory ', + f'{restart} for a datetime. Error: {e}') + raise - if (next_dt is not None and restart_dt < next_dt): - restarts_to_prune.append(restart_path) - else: - # Keep the earliest datetime and use last kept datetime - # as point of reference when adding the next time - # interval - next_dt = date_offset.add_to_datetime(restart_dt) - - except Exception as error: - print( - "payu: error occured during date-based restart pruning:", - error - ) + if (next_dt is not None and restart_dt < next_dt): + intermediate_restarts.append(restart) + else: + # Keep the earliest datetime and use last kept datetime + # as point of reference when adding the next time interval + next_dt = date_offset.add_to_datetime(restart_dt) + + # Add intermediate restarts to restarts to prune + restarts_to_prune.extend(intermediate_restarts) + previous_intermediate_restarts = intermediate_restarts + intermediate_restarts = [] + + if ignore_intermediate_restarts: + # Return all restarts that'll eventually be pruned + restarts_to_prune.extend(intermediate_restarts) + return restarts_to_prune + + if not force: + # check environment for --force-prune-restarts flag + force = os.environ.get('PAYU_FORCE_PRUNE_RESTARTS', False) + + # Flag to check whether more restarts than expected will be deleted + is_unexpected = restarts_to_prune != previous_intermediate_restarts + + # Restart_history override + restart_history = self.config.get('restart_history', None) + if restart_history is not None: + if not isinstance(restart_history, int): + raise ValueError("payu: error: restart_history is not an " + f"integer value: {restart_history}") + + # Keep restart_history latest restarts, in addition to the + # permanently saved restarts defined by restart_freq + restarts_to_prune.extend(intermediate_restarts) + max_index = self.max_output_index(output_type="restart") + index_bound = max_index - restart_history + restarts_to_prune = [res for res in restarts_to_prune + if int(res.lstrip('restart')) <= index_bound] + + # Only expect at most 1 restart to be pruned with restart_history + is_unexpected = len(restarts_to_prune) > 1 + + # Log out warning if more restarts than expected will be deleted + if not force and is_unexpected: + config_info = (f'restart pruning frequency of {restart_freq}') + if restart_history: + config_info += f' and restart history of {restart_history}' + + print(f'payu: warning: Current {config_info} would result in ' + 'following restarts being pruned: ' + f'{" ".join(restarts_to_prune)}\n' + 'If this is expected, use --force-prune-restarts flag at ' + 'next run or archive (if running archive manually), e.g.:\n' + ' payu run --force-prune-restarts, or\n' + ' payu archive --force-prune-restarts\n' + 'Otherwise, no restarts will be pruned') + + # Return empty list to prevent restarts being pruned + restarts_to_prune = [] return restarts_to_prune diff --git a/payu/models/fms.py b/payu/models/fms.py index f2f3e743..5da78635 100644 --- a/payu/models/fms.py +++ b/payu/models/fms.py @@ -17,6 +17,8 @@ from itertools import count import fnmatch +import cftime + from payu.models.model import Model from payu import envmod from payu.fsops import required_libs @@ -253,3 +255,31 @@ def archive(self, **kwargs): def collate(self): fms_collate(self) + + def get_restart_datetime(self, restart_path): + """Given a restart path, parse the restart files and + return a cftime datetime (for date-based restart pruning)""" + # Check for ocean_solo.res file + ocean_solo_path = os.path.join(restart_path, 'ocean_solo.res') + if not os.path.exists(ocean_solo_path): + raise NotImplementedError( + 'Cannot find ocean_solo.res file, which is required for ' + 'date-based restart pruning') + + with open(ocean_solo_path, 'r') as ocean_solo: + lines = ocean_solo.readlines() + + calendar_int = int(lines[0].split()[0]) + cftime_calendars = { + 1: "360_day", + 2: "julian", + 3: "proleptic_gregorian", + 4: "noleap" + } + calendar = cftime_calendars[calendar_int] + + last_date_line = lines[-1].split() + date_values = [int(i) for i in last_date_line[:6]] + year, month, day, hour, minute, second = date_values + return cftime.datetime(year, month, day, hour, minute, second, + calendar=calendar) diff --git a/payu/models/mom.py b/payu/models/mom.py index 0596be52..6a28f2b2 100644 --- a/payu/models/mom.py +++ b/payu/models/mom.py @@ -9,7 +9,6 @@ import subprocess import f90nml -import cftime from payu.models.fms import Fms from payu.fsops import mkdir_p, make_symlink @@ -225,30 +224,3 @@ def create_mask_table(self, input_nml): land_cells = int(fmask.readline()) return land_cells - - def get_restart_datetime(self, restart_path): - """Given a restart path, parse the restart files and - return a cftime datetime (for date-based restart pruning)""" - ocean_solo_path = os.path.join(restart_path, 'ocean_solo.res') - with open(ocean_solo_path, 'r') as ocean_solo: - lines = ocean_solo.readlines() - - calendar_int = int(lines[0].split()[0]) - cftime_calendars = { - 1: "360_day", - 2: "julian", - 3: "proleptic_gregorian", - 4: "noleap" - } - calendar = cftime_calendars[calendar_int] - - last_date_line = lines[-1].split() - date_values = [int(i) for i in last_date_line[:6]] - year, month, day, hour, minute, second = date_values - return cftime.datetime(year=year, - month=month, - day=day, - hour=hour, - minute=minute, - second=second, - calendar=calendar) diff --git a/payu/subcommands/archive_cmd.py b/payu/subcommands/archive_cmd.py index 18dfa5a3..6d5f0131 100644 --- a/payu/subcommands/archive_cmd.py +++ b/payu/subcommands/archive_cmd.py @@ -7,15 +7,16 @@ title = 'archive' parameters = {'description': 'Archive model output after run'} -arguments = [args.model, args.config, args.laboratory] +arguments = [args.model, args.config, args.laboratory, + args.force_prune_restarts] -def runcmd(model_type, config_path, lab_path): +def runcmd(model_type, config_path, lab_path, force_prune_restarts): lab = Laboratory(model_type, config_path, lab_path) expt = Experiment(lab) - expt.archive() + expt.archive(force_prune_restarts) runscript = runcmd diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index f5f38e6d..167fbec6 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -114,3 +114,15 @@ 'help': 'Force run to proceed, overwriting existing directories', } } + +# Force restarts to be pruned despite changes to configuration +force_prune_restarts = { + 'flags': ('--force-prune-restarts', '-F'), + 'parameters': { + 'action': 'store_true', + 'dest': 'force_prune_restarts', + 'default': False, + 'help': 'Force outdated restart directories to be pruned during \ + archive, ignoring changes made to configuration.', + } +} diff --git a/payu/subcommands/run_cmd.py b/payu/subcommands/run_cmd.py index 089e07d8..d4d1d7d6 100644 --- a/payu/subcommands/run_cmd.py +++ b/payu/subcommands/run_cmd.py @@ -12,11 +12,12 @@ parameters = {'description': 'Run the model experiment'} arguments = [args.model, args.config, args.initial, args.nruns, - args.laboratory, args.reproduce, args.force] + args.laboratory, args.reproduce, args.force, + args.force_prune_restarts] def runcmd(model_type, config_path, init_run, n_runs, lab_path, - reproduce=False, force=False): + reproduce=False, force=False, force_prune_restarts=False): # Get job submission configuration pbs_config = fsops.read_config(config_path) @@ -24,7 +25,8 @@ def runcmd(model_type, config_path, init_run, n_runs, lab_path, n_runs=n_runs, lab_path=lab_path, reproduce=reproduce, - force=force) + force=force, + force_prune_restarts=force_prune_restarts) # Set the queue # NOTE: Maybe force all jobs on the normal queue @@ -130,7 +132,7 @@ def runscript(): expt.setup() expt.run() - expt.archive() + expt.archive(force_prune_restarts=run_args.force_prune_restarts) # Finished runs if expt.n_runs == 0: diff --git a/test/common.py b/test/common.py index b77ed093..0001dcfa 100644 --- a/test/common.py +++ b/test/common.py @@ -165,27 +165,24 @@ def make_restarts(fnames=None): make_random_file(restartdir/fname, 5000**2 + i) -def make_expt_archive_dirs(dir_type, num_dirs=5, additional_path=None): - """Make experiment archive directories of given type (i.e. "restart" or - "output")""" - created_dirs = [] - for i in range(num_dirs): - dir_path = os.path.join(expt_archive_dir, f'{dir_type}{i:03d}') - if additional_path: - dir_path = os.path.join(dir_path, additional_path) +def make_expt_archive_dir(type='restart', index=0, additional_path=None): + """Make experiment archive directory of given type (i.e. restart or + output)""" + dir_path = os.path.join(expt_archive_dir, f'{type}{index:03d}') + if additional_path: + dir_path = os.path.join(dir_path, additional_path) - os.makedirs(dir_path) - created_dirs.append(dir_path) - return created_dirs + os.makedirs(dir_path) + return dir_path -def list_expt_archive_dirs(dir_type='restart', full_path=True): +def list_expt_archive_dirs(type='restart', full_path=True): """Return a list of output/restart paths in experiment archive path""" dirs = [] if os.path.exists(expt_archive_dir): if os.path.isdir(expt_archive_dir): - naming_pattern = re.compile(fr"^{dir_type}[0-9][0-9][0-9]$") + naming_pattern = re.compile(fr"^{type}[0-9][0-9][0-9]$") dirs = [d for d in os.listdir(expt_archive_dir) if naming_pattern.match(d)] @@ -194,10 +191,10 @@ def list_expt_archive_dirs(dir_type='restart', full_path=True): return dirs -def remove_expt_archive_dirs(dir_type='restart'): - """Remove experiment archive directories of the given type (i.e. "restart" - or "output"). Useful for cleaning up archive between tests""" - for dir_path in list_expt_archive_dirs(dir_type): +def remove_expt_archive_dirs(type='restart'): + """Remove experiment archive directories of the given type (i.e. restart + or output). Useful for cleaning up archive between tests""" + for dir_path in list_expt_archive_dirs(type): try: shutil.rmtree(dir_path) except Exception as e: diff --git a/test/models/test_mom.py b/test/models/test_mom.py index 885fd01a..2847e181 100644 --- a/test/models/test_mom.py +++ b/test/models/test_mom.py @@ -13,7 +13,7 @@ from test.common import write_config from test.common import make_all_files from test.common import list_expt_archive_dirs -from test.common import make_expt_archive_dirs, remove_expt_archive_dirs +from test.common import make_expt_archive_dir, remove_expt_archive_dirs verbose = True @@ -43,6 +43,11 @@ def setup_module(module): except Exception as e: print(e) + # Write config + test_config = config + test_config['model'] = 'mom' + write_config(test_config) + def teardown_module(module): """ @@ -64,43 +69,39 @@ def teardown(): yield # Remove any created restart files - remove_expt_archive_dirs(dir_type='restart') + remove_expt_archive_dirs(type='restart') -def make_ocean_restart_files(init_dt_array, - run_dt_arrays, - calendar, - additional_path=None): - restart_paths = make_expt_archive_dirs(dir_type='restart', - num_dirs=len(run_dt_arrays), - additional_path=additional_path) +def make_ocean_restart_dir(start_dt, + run_dt, + calendar, + restart_index=0, + additional_path=None): + """Create tests restart directory with ocean_solo.res file""" + # Create restart directory + restart_path = make_expt_archive_dir(type='restart', + index=restart_index, + additional_path=additional_path) - for index, run_dt_array in enumerate(run_dt_arrays): - # Create ocean_solo.res file - make_ocean_solo_file(restart_paths[index], - init_dt_array, - run_dt_array, - calendar) - - -def make_ocean_solo_file(restart_path, init_dt_array, run_dt_array, calendar): - "Create test ocean_solo.res files in restart directories" + # Create ocean_solo.res file lines = (f"{calendar:6d} " "(Calendar: no_calendar=0, thirty_day_months=1, julian=2, " "gregorian=3, noleap=4)\n") init_dt_desc = "Model start time: year, month, day, hour, minute, second" - lines += format_ocean_solo_datetime_line(init_dt_array, init_dt_desc) + lines += format_ocean_solo_datetime_line(start_dt, init_dt_desc) run_dt_desc = "Current model time: year, month, day, hour, minute, second" - lines += format_ocean_solo_datetime_line(run_dt_array, run_dt_desc) + lines += format_ocean_solo_datetime_line(run_dt, run_dt_desc) ocean_solo_path = os.path.join(restart_path, "ocean_solo.res") with open(ocean_solo_path, "w") as ocean_solo_file: ocean_solo_file.write(lines) -def format_ocean_solo_datetime_line(dt_array, description): +def format_ocean_solo_datetime_line(dt_string, description): + """Format datetime string to match actual output files""" + dt_array = convert_date_string_to_array(dt_string) year, month, day, hour, minute, second = dt_array return ( f"{year:6d}{month:6d}{day:6d}{hour:6d}{minute:6d}{second:6d}" @@ -108,43 +109,49 @@ def format_ocean_solo_datetime_line(dt_array, description): ) +def convert_date_string_to_array(dt_string): + """Convert string of YYYY-MM-DD hh:mm:ss to array of integers of + [year, month, day, hour, minute, second] format""" + date, time = dt_string.split(' ') + year, month, day = map(int, date.split('-')) + hour, minute, second = map(int, time.split(':')) + return [year, month, day, hour, minute, second] + + @pytest.mark.parametrize( - "run_dt_arrays, calendar, expected_cftimes", + "run_dt, calendar, expected_cftime", [ ( - [[1900, 2, 1, 0, 0, 0], [1900, 3, 1, 0, 0, 0]], + "1900-02-01 00:00:00", 4, - [ - cftime.datetime(1900, 2, 1, calendar="noleap"), - cftime.datetime(1900, 3, 1, calendar="noleap"), - ], + cftime.datetime(1900, 2, 1, calendar="noleap") ), ( - [[1900, 6, 1, 0, 0, 0], [1901, 1, 1, 0, 0, 0]], + "1900-06-01 00:00:00", 3, - [ - cftime.datetime(1900, 6, 1, calendar="proleptic_gregorian"), - cftime.datetime(1901, 1, 1, calendar="proleptic_gregorian"), - ], - ) + cftime.datetime(1900, 6, 1, calendar="proleptic_gregorian") + ), + ( + "1000-11-12 12:23:34", + 2, + cftime.datetime(1000, 11, 12, 12, 23, 34, + calendar="julian") + ), + ( + "1900-02-30 00:00:00", + 1, + cftime.datetime(1900, 2, 30, calendar="360_day") + ), ]) -def test_mom_get_restart_datetime(run_dt_arrays, calendar, expected_cftimes): - # Create mom restart files - init_dt_array = [1900, 1, 1, 0, 0, 0] - make_ocean_restart_files(init_dt_array, run_dt_arrays, calendar) - - # Write config - test_config = config - test_config['model'] = 'mom' - write_config(test_config) +def test_mom_get_restart_datetime(run_dt, calendar, expected_cftime): + # Create 1 mom restart directory + start_dt = "1900-01-01 00:00:00" + make_ocean_restart_dir(start_dt, run_dt, calendar) with cd(ctrldir): lab = payu.laboratory.Laboratory(lab_path=str(labdir)) expt = payu.experiment.Experiment(lab, reproduce=False) - restart_paths = list_expt_archive_dirs(dir_type='restart') - - for index, expected_cftime in enumerate(expected_cftimes): - restart_path = restart_paths[index] - run_dt = expt.model.get_restart_datetime(restart_path) - assert run_dt == expected_cftime + restart_path = list_expt_archive_dirs()[0] + parsed_run_dt = expt.model.get_restart_datetime(restart_path) + assert parsed_run_dt == expected_cftime diff --git a/test/test_cli.py b/test/test_cli.py index b702075a..b2b8933c 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -132,6 +132,7 @@ def test_parse_run(): assert args.pop('force') is False assert args.pop('init_run') is None assert args.pop('n_runs') is None + assert args.pop('force_prune_restarts') is False assert len(args) == 0 @@ -143,7 +144,8 @@ def test_parse_run(): '--force ' '--initial 99 ' '--nruns 999 ' - '--reproduce'.format(cmd=cmd)) + '--reproduce ' + '--force-prune-restarts'.format(cmd=cmd)) args = vars(parser.parse_args(arguments[1:])) @@ -158,6 +160,7 @@ def test_parse_run(): assert args.pop('force') is True assert args.pop('init_run') == '99' assert args.pop('n_runs') == '999' + assert args.pop('force_prune_restarts') is True assert len(args) == 0 @@ -169,7 +172,8 @@ def test_parse_run(): '-f ' '-i 99 ' '-n 999 ' - '-r'.format(cmd=cmd)) + '-r ' + '-F'.format(cmd=cmd)) args = vars(parser.parse_args(arguments[1:])) @@ -184,6 +188,7 @@ def test_parse_run(): assert args.pop('force') is True assert args.pop('init_run') == '99' assert args.pop('n_runs') == '999' + assert args.pop('force_prune_restarts') is True assert len(args) == 0 diff --git a/test/test_prune_restarts.py b/test/test_prune_restarts.py index 2fb1c79f..f299cb5f 100644 --- a/test/test_prune_restarts.py +++ b/test/test_prune_restarts.py @@ -1,6 +1,5 @@ import copy import shutil -import re import pytest @@ -12,7 +11,7 @@ from test.common import write_config from test.common import make_all_files from test.common import remove_expt_archive_dirs -from test.models.test_mom import make_ocean_restart_files +from test.models.test_mom import make_ocean_restart_dir verbose = True @@ -62,72 +61,157 @@ def teardown(): yield # Remove any created restart files - remove_expt_archive_dirs(dir_type='restart') + remove_expt_archive_dirs(type='restart') -@pytest.mark.parametrize( - "restart_freq, restart_history, expected_pruned_restarts_indices", - [ - ("1MS", 1, []), - ("2MS", 5, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]), - ("12MS", - 1, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), - ("1YS", - 1, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), - (1, 1, []), - (5, 5, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]) - ]) -def test_prune_restarts(restart_freq, - restart_history, - expected_pruned_restarts_indices): - - # Create 2 years + 1 month worth of mom restarts directories - # with 1 month runtimes - starting from 1900/02/01 to 1902/02/01 - # e.g (run_date, restart_directory) - # (1900/02/01, restart000) - # (1900/03/01, restart001) - # ... - # (1902/02/01, restart024) +def create_test_2Y_1_month_frequency_restarts(): + """Create 2 years + 1 month worth of mom restarts directories + with 1 month runtimes - starting from 1900/02/01 to 1902/02/01 + e.g (run_date, restart_directory) + (1900/02/01, restart000) + (1900/03/01, restart001) + ... + (1902/02/01, restart024)""" restart_dts = [] for year in [1900, 1901, 1902]: for month in range(1, 13): if (year == 1900 and month == 1) or (year == 1902 and month > 2): # Ignore the first date and dates from 1902/03/01 onwards continue - restart_dts.append([year, month, 1, 0, 0, 0]) + restart_dts.append(f"{year}-{month}-01 00:00:00") - make_ocean_restart_files( - init_dt_array=[1900, 1, 1, 0, 0, 0], - run_dt_arrays=restart_dts, - calendar=4, - additional_path='ocean') + for index, run_dt in enumerate(restart_dts): + make_ocean_restart_dir(start_dt="1900-01-01 00:00:00", + run_dt=run_dt, + calendar=4, + restart_index=index, + additional_path='ocean') - # Set up config - test_config = config + +def write_test_config(restart_freq, restart_history=None): + test_config = copy.deepcopy(config) test_config['model'] = 'access-om2' test_config['submodels'] = [ {'name': 'atmosphere', 'model': 'yatm'}, {'name': 'ocean', 'model': 'mom'} ] test_config['restart_freq'] = restart_freq - test_config['restart_history'] = restart_history + if restart_history: + test_config['restart_history'] = restart_history + write_config(test_config) + +@pytest.mark.parametrize( + "restart_freq, restart_history, expected_pruned_restarts_indices", + [ + ("1MS", None, []), + ("2MS", None, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23]), + ("2MS", 5, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]), + ("12MS", None, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + ("1YS", None, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + (1, 1, []), + (5, 5, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]), + (5, None, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]) + ]) +def test_force_prune_restarts(restart_freq, + restart_history, + expected_pruned_restarts_indices): + # Test --force-prune-restarts with varying restart_freq and restart_history + + # Create restart files 1900/02/01-restart000 to 1902/02/01-restart024 + create_test_2Y_1_month_frequency_restarts() + + # Set up config + write_test_config(restart_freq, restart_history) + with cd(ctrldir): lab = payu.laboratory.Laboratory(lab_path=str(labdir)) expt = payu.experiment.Experiment(lab, reproduce=False) # Function to test - restarts_to_prune = expt.prune_restarts() + restarts_to_prune = expt.get_restarts_to_prune(force=True) - # Extract out index from the full paths + # Extract out index restarts_to_prune_indices = [ - int(re.search("[0-9]+$", restart_path).group()) - for restart_path in restarts_to_prune + int(restart.lstrip('restart')) for restart in restarts_to_prune ] assert restarts_to_prune_indices == expected_pruned_restarts_indices + + +@pytest.mark.parametrize( + "restarts, restart_freq, restart_history, expected_restart_indices", + [ + ([ + (0, "1901-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + (5, "1906-01-01 00:00:00"), + (6, "1907-01-01 00:00:00") + ], "3YS", None, [4, 5]), + ([ + (0, "1901-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + (5, "1906-01-01 00:00:00"), + (6, "1907-01-01 00:00:00") + ], "3YS", 2, [4]), + ([ + (0, "1901-01-01 00:00:00"), + (1, "1902-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00") + ], "2YS", 1, []), + ([ + (0, "1901-01-01 00:00:00"), + (1, "1902-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1905-01-01 00:00:00") + ], "2YS", None, []), + ([ + (0, "1901-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + ], 2, None, []), + ([ + (0, "1901-01-01 00:00:00"), + (2, "1903-01-01 00:00:00"), + (3, "1904-01-01 00:00:00"), + (4, "1904-01-01 00:00:00"), + ], 2, None, [3]) + ]) +def test_prune_restarts(restarts, + restart_freq, + restart_history, + expected_restart_indices): + # Create restart files + for index, datetime in restarts: + make_ocean_restart_dir(start_dt="1900-01-01 00:00:00", + run_dt=datetime, + calendar=4, + restart_index=index, + additional_path='ocean') + + # Set up config + write_test_config(restart_freq, restart_history) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + + # Function to test - Note: with force=False which is default + restarts_to_prune = expt.get_restarts_to_prune() + + # Extract out index + restarts_to_prune_indices = [ + int(restart.lstrip('restart')) for restart in restarts_to_prune + ] + + assert restarts_to_prune_indices == expected_restart_indices From c4418f5600a63c96644bde57b1348c2512d34d6c Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 6 Oct 2023 12:34:21 +1100 Subject: [PATCH 4/5] Update documentation to re-include intermediate restarts and restart_history --- docs/source/config.rst | 25 ++++++++++++++----------- docs/source/usage.rst | 6 ++++++ test/test_prune_restarts.py | 10 ++++++++-- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index 55991481..729e37fc 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -189,25 +189,28 @@ configuration. ncpus: 0 ``restart_freq`` (*Default:* ``5``) - Specifies the rate of saved restart files. For the default rate of 5, we - keep the restart files for every fifth run (``restart004``, ``restart009``, - ``restart014``, etc.). - Using ``restart_freq: 1`` will save all restart files. - - For both integer and date-based restart frequency, the first restart and, - by default, the 5 latest restarts are saved. + Specifies the rate of saved restart files. This rate can be either an + integer or date-based. For the default rate of 5, we + keep the restart files for every fifth run (``restart000``, ``restart005``, + ``restart010``, etc.). To save all restart files, set ``restart_freq: 1``. + + If ``restart_history`` is not configured, intermediate restarts are not + deleted until a permanently archived restart has been produced. + For example, if we have just completed run ``11``, then + we keep ``restart000``, ``restart005``, ``restart010``, and ``restart011``. + Restarts 11 through 14 are not deleted until ``restart015`` has been saved. To use a date-based restart frequency, specify a number with a time unit. The supported time units are ``YS`` - year-start, ``MS`` - month-start, ``W`` - week, ``D`` - day, ``H`` - hour, ``T`` - minute and ``S`` - second. For example, ``restart_freq: 10YS`` would save earliest restart of the year, - 10 years from the last permanently saved restart's datetime. + 10 years from the last permanently archived restart's datetime. - Please note that currently, only ACCESS-OM2 and MOM models support - date-based restart frequency, as it depends the payu model driver being + Please note that currently, only ACCESS-OM2, MOM5 and MOM6 models support + date-based restart frequency, as it depends on the payu model driver being able to parse restarts files for a datetime. -``restart_history`` (*Default:* ``5``) +``restart_history`` Specifies the number of latest restart files to save *The following model-based tags are typically not configured* diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 23a8db2c..086b4ca7 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -232,6 +232,12 @@ To run from an existing model run, also called a warm start, set the ``restart`` option to point to the folder containing the restart files from a previous matching experiment. +If restart pruning configuration has changed, there may be warnings if +many restarts will be pruned as a result. If this is desired, at the next +run use ``-F/--force-prune-restarts`` flag: + + payu run --force-prune-restarts + Cleaning up =========== diff --git a/test/test_prune_restarts.py b/test/test_prune_restarts.py index f299cb5f..ad823b3d 100644 --- a/test/test_prune_restarts.py +++ b/test/test_prune_restarts.py @@ -184,8 +184,14 @@ def test_force_prune_restarts(restart_freq, (0, "1901-01-01 00:00:00"), (2, "1903-01-01 00:00:00"), (3, "1904-01-01 00:00:00"), - (4, "1904-01-01 00:00:00"), - ], 2, None, [3]) + (4, "1905-01-01 00:00:00"), + ], 2, None, [3]), + ([ + (2, "1903-01-01 00:00:00"), + (4, "1905-01-01 00:00:00"), + (6, "1907-01-01 00:00:00"), + (8, "1909-01-01 00:00:00"), + ], 4, None, []), ]) def test_prune_restarts(restarts, restart_freq, From bc0aa459665ac5ce8aa79132a5766fb25796b276 Mon Sep 17 00:00:00 2001 From: jo-basevi Date: Wed, 11 Oct 2023 09:01:13 +1100 Subject: [PATCH 5/5] Add sorting to general method for listing output/restart dirs - Allow regex to support restart/output dirs with counter > 999 - Add test cases for restart_history smaller/greater than restart_freq --- docs/source/config.rst | 2 +- payu/experiment.py | 19 ++++++++++--------- test/test_prune_restarts.py | 3 ++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index 729e37fc..4f1483a3 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -211,7 +211,7 @@ configuration. able to parse restarts files for a datetime. ``restart_history`` - Specifies the number of latest restart files to save + Specifies how many of the most recent restart files to retain regardless of `restart_freq` *The following model-based tags are typically not configured* diff --git a/payu/experiment.py b/payu/experiment.py index 36a16fe8..15f5f8cb 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -198,13 +198,15 @@ def max_output_index(self, output_type="output"): raise if output_dirs and len(output_dirs): - return max([int(d.lstrip(output_type)) for d in output_dirs]) + return int(output_dirs[-1].lstrip(output_type)) def list_output_dirs(self, output_type="output"): - """Return a list of restart or output directories in archive""" - naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]$") - return [d for d in os.listdir(self.archive_path) + """Return a sorted list of restart or output directories in archive""" + naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]+$") + dirs = [d for d in os.listdir(self.archive_path) if naming_pattern.match(d)] + dirs.sort(key=lambda d: int(d.lstrip(output_type))) + return dirs def set_stacksize(self, stacksize): @@ -1003,9 +1005,8 @@ def get_restarts_to_prune(self, if not os.path.exists(self.archive_path): return [] - # List all and sort restart directories in archive + # List all restart directories in archive restarts = self.list_output_dirs(output_type='restart') - restarts.sort(key=lambda d: int(d.lstrip('restart'))) # TODO: Previous logic was to prune all restarts if self.repeat_run # Still need to figure out what should happen in this case @@ -1043,15 +1044,15 @@ def get_restarts_to_prune(self, restart_path = os.path.join(self.archive_path, restart) try: restart_dt = self.model.get_restart_datetime(restart_path) - except NotImplementedError as e: + except NotImplementedError: print('payu: error: Date-based restart pruning is not ' f'implemented for the {self.model.model_type} ' 'model. To use integer based restart pruning, ' 'set restart_freq to an integer value.') raise - except Exception as e: + except Exception: print('payu: error: Error parsing restart directory ', - f'{restart} for a datetime. Error: {e}') + f'{restart} for a datetime to prune restarts.') raise if (next_dt is not None and restart_dt < next_dt): diff --git a/test/test_prune_restarts.py b/test/test_prune_restarts.py index ad823b3d..253e02b1 100644 --- a/test/test_prune_restarts.py +++ b/test/test_prune_restarts.py @@ -115,7 +115,8 @@ def write_test_config(restart_freq, restart_history=None): [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), (1, 1, []), - (5, 5, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]), + (5, 3, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21]), + (5, 7, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17]), (5, None, [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19]) ]) def test_force_prune_restarts(restart_freq,