From 561324a0be9c68c10e858b6bcbec3a55b6a4c294 Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Mon, 6 May 2024 16:30:39 +1000 Subject: [PATCH 01/12] add basic accessom3 model --- models/accessom3.py | 85 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 models/accessom3.py diff --git a/models/accessom3.py b/models/accessom3.py new file mode 100644 index 0000000..e2bdffc --- /dev/null +++ b/models/accessom3.py @@ -0,0 +1,85 @@ +"""Specific Access-OM3 Model setup and post-processing""" + +import hashlib +from pathlib import Path +from payu.models.cesm_cmeps import Runconfig +from typing import Dict, Any + +from models.model import Model + +BASE_SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/main/au.org.access-nri/model/access-om2/experiment/reproducibility/checksums" + +SCHEMA_VERSION_1_0_0 = "1-0-0" +DEFAULT_SCHEMA_VERSION = SCHEMA_VERSION_1_0_0 +SUPPORTED_SCHEMA_VERSIONS = [SCHEMA_VERSION_1_0_0] + +class AccessOm3(Model): + def __init__(self, experiment): + super(AccessOm3, self).__init__(experiment) + self.output_file = self.experiment.output000 / 'ocean.stats' + + self.runconfig = experiment.control_path / 'nuopc.runconfig' + self.ocean_config = experiment.control_path / 'input.nml' + self.default_schema_version = DEFAULT_SCHEMA_VERSION + + def set_model_runtime(self, + years: int = 0, + months: int = 0, + seconds: int = 10800): + """Set config files to a short time period for experiment run. + Default is 3 hours""" + runconfig = Runconfig(self.runconfig) + + if years == months == 0: + freq = "nseconds" + n = str(seconds) + elif seconds == 0: + freq = "nmonths" + n = str(12 * years + months) + else: + raise NotImplementedError( + f"Cannot specify runtime in seconds and year/months at the same time") + + runconfig.set("CLOCK_attributes", "restart_n", n) + runconfig.set("CLOCK_attributes", "restart_option", freq) + runconfig.set("CLOCK_attributes", "stop_n", n) + runconfig.set("CLOCK_attributes", "stop_option", freq) + + runconfig.write() + + def output_exists(self) -> bool: + """Check for existing output file""" + return self.output_file.exists() + + def extract_checksums(self, + output_directory: Path = None, + schema_version: str = None) -> Dict[str, Any]: + """Parse output file and create checksum using defined schema""" + if output_directory: + output_filename = output_directory / 'ocean.stats' + else: + output_filename = self.output_file + + # ocean.stats is used for regression testing in MOM6's own test suite + # See https://github.com/mom-ocean/MOM6/blob/2ab885eddfc47fc0c8c0bae46bc61531104428d5/.testing/Makefile#L495-L501 + # Here we calculate the md5 hash of ocean.stats + with open(output_filename, 'rb') as f: + contents = f.read() + md5_hash = hashlib.md5(contents).hexdigest() + + output_checksums = {"ocean.stats": [md5_hash]} + + if schema_version is None: + schema_version = DEFAULT_SCHEMA_VERSION + + if schema_version == SCHEMA_VERSION_1_0_0: + checksums = { + "schema_version": schema_version, + "output": dict(output_checksums) + } + else: + raise NotImplementedError( + f"Unsupported checksum schema version: {schema_version}") + + return checksums + From b7878084b546f670b7e63c8d5389db804c51a16c Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Thu, 9 May 2024 12:57:18 +1000 Subject: [PATCH 02/12] add access-om3 output to test resources --- test/resources/access-om3-checksums-1-0-0.json | 8 ++++++++ test/resources/ocean.stats | 3 +++ 2 files changed, 11 insertions(+) create mode 100644 test/resources/access-om3-checksums-1-0-0.json create mode 100644 test/resources/ocean.stats diff --git a/test/resources/access-om3-checksums-1-0-0.json b/test/resources/access-om3-checksums-1-0-0.json new file mode 100644 index 0000000..78070af --- /dev/null +++ b/test/resources/access-om3-checksums-1-0-0.json @@ -0,0 +1,8 @@ +{ + "schema_version": "1-0-0", + "output": { + "ocean.stats": [ + "d2f2f18e0f688c1717b52acf78c0793f" + ] + } +} diff --git a/test/resources/ocean.stats b/test/resources/ocean.stats new file mode 100644 index 0000000..ee9e004 --- /dev/null +++ b/test/resources/ocean.stats @@ -0,0 +1,3 @@ + Step, Day, Truncs, Energy/Mass, Maximum CFL, Mean Sea Level, Total Mass, Mean Salin, Mean Temp, Frac Mass Err, Salin Err, Temp Err + [days] [m2 s-2] [Nondim] [m] [kg] [PSU] [degC] [Nondim] [PSU] [degC] + 0, 693135.000, 0, En 3.0745627134675957E-23, CFL 0.00000, SL 1.5112E-10, M 1.36404E+21, S 34.7263, T 3.6362, Me 0.00E+00, Se 0.00E+00, Te 0.00E+00 From 0313093d459eb9a3b91e3f6fffa52b19602f47d3 Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Fri, 10 May 2024 22:26:12 +1000 Subject: [PATCH 03/12] extract fields from ocean.stats rather than checksumming whole file --- models/accessom3.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/models/accessom3.py b/models/accessom3.py index e2bdffc..0d1531f 100644 --- a/models/accessom3.py +++ b/models/accessom3.py @@ -1,6 +1,7 @@ """Specific Access-OM3 Model setup and post-processing""" -import hashlib +from collections import defaultdict +import re from pathlib import Path from payu.models.cesm_cmeps import Runconfig from typing import Dict, Any @@ -62,12 +63,23 @@ def extract_checksums(self, # ocean.stats is used for regression testing in MOM6's own test suite # See https://github.com/mom-ocean/MOM6/blob/2ab885eddfc47fc0c8c0bae46bc61531104428d5/.testing/Makefile#L495-L501 - # Here we calculate the md5 hash of ocean.stats - with open(output_filename, 'rb') as f: - contents = f.read() - md5_hash = hashlib.md5(contents).hexdigest() - - output_checksums = {"ocean.stats": [md5_hash]} + # Rows in ocean.stats look like: + # 0, 693135.000, 0, En 3.0745627134675957E-23, CFL 0.00000, ... + # where the first three columns are Step, Day, Truncs and the remaining + # columns include a label for what they are (e.g. En = Energy/Mass) + # Header info is only included for new runs so can't be relied on + output_checksums: dict[str, list[any]] = defaultdict(list) + + with open(output_filename) as f: + lines = f.readlines() + # Skip header if it exists (for new runs) + istart = 2 if "Step" in lines[0] else 0 + for line in lines[istart:]: + for col in line.split(","): + # Only keep columns with labels (ie not Step, Day, Truncs) + col = re.split(" +", col.strip().rstrip('\n')) + if len(col) > 1: + output_checksums[col[0]].append(col[-1]) if schema_version is None: schema_version = DEFAULT_SCHEMA_VERSION From 1671601668c2326961370f8e00cd6629669345bd Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Mon, 13 May 2024 08:41:50 +1000 Subject: [PATCH 04/12] update accessom3 test checksums --- .../resources/access-om3-checksums-1-0-0.json | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/test/resources/access-om3-checksums-1-0-0.json b/test/resources/access-om3-checksums-1-0-0.json index 78070af..61eeb82 100644 --- a/test/resources/access-om3-checksums-1-0-0.json +++ b/test/resources/access-om3-checksums-1-0-0.json @@ -1,8 +1,32 @@ { "schema_version": "1-0-0", "output": { - "ocean.stats": [ - "d2f2f18e0f688c1717b52acf78c0793f" + "En": [ + "3.0745627134675957E-23" + ], + "CFL": [ + "0.00000" + ], + "SL": [ + "1.5112E-10" + ], + "M": [ + "1.36404E+21" + ], + "S": [ + "34.7263" + ], + "T": [ + "3.6362" + ], + "Me": [ + "0.00E+00" + ], + "Se": [ + "0.00E+00" + ], + "Te": [ + "0.00E+00" ] } } From 0149a843de5bf46a3ca031188205dd7878239e11 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Mon, 13 May 2024 11:59:33 +1000 Subject: [PATCH 05/12] Move schema and general methods to Model class so they can used for different models --- models/__init__.py | 4 +++- models/accessom2.py | 35 ++--------------------------------- models/accessom3.py | 9 +-------- models/model.py | 27 ++++++++++++++++++++++++++- 4 files changed, 32 insertions(+), 43 deletions(-) diff --git a/models/__init__.py b/models/__init__.py index c282c8f..7e66e81 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,5 +1,7 @@ from models.accessom2 import AccessOm2 +from models.accessom3 import AccessOm3 index = { - 'access-om2': AccessOm2 + 'access-om2': AccessOm2, + 'access-om3': AccessOm3 } diff --git a/models/accessom2.py b/models/accessom2.py index 1523524..c0bfd4b 100644 --- a/models/accessom2.py +++ b/models/accessom2.py @@ -8,13 +8,7 @@ from pathlib import Path from typing import Dict, Any -from models.model import Model - -BASE_SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/main/au.org.access-nri/model/access-om2/experiment/reproducibility/checksums" - -SCHEMA_VERSION_1_0_0 = "1-0-0" -DEFAULT_SCHEMA_VERSION = SCHEMA_VERSION_1_0_0 -SUPPORTED_SCHEMA_VERSIONS = [SCHEMA_VERSION_1_0_0] +from models.model import Model, SCHEMA_VERSION_1_0_0 class AccessOm2(Model): def __init__(self, experiment): @@ -23,7 +17,6 @@ def __init__(self, experiment): self.accessom2_config = experiment.control_path / 'accessom2.nml' self.ocean_config = experiment.control_path / 'ocean' / 'input.nml' - self.default_schema_version = DEFAULT_SCHEMA_VERSION def set_model_runtime(self, years: int = 0, @@ -78,7 +71,7 @@ def extract_checksums(self, output_checksums[field].append(checksum) if schema_version is None: - schema_version = DEFAULT_SCHEMA_VERSION + schema_version = self.default_schema_version if schema_version == SCHEMA_VERSION_1_0_0: checksums = { @@ -90,27 +83,3 @@ def extract_checksums(self, f"Unsupported checksum schema version: {schema_version}") return checksums - - def check_checksums_over_restarts(self, - long_run_checksum: Dict[str, Any], - short_run_checksum_0: Dict[str, Any], - short_run_checksum_1: Dict[str, Any] - ) -> bool: - """Compare a checksums from a long run (e.g. 2 days) against - checksums from 2 short runs (e.g. 1 day)""" - short_run_checksums = short_run_checksum_0['output'] - for field, checksums in short_run_checksum_1['output'].items(): - if field not in short_run_checksums: - short_run_checksums[field] = checksums - else: - short_run_checksums[field].extend(checksums) - - matching_checksums = True - for field, checksums in long_run_checksum['output'].items(): - for checksum in checksums: - if (field not in short_run_checksums or - checksum not in short_run_checksums[field]): - print(f"Unequal checksum: {field}: {checksum}") - matching_checksums = False - - return matching_checksums diff --git a/models/accessom3.py b/models/accessom3.py index 0d1531f..fd520d7 100644 --- a/models/accessom3.py +++ b/models/accessom3.py @@ -6,13 +6,7 @@ from payu.models.cesm_cmeps import Runconfig from typing import Dict, Any -from models.model import Model - -BASE_SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/main/au.org.access-nri/model/access-om2/experiment/reproducibility/checksums" - -SCHEMA_VERSION_1_0_0 = "1-0-0" -DEFAULT_SCHEMA_VERSION = SCHEMA_VERSION_1_0_0 -SUPPORTED_SCHEMA_VERSIONS = [SCHEMA_VERSION_1_0_0] +from models.model import Model, SCHEMA_VERSION_1_0_0, DEFAULT_SCHEMA_VERSION class AccessOm3(Model): def __init__(self, experiment): @@ -21,7 +15,6 @@ def __init__(self, experiment): self.runconfig = experiment.control_path / 'nuopc.runconfig' self.ocean_config = experiment.control_path / 'input.nml' - self.default_schema_version = DEFAULT_SCHEMA_VERSION def set_model_runtime(self, years: int = 0, diff --git a/models/model.py b/models/model.py index 2ae126f..0916ae2 100644 --- a/models/model.py +++ b/models/model.py @@ -2,11 +2,21 @@ from pathlib import Path from typing import Dict, Any +#TODO: Update to point to schema moved to a more general location +SCHEMA_VERSION_1_0_0 = "1-0-0" +SCHEMA_1_0_0_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/aae51cd2835abd7eb57f6690777a25359c758e98/au.org.access-nri/model/access-om2/experiment/reproducibility/checksums/1-0-0.json" +SCHEMA_VERSION_TO_URL = { + SCHEMA_VERSION_1_0_0: SCHEMA_1_0_0_URL +} +DEFAULT_SCHEMA_VERSION = "1-0-0" class Model(object): def __init__(self, experiment): self.experiment = experiment + self.default_schema_version = DEFAULT_SCHEMA_VERSION + self.schema_version_to_url = SCHEMA_VERSION_TO_URL + def extract_checksums(self, output_directory: Path, schema_version: str): @@ -30,4 +40,19 @@ def check_checksums_over_restarts(self, short_run_checksum_1) -> bool: """Compare a checksums from a long run (e.g. 2 days) against checksums from 2 short runs (e.g. 1 day)""" - raise NotImplementedError + short_run_checksums = short_run_checksum_0['output'] + for field, checksums in short_run_checksum_1['output'].items(): + if field not in short_run_checksums: + short_run_checksums[field] = checksums + else: + short_run_checksums[field].extend(checksums) + + matching_checksums = True + for field, checksums in long_run_checksum['output'].items(): + for checksum in checksums: + if (field not in short_run_checksums or + checksum not in short_run_checksums[field]): + print(f"Unequal checksum: {field}: {checksum}") + matching_checksums = False + + return matching_checksums From 55f185b3555ab1ce41e15d84bc56a8fb8cfb03c4 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Mon, 13 May 2024 12:26:14 +1000 Subject: [PATCH 06/12] Update test for testing model extract checksums methods --- .gitignore | 2 + .../checksums/1-0-0.json} | 0 .../{ => access-om2/output000}/access-om2.out | 0 .../checksums/1-0-0.json} | 0 .../{ => access-om3/output000}/ocean.stats | 0 test/test_access_om2_extract_checksums.py | 45 ------------ test/test_model_extract_checksums.py | 70 +++++++++++++++++++ 7 files changed, 72 insertions(+), 45 deletions(-) create mode 100644 .gitignore rename test/resources/{access-om2-checksums-1-0-0.json => access-om2/checksums/1-0-0.json} (100%) rename test/resources/{ => access-om2/output000}/access-om2.out (100%) rename test/resources/{access-om3-checksums-1-0-0.json => access-om3/checksums/1-0-0.json} (100%) rename test/resources/{ => access-om3/output000}/ocean.stats (100%) delete mode 100644 test/test_access_om2_extract_checksums.py create mode 100644 test/test_model_extract_checksums.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d48c759 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +.vscode \ No newline at end of file diff --git a/test/resources/access-om2-checksums-1-0-0.json b/test/resources/access-om2/checksums/1-0-0.json similarity index 100% rename from test/resources/access-om2-checksums-1-0-0.json rename to test/resources/access-om2/checksums/1-0-0.json diff --git a/test/resources/access-om2.out b/test/resources/access-om2/output000/access-om2.out similarity index 100% rename from test/resources/access-om2.out rename to test/resources/access-om2/output000/access-om2.out diff --git a/test/resources/access-om3-checksums-1-0-0.json b/test/resources/access-om3/checksums/1-0-0.json similarity index 100% rename from test/resources/access-om3-checksums-1-0-0.json rename to test/resources/access-om3/checksums/1-0-0.json diff --git a/test/resources/ocean.stats b/test/resources/access-om3/output000/ocean.stats similarity index 100% rename from test/resources/ocean.stats rename to test/resources/access-om3/output000/ocean.stats diff --git a/test/test_access_om2_extract_checksums.py b/test/test_access_om2_extract_checksums.py deleted file mode 100644 index a08e1af..0000000 --- a/test/test_access_om2_extract_checksums.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -import requests -import json -import jsonschema -from pathlib import Path -from unittest.mock import Mock - -from models.accessom2 import AccessOm2 -from models.accessom2 import SUPPORTED_SCHEMA_VERSIONS - -@pytest.mark.parametrize("version", SUPPORTED_SCHEMA_VERSIONS) -@pytest.mark.test -def test_extract_checksums(version): - # Mock ExpTestHelper - mock_experiment = Mock() - mock_experiment.output000 = Path('test/test/resources') - mock_experiment.control_path = Path('test/tmp') - - model = AccessOm2(mock_experiment) - - checksums = model.extract_checksums( - schema_version=version - ) - - # Assert version is set as expected - assert checksums["schema_version"] == version - - # Check the entire checksum file is expected - with open(f'test/test/resources/access-om2-checksums-1-0-0.json', 'r') as file: - expected_checksums = json.load(file) - - assert checksums == expected_checksums - - # Validate checksum file with schema - schema = get_schema_from_url(expected_checksums["schema"]) - - # Validate checksums against schema - jsonschema.validate(instance=checksums, schema=schema) - - -def get_schema_from_url(url): - """Retrieve schema from github""" - response = requests.get(url) - assert response.status_code == 200 - return response.json() diff --git a/test/test_model_extract_checksums.py b/test/test_model_extract_checksums.py new file mode 100644 index 0000000..076541c --- /dev/null +++ b/test/test_model_extract_checksums.py @@ -0,0 +1,70 @@ +import pytest +import requests +import json +import jsonschema +from pathlib import Path +from unittest.mock import Mock + +from models import index as model_index + +MODEL_NAMES = model_index.keys() + +@pytest.mark.parametrize("model_name", MODEL_NAMES) +@pytest.mark.test +def test_extract_checksums(model_name): + resources_dir = Path(f'test/resources/{model_name}') + + # Mock ExpTestHelper + mock_experiment = Mock() + mock_experiment.output000 = resources_dir / 'output000' + mock_experiment.control_path = Path('test/tmp') + + # Create Model instance + ModelType = model_index[model_name] + model = ModelType(mock_experiment) + + # Test extract checksums for each schema version + for version, url in model.schema_version_to_url.items(): + checksums = model.extract_checksums(schema_version=version) + + # Assert version is set as expected + assert checksums["schema_version"] == version + + # Check the entire checksum file is expected + checksum_file = resources_dir / 'checksums' / f'{version}.json' + with open(checksum_file, 'r') as file: + expected_checksums = json.load(file) + + assert checksums == expected_checksums + + # Validate checksum file with schema + schema = get_schema_from_url(url) + + # Validate checksums against schema + jsonschema.validate(instance=checksums, schema=schema) + + +@pytest.mark.parametrize("model_name", MODEL_NAMES) +@pytest.mark.test +def test_extract_checksums_unsupported_version(model_name): + resources_dir = Path(f'test/resources/{model_name}') + + # Mock ExpTestHelper + mock_experiment = Mock() + mock_experiment.output000 = resources_dir / 'output000' + mock_experiment.control_path = Path('test/tmp') + + # Create Model instance + ModelType = model_index[model_name] + model = ModelType(mock_experiment) + + # Test NotImplementedError gets raised for unsupported versions + with pytest.raises(NotImplementedError): + model.extract_checksums(schema_version='test-version') + + +def get_schema_from_url(url): + """Retrieve schema from GitHub""" + response = requests.get(url) + assert response.status_code == 200 + return response.json() From 539ef5d380acfa287767ee6d4969a58017a9708c Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Mon, 13 May 2024 13:40:57 +1000 Subject: [PATCH 07/12] Access-om2: Add check for runtime that 2 of years, months and seconds is 0 --- models/accessom2.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/models/accessom2.py b/models/accessom2.py index c0bfd4b..3186a7e 100644 --- a/models/accessom2.py +++ b/models/accessom2.py @@ -27,6 +27,12 @@ def set_model_runtime(self, with open(self.accessom2_config) as f: nml = f90nml.read(f) + # Check that two of years, months, seconds is zero + if sum(x == 0 for x in (years, months, seconds)) != 2: + raise NotImplementedError( + "Cannot specify runtime in seconds and years and months" + + " at the same time. Two of which must be zero") + nml['date_manager_nml']['restart_period'] = [years, months, seconds] nml.write(self.accessom2_config, force=True) From 43f6b9307cc899e692f262f86b78a80714b6036a Mon Sep 17 00:00:00 2001 From: dougiesquire Date: Tue, 14 May 2024 08:58:02 +1000 Subject: [PATCH 08/12] set default schema in AccessOM3 in same way as in AccessOm2 --- models/accessom3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/accessom3.py b/models/accessom3.py index fd520d7..9416fb1 100644 --- a/models/accessom3.py +++ b/models/accessom3.py @@ -6,7 +6,7 @@ from payu.models.cesm_cmeps import Runconfig from typing import Dict, Any -from models.model import Model, SCHEMA_VERSION_1_0_0, DEFAULT_SCHEMA_VERSION +from models.model import Model, SCHEMA_VERSION_1_0_0 class AccessOm3(Model): def __init__(self, experiment): @@ -75,7 +75,7 @@ def extract_checksums(self, output_checksums[col[0]].append(col[-1]) if schema_version is None: - schema_version = DEFAULT_SCHEMA_VERSION + schema_version = self.default_schema_version if schema_version == SCHEMA_VERSION_1_0_0: checksums = { From 104a8523e3591ba0a182112a17462c443225dc7d Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Tue, 14 May 2024 14:13:46 +1000 Subject: [PATCH 09/12] Update checksum schema url and commit to point to new location --- models/model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models/model.py b/models/model.py index 0916ae2..42d8c79 100644 --- a/models/model.py +++ b/models/model.py @@ -2,9 +2,8 @@ from pathlib import Path from typing import Dict, Any -#TODO: Update to point to schema moved to a more general location SCHEMA_VERSION_1_0_0 = "1-0-0" -SCHEMA_1_0_0_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/aae51cd2835abd7eb57f6690777a25359c758e98/au.org.access-nri/model/access-om2/experiment/reproducibility/checksums/1-0-0.json" +SCHEMA_1_0_0_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/7666d95967de4dfd19b0d271f167fdcfd3f46962/au.org.access-nri/model/reproducibility/checksums/1-0-0.json" SCHEMA_VERSION_TO_URL = { SCHEMA_VERSION_1_0_0: SCHEMA_1_0_0_URL } From 1c5f1f8abb8364e32c964085172473f425dff519 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 24 May 2024 11:27:31 +1000 Subject: [PATCH 10/12] Run pre-commit lint and format over merged code --- src/model_config_tests/models/__init__.py | 5 +-- src/model_config_tests/models/accessom2.py | 15 ++++---- src/model_config_tests/models/accessom3.py | 40 +++++++++++----------- src/model_config_tests/models/model.py | 35 ++++++++----------- tests/test_model_extract_checksums.py | 26 +++++++------- 5 files changed, 59 insertions(+), 62 deletions(-) diff --git a/src/model_config_tests/models/__init__.py b/src/model_config_tests/models/__init__.py index 7f7e474..c704378 100644 --- a/src/model_config_tests/models/__init__.py +++ b/src/model_config_tests/models/__init__.py @@ -1,7 +1,4 @@ from model_config_tests.models.accessom2 import AccessOm2 from model_config_tests.models.accessom3 import AccessOm3 -index = { - "access-om2": AccessOm2, - 'access-om3': AccessOm3 -} +index = {"access-om2": AccessOm2, "access-om3": AccessOm3} diff --git a/src/model_config_tests/models/accessom2.py b/src/model_config_tests/models/accessom2.py index ed13a82..a1cc48a 100644 --- a/src/model_config_tests/models/accessom2.py +++ b/src/model_config_tests/models/accessom2.py @@ -5,7 +5,9 @@ from pathlib import Path from typing import Any -from model_config_tests.models.model import Model, SCHEMA_VERSION_1_0_0 +import f90nml + +from model_config_tests.models.model import SCHEMA_VERSION_1_0_0, Model class AccessOm2(Model): @@ -13,8 +15,8 @@ def __init__(self, experiment): super().__init__(experiment) self.output_file = self.experiment.output000 / "access-om2.out" - self.accessom2_config = experiment.control_path / 'accessom2.nml' - self.ocean_config = experiment.control_path / 'ocean' / 'input.nml' + self.accessom2_config = experiment.control_path / "accessom2.nml" + self.ocean_config = experiment.control_path / "ocean" / "input.nml" def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800): """Set config files to a short time period for experiment run. @@ -25,10 +27,11 @@ def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 1080 # Check that two of years, months, seconds is zero if sum(x == 0 for x in (years, months, seconds)) != 2: raise NotImplementedError( - "Cannot specify runtime in seconds and years and months" + - " at the same time. Two of which must be zero") + "Cannot specify runtime in seconds and years and months" + + " at the same time. Two of which must be zero" + ) - nml['date_manager_nml']['restart_period'] = [years, months, seconds] + nml["date_manager_nml"]["restart_period"] = [years, months, seconds] nml.write(self.accessom2_config, force=True) def output_exists(self) -> bool: diff --git a/src/model_config_tests/models/accessom3.py b/src/model_config_tests/models/accessom3.py index 956ccd7..9a5df35 100644 --- a/src/model_config_tests/models/accessom3.py +++ b/src/model_config_tests/models/accessom3.py @@ -1,25 +1,24 @@ """Specific Access-OM3 Model setup and post-processing""" -from collections import defaultdict import re +from collections import defaultdict from pathlib import Path +from typing import Any + from payu.models.cesm_cmeps import Runconfig -from typing import Dict, Any -from model_config_tests.models.model import Model, SCHEMA_VERSION_1_0_0 +from model_config_tests.models.model import SCHEMA_VERSION_1_0_0, Model + class AccessOm3(Model): def __init__(self, experiment): - super(AccessOm3, self).__init__(experiment) - self.output_file = self.experiment.output000 / 'ocean.stats' + super().__init__(experiment) + self.output_file = self.experiment.output000 / "ocean.stats" - self.runconfig = experiment.control_path / 'nuopc.runconfig' - self.ocean_config = experiment.control_path / 'input.nml' + self.runconfig = experiment.control_path / "nuopc.runconfig" + self.ocean_config = experiment.control_path / "input.nml" - def set_model_runtime(self, - years: int = 0, - months: int = 0, - seconds: int = 10800): + def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800): """Set config files to a short time period for experiment run. Default is 3 hours""" runconfig = Runconfig(self.runconfig) @@ -32,7 +31,8 @@ def set_model_runtime(self, n = str(12 * years + months) else: raise NotImplementedError( - f"Cannot specify runtime in seconds and year/months at the same time") + "Cannot specify runtime in seconds and year/months at the same time" + ) runconfig.set("CLOCK_attributes", "restart_n", n) runconfig.set("CLOCK_attributes", "restart_option", freq) @@ -45,12 +45,12 @@ def output_exists(self) -> bool: """Check for existing output file""" return self.output_file.exists() - def extract_checksums(self, - output_directory: Path = None, - schema_version: str = None) -> Dict[str, Any]: + def extract_checksums( + self, output_directory: Path = None, schema_version: str = None + ) -> dict[str, Any]: """Parse output file and create checksum using defined schema""" if output_directory: - output_filename = output_directory / 'ocean.stats' + output_filename = output_directory / "ocean.stats" else: output_filename = self.output_file @@ -70,7 +70,7 @@ def extract_checksums(self, for line in lines[istart:]: for col in line.split(","): # Only keep columns with labels (ie not Step, Day, Truncs) - col = re.split(" +", col.strip().rstrip('\n')) + col = re.split(" +", col.strip().rstrip("\n")) if len(col) > 1: output_checksums[col[0]].append(col[-1]) @@ -80,11 +80,11 @@ def extract_checksums(self, if schema_version == SCHEMA_VERSION_1_0_0: checksums = { "schema_version": schema_version, - "output": dict(output_checksums) + "output": dict(output_checksums), } else: raise NotImplementedError( - f"Unsupported checksum schema version: {schema_version}") + f"Unsupported checksum schema version: {schema_version}" + ) return checksums - diff --git a/src/model_config_tests/models/model.py b/src/model_config_tests/models/model.py index 198c135..05f818f 100644 --- a/src/model_config_tests/models/model.py +++ b/src/model_config_tests/models/model.py @@ -1,31 +1,25 @@ """Generic Model class""" + from pathlib import Path SCHEMA_VERSION_1_0_0 = "1-0-0" SCHEMA_1_0_0_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/7666d95967de4dfd19b0d271f167fdcfd3f46962/au.org.access-nri/model/reproducibility/checksums/1-0-0.json" -SCHEMA_VERSION_TO_URL = { - SCHEMA_VERSION_1_0_0: SCHEMA_1_0_0_URL -} +SCHEMA_VERSION_TO_URL = {SCHEMA_VERSION_1_0_0: SCHEMA_1_0_0_URL} DEFAULT_SCHEMA_VERSION = "1-0-0" -class Model(object): +class Model: def __init__(self, experiment): self.experiment = experiment self.default_schema_version = DEFAULT_SCHEMA_VERSION self.schema_version_to_url = SCHEMA_VERSION_TO_URL - def extract_checksums(self, - output_directory: Path, - schema_version: str): + def extract_checksums(self, output_directory: Path, schema_version: str): """Extract checksums from output directory""" raise NotImplementedError - def set_model_runtime(self, - years: int = 0, - months: int = 0, - seconds: int = 10800): + def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800): """Configure model runtime""" raise NotImplementedError @@ -33,24 +27,25 @@ def output_exists(self): """Check for existing output files""" raise NotImplementedError - def check_checksums_over_restarts(self, - long_run_checksum, - short_run_checksum_0, - short_run_checksum_1) -> bool: + def check_checksums_over_restarts( + self, long_run_checksum, short_run_checksum_0, short_run_checksum_1 + ) -> bool: """Compare a checksums from a long run (e.g. 2 days) against checksums from 2 short runs (e.g. 1 day)""" - short_run_checksums = short_run_checksum_0['output'] - for field, checksums in short_run_checksum_1['output'].items(): + short_run_checksums = short_run_checksum_0["output"] + for field, checksums in short_run_checksum_1["output"].items(): if field not in short_run_checksums: short_run_checksums[field] = checksums else: short_run_checksums[field].extend(checksums) matching_checksums = True - for field, checksums in long_run_checksum['output'].items(): + for field, checksums in long_run_checksum["output"].items(): for checksum in checksums: - if (field not in short_run_checksums or - checksum not in short_run_checksums[field]): + if ( + field not in short_run_checksums + or checksum not in short_run_checksums[field] + ): print(f"Unequal checksum: {field}: {checksum}") matching_checksums = False diff --git a/tests/test_model_extract_checksums.py b/tests/test_model_extract_checksums.py index 9b08a00..6c32c19 100644 --- a/tests/test_model_extract_checksums.py +++ b/tests/test_model_extract_checksums.py @@ -1,23 +1,25 @@ -import pytest -import requests import json -import jsonschema from pathlib import Path from unittest.mock import Mock +import jsonschema +import pytest +import requests + from model_config_tests.models import index as model_index MODEL_NAMES = model_index.keys() + @pytest.mark.parametrize("model_name", MODEL_NAMES) @pytest.mark.test def test_extract_checksums(model_name): - resources_dir = Path(f'test/resources/{model_name}') + resources_dir = Path(f"test/resources/{model_name}") # Mock ExpTestHelper mock_experiment = Mock() - mock_experiment.output000 = resources_dir / 'output000' - mock_experiment.control_path = Path('test/tmp') + mock_experiment.output000 = resources_dir / "output000" + mock_experiment.control_path = Path("test/tmp") # Create Model instance ModelType = model_index[model_name] @@ -31,8 +33,8 @@ def test_extract_checksums(model_name): assert checksums["schema_version"] == version # Check the entire checksum file is expected - checksum_file = resources_dir / 'checksums' / f'{version}.json' - with open(checksum_file, 'r') as file: + checksum_file = resources_dir / "checksums" / f"{version}.json" + with open(checksum_file) as file: expected_checksums = json.load(file) assert checksums == expected_checksums @@ -47,12 +49,12 @@ def test_extract_checksums(model_name): @pytest.mark.parametrize("model_name", MODEL_NAMES) @pytest.mark.test def test_extract_checksums_unsupported_version(model_name): - resources_dir = Path(f'test/resources/{model_name}') + resources_dir = Path(f"test/resources/{model_name}") # Mock ExpTestHelper mock_experiment = Mock() - mock_experiment.output000 = resources_dir / 'output000' - mock_experiment.control_path = Path('test/tmp') + mock_experiment.output000 = resources_dir / "output000" + mock_experiment.control_path = Path("test/tmp") # Create Model instance ModelType = model_index[model_name] @@ -60,7 +62,7 @@ def test_extract_checksums_unsupported_version(model_name): # Test NotImplementedError gets raised for unsupported versions with pytest.raises(NotImplementedError): - model.extract_checksums(schema_version='test-version') + model.extract_checksums(schema_version="test-version") def get_schema_from_url(url): From 3c8a259433ab0ee8af212100beb9003df359f7e6 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 24 May 2024 11:41:33 +1000 Subject: [PATCH 11/12] Add payu as a dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1063f90..0d604ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "pytest >=8.0.1", "ruamel.yaml >=0.18.5", "jsonschema >=4.21.1", + "payu >=1.1.3" ] [project.optional-dependencies] From 213dd719ebafdf0b7f47df2546946ee3c7c5461f Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 24 May 2024 11:42:17 +1000 Subject: [PATCH 12/12] Update extract checksum tests: Remove test marker and find resources dir --- tests/test_model_extract_checksums.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_model_extract_checksums.py b/tests/test_model_extract_checksums.py index 6c32c19..167e800 100644 --- a/tests/test_model_extract_checksums.py +++ b/tests/test_model_extract_checksums.py @@ -1,4 +1,5 @@ import json +import os from pathlib import Path from unittest.mock import Mock @@ -9,12 +10,13 @@ from model_config_tests.models import index as model_index MODEL_NAMES = model_index.keys() +HERE = os.path.dirname(__file__) +RESOURCES_DIR = Path(f"{HERE}/resources") @pytest.mark.parametrize("model_name", MODEL_NAMES) -@pytest.mark.test def test_extract_checksums(model_name): - resources_dir = Path(f"test/resources/{model_name}") + resources_dir = RESOURCES_DIR / model_name # Mock ExpTestHelper mock_experiment = Mock() @@ -47,9 +49,8 @@ def test_extract_checksums(model_name): @pytest.mark.parametrize("model_name", MODEL_NAMES) -@pytest.mark.test def test_extract_checksums_unsupported_version(model_name): - resources_dir = Path(f"test/resources/{model_name}") + resources_dir = RESOURCES_DIR / model_name # Mock ExpTestHelper mock_experiment = Mock()