diff --git a/pyproject.toml b/pyproject.toml index c2344d5..5a27610 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,8 @@ exclude = [ "site-packages", "venv", ] + +[tool.ruff.lint] # E402: module level import not at top of file # E501: line too long - let black worry about that ignore = [ @@ -93,8 +95,8 @@ select = [ "UP", ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 18 -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["model_config_tests"] diff --git a/src/model_config_tests/__main__.py b/src/model_config_tests/__main__.py index 89ebdf1..4699190 100644 --- a/src/model_config_tests/__main__.py +++ b/src/model_config_tests/__main__.py @@ -9,6 +9,7 @@ def main(): import pytest + errcode = pytest.main([HERE] + sys.argv[1:]) sys.exit(errcode) diff --git a/src/model_config_tests/conftest.py b/src/model_config_tests/conftest.py index d343e69..637a6bd 100644 --- a/src/model_config_tests/conftest.py +++ b/src/model_config_tests/conftest.py @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 import os -import pytest from pathlib import Path +import pytest import yaml from ruamel.yaml import YAML @@ -14,19 +14,19 @@ def output_path(request): """Set the output path: This contains control and lab directories for each test and test output files - e.g. CHECKSUMS """ - path = request.config.getoption('--output-path') + path = request.config.getoption("--output-path") if path is None: # Set default to /scratch/PROJECT/USER/test-model-repro/ - project = os.environ.get('PROJECT') - user = os.environ.get('USER') - path = f'/scratch/{project}/{user}/test-model-repro' + project = os.environ.get("PROJECT") + user = os.environ.get("USER") + path = f"/scratch/{project}/{user}/test-model-repro" return Path(path) @pytest.fixture(scope="session") def control_path(request): """Set the path of the model configuration directory to test""" - path = request.config.getoption('--control-path') + path = request.config.getoption("--control-path") if path is None: # Set default to current working directory path = Path.cwd() @@ -36,17 +36,17 @@ def control_path(request): @pytest.fixture(scope="session") def checksum_path(request, control_path): """Set the path of the model configuration directory to test""" - path = request.config.getoption('--checksum-path') + path = request.config.getoption("--checksum-path") if path is None: # Set default to checksum stored on model configuration - path = control_path / 'testing' / 'checksum' / 'historical-3hr-checksum.json' + path = control_path / "testing" / "checksum" / "historical-3hr-checksum.json" return Path(path) @pytest.fixture(scope="session") def metadata(control_path: Path): """Read the metadata file in the control directory""" - metadata_path = control_path / 'metadata.yaml' + metadata_path = control_path / "metadata.yaml" # Use ruamel.yaml as that is what is used to read metadata files in Payu # It also errors out if there are duplicate keys in metadata content = YAML().load(metadata_path) @@ -56,7 +56,7 @@ def metadata(control_path: Path): @pytest.fixture(scope="session") def config(control_path: Path): """Read the config file in the control directory""" - config_path = control_path / 'config.yaml' + config_path = control_path / "config.yaml" with open(config_path) as f: config_content = yaml.safe_load(f) return config_content @@ -67,27 +67,33 @@ def target_branch(request): """Set the target branch - i.e., the branch the configuration will be merged into. This used is to infer configuration information, if the configuration branches follow a common naming scheme (e.g. ACCESS-OM2)""" - return request.config.getoption('--target-branch') + return request.config.getoption("--target-branch") # Set up command line options and default for directory paths def pytest_addoption(parser): """Attaches optional command line arguments""" - parser.addoption("--output-path", - action="store", - help="Specify the output directory path for test output") - - parser.addoption("--control-path", - action="store", - help="Specify the model configuration path to test") - - parser.addoption("--checksum-path", - action="store", - help="Specify the checksum file to compare against") - - parser.addoption("--target-branch", - action="store", - help="Specify the target branch name") + parser.addoption( + "--output-path", + action="store", + help="Specify the output directory path for test output", + ) + + parser.addoption( + "--control-path", + action="store", + help="Specify the model configuration path to test", + ) + + parser.addoption( + "--checksum-path", + action="store", + help="Specify the checksum file to compare against", + ) + + parser.addoption( + "--target-branch", action="store", help="Specify the target branch name" + ) def pytest_configure(config): diff --git a/src/model_config_tests/exp_test_helper.py b/src/model_config_tests/exp_test_helper.py index d52ab67..fdc3f3f 100644 --- a/src/model_config_tests/exp_test_helper.py +++ b/src/model_config_tests/exp_test_helper.py @@ -1,31 +1,32 @@ # Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 -import subprocess as sp -import shutil -import re +import glob import os +import re +import shutil +import subprocess as sp import sys -import glob -import yaml from pathlib import Path -from model_config_tests.util import wait_for_qsub +import yaml + from model_config_tests.models import index as model_index +from model_config_tests.util import wait_for_qsub -class ExpTestHelper(object): +class ExpTestHelper: def __init__(self, control_path: Path, lab_path: Path): self.exp_name = control_path.name self.control_path = control_path self.lab_path = lab_path - self.config_path = control_path / 'config.yaml' - self.archive_path = lab_path / 'archive' / self.exp_name - self.work_path = lab_path / 'work' / self.exp_name - self.output000 = self.archive_path / 'output000' - self.output001 = self.archive_path / 'output001' + self.config_path = control_path / "config.yaml" + self.archive_path = lab_path / "archive" / self.exp_name + self.work_path = lab_path / "work" / self.exp_name + self.output000 = self.archive_path / "output000" + self.output001 = self.archive_path / "output001" with open(self.config_path) as f: self.config = yaml.safe_load(f) @@ -35,13 +36,13 @@ def __init__(self, control_path: Path, lab_path: Path): def set_model(self): """Set model based on payu config. Currently only setting top-level model""" - self.model_name = self.config.get('model') + self.model_name = self.config.get("model") ModelType = model_index[self.model_name] self.model = ModelType(self) - def extract_checksums(self, - output_directory: Path = None, - schema_version: str = None): + def extract_checksums( + self, output_directory: Path = None, schema_version: str = None + ): """Use model subclass to extract checksums from output""" return self.model.extract_checksums(output_directory, schema_version) @@ -61,17 +62,17 @@ def setup_for_test_run(self): doc = yaml.safe_load(f) # Disable git runlog - doc['runlog'] = False + doc["runlog"] = False # Disable metadata and set override experiment name for work/archive # directories - doc['metadata'] = {"enable": False} - doc['experiment'] = self.exp_name + doc["metadata"] = {"enable": False} + doc["experiment"] = self.exp_name # Set laboratory path - doc['laboratory'] = str(self.lab_path) + doc["laboratory"] = str(self.lab_path) - with open(self.config_path, 'w') as f: + with open(self.config_path, "w") as f: yaml.dump(doc, f) def run(self): @@ -95,48 +96,48 @@ def force_qsub_run(self): owd = Path.cwd() try: os.chdir(self.control_path) - sp.check_output(['payu', 'sweep', '--lab', self.lab_path]) - run_id = sp.check_output(['payu', 'run', '--lab', self.lab_path]) + sp.check_output(["payu", "sweep", "--lab", self.lab_path]) + run_id = sp.check_output(["payu", "run", "--lab", self.lab_path]) run_id = run_id.decode().splitlines()[0] - except sp.CalledProcessError as err: - print('Error: call to payu run failed.', file=sys.stderr) + except sp.CalledProcessError: + print("Error: call to payu run failed.", file=sys.stderr) return 1, None, None, None finally: os.chdir(owd) wait_for_qsub(run_id) - run_id = run_id.split('.')[0] + run_id = run_id.split(".")[0] output_files = [] # Read qsub stdout file - stdout_filename = glob.glob(str(self.control_path / f'*.o{run_id}')) + stdout_filename = glob.glob(str(self.control_path / f"*.o{run_id}")) print(stdout_filename) if len(stdout_filename) != 1: - print('Error: there are too many stdout files.', file=sys.stderr) + print("Error: there are too many stdout files.", file=sys.stderr) return 2, None, None, None stdout_filename = stdout_filename[0] output_files.append(stdout_filename) - stdout = '' - with open(stdout_filename, 'r') as f: + stdout = "" + with open(stdout_filename) as f: stdout = f.read() # Read qsub stderr file - stderr_filename = glob.glob(str(self.control_path / f'*.e{run_id}')) - stderr = '' + stderr_filename = glob.glob(str(self.control_path / f"*.e{run_id}")) + stderr = "" if len(stderr_filename) == 1: stderr_filename = stderr_filename[0] output_files.append(stderr_filename) - with open(stderr_filename, 'r') as f: + with open(stderr_filename) as f: stderr = f.read() # TODO: Early return if not collating # Read the qsub id of the collate job from the stdout. # Payu puts this here. - m = re.search(r'(\d+.gadi-pbs)\n', stdout) + m = re.search(r"(\d+.gadi-pbs)\n", stdout) if m is None: - print('Error: qsub id of collate job.', file=sys.stderr) + print("Error: qsub id of collate job.", file=sys.stderr) return 3, stdout, stderr, output_files # Wait for the collate to complete. @@ -144,7 +145,7 @@ def force_qsub_run(self): wait_for_qsub(run_id) # Return files created by qsub so caller can read or delete. - collate_files = self.control_path / f'*.[oe]{run_id}' + collate_files = self.control_path / f"*.[oe]{run_id}" output_files += glob.glob(str(collate_files)) return 0, stdout, stderr, output_files @@ -159,20 +160,19 @@ def setup_exp(control_path: Path, output_path: Path, exp_name: str): Create a exp by copying over base config """ # Set experiment control path - if control_path.name != 'base-experiment': - exp_name = f'{control_path.name}-{exp_name}' + if control_path.name != "base-experiment": + exp_name = f"{control_path.name}-{exp_name}" - exp_control_path = output_path / 'control' / exp_name + exp_control_path = output_path / "control" / exp_name # Copy over base control directory (e.g. model configuration) if exp_control_path.exists(): shutil.rmtree(exp_control_path) shutil.copytree(control_path, exp_control_path, symlinks=True) - exp_lab_path = output_path / 'lab' + exp_lab_path = output_path / "lab" - exp = ExpTestHelper(control_path=exp_control_path, - lab_path=exp_lab_path) + exp = ExpTestHelper(control_path=exp_control_path, lab_path=exp_lab_path) # Remove any pre-existing archive or work directories for the experiment try: diff --git a/src/model_config_tests/models/__init__.py b/src/model_config_tests/models/__init__.py index 7be28e5..efb7367 100644 --- a/src/model_config_tests/models/__init__.py +++ b/src/model_config_tests/models/__init__.py @@ -1,5 +1,3 @@ from model_config_tests.models.accessom2 import AccessOm2 -index = { - 'access-om2': AccessOm2 -} +index = {"access-om2": AccessOm2} diff --git a/src/model_config_tests/models/accessom2.py b/src/model_config_tests/models/accessom2.py index 27fd865..0a98800 100644 --- a/src/model_config_tests/models/accessom2.py +++ b/src/model_config_tests/models/accessom2.py @@ -1,10 +1,11 @@ """Specific Access-OM2 Model setup and post-processing""" -from collections import defaultdict -import f90nml import re +from collections import defaultdict from pathlib import Path -from typing import Dict, Any +from typing import Any + +import f90nml from model_config_tests.models.model import Model @@ -14,37 +15,35 @@ DEFAULT_SCHEMA_VERSION = SCHEMA_VERSION_1_0_0 SUPPORTED_SCHEMA_VERSIONS = [SCHEMA_VERSION_1_0_0] + class AccessOm2(Model): def __init__(self, experiment): - super(AccessOm2, self).__init__(experiment) - self.output_file = self.experiment.output000 / 'access-om2.out' + super().__init__(experiment) + self.output_file = self.experiment.output000 / "access-om2.out" - self.accessom2_config = experiment.control_path / 'accessom2.nml' - self.ocean_config = experiment.control_path / 'ocean' / 'input.nml' + self.accessom2_config = experiment.control_path / "accessom2.nml" + self.ocean_config = experiment.control_path / "ocean" / "input.nml" self.default_schema_version = DEFAULT_SCHEMA_VERSION - def set_model_runtime(self, - years: int = 0, - months: int = 0, - seconds: int = 10800): + def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800): """Set config files to a short time period for experiment run. Default is 3 hours""" with open(self.accessom2_config) as f: nml = f90nml.read(f) - nml['date_manager_nml']['restart_period'] = [years, months, seconds] + nml["date_manager_nml"]["restart_period"] = [years, months, seconds] nml.write(self.accessom2_config, force=True) def output_exists(self) -> bool: """Check for existing output file""" return self.output_file.exists() - def extract_checksums(self, - output_directory: Path = None, - schema_version: str = None) -> Dict[str, Any]: + def extract_checksums( + self, output_directory: Path = None, schema_version: str = None + ) -> dict[str, Any]: """Parse output file and create checksum using defined schema""" if output_directory: - output_filename = output_directory / 'access-om2.out' + output_filename = output_directory / "access-om2.out" else: output_filename = self.output_file @@ -53,7 +52,7 @@ def extract_checksums(self, # [chksum] ht -2390360641069121536 # [chksum] hu 6389284661071183872 # [chksum] htr 928360042410663049 - pattern = r'\[chksum\]\s+(.+)\s+(-?\d+)' + pattern = r"\[chksum\]\s+(.+)\s+(-?\d+)" # checksums outputted in form: # { @@ -81,33 +80,37 @@ def extract_checksums(self, if schema_version == SCHEMA_VERSION_1_0_0: checksums = { "schema_version": schema_version, - "output": dict(output_checksums) + "output": dict(output_checksums), } else: raise NotImplementedError( - f"Unsupported checksum schema version: {schema_version}") + f"Unsupported checksum schema version: {schema_version}" + ) return checksums - def check_checksums_over_restarts(self, - long_run_checksum: Dict[str, Any], - short_run_checksum_0: Dict[str, Any], - short_run_checksum_1: Dict[str, Any] - ) -> bool: + def check_checksums_over_restarts( + self, + long_run_checksum: dict[str, Any], + short_run_checksum_0: dict[str, Any], + short_run_checksum_1: dict[str, Any], + ) -> bool: """Compare a checksums from a long run (e.g. 2 days) against checksums from 2 short runs (e.g. 1 day)""" - short_run_checksums = short_run_checksum_0['output'] - for field, checksums in short_run_checksum_1['output'].items(): + short_run_checksums = short_run_checksum_0["output"] + for field, checksums in short_run_checksum_1["output"].items(): if field not in short_run_checksums: short_run_checksums[field] = checksums else: short_run_checksums[field].extend(checksums) matching_checksums = True - for field, checksums in long_run_checksum['output'].items(): + for field, checksums in long_run_checksum["output"].items(): for checksum in checksums: - if (field not in short_run_checksums or - checksum not in short_run_checksums[field]): + if ( + field not in short_run_checksums + or checksum not in short_run_checksums[field] + ): print(f"Unequal checksum: {field}: {checksum}") matching_checksums = False diff --git a/src/model_config_tests/models/model.py b/src/model_config_tests/models/model.py index 2ae126f..ee5bedd 100644 --- a/src/model_config_tests/models/model.py +++ b/src/model_config_tests/models/model.py @@ -1,22 +1,17 @@ """Generic Model class""" + from pathlib import Path -from typing import Dict, Any -class Model(object): +class Model: def __init__(self, experiment): self.experiment = experiment - def extract_checksums(self, - output_directory: Path, - schema_version: str): + def extract_checksums(self, output_directory: Path, schema_version: str): """Extract checksums from output directory""" raise NotImplementedError - def set_model_runtime(self, - years: int = 0, - months: int = 0, - seconds: int = 10800): + def set_model_runtime(self, years: int = 0, months: int = 0, seconds: int = 10800): """Configure model runtime""" raise NotImplementedError @@ -24,10 +19,9 @@ def output_exists(self): """Check for existing output files""" raise NotImplementedError - def check_checksums_over_restarts(self, - long_run_checksum, - short_run_checksum_0, - short_run_checksum_1) -> bool: + def check_checksums_over_restarts( + self, long_run_checksum, short_run_checksum_0, short_run_checksum_1 + ) -> bool: """Compare a checksums from a long run (e.g. 2 days) against checksums from 2 short runs (e.g. 1 day)""" raise NotImplementedError diff --git a/src/model_config_tests/test_access_om2_config.py b/src/model_config_tests/test_access_om2_config.py index 0667c87..69e4ab1 100644 --- a/src/model_config_tests/test_access_om2_config.py +++ b/src/model_config_tests/test_access_om2_config.py @@ -4,30 +4,32 @@ """ACCESS-OM2 specific configuration tests""" import re +import warnings -import pytest import f90nml -import warnings +import pytest from model_config_tests.util import get_git_branch_name # Mutually exclusive topic keywords TOPIC_KEYWORDS = { - 'spatial extent': {'global', 'regional'}, - 'forcing product': {'JRA55', 'ERA5'}, - 'forcing mode': {'repeat-year', 'ryf', 'repeat-decade', 'rdf', - 'interannual', 'iaf'}, - 'model': {'access-om2', 'access-om2-025', 'access-om2-01'}, - 'model variant': {'bgc'} + "spatial extent": {"global", "regional"}, + "forcing product": {"JRA55", "ERA5"}, + "forcing mode": { + "repeat-year", + "ryf", + "repeat-decade", + "rdf", + "interannual", + "iaf", + }, + "model": {"access-om2", "access-om2-025", "access-om2-01"}, + "model variant": {"bgc"}, } # Nominal resolutions are sourced from CMIP6 controlled vocabulary # https://github.com/WCRP-CMIP/CMIP6_CVs/blob/main/CMIP6_nominal_resolution.json -NOMINAL_RESOLUTION = { - '025deg': '25 km', - '01deg': '10 km', - '1deg': '100 km' -} +NOMINAL_RESOLUTION = {"025deg": "25 km", "01deg": "10 km", "1deg": "100 km"} class AccessOM2Branch: @@ -38,12 +40,12 @@ def __init__(self, branch_name): self.branch_name = branch_name self.set_resolution() - self.is_high_resolution = self.resolution in ['025deg', '01deg'] - self.is_bgc = 'bgc' in branch_name + self.is_high_resolution = self.resolution in ["025deg", "01deg"] + self.is_bgc = "bgc" in branch_name def set_resolution(self): # Resolutions are ordered, so the start of the list are matched first - resolutions = ['025deg', '01deg', '1deg'] + resolutions = ["025deg", "01deg", "1deg"] self.resolution = None for res in resolutions: if res in self.branch_name: @@ -51,8 +53,8 @@ def set_resolution(self): return pytest.fail( - f"Branch name {self.branch_name} has an unknown resolution. " + - f"Current supported resolutions: {', '.join(resolutions)}" + f"Branch name {self.branch_name} has an unknown resolution. " + + f"Current supported resolutions: {', '.join(resolutions)}" ) @@ -62,9 +64,9 @@ def branch(control_path, target_branch): if branch_name is None: # Default to current branch name branch_name = get_git_branch_name(control_path) - assert branch_name is not None, ( - f"Failed getting git branch name of control path: {control_path}" - ) + assert ( + branch_name is not None + ), f"Failed getting git branch name of control path: {control_path}" warnings.warn( "Target branch is not specifed, defaulting to current git branch: " f"{branch_name}. As some ACCESS-OM2 tests infer information, " @@ -81,54 +83,57 @@ class TestAccessOM2: def test_mppncombine_fast_collate_exe(self, config, branch): if branch.is_high_resolution: - pattern = r'/g/data/vk83/apps/mppnccombine-fast/.*/bin/mppnccombine-fast' - if 'collate' in config: - assert re.match(pattern, config['collate']['exe']), ( - "Expect collate executable set to mppnccombine-fast" - ) + pattern = r"/g/data/vk83/apps/mppnccombine-fast/.*/bin/mppnccombine-fast" + if "collate" in config: + assert re.match( + pattern, config["collate"]["exe"] + ), "Expect collate executable set to mppnccombine-fast" - assert config['collate']['mpi'], ( - "Expect `mpi: true` when using mppnccombine-fast" - ) + assert config["collate"][ + "mpi" + ], "Expect `mpi: true` when using mppnccombine-fast" def test_sync_userscript_ice_concatenation(self, config): # This script runs in the sync pbs job before syncing output to a # remote location - script = '/g/data/vk83/apps/om2-scripts/concatenate_ice/concat_ice_daily.sh' - assert ('userscripts' in config and 'sync' in config['userscripts'] - and config['userscripts']['sync'] == script), ( - "Expect sync userscript set to ice-concatenation script." + - f"\nuserscript:\n sync: {script}" - ) + script = "/g/data/vk83/apps/om2-scripts/concatenate_ice/concat_ice_daily.sh" + assert ( + "userscripts" in config + and "sync" in config["userscripts"] + and config["userscripts"]["sync"] == script + ), ( + "Expect sync userscript set to ice-concatenation script." + + f"\nuserscript:\n sync: {script}" + ) def test_metadata_realm(self, metadata, branch): - expected_realms = {'ocean', 'seaIce'} - expected_config = 'realm:\n - ocean\n - seaIce' + expected_realms = {"ocean", "seaIce"} + expected_config = "realm:\n - ocean\n - seaIce" if branch.is_bgc: - expected_realms.add('ocnBgchem') - expected_config += '\n - ocnBgchem' + expected_realms.add("ocnBgchem") + expected_config += "\n - ocnBgchem" - assert ('realm' in metadata - and metadata['realm'] is not None - and set(metadata['realm']) == expected_realms), ( - 'Expected metadata realm set to:\n' + expected_config - ) + assert ( + "realm" in metadata + and metadata["realm"] is not None + and set(metadata["realm"]) == expected_realms + ), ("Expected metadata realm set to:\n" + expected_config) def test_restart_period(self, branch, control_path): - accessom2_nml_path = control_path / 'accessom2.nml' + accessom2_nml_path = control_path / "accessom2.nml" assert accessom2_nml_path.exists() accessom2_nml = f90nml.read(accessom2_nml_path) - restart_period = accessom2_nml['date_manager_nml']['restart_period'] + restart_period = accessom2_nml["date_manager_nml"]["restart_period"] - if branch.resolution == '1deg': + if branch.resolution == "1deg": expected_period = [5, 0, 0] - elif branch.resolution == '025deg': + elif branch.resolution == "025deg": if branch.is_bgc: expected_period = [1, 0, 0] else: expected_period = [2, 0, 0] - elif branch.resolution == '01deg': + elif branch.resolution == "01deg": if branch.is_bgc: expected_period = [0, 1, 0] else: @@ -142,32 +147,33 @@ def test_restart_period(self, branch, control_path): def test_metadata_keywords(self, metadata): - assert 'keywords' in metadata - metadata_keywords = set(metadata['keywords']) + assert "keywords" in metadata + metadata_keywords = set(metadata["keywords"]) expected_keywords = set() for topic, keywords in TOPIC_KEYWORDS.items(): mutually_exclusive = metadata_keywords.intersection(keywords) - assert len(mutually_exclusive) <= 1, ( - f"Topic {topic} has multiple mutually exlusive keywords: " + - str(mutually_exclusive) - ) + assert ( + len(mutually_exclusive) <= 1 + ), f"Topic {topic} has multiple mutually exlusive keywords: " + str( + mutually_exclusive + ) expected_keywords = expected_keywords.union(keywords) unrecognised_keywords = metadata_keywords.difference(expected_keywords) - assert len(unrecognised_keywords) == 0, ( - f"Metadata has unrecognised keywords: {unrecognised_keywords}" - ) + assert ( + len(unrecognised_keywords) == 0 + ), f"Metadata has unrecognised keywords: {unrecognised_keywords}" def test_metadata_nominal_resolution(self, metadata, branch): assert branch.resolution in NOMINAL_RESOLUTION, ( - f"The expected nominal_resolution is not defined for given " + - f"resolution: {branch.resolution}" + "The expected nominal_resolution is not defined for given " + + f"resolution: {branch.resolution}" ) expected = NOMINAL_RESOLUTION[branch.resolution] - assert ('nominal_resolution' in metadata - and metadata['nominal_resolution'] == expected), ( - f"Expected nominal_resolution field set to: {expected}" - ) + assert ( + "nominal_resolution" in metadata + and metadata["nominal_resolution"] == expected + ), f"Expected nominal_resolution field set to: {expected}" diff --git a/src/model_config_tests/test_bit_reproducibility.py b/src/model_config_tests/test_bit_reproducibility.py index 476603b..08c03b8 100644 --- a/src/model_config_tests/test_bit_reproducibility.py +++ b/src/model_config_tests/test_bit_reproducibility.py @@ -4,24 +4,27 @@ """Tests for model reproducibility""" import json -import pytest from pathlib import Path +import pytest + from model_config_tests.exp_test_helper import setup_exp -class TestBitReproducibility(): + +class TestBitReproducibility: @pytest.mark.checksum - def test_bit_repro_historical(self, output_path: Path, control_path: Path, - checksum_path: Path): + def test_bit_repro_historical( + self, output_path: Path, control_path: Path, checksum_path: Path + ): """ Test that a run reproduces historical checksums """ # Setup checksum output directory # NOTE: The checksum output file is used as part of `repro-ci` workflows - output_dir = output_path / 'checksum' + output_dir = output_path / "checksum" output_dir.mkdir(parents=True, exist_ok=True) - checksum_output_file = output_dir / 'historical-3hr-checksum.json' + checksum_output_file = output_dir / "historical-3hr-checksum.json" if checksum_output_file.exists(): checksum_output_file.unlink() @@ -32,14 +35,16 @@ def test_bit_repro_historical(self, output_path: Path, control_path: Path, assert exp.model.output_exists() - #Check checksum against historical checksum file + # Check checksum against historical checksum file hist_checksums = None hist_checksums_schema_version = None - if not checksum_path.exists(): # AKA, if the config branch doesn't have a checksum, or the path is misconfigured + if ( + not checksum_path.exists() + ): # AKA, if the config branch doesn't have a checksum, or the path is misconfigured hist_checksums_schema_version = exp.model.default_schema_version else: # we can use the historic-3hr-checksum that is in the testing directory - with open(checksum_path, 'r') as file: + with open(checksum_path) as file: hist_checksums = json.load(file) # Parse checksums using the same version @@ -48,20 +53,20 @@ def test_bit_repro_historical(self, output_path: Path, control_path: Path, checksums = exp.extract_checksums(schema_version=hist_checksums_schema_version) # Write out checksums to output file - with open(checksum_output_file, 'w') as file: + with open(checksum_output_file, "w") as file: json.dump(checksums, file, indent=2) - assert hist_checksums == checksums, f"Checksums were not equal. The new checksums have been written to {checksum_output_file}." + assert ( + hist_checksums == checksums + ), f"Checksums were not equal. The new checksums have been written to {checksum_output_file}." @pytest.mark.slow def test_bit_repro_repeat(self, output_path: Path, control_path: Path): """ Test that a run has same checksums when ran twice """ - exp_bit_repo1 = setup_exp(control_path, output_path, - "test_bit_repro_repeat_1") - exp_bit_repo2 = setup_exp(control_path, output_path, - "test_bit_repro_repeat_2") + exp_bit_repo1 = setup_exp(control_path, output_path, "test_bit_repro_repeat_1") + exp_bit_repo2 = setup_exp(control_path, output_path, "test_bit_repro_repeat_2") # Reconfigure to a 3 hours (default) and run for exp in [exp_bit_repo1, exp_bit_repo2]: @@ -83,8 +88,7 @@ def test_restart_repro(self, output_path: Path, control_path: Path): Test that a run reproduces across restarts. """ # First do two short (1 day) runs. - exp_2x1day = setup_exp(control_path, output_path, - 'test_restart_repro_2x1day') + exp_2x1day = setup_exp(control_path, output_path, "test_restart_repro_2x1day") # Reconfigure to a 1 day run. exp_2x1day.model.set_model_runtime(seconds=86400) @@ -94,8 +98,7 @@ def test_restart_repro(self, output_path: Path, control_path: Path): exp_2x1day.force_qsub_run() # Now do a single 2 day run - exp_2day = setup_exp(control_path, output_path, - 'test_restart_repro_2day') + exp_2day = setup_exp(control_path, output_path, "test_restart_repro_2day") # Reconfigure exp_2day.model.set_model_runtime(seconds=172800) @@ -113,16 +116,16 @@ def test_restart_repro(self, output_path: Path, control_path: Path): matching_checksums = model.check_checksums_over_restarts( long_run_checksum=checksums_2d, short_run_checksum_0=checksums_1d_0, - short_run_checksum_1=checksums_1d_1 + short_run_checksum_1=checksums_1d_1, ) if not matching_checksums: # Write checksums out to file - with open(output_path / 'restart-1d-0-checksum.json', 'w') as file: + with open(output_path / "restart-1d-0-checksum.json", "w") as file: json.dump(checksums_1d_0, file, indent=2) - with open(output_path / 'restart-1d-1-checksum.json', 'w') as file: + with open(output_path / "restart-1d-1-checksum.json", "w") as file: json.dump(checksums_1d_1, file, indent=2) - with open(output_path / 'restart-2d-0-checksum.json', 'w') as file: + with open(output_path / "restart-2d-0-checksum.json", "w") as file: json.dump(checksums_2d, file, indent=2) assert matching_checksums diff --git a/src/model_config_tests/test_config.py b/src/model_config_tests/test_config.py index 1f3939c..b355d9b 100644 --- a/src/model_config_tests/test_config.py +++ b/src/model_config_tests/test_config.py @@ -3,13 +3,12 @@ """Tests for checking configs and valid metadata files""" -from pathlib import Path import re -import warnings +from pathlib import Path +import jsonschema import pytest import requests -import jsonschema import yaml # Experiment Metadata Schema @@ -25,16 +24,18 @@ @pytest.fixture(scope="class") def exe_manifest_fullpaths(control_path: Path): - manifest_path = control_path / 'manifests' / 'exe.yaml' + manifest_path = control_path / "manifests" / "exe.yaml" with open(manifest_path) as f: _, data = yaml.safe_load_all(f) - exe_fullpaths = {item['fullpath'] for item in data.values()} + exe_fullpaths = {item["fullpath"] for item in data.values()} return exe_fullpaths def insist_array(str_or_array): if isinstance(str_or_array, str): - str_or_array = [str_or_array,] + str_or_array = [ + str_or_array, + ] return str_or_array @@ -42,74 +43,69 @@ def insist_array(str_or_array): class TestConfig: """General configuration tests""" - @pytest.mark.parametrize( - "field", ["project", "shortpath"] - ) + @pytest.mark.parametrize("field", ["project", "shortpath"]) def test_field_is_not_defined(self, config, field): - assert field not in config, ( - f"{field} should not be defined: '{field}: {config[field]}'" - ) + assert ( + field not in config + ), f"{field} should not be defined: '{field}: {config[field]}'" def test_absolute_input_paths(self, config): - for path in insist_array(config.get('input', [])): - assert Path(path).is_absolute(), ( - f"Input path should be absolute: {path}" - ) + for path in insist_array(config.get("input", [])): + assert Path(path).is_absolute(), f"Input path should be absolute: {path}" def test_absolute_submodel_input_paths(self, config): - for model in config.get('submodels', []): - for path in insist_array(model.get('input', [])): + for model in config.get("submodels", []): + for path in insist_array(model.get("input", [])): assert Path(path).is_absolute(), ( - f"Input path for {model['name']} submodel should be " + - f" absolute: {path}" + f"Input path for {model['name']} submodel should be " + + f" absolute: {path}" ) def test_no_storage_qsub_flags(self, config): - qsub_flags = config.get('qsub_flags', '') - assert 'storage' not in qsub_flags, ( - "Storage flags defined in qsub_flags will be silently ignored" - ) + qsub_flags = config.get("qsub_flags", "") + assert ( + "storage" not in qsub_flags + ), "Storage flags defined in qsub_flags will be silently ignored" def test_runlog_is_on(self, config): - runlog_config = config.get('runlog', {}) + runlog_config = config.get("runlog", {}) if isinstance(runlog_config, bool): runlog_enabled = runlog_config else: - runlog_enabled = runlog_config.get('enable', True) + runlog_enabled = runlog_config.get("enable", True) assert runlog_enabled def test_absolute_exe_path(self, config): - assert 'exe' not in config or Path(config['exe']).is_absolute(), ( - f"Executable for model should be an absolute path: {config['exe']}" - ) + assert ( + "exe" not in config or Path(config["exe"]).is_absolute() + ), f"Executable for model should be an absolute path: {config['exe']}" def test_absolute_submodel_exe_path(self, config): - for model in config.get('submodels', []): - if 'exe' not in model: + for model in config.get("submodels", []): + if "exe" not in model: # Allow models such as couplers that have no executable - if 'ncpus' in model and model['ncpus'] != 0: + if "ncpus" in model and model["ncpus"] != 0: pytest.fail(f"No executable for submodel {model['name']}") continue - assert Path(model['exe']).is_absolute(), ( - f"Executable for {model['name']} submodel should be " + - f"an absolute path: {config['exe']}" + assert Path(model["exe"]).is_absolute(), ( + f"Executable for {model['name']} submodel should be " + + f"an absolute path: {config['exe']}" ) def test_exe_paths_in_manifest(self, config, exe_manifest_fullpaths): - if 'exe' in config: - assert config['exe'] in exe_manifest_fullpaths, ( - f"Model executable path should be in Manifest file " + - f"(e.g. manifests/exe.yaml): {config['exe']}" + if "exe" in config: + assert config["exe"] in exe_manifest_fullpaths, ( + "Model executable path should be in Manifest file " + + f"(e.g. manifests/exe.yaml): {config['exe']}" ) - def test_sub_model_exe_paths_in_manifest(self, config, - exe_manifest_fullpaths): - for model in config.get('submodels', []): - if 'exe' in model: - assert model['exe'] in exe_manifest_fullpaths, ( - f"Submodel {model['name']} executable path should be in " + - f"Manifest file (e.g. manifests/exe.yaml): {config['exe']}" + def test_sub_model_exe_paths_in_manifest(self, config, exe_manifest_fullpaths): + for model in config.get("submodels", []): + if "exe" in model: + assert model["exe"] in exe_manifest_fullpaths, ( + f"Submodel {model['name']} executable path should be in " + + f"Manifest file (e.g. manifests/exe.yaml): {config['exe']}" ) def test_restart_freq_is_date_based(self, config): @@ -117,53 +113,51 @@ def test_restart_freq_is_date_based(self, config): frequency = config["restart_freq"] # String of an integer followed by a YS/MS/W/D/H/T/S unit, # e.g. 1YS for 1 year-start - pattern = r'^\d+(YS|MS|W|D|H|T|S)$' + pattern = r"^\d+(YS|MS|W|D|H|T|S)$" assert isinstance(frequency, str) and re.match(pattern, frequency), ( - "Restart frequency should be date-based: " + - f"'restart_freq: {frequency}'" + "Restart frequency should be date-based: " + f"'restart_freq: {frequency}'" ) def test_sync_is_not_enabled(self, config): - if 'sync' in config and 'enable' in config['sync']: - assert not config['sync']['enable'], ( - "Sync to remote archive should not be enabled" - ) + if "sync" in config and "enable" in config["sync"]: + assert not config["sync"][ + "enable" + ], "Sync to remote archive should not be enabled" def test_sync_path_is_not_set(self, config): - if 'sync' in config: - assert not ('path' in config['sync'] - and config['sync']['path'] is not None), ( - "Sync path to remote archive should not be set" - ) + if "sync" in config: + assert not ( + "path" in config["sync"] and config["sync"]["path"] is not None + ), "Sync path to remote archive should not be set" def test_manifest_reproduce_exe_is_on(self, config): - manifest_reproduce = config.get('manifest', {}).get('reproduce', {}) - assert 'exe' in manifest_reproduce and manifest_reproduce['exe'], ( - "Executable reproducibility should be enforced, e.g set:\n" + - "manifest:\n reproduce:\n exe: True" - ) + manifest_reproduce = config.get("manifest", {}).get("reproduce", {}) + assert "exe" in manifest_reproduce and manifest_reproduce["exe"], ( + "Executable reproducibility should be enforced, e.g set:\n" + + "manifest:\n reproduce:\n exe: True" + ) def test_metadata_is_enabled(self, config): - if 'metadata' in config and 'enable' in config['metadata']: - assert config['metadata']['enable'], ( - "Metadata should be enabled, otherwise new UUIDs will not " + - "be generated and branching in Payu would not work - as " + - "branch and UUIDs are not used in the name used for archival." + if "metadata" in config and "enable" in config["metadata"]: + assert config["metadata"]["enable"], ( + "Metadata should be enabled, otherwise new UUIDs will not " + + "be generated and branching in Payu would not work - as " + + "branch and UUIDs are not used in the name used for archival." ) def test_experiment_name_is_not_defined(self, config): - assert 'experiment' not in config, ( - f"experiment: {config['experiment']} should not set, " + - "as this over-rides the experiment name used for archival. " + - "If set, branching in payu would not work." + assert "experiment" not in config, ( + f"experiment: {config['experiment']} should not set, " + + "as this over-rides the experiment name used for archival. " + + "If set, branching in payu would not work." ) def test_no_scripts_in_top_level_directory(self, control_path): exts = {".py", ".sh"} scripts = [p for p in control_path.iterdir() if p.suffix in exts] assert scripts == [], ( - "Scripts in top-level directory should be moved to a " + - "'tools' sub-directory" + "Scripts in top-level directory should be moved to a " + + "'tools' sub-directory" ) def test_validate_metadata(self, metadata): @@ -179,45 +173,53 @@ def test_validate_metadata(self, metadata): # description and long_description. As name & experiment_uuid are # generated for running experiments, the required fields are removed # from the schema validation for now - schema.pop('required') + schema.pop("required") # Validate field names and types jsonschema.validate(instance=metadata, schema=schema) @pytest.mark.parametrize( "field", - ["description", "notes", "keywords", "nominal_resolution", "version", - "reference", "url", "model", "realm"] + [ + "description", + "notes", + "keywords", + "nominal_resolution", + "version", + "reference", + "url", + "model", + "realm", + ], ) def test_metadata_contains_fields(self, field, metadata): assert field in metadata, f"{field} field shoud be defined in metadata" def test_metadata_does_contain_UUID(self, metadata): - assert 'experiment_uuid' not in metadata, ( - "`experiment_uuid` should not be defined in metadata, " + - "as this is an configuration rather than an experiment. " + assert "experiment_uuid" not in metadata, ( + "`experiment_uuid` should not be defined in metadata, " + + "as this is an configuration rather than an experiment. " ) def test_metadata_license(self, metadata): - assert 'license' in metadata and metadata['license'] == LICENSE, ( - f"The license should be set to {LICENSE}" - ) + assert ( + "license" in metadata and metadata["license"] == LICENSE + ), f"The license should be set to {LICENSE}" def test_license_file(self, control_path): - license_path = control_path / 'LICENSE' + license_path = control_path / "LICENSE" assert license_path.exists(), ( - f"LICENSE file should exist and equal to {LICENSE} found here: " + - LICENSE_URL + f"LICENSE file should exist and equal to {LICENSE} found here: " + + LICENSE_URL ) response = requests.get(LICENSE_URL) assert response.status_code == 200 license = response.text - with open(license_path, 'r') as f: + with open(license_path) as f: content = f.read() assert content == license, ( - f"LICENSE file should be equal to {LICENSE} found here: " + - LICENSE_URL + f"LICENSE file should be equal to {LICENSE} found here: " + LICENSE_URL ) diff --git a/src/model_config_tests/util.py b/src/model_config_tests/util.py index f9d80f4..d962e00 100644 --- a/src/model_config_tests/util.py +++ b/src/model_config_tests/util.py @@ -1,8 +1,8 @@ # Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 -import time import subprocess as sp +import time def wait_for_qsub(run_id): @@ -11,26 +11,25 @@ def wait_for_qsub(run_id): """ while True: - time.sleep(1*60) + time.sleep(1 * 60) try: - qsub_out = sp.check_output(['qstat', run_id], stderr=sp.STDOUT) + qsub_out = sp.check_output(["qstat", run_id], stderr=sp.STDOUT) except sp.CalledProcessError as err: qsub_out = err.output qsub_out = qsub_out.decode() - if 'Job has finished' in qsub_out: + if "Job has finished" in qsub_out: break def get_git_branch_name(path): """Get the git branch name of the given git directory""" try: - cmd = 'git rev-parse --abbrev-ref HEAD' - result = sp.check_output(cmd, shell=True, - cwd=path).strip() + cmd = "git rev-parse --abbrev-ref HEAD" + result = sp.check_output(cmd, shell=True, cwd=path).strip() # Decode byte string to string - branch_name = result.decode('utf-8') + branch_name = result.decode("utf-8") return branch_name except sp.CalledProcessError: return None diff --git a/tests/test_access_om2_extract_checksums.py b/tests/test_access_om2_extract_checksums.py index b7ee838..1f7088c 100644 --- a/tests/test_access_om2_extract_checksums.py +++ b/tests/test_access_om2_extract_checksums.py @@ -1,30 +1,30 @@ -import pytest -import requests import json from pathlib import Path from unittest.mock import Mock +import pytest +import requests + from model_config_tests.models import AccessOm2 from model_config_tests.models.accessom2 import SUPPORTED_SCHEMA_VERSIONS + @pytest.mark.parametrize("version", SUPPORTED_SCHEMA_VERSIONS) def test_extract_checksums(version): # Mock ExpTestHelper mock_experiment = Mock() - mock_experiment.output000 = Path('tests/resources') - mock_experiment.control_path = Path('tests/tmp') + mock_experiment.output000 = Path("tests/resources") + mock_experiment.control_path = Path("tests/tmp") model = AccessOm2(mock_experiment) - checksums = model.extract_checksums( - schema_version=version - ) + checksums = model.extract_checksums(schema_version=version) # Assert version is set as expected assert checksums["schema_version"] == version # Check the entire checksum file is expected - with open(f'tests/resources/access-om2-checksums-1-0-0.json', 'r') as file: + with open("tests/resources/access-om2-checksums-1-0-0.json") as file: expected_checksums = json.load(file) assert checksums == expected_checksums