From 235563f60d1270fc7a80ad40db5185202e1dc3bd Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Thu, 8 Aug 2024 14:20:42 +1000 Subject: [PATCH 1/3] Add warnings when there's duplicate keys in config.yaml --- payu/branch.py | 16 +++++++++++++--- payu/fsops.py | 24 +++++++++++++++++++++++- test/test_payu.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/payu/branch.py b/payu/branch.py index 09f7d845..dd0c51b6 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -13,7 +13,7 @@ from typing import Optional import shutil -from ruamel.yaml import YAML, CommentedMap +from ruamel.yaml import YAML, CommentedMap, constructor import git from payu.fsops import read_config, DEFAULT_CONFIG_FNAME, list_archive_dirs @@ -67,8 +67,18 @@ def add_restart_to_config(restart_path: Path, config_path: Path) -> None: restart in archive""" # Default ruamel yaml preserves comments and multiline strings - yaml = YAML() - config = yaml.load(config_path) + try: + yaml = YAML() + config = yaml.load(config_path) + except constructor.DuplicateKeyError as e: + # PyYaml which is used to read config allows duplicate keys + # These will get removed when the configuration file is modified + warnings.warn( + "Removing any subsequent duplicate keys from config.yaml" + ) + yaml = YAML() + yaml.allow_duplicate_keys = True + config = yaml.load(config_path) # Add in restart path config['restart'] = str(restart_path) diff --git a/payu/fsops.py b/payu/fsops.py index ac91d82c..ce55c5ca 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -17,6 +17,7 @@ import shlex import subprocess from typing import Union, List +import warnings # Extensions import yaml @@ -62,6 +63,27 @@ def movetree(src, dst, symlinks=False): shutil.rmtree(src) +class DuplicateKeyWarnLoader(yaml.SafeLoader): + def construct_mapping(self, node, deep=False): + """Add warning for duplicate keys in yaml file, as currently + PyYAML overwrites duplicate keys even though in YAML, keys + are meant to be unique + """ + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + if key in mapping: + warnings.warn( + "Duplicate key found in config.yaml: " + f"key '{key}' with value '{value}' " + f"(original value: '{mapping[key]}')" + ) + mapping[key] = value + + return super().construct_mapping(node, deep) + + def read_config(config_fname=None): """Parse input configuration file and return a config dict.""" @@ -70,7 +92,7 @@ def read_config(config_fname=None): try: with open(config_fname, 'r') as config_file: - config = yaml.safe_load(config_file) + config = yaml.load(config_file, Loader=DuplicateKeyWarnLoader) # NOTE: A YAML file with no content returns `None` if config is None: diff --git a/test/test_payu.py b/test/test_payu.py index cf180875..6a2df5af 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -1,6 +1,7 @@ from io import StringIO import os from pathlib import Path +import payu.branch import pytest import shutil import stat @@ -432,3 +433,34 @@ def test_run_userscript_command(tmp_path): ]) def test_needs_shell(command, expected): assert payu.fsops.needs_subprocess_shell(command) == expected + + +def test_read_config_yaml_duplicate_key(): + """The PyYAML library is used for reading config.yaml, but use ruamel yaml + is used in when modifying config.yaml as part of payu checkout + (ruamel is used to preserve comments and multi-line strings). + This led to bug #441, where pyyaml allowed duplicate keys but + ruamel.library raises an error + """ + # Create a yaml file with a duplicate key + config_content = """ +pbs_flags: value1 +pbs_flags: value2 +""" + config_path = tmpdir / "config.yaml" + with config_path.open("w") as file: + file.write(config_content) + + # Test read config passes without an error but a warning is raised + warn_msg = r"Duplicate key found in config.yaml: key 'pbs_flags' with " + warn_msg += r"value 'value2' \(original value: 'value1'\)" + with pytest.warns(UserWarning, match=warn_msg): + payu.fsops.read_config(config_path) + + restart_path = tmpdir / "restarts" + + # Test add restart to config.yaml does not fail with an error, but + # still raises another warning that duplicates keys will be deleted + warn_msg = "Removing any subsequent duplicate keys from config.yaml" + with pytest.warns(UserWarning, match=warn_msg): + payu.branch.add_restart_to_config(restart_path, config_path) From fad94a4081579451b4176a0c03a6711d56016c5b Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 9 Aug 2024 09:20:50 +1000 Subject: [PATCH 2/3] Globally format warnings to omit line of source code in warnings messages --- payu/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/payu/cli.py b/payu/cli.py index 0a6348a3..87afc4c9 100644 --- a/payu/cli.py +++ b/payu/cli.py @@ -15,6 +15,7 @@ import shlex import subprocess import sys +import warnings import payu import payu.envmod as envmod @@ -23,10 +24,17 @@ from payu.schedulers import index as scheduler_index import payu.subcommands - # Default configuration DEFAULT_CONFIG = 'config.yaml' +# Force warnings.warn() to omit the source code line in the message +formatwarning_orig = warnings.formatwarning +warnings.formatwarning = ( + lambda message, category, filename, lineno, line=None: ( + formatwarning_orig(message, category, filename, lineno, line='') + ) +) + def parse(): """Parse the command line inputs and execute the subcommand.""" From cddbef651ae7d474fcfe378ce426b47e9eca742b Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Tue, 20 Aug 2024 12:16:37 +1000 Subject: [PATCH 3/3] Add more information to duplicate key warnings in config.yaml --- payu/fsops.py | 4 ++-- test/test_payu.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/payu/fsops.py b/payu/fsops.py index ce55c5ca..6560eade 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -76,8 +76,8 @@ def construct_mapping(self, node, deep=False): if key in mapping: warnings.warn( "Duplicate key found in config.yaml: " - f"key '{key}' with value '{value}' " - f"(original value: '{mapping[key]}')" + f"key '{key}' with value '{value}'. " + f"This overwrites the original value: '{mapping[key]}'" ) mapping[key] = value diff --git a/test/test_payu.py b/test/test_payu.py index 6a2df5af..7ab218cf 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -452,8 +452,8 @@ def test_read_config_yaml_duplicate_key(): file.write(config_content) # Test read config passes without an error but a warning is raised - warn_msg = r"Duplicate key found in config.yaml: key 'pbs_flags' with " - warn_msg += r"value 'value2' \(original value: 'value1'\)" + warn_msg = "Duplicate key found in config.yaml: key 'pbs_flags' with " + warn_msg += "value 'value2'. This overwrites the original value: 'value1'" with pytest.warns(UserWarning, match=warn_msg): payu.fsops.read_config(config_path)