From b5e06a61a29bc682ec3af8d42d9f93fe54987d2e Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 17 Nov 2023 09:39:33 +1100 Subject: [PATCH 1/7] Add branch and metadata with experiment uuid support This includes: - payu checkout and clone commands for interacting with branches - added a metadata class that reads/creates/updates metadata files that run in experiment initialisation and in payu checkout and clone commands - added a payu uuid command for creating new metadata files on existing branches - added git utils file with wrappers around simple git functions - added tests for added code - added type hints to branch and metadata classes --- payu/branch.py | 153 +++++++++++ payu/cli.py | 1 - payu/experiment.py | 12 +- payu/git_utils.py | 168 +++++++++++++ payu/metadata.py | 268 ++++++++++++++++++++ payu/models/model.py | 1 - payu/subcommands/args.py | 105 ++++++++ payu/subcommands/checkout_cmd.py | 42 ++++ payu/subcommands/clone_cmd.py | 46 ++++ payu/subcommands/uuid_cmd.py | 27 ++ setup.py | 10 +- test/common.py | 10 +- test/test_branch.py | 418 +++++++++++++++++++++++++++++++ test/test_git_utils.py | 185 ++++++++++++++ test/test_metadata.py | 126 ++++++++++ 15 files changed, 1562 insertions(+), 10 deletions(-) create mode 100644 payu/branch.py create mode 100644 payu/git_utils.py create mode 100644 payu/metadata.py create mode 100644 payu/subcommands/checkout_cmd.py create mode 100644 payu/subcommands/clone_cmd.py create mode 100644 payu/subcommands/uuid_cmd.py create mode 100644 test/test_branch.py create mode 100644 test/test_git_utils.py create mode 100644 test/test_metadata.py diff --git a/payu/branch.py b/payu/branch.py new file mode 100644 index 00000000..2fe44e5d --- /dev/null +++ b/payu/branch.py @@ -0,0 +1,153 @@ +"""Experiment branch support for payu's branch, clone and checkout commands + +This may generate new experiment ID, updates, sets any +specified configuration in config.yaml and updates work/archive symlinks + +:copyright: Copyright 2011 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +import os +import warnings +from pathlib import Path +from typing import Optional + +from ruamel.yaml import YAML + +from payu.fsops import read_config, DEFAULT_CONFIG_FNAME +from payu.laboratory import Laboratory +from payu.metadata import Metadata +from payu.git_utils import git_checkout_branch, git_clone, get_git_branch + + +def add_restart_to_config(restart_path: Path, + config_path: Optional[Path] = None) -> None: + """Takes restart path and config path, and add 'restart' flag to the + config file - which is used to start a run if there isn't a pre-existing + restart in archive""" + if config_path is None: + config_path = Path(DEFAULT_CONFIG_FNAME) + config_path.resolve() + + # Check for valid paths + skip_msg = f"Skipping adding 'restart: {restart_path}' to config file" + if not config_path.exists() or not config_path.is_file: + warnings.warn(f"Given configuration file {config_path} does not " + "exist. " + skip_msg) + return + if not restart_path.exists() or not restart_path.is_dir(): + warnings.warn((f"Given restart directory {restart_path} does not " + "exist. " + skip_msg)) + return + + # Default ruamel yaml preserves comments and multiline strings + yaml = YAML() + config = yaml.load(config_path) + + # Add in restart path + config['restart'] = str(restart_path) + + # Write modified lines back to config + yaml.dump(config, config_path) + print(f"Added 'restart: {restart_path}' to configuration file:", + config_path.name) + + +def checkout_branch(lab: Laboratory, + branch_name: str, + is_new_branch: bool = False, + is_new_experiment: bool = False, + start_point: Optional[str] = None, + restart_path: Optional[Path] = None, + config_path: Optional[Path] = None) -> None: + """Checkout branch""" + # Note: Control path is set in read_config + config = read_config(config_path) + control_path = Path(config.get('control_path')) + + # Checkout branch + git_checkout_branch(control_path, branch_name, is_new_branch, start_point) + + metadata = Metadata(lab, branch=branch_name, config_path=config_path) + if is_new_branch or is_new_experiment: + # Creates new uuid, experiment name, updates and commit metadata file + metadata.setup_new_experiment() + else: + # Setup metadata if there is no uuid, otherwise check existing metadata + # and commit any changes + metadata.setup() + metadata.commit_file() + + # Add restart option to config + if restart_path: + add_restart_to_config(restart_path, config_path=config_path) + + # Switch/Remove/Add archive and work symlinks + experiment = metadata.experiment_name + switch_symlink(Path(lab.archive_path), control_path, experiment, 'archive') + switch_symlink(Path(lab.work_path), control_path, experiment, 'work') + + +def switch_symlink(lab_dir_path: Path, control_path: Path, + experiment_name: str, sym_dir: str) -> None: + """Helper function for removing and switching work and archive + symlinks in control directory""" + dir_path = lab_dir_path / experiment_name + sym_path = control_path / sym_dir + + # Remove symlink if it already exists + if sym_path.exists() and sym_path.is_symlink: + previous_path = sym_path.resolve() + sym_path.unlink() + print(f"Removed {sym_dir} symlink to {previous_path}") + + # Create symlink, if directory exists in laboratory + if dir_path.exists(): + sym_path.symlink_to(dir_path) + print(f"Added {sym_dir} symlink to {dir_path}") + + +def clone(repository: str, + directory: Path, + branch: Optional[Path] = None, + new_branch_name: Optional[str] = None, + keep_uuid: bool = False, + model_type: Optional[str] = None, + config_path: Optional[Path] = None, + lab_path: Optional[Path] = None, + restart_path: Optional[Path] = None) -> None: + """Clone an experiment control repo""" + # git clone the repository + git_clone(repository, directory, branch) + + # Resolve directory to an absolute path and cd into cloned directory + previous_directory = os.getcwd() + dir_path = directory.resolve() + os.chdir(directory) + + # Initial lab and metadata + lab = Laboratory(model_type, config_path, lab_path) + + # Use checkout wrapper + if new_branch_name is not None: + # Create and checkout new branch + checkout_branch(lab=lab, + is_new_branch=True, + branch_name=new_branch_name, + restart_path=restart_path, + config_path=config_path) + else: + # Checkout branch + if branch is None: + branch = get_git_branch(dir_path) + + checkout_branch(lab=lab, + branch_name=branch, + config_path=config_path, + is_new_experiment=not keep_uuid, + restart_path=restart_path) + # Note: is_new_experiment ensures new uuid and metadata is created + # Otherwise uuid is generated only if there's no pre-existing uuid + + # Change back to previous directory + os.chdir(previous_directory) diff --git a/payu/cli.py b/payu/cli.py index 433dc216..1a6b5bab 100644 --- a/payu/cli.py +++ b/payu/cli.py @@ -30,7 +30,6 @@ def parse(): """Parse the command line inputs and execute the subcommand.""" - parser = generate_parser() # Display help if no arguments are provided diff --git a/payu/experiment.py b/payu/experiment.py index f0d85218..2109c239 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -33,6 +33,7 @@ from payu.manifest import Manifest from payu.calendar import parse_date_offset from payu.sync import SyncToRemoteArchive +from payu.metadata import Metadata # Environment module support on vayu # TODO: To be removed @@ -55,6 +56,10 @@ def __init__(self, lab, reproduce=False, force=False): self.start_time = datetime.datetime.now() + # Initialise experiment metadata - uuid and experiment name + self.metadata = Metadata(lab) + self.metadata.setup() + # TODO: replace with dict, check versions via key-value pairs self.modules = set() @@ -289,8 +294,7 @@ def set_expt_pathnames(self): self.control_path = self.config.get('control_path') # Experiment name - self.name = self.config.get('experiment', - os.path.basename(self.control_path)) + self.name = self.metadata.experiment_name # Experiment subdirectories self.archive_path = os.path.join(self.lab.archive_path, self.name) @@ -453,6 +457,10 @@ def setup(self, force_archive=False): if self.config.get('archive', True): self.get_restarts_to_prune() + # Commit any changes to metadata + if self.runlog.enabled: + self.metadata.commit_file() + def run(self, *user_flags): # XXX: This was previously done in reversion diff --git a/payu/git_utils.py b/payu/git_utils.py new file mode 100644 index 00000000..e5df3ef2 --- /dev/null +++ b/payu/git_utils.py @@ -0,0 +1,168 @@ +"""Simple wrappers around git commands + +Using the GitPython library for interacting with Git +""" + +import warnings +from pathlib import Path +from typing import Optional, Union, List, Dict + +import git +import configparser + + +class PayuBranchError(Exception): + """Custom exception for payu branch operations""" + + +def _get_git_repository(repo_path: Union[Path, str], + initialise: bool = False, + catch_error: bool = False) -> Optional[git.Repo]: + """Return a PythonGit repository object at given path. If initialise is + true, it will attempt to initialise a repository if it does not exist. + Otherwise, if catch_error is true, it will return None""" + try: + repo = git.Repo(repo_path) + return repo + except git.exc.InvalidGitRepositoryError: + if initialise: + repo = git.Repo.init(repo_path) + print(f"Initialised new git repository at: {repo_path}") + return repo + + warnings.warn( + f"Path is not a valid git repository: {repo_path}" + ) + if catch_error: + return None + raise + + +def get_git_branch(repo_path: Union[Path, str]) -> Optional[str]: + """Return the current git branch or None if repository path is not a git + repository""" + repo = _get_git_repository(repo_path, catch_error=True) + if repo: + return str(repo.active_branch) + + +def get_git_user_info(repo_path: Union[Path, str], + config_key: str, + example_value: str) -> Optional[str]: + """Return git config user info, None otherwise. Used for retrieving + name and email saved in git""" + repo = _get_git_repository(repo_path, catch_error=True) + if repo is None: + return + + try: + user_value = repo.config_reader().get_value('user', config_key) + return user_value + except (configparser.NoSectionError, configparser.NoOptionError): + print( + f'No git config set for user.{config_key}. ' + 'To set run the following inside the control repository:\n' + f' git config user.{config_key} "{example_value}"' + ) + + +def git_commit(repo_path: Union[Path, str], + commit_message: str, + paths_to_commit: List[Union[Path, str]]) -> None: + """Add a git commit of changes to paths""" + # Get/Create git repository - initialise is true as adding a commit + # directly after + repo = _get_git_repository(repo_path, initialise=True) + + # Un-stage any pre-existing changes + repo.index.reset() + + # Check if paths to commit have changed, or it is an untracked file + changes = False + untracked_files = [Path(repo_path) / path for path in repo.untracked_files] + for path in paths_to_commit: + if repo.git.diff(None, path) or path in untracked_files: + repo.index.add(paths_to_commit) + changes = True + + # Run commit if there's changes + if changes: + repo.index.commit(commit_message) + print(commit_message) + + +def list_local_branches(repo: git.Repo) -> List[str]: + """List all local branches""" + return [head.name for head in repo.heads] + + +def remote_branches_dict(repo: git.Repo) -> Dict[str, git.Commit]: + """Return a dictionary mapping remote branch names to commits""" + branch_to_commits = {} + for remote in repo.remotes: + remote.fetch() + for ref in remote.refs: + branch_to_commits[ref.remote_head] = ref.commit + return branch_to_commits + + +def git_checkout_branch(repo_path: Union[Path, str], + branch_name: str, + new_branch: bool = False, + start_point: Optional[str] = None) -> None: + """Checkout branch and create branch if specified""" + # Get git repository + repo = _get_git_repository(repo_path) + + # Existing branches + local_branches = list_local_branches(repo) + remote_branches = remote_branches_dict(repo) + all_branches = local_branches + list(remote_branches.keys()) + + # Create new branch, if specified + if new_branch: + if branch_name in all_branches: + raise PayuBranchError( + f"A branch named {branch_name} already exists. " + "To checkout this branch, remove the new branch flag '-b' " + "from the checkout command." + ) + + if start_point is not None: + if (start_point not in local_branches and + start_point in remote_branches): + # Use hash for remote start point -local branch names work fine + start_point = remote_branches[start_point] + branch = repo.create_head(branch_name, commit=start_point) + else: + branch = repo.create_head(branch_name) + branch.checkout() + + print(f"Created and checked out new branch: {branch_name}") + return + + # Checkout branch + if branch_name not in all_branches: + raise PayuBranchError( + f"There is no existing branch called {branch_name}. " + "To create this branch, add the new branch flag '-b' " + "to the checkout command." + ) + + repo.git.checkout(branch_name) + print(f"Checked out branch: {branch_name}") + + +def git_clone(repository: str, + directory: Union[str, Path], + branch: Optional[str] = None) -> None: + """Clone repository to directory""" + # Clone the repository + if branch is not None: + git.Repo.clone_from(repository, + to_path=directory, + branch=branch) + else: + git.Repo.clone_from(repository, to_path=directory) + + print(f"Cloned repository from {repository} to directory: {directory}") diff --git a/payu/metadata.py b/payu/metadata.py new file mode 100644 index 00000000..76cab6ae --- /dev/null +++ b/payu/metadata.py @@ -0,0 +1,268 @@ +"""Payu experiment UUID and metadata support + +Generates and commit a new experiment uuid and updates/creates experiment +metadata + +:copyright: Copyright 2011 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +import warnings +from pathlib import Path +from typing import Optional, List + +import shortuuid +from ruamel.yaml import YAML +from ruamel.yaml.comments import CommentedMap + +from payu.fsops import read_config +from payu.laboratory import Laboratory +from payu.git_utils import get_git_branch, get_git_user_info, git_commit + +# Short uuid is used for experiment names (for work and archive directories) +SHORT_UUID_LENGTH = 7 +METADATA_FILENAME = 'metadata.yaml' + +USAGE_HELP = """ +If this is a new experiment, either: + - Create a new git branch, by running: + payu checkout -b NEW_BRANCH_NAME + where NEW_BRANCH_NAME is name of the new branch + - Or generate a new experiment uuid on the current git branch, by running: + payu uuid +Both of the above will generate a new uuid, a branch-uuid aware experiment +name, and update and commit changes to the metadata file. +Note: Experiment names will be of the format: + {CONTROL_DIR}-{BRANCH_NAME}-{SHORTENED_UUID} + +If this an older experiment, or if wanting to opt out of branch-uuid aware +experiment names, run: + payu uuid --legacy +This will generate a new uuid, set the experiment name to be the name of +the control directory (default) or the set 'experiment' value in the +configuration file. This command will also update and commit changes to the +metadata file. +""" + + +class ExperimentMetadataError(Exception): + """Class for metadata processing exceptions""" + def __init__(self, message="Invalid experiment name in metadata"): + super().__init__(message) + print(USAGE_HELP) + + +class Metadata: + """ + Class to store/update/create metadata such as experiment uuid and name + + Parameters: + lab : Laboratory + The modules laboratory + branch : str | None = None + The git branch on which the experiment is run + control_path : Path | None = None + Path to where the experiment is configured and run. The default + is set to the current working directory. This default is set in + in fsops.read_config + config_path : Path | None = None + Configuration Path. The default is config.yaml in the current + working directory. This is also set in fsop.read_config + """ + + def __init__(self, + lab: Laboratory, + config_path: Optional[Path] = None, + branch: Optional[str] = None, + control_path: Optional[Path] = None) -> None: + self.lab = lab + self.config = read_config(config_path) + + if control_path is None: + control_path = Path(self.config.get('control_path')) + self.control_path = control_path + self.filepath = self.control_path / METADATA_FILENAME + + if branch is None: + branch = get_git_branch(control_path) + self.branch = branch + + self.base_experiment_name = self.config.get('experiment', + self.control_path.name) + + metadata = self.read_file() + self.uuid = metadata.get('uuid', None) + self.experiment_name = metadata.get('experiment', None) + + def read_file(self) -> CommentedMap: + """Read metadata file - preserving orginal format if it exists""" + metadata = CommentedMap() + if self.filepath.exists(): + # Use default ruamel YAML to preserve comments and multi-line + # strings + metadata = YAML().load(self.filepath) + return metadata + + def setup(self) -> None: + """To be run at experiment initialisation""" + if self.uuid is None: + warnings.warn("No experiment uuid found. Generating a new uuid") + self.update_metadata() + elif self.experiment_name is None: + # Add an experiment name back into metadata + warnings.warn("No experiment name found in metadata") + self.update_metadata(set_only_experiment_name=True) + + self.check_experiment_name() + + def update_metadata(self, set_only_experiment_name: bool = False) -> None: + """Create/Update metadata - uses legacy existing name if there's an + existing local archive""" + lab_archive_path = Path(self.lab.archive_path) + archive_path = lab_archive_path / self.base_experiment_name + + if archive_path.exists(): + warnings.warn( + f"Pre-existing archive found at: {archive_path}" + f"Experiment name will remain: {self.base_experiment_name}" + ) + if set_only_experiment_name: + self.base_experiment_name = self.base_experiment_name + else: + self.set_new_uuid(legacy=True) + else: + if set_only_experiment_name: + self.set_new_experiment_name() + else: + self.set_new_uuid() + + # Update metadata file + self.update_file() + + def check_experiment_name(self) -> None: + """Check experiment name in metadata file""" + truncated_uuid = self.uuid[:SHORT_UUID_LENGTH] + if self.experiment_name.endswith(truncated_uuid): + # Check whether on the same branch or control directory as + # using the experiment name in metadata.yaml + metadata_experiment = self.experiment_name + self.set_new_experiment_name() + if self.experiment_name != metadata_experiment: + warnings.warn( + "Either the branch name, the control directory, or the " + "configured 'experiment' value has changed.\n" + f"Experiment name in {METADATA_FILENAME}: " + f"{metadata_experiment}\nGenerated experiment name: " + f"{self.experiment_name}." + ) + raise ExperimentMetadataError() + else: + # Legacy experiment name: Check metadata's experiment name matches + # base experiment name + if self.experiment_name != self.base_experiment_name: + msg = f"Experiment name in {METADATA_FILENAME} does not match" + if 'experiment' in self.config: + msg += " the configured 'experiment' value." + else: + msg += " the control directory base name." + warnings.warn(msg + f"{self.experiment_name} does not equal " + "{self.base_experiment_name}") + raise ExperimentMetadataError() + + def set_new_experiment_name(self, legacy=False) -> None: + """Set a new experiment name - this the name used work + and archive directories""" + if legacy: + # Experiment remains base experiment name + self.experiment_name = self.base_experiment_name + return + + # Add branch and a truncated uuid to experiment name + truncated_uuid = self.uuid[:SHORT_UUID_LENGTH] + if self.branch is None or self.branch in ('main', 'master'): + suffix = f'-{truncated_uuid}' + else: + suffix = f'-{self.branch}-{truncated_uuid}' + + self.experiment_name = self.base_experiment_name + suffix + + def set_new_uuid(self, legacy=False) -> None: + """Create a new uuid and set experiment name""" + # Generate new uuid and experiment name + self.uuid = generate_uuid() + self.set_new_experiment_name(legacy=legacy) + + if legacy: + return + + # Check experiment name is unique in local archive + lab_archive_path = Path(self.lab.archive_path) + if lab_archive_path.exists(): + local_experiments = [item for item in lab_archive_path.iterdir() + if item.is_dir()] + while self.experiment_name in local_experiments: + # Generate a new id and experiment name + self.uuid = generate_uuid() + self.set_new_experiment_name() + + def update_file(self) -> None: + """Write any updates to metadata file""" + metadata = self.read_file() + + previous_uuid = metadata.get('uuid', None) + if previous_uuid is not None and previous_uuid != self.uuid: + metadata['previous_uuid'] = previous_uuid + + # Update uuid + metadata['uuid'] = self.uuid + + # Add experiment name + metadata['experiment'] = self.experiment_name + + # Update email/contact in metadata + self.update_user_info(metadata=metadata, + metadata_key='contact', + config_key='name', + filler_values=['Your name', + 'Add your name here']) + + self.update_user_info(metadata=metadata, + metadata_key='email', + config_key='email', + filler_values=['you@example.com', + 'Add your email address here']) + + # Write updated metadata to file + YAML().dump(metadata, self.filepath) + + def update_user_info(self, metadata: CommentedMap, metadata_key: str, + config_key: str, filler_values=List[str]): + """Add user email/name to metadata - if defined and not already set + in metadata""" + if (metadata_key not in metadata + or metadata[metadata_key] in filler_values): + # Get config value from git + value = get_git_user_info(repo_path=self.control_path, + config_key=config_key, + example_value=filler_values[0]) + if value is not None: + metadata[metadata_key] = value + + def commit_file(self) -> None: + "Add a git commit for changes to metadata file, if file has changed" + commit_message = f"Updated metadata. Experiment uuid: {self.uuid}" + git_commit(repo_path=self.control_path, + commit_message=commit_message, + paths_to_commit=[self.filepath]) + + def setup_new_experiment(self, legacy: bool = False) -> None: + """Creates new uuid, creates/updates metadata file and + commits file to git""" + self.set_new_uuid(legacy) + self.update_file() + self.commit_file() + + +def generate_uuid() -> shortuuid.uuid: + """Generate a new uuid""" + return shortuuid.uuid() diff --git a/payu/models/model.py b/payu/models/model.py index 80548f77..9d4365aa 100644 --- a/payu/models/model.py +++ b/payu/models/model.py @@ -91,7 +91,6 @@ def set_model_pathnames(self): self.exec_name) else: self.exec_path = None - if self.exec_path: # Make exec_name consistent for models with fully qualified path. # In all cases it will just be the name of the executable without a diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index 253701a9..4ad9f7f0 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -149,3 +149,108 @@ syncing.', } } + +# Clone Repository +repository = { + 'flags': [], + 'parameters': { + 'dest': 'repository', + 'help': 'The repository to clone from. This can be either a local \ + path or git url' + } +} + +# Clone to directory +local_directory = { + 'flags': [], + 'parameters': { + 'dest': 'local_directory', + 'help': 'The directory to clone into' + } +} + +# Clone uuid flag +keep_uuid = { + 'flags': ('-k', '--keep-uuid'), + 'parameters': { + 'action': 'store_true', + 'default': False, + 'dest': 'keep_uuid', + 'help': 'If the cloned experiment uuid exists, leave it \ + unchanged' + } +} + +# Clone branch +clone_branch = { + 'flags': ('--branch', '-B'), + 'parameters': { + 'action': 'store', + 'dest': 'branch', + 'default': None, + 'help': 'Clone and checkout this branch' + } +} + +# Clone create branch +new_branch_name = { + 'flags': ('--new-branch', '-b'), + 'parameters': { + 'action': 'store', + 'dest': 'new_branch_name', + 'default': None, + 'help': 'The name of the git branch to create and checkout' + } +} + +# Branch name +branch_name = { + 'flags': [], + 'parameters': { + 'dest': 'branch_name', + 'help': 'The name of the git branch to create/checkout' + } +} + +# Branch start point +start_point = { + 'flags': [], + 'parameters': { + 'nargs': '?', + 'dest': 'start_point', + 'help': 'The new branch head will point to this commit' + } +} + + +# Branch start restart +restart_path = { + 'flags': ('--start-from-restart', '-s'), + 'parameters': { + 'dest': 'restart_path', + 'action': 'store', + 'help': 'The absolute restart path from which to start the model run' + } +} + +# Checkout new branch flag +new_branch = { + 'flags': ['-b'], + 'parameters': { + 'dest': 'new_branch', + 'action': 'store_true', + 'default': False, + 'help': 'Create new branch' + } +} + +# Legacy experiment +legacy_experiment = { + 'flags': ['--legacy'], + 'parameters': { + 'dest': 'legacy_experiment', + 'action': 'store_true', + 'default': False, + 'help': 'Flag to opt out of branch-uuid aware experiment names' + } +} diff --git a/payu/subcommands/checkout_cmd.py b/payu/subcommands/checkout_cmd.py new file mode 100644 index 00000000..09ddd365 --- /dev/null +++ b/payu/subcommands/checkout_cmd.py @@ -0,0 +1,42 @@ +"""Run the `payu checkout` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" +from pathlib import Path + +from payu.laboratory import Laboratory +from payu.branch import checkout_branch +import payu.subcommands.args as args + +title = 'checkout' +parameters = {'description': ('A wrapper around git checkout. ' + 'Create a new branch (if specified), ' + 'checkout branch, setup experiment metadata ' + 'and create/switch archive/work symlinks')} + +arguments = [args.model, args.config, args.laboratory, args.new_branch, + args.branch_name, args.start_point, args.restart_path] + + +def transform_strings_to_path(path_str=None): + return Path(path_str) if path_str is not None else None + + +def runcmd(model_type, config_path, lab_path, new_branch, + branch_name, start_point, restart_path): + """Execute the command.""" + lab = Laboratory(model_type, config_path, lab_path) + + config_path = transform_strings_to_path(config_path) + lab_path = transform_strings_to_path(lab_path) + + checkout_branch(lab=lab, + is_new_branch=new_branch, + branch_name=branch_name, + start_point=start_point, + restart_path=restart_path, + config_path=config_path) + + +runscript = runcmd diff --git a/payu/subcommands/clone_cmd.py b/payu/subcommands/clone_cmd.py new file mode 100644 index 00000000..2f875cc6 --- /dev/null +++ b/payu/subcommands/clone_cmd.py @@ -0,0 +1,46 @@ +"""Run the `payu clone` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +from pathlib import Path + +from payu.branch import clone +import payu.subcommands.args as args + +title = 'clone' +parameters = {'description': ('A wrapper around git clone. Clones a ' + 'control repository and setup new experiment ' + 'metadata')} + +arguments = [args.model, args.config, args.laboratory, + args.keep_uuid, args.clone_branch, + args.repository, args.local_directory, + args.new_branch_name, args.restart_path] + + +def transform_strings_to_path(path_str=None): + return Path(path_str) if path_str is not None else None + + +def runcmd(model_type, config_path, lab_path, keep_uuid, + branch, repository, local_directory, new_branch_name, restart_path): + """Execute the command.""" + config_path = transform_strings_to_path(config_path) + restart_path = transform_strings_to_path(restart_path) + lab_path = transform_strings_to_path(lab_path) + directory = transform_strings_to_path(local_directory) + + clone(repository=repository, + directory=directory, + branch=branch, + keep_uuid=keep_uuid, + model_type=model_type, + config_path=config_path, + lab_path=lab_path, + new_branch_name=new_branch_name, + restart_path=restart_path) + + +runscript = runcmd diff --git a/payu/subcommands/uuid_cmd.py b/payu/subcommands/uuid_cmd.py new file mode 100644 index 00000000..55cbf21b --- /dev/null +++ b/payu/subcommands/uuid_cmd.py @@ -0,0 +1,27 @@ +"""Run the `payu uuid` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +import payu.subcommands.args as args +from payu.metadata import Metadata +from payu.laboratory import Laboratory + +title = 'uuid' +parameters = {'description': ('Generates and commits a new experiment uuid, ' + 'update/create and commit experiment metadata')} +arguments = [args.model, args.config, args.laboratory, args.legacy_experiment] + + +def runcmd(model_type, config_path, lab_path, legacy_experiment): + """Execute the command.""" + lab = Laboratory(model_type=model_type, + config_path=config_path, + lab_path=lab_path) + metadata = Metadata(lab=lab, config_path=config_path) + + metadata.setup_new_experiment(legacy=legacy_experiment) + + +runscript = runcmd diff --git a/setup.py b/setup.py index 9b7b2dbc..9b44e155 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,10 @@ 'yamanifest', 'dateutil', 'tenacity', - 'cftime' + 'cftime', + 'shortuuid', + 'GitPython', + 'ruamel.yaml' ], install_requires=[ 'f90nml >= 0.16', @@ -45,7 +48,10 @@ 'requests[security]', 'python-dateutil', 'tenacity!=7.0.0', - 'cftime' + 'cftime', + 'shortuuid >= 1.0.11', + 'GitPython >= 3.1.40', + 'ruamel.yaml >= 0.18.5' ], tests_require=[ 'pytest', diff --git a/test/common.py b/test/common.py index 0001dcfa..2abd341c 100644 --- a/test/common.py +++ b/test/common.py @@ -28,6 +28,8 @@ expt_archive_dir = archive_dir / ctrldir_basename expt_workdir = labdir / 'work' / ctrldir_basename +config_path = ctrldir / 'config.yaml' + print('tmpdir: {}'.format(tmpdir)) config = { @@ -48,11 +50,11 @@ 'input': False, 'exe': False } - } + }, + 'runlog': False } - @contextmanager def cd(directory): """ @@ -122,8 +124,8 @@ def payu_setup(model_type=None, force) -def write_config(config): - with (ctrldir / 'config.yaml').open('w') as file: +def write_config(config, path=config_path): + with path.open('w') as file: file.write(yaml.dump(config, default_flow_style=False)) diff --git a/test/test_branch.py b/test/test_branch.py new file mode 100644 index 00000000..eeaccd37 --- /dev/null +++ b/test/test_branch.py @@ -0,0 +1,418 @@ +import copy +import shutil +from pathlib import Path + +import pytest +import git +from unittest.mock import patch + +import payu +from payu.branch import add_restart_to_config, switch_symlink +from payu.branch import checkout_branch, clone +from payu.metadata import Metadata +from payu.fsops import read_config + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir +from test.common import ctrldir_basename +from test.common import config as config_orig, write_config +from test.common import config_path +from test.common import make_all_files, make_expt_archive_dir + + +# Global config +config = copy.deepcopy(config_orig) + + +@pytest.fixture(autouse=True) +def setup_and_teardown(): + # Create tmp, lab and control directories + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + make_all_files() + except Exception as e: + print(e) + + yield + + # Remove tmp directory + try: + shutil.rmtree(tmpdir) + except Exception as e: + print(e) + + +def setup_control_repository(path: Path = ctrldir) -> git.Repo: + """ Return an new control repository""" + write_config(config, path=(path / 'config.yaml')) + # Initialise a control repo + repo = git.Repo.init(path) + repo.index.add("*") + # Commit the changes + repo.index.commit("First commit - initialising repository") + return repo + + +@pytest.mark.parametrize( + "config_lines, expected_lines", + [ + ( + ( + 'sync:', + ' restart: true', + '# Test comment', + 'restart: old/path/to/restart', + 'anotherField: 1\n' + ), + ( + 'sync:', + ' restart: true', + '# Test comment', + 'restart: {0}', + 'anotherField: 1\n' + ) + ), + ( + ( + '# Test comment', + '', + 'anotherField: 1', + ), + ( + '# Test comment', + '', + 'anotherField: 1', + 'restart: {0}\n', + ) + ), + ] +) +def test_add_restart_to_config(config_lines, expected_lines): + """Test adding restart: path/to/restart to configuration file""" + restart_path = labdir / 'archive' / 'tmpRestart' + restart_path.mkdir() + + test_config = '\n'.join(config_lines) + expected_config = '\n'.join(expected_lines).format(restart_path) + + with config_path.open('w') as file: + file.write(test_config) + + # Function to test + with cd(ctrldir): + add_restart_to_config(restart_path) + + with config_path.open('r') as file: + updated_config = file.read() + + # Test order, comments are preserved + assert updated_config == expected_config + + +def test_add_restart_to_config_invalid_restart_path(): + """Test restart path that does not exist raises a warning""" + restart_path = tmpdir / 'restartDNE' + + config_content = "# Test config content" + with config_path.open('w') as file: + file.write(config_content) + + expected_msg = f"Given restart directory {restart_path} does not exist. " + expected_msg += f"Skipping adding 'restart: {restart_path}' to config file" + + with cd(ctrldir): + with pytest.warns(UserWarning, match=expected_msg): + add_restart_to_config(restart_path) + + # Test config unchanged + with config_path.open('r') as file: + assert file.read() == config_content + + +def test_add_restart_to_config_invalid_config_path(): + """Test restart path that does not exist raises a warning""" + config_path = tmpdir / "configDNE" + + restart_path = labdir / 'archive' / 'tmpRestart' + restart_path.mkdir(exist_ok=True) + + expected_msg = f"Given configuration file {config_path} does not exist. " + expected_msg += f"Skipping adding 'restart: {restart_path}' to config file" + + with pytest.warns(UserWarning, match=expected_msg): + add_restart_to_config(restart_path, config_path) + + +def test_switch_symlink_when_symlink_and_archive_exists(): + # Pre-existing experiment symlink + lab_archive = labdir / 'archive' + previous_archive_dir = lab_archive / 'Experiment0' + previous_archive_dir.mkdir(parents=True) + + archive_symlink = ctrldir / 'archive' + archive_symlink.symlink_to(previous_archive_dir) + + # New Experiment - Existing archive + experiment_name = 'Experiment1' + archive_dir = lab_archive / experiment_name + archive_dir.mkdir(parents=True) + + # Test Function + switch_symlink(lab_archive, ctrldir, experiment_name, 'archive') + + # Assert new symlink is created + assert archive_symlink.exists() and archive_symlink.is_symlink() + assert archive_symlink.resolve() == archive_dir + + +def test_switch_symlink_when_symlink_exists_but_no_archive(): + # Pre-existing experiment symlink + lab_archive = labdir / 'archive' + previous_archive_dir = lab_archive / 'Experiment0' + previous_archive_dir.mkdir(parents=True) + + archive_symlink = ctrldir / 'archive' + archive_symlink.symlink_to(previous_archive_dir) + + # New Experiment + experiment_name = 'Experiment1' + + # Test Function + switch_symlink(lab_archive, ctrldir, experiment_name, 'archive') + + # Assert no symlink is created but previous one is removed + assert not archive_symlink.exists() + assert not archive_symlink.is_symlink() + + +def test_switch_symlink_when_no_symlink_exists_and_no_archive(): + # New Experiment + experiment_name = 'Experiment1' + lab_archive = labdir / 'archive' + + archive_symlink = ctrldir / 'archive' + + # Test Function + switch_symlink(lab_archive, ctrldir, experiment_name, 'archive') + + # Assert no symlink + assert not archive_symlink.exists() + assert not archive_symlink.is_symlink() + + +@patch('shortuuid.uuid') +def test_checkout_branch(mock_uuid): + repo = setup_control_repository() + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=labdir) + + # Mock uuid1 value + uuid1 = 'a1234567890' + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout new branch (with no existing metadata) + checkout_branch(lab=lab, + branch_name="Branch1", + is_new_branch=True) + metadata = Metadata(lab) + + # Check metadata was created and commited + assert str(repo.active_branch) == "Branch1" + assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' + assert metadata.uuid == uuid1 + + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" + assert repo.head.commit.message == expected_commit_msg + branch_1_commit_hash = repo.active_branch.object.hexsha + + # Mock uuid2 value + uuid2 = 'b1234567890' + mock_uuid.return_value = uuid2 + + with cd(ctrldir): + # Test checkout new branch from branch with existing metadata + checkout_branch(lab=lab, + branch_name="Branch2", + is_new_branch=True, + start_point="Branch1") + metadata = Metadata(lab) + + # Check metadata has been updated and commited + assert str(repo.active_branch) == "Branch2" + assert metadata.experiment_name == f'{ctrldir_basename}-Branch2-b123456' + assert metadata.uuid == uuid2 + + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid2}" + assert repo.head.commit.message == expected_commit_msg + + with cd(ctrldir): + # Test checkout existing branch with existing metadata + checkout_branch(lab=lab, + branch_name="Branch1") + metadata = Metadata(lab) + + # Check metadata and commit has not changed on Branch1 + assert str(repo.active_branch) == "Branch1" + assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' + assert metadata.uuid == uuid1 + + # Assert commit hash is the same + assert repo.active_branch.object.hexsha == branch_1_commit_hash + + +@patch('shortuuid.uuid') +def test_checkout_existing_branches_with_no_metadata(mock_uuid): + repo = setup_control_repository() + main_commit = repo.active_branch.object.hexsha + + # Create new branch + repo.create_head("Branch1") + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=labdir) + + # Mock uuid1 value + uuid1 = 'a1234567890' + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout existing branch with no existing metadata + checkout_branch(lab=lab, + branch_name="Branch1") + metadata = Metadata(lab) + + # Check metadata was created and commited + assert str(repo.active_branch) == "Branch1" + assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' + assert metadata.uuid == uuid1 + + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" + assert repo.head.commit.message == expected_commit_msg + + # Create new branch - from main commit + repo.create_head("Branch2", commit=main_commit) + # Make experiment archive - This function creates legacy experiment archive + make_expt_archive_dir(type='restart') + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=labdir) + + # Mock uuid1 value + uuid2 = 'b1234567890' + mock_uuid.return_value = uuid2 + + with cd(ctrldir): + # Test checkout existing branch (with no existing metadata) + # But crucially with archive + checkout_branch(lab=lab, + branch_name="Branch2") + metadata = Metadata(lab) + + # Check metadata was created and commited + assert str(repo.active_branch) == "Branch2" + + # Check for legacy experiment name + assert metadata.experiment_name == f'{ctrldir_basename}' + assert metadata.uuid == uuid2 + + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid2}" + assert repo.head.commit.message == expected_commit_msg + + # Note: new experiments branches created with payu checkout + # can work with existing repo's but using git branch to create branch + # will result in branch using the same archive (as it worked before branch + # support) + + +@patch('shortuuid.uuid') +def test_checkout_new_branch_existing_legacy_archive(mock_uuid): + # Using payu checkout new branch should generate new uuid, + # and experiment name - even if there's a legacy archive + repo = setup_control_repository() + + # Add archive under legacy name + restart_path = Path(make_expt_archive_dir(type='restart')) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=labdir) + + # Mock uuid1 value + uuid1 = 'a1234567890' + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout new branch (with no existing metadata) + checkout_branch(lab=lab, + branch_name="Branch1", + is_new_branch=True, + restart_path=restart_path, + config_path=config_path) + metadata = Metadata(lab) + + # Check metadata was created and commited - with branch-uuid aware name + assert str(repo.active_branch) == "Branch1" + assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' + assert metadata.uuid == uuid1 + + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" + assert repo.head.commit.message == expected_commit_msg + + # Check restart path was added to configuration file + config = read_config(config_path) + assert config['restart'] == str(restart_path) + + +@patch('shortuuid.uuid') +def test_clone(mock_uuid): + # Create a repo to clone + source_repo_path = tmpdir / 'sourceRepo' + source_repo_path.mkdir() + source_repo = setup_control_repository(path=source_repo_path) + source_main_branch = str(source_repo.active_branch) + + # Create and checkout branch + branch1 = source_repo.create_head("Branch1") + branch1.checkout() + + # Mock uuid1 value + uuid1 = 'a1234567890' + mock_uuid.return_value = uuid1 + + # Test clone + cloned_repo_path = tmpdir / 'clonedRepo' + clone(source_repo_path, cloned_repo_path, lab_path=labdir) + + # Check new commit added + cloned_repo = git.Repo(cloned_repo_path) + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" + assert cloned_repo.head.commit.message == expected_commit_msg + assert str(cloned_repo.active_branch) == 'Branch1' + + # Check metadata + with cd(cloned_repo_path): + lab = payu.laboratory.Laboratory(lab_path=labdir) + metadata = Metadata(lab) + + assert metadata.uuid == uuid1 + assert metadata.experiment_name == 'clonedRepo-Branch1-a123456' + + cloned_repo.git.checkout(source_main_branch) + + # Test clone of a clone - adding a new branch + uuid2 = 'b1234567890' + mock_uuid.return_value = uuid2 + + # Run clone + with cd(tmpdir): + clone(cloned_repo_path, Path('clonedRepo2'), + lab_path=labdir, new_branch_name='Branch2', branch='Branch1') + + # Check new commit added + cloned_repo2 = git.Repo(tmpdir / 'clonedRepo2') + expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid2}" + assert cloned_repo2.head.commit.message == expected_commit_msg + assert [head.name for head in cloned_repo2.heads] == ['Branch1', 'Branch2'] diff --git a/test/test_git_utils.py b/test/test_git_utils.py new file mode 100644 index 00000000..deb24d65 --- /dev/null +++ b/test/test_git_utils.py @@ -0,0 +1,185 @@ +import shutil +import subprocess + +import git +import pytest + +from payu.git_utils import _get_git_repository, get_git_user_info +from payu.git_utils import git_checkout_branch, PayuBranchError + +from test.common import tmpdir + + +@pytest.fixture(autouse=True) +def setup_and_teardown(): + # Create tmp directory + try: + tmpdir.mkdir() + except Exception as e: + print(e) + + yield + + # Remove tmp directory + try: + shutil.rmtree(tmpdir) + except Exception as e: + print(e) + + +def create_new_repo(repo_path): + """Helper function to initialise a repo and create first commit""" + repo = git.Repo.init(repo_path) + init_file = repo_path / "init.txt" + add_file_and_commit(repo, init_file) + return repo + + +def add_file_and_commit(repo, file_path, commit_no=0): + """Helper function to add a commit to repo""" + file_path.touch() + repo.index.add([file_path]) + repo.index.commit(f"Add commit {commit_no}") + return repo + + +def test_get_git_repo_invalid_repo_initialise(): + invalid_repo_path = tmpdir / "invalidRepo" + invalid_repo_path.mkdir() + repo = _get_git_repository(invalid_repo_path, initialise=True) + assert not repo.bare + + +def test_get_git_repo_invalid_repo_catch_error(): + invalid_path = tmpdir / "invalidRepo" + invalid_path.mkdir() + expected_warning_msg = "Path is not a valid git repository: " + expected_warning_msg += str(invalid_path) + with pytest.warns(UserWarning, match=expected_warning_msg): + repo = _get_git_repository(invalid_path, catch_error=True) + assert repo is None + + +def test_get_git_user_info_no_config_set(): + # Testing this is tricky as don't want to remove any global configs for + # name or email. Instead using assumption that key 'testKey-c9hCC' is not + # defined in the 'user' namespace. + repo_path = tmpdir / "test_repo" + create_new_repo(repo_path) + value = get_git_user_info(repo_path, 'testKey-c9hCC', 'test_value') + assert value is None + + +def test_get_git_user_info_config_set(): + repo_path = tmpdir / "test_repo" + create_new_repo(repo_path) + try: + # Set config that is local to repository only + subprocess.run('git config user.name "TestUserName"', + check=True, + shell=True, + cwd=repo_path) + print("User name set successfully.") + except subprocess.CalledProcessError as e: + print(f"Error setting user name: {e}") + + value = get_git_user_info(repo_path, 'name', 'test_value') + + assert value == 'TestUserName' + + +@pytest.mark.parametrize("ref", ["branch", "hash", None]) +def test_git_checkout_new_branch_from_remote_ref(ref): + # Setup + remote_repo_path = tmpdir / 'remoteRepo' + remote_repo = create_new_repo(remote_repo_path) + main_branch = remote_repo.active_branch + main_branch_hash = main_branch.object.hexsha + + # Create branch_1 + branch_1 = remote_repo.create_head("branch-1") + remote_repo.git.checkout(branch_1) + add_file_and_commit(remote_repo, (remote_repo_path / 'file'), commit_no=1) + branch_1_hash = branch_1.object.hexsha + + assert main_branch_hash != branch_1_hash + + # Re-checkout main branch + remote_repo.git.checkout(main_branch) + + # Clone repo + cloned_repo_path = tmpdir / 'cloned_repo' + cloned_repo = remote_repo.clone(cloned_repo_path) + + if ref == "hash": + start_point = branch_1_hash + expected_hash = branch_1_hash + elif ref == "branch": + start_point = "branch-1" + expected_hash = branch_1_hash + else: + start_point = None + expected_hash = main_branch_hash + + # Test startpoint being remote branch/hash/None + git_checkout_branch(cloned_repo_path, + 'branch-2', + new_branch=True, + start_point=start_point) + + current_branch = cloned_repo.active_branch + current_hash = current_branch.object.hexsha + assert str(current_branch) == 'branch-2' + assert current_hash == expected_hash + + +def test_git_checkout_new_branch_existing(): + # Setup + repo_path = tmpdir / 'remoteRepo' + repo = create_new_repo(repo_path) + existing_branch = repo.active_branch + + # Test create branch with existing branch + with pytest.raises(PayuBranchError): + git_checkout_branch(repo_path, + str(existing_branch), + new_branch=True) + + +def test_git_checkout_non_existent_branch(): + # Setup + repo_path = tmpdir / 'remoteRepo' + create_new_repo(repo_path) + + # Test create branch with existing branch + with pytest.raises(PayuBranchError): + git_checkout_branch(repo_path, "Gibberish") + + +def test_git_checkout_existing_branch(): + # Setup + remote_repo_path = tmpdir / 'remoteRepo' + remote_repo = create_new_repo(remote_repo_path) + main_branch = remote_repo.active_branch + + # Create branch_1 + branch_1 = remote_repo.create_head("branch-1") + remote_repo.git.checkout(branch_1) + add_file_and_commit(remote_repo, (remote_repo_path / 'file'), commit_no=1) + branch_1_hash = branch_1.object.hexsha + + # Re-checkout main branch + remote_repo.git.checkout(main_branch) + + # Clone repo + cloned_repo_path = tmpdir / 'cloned_repo' + cloned_repo = remote_repo.clone(cloned_repo_path) + + # Test checkout existing remote branch + git_checkout_branch(cloned_repo_path, + 'branch-1') + + current_branch = cloned_repo.active_branch + current_hash = current_branch.object.hexsha + assert str(current_branch) == 'branch-1' + assert current_hash == branch_1_hash diff --git a/test/test_metadata.py b/test/test_metadata.py new file mode 100644 index 00000000..5601da70 --- /dev/null +++ b/test/test_metadata.py @@ -0,0 +1,126 @@ +import os +import copy +import shutil + +import pytest + +import payu +from payu.metadata import Metadata + +from test.common import cd +from test.common import tmpdir, ctrldir, labdir, expt_archive_dir +from test.common import config as config_orig +from test.common import write_config +from test.common import make_all_files, make_random_file +from test.common import make_expt_archive_dir + +verbose = True + +# Global config +config = copy.deepcopy(config_orig) + + +def setup_module(module): + """ + Put any test-wide setup code in here, e.g. creating test files + """ + if verbose: + print("setup_module module:%s" % module.__name__) + + # Should be taken care of by teardown, in case remnants lying around + try: + shutil.rmtree(tmpdir) + except FileNotFoundError: + pass + + try: + tmpdir.mkdir() + labdir.mkdir() + ctrldir.mkdir() + make_all_files() + except Exception as e: + print(e) + + write_config(config) + + +def teardown_module(module): + """ + Put any test-wide teardown code in here, e.g. removing test outputs + """ + if verbose: + print("teardown_module module:%s" % module.__name__) + + try: + shutil.rmtree(tmpdir) + print('removing tmp') + except Exception as e: + print(e) + + +@pytest.mark.parametrize( + "uuid, experiment, previous_uuid, previous_metadata, expected_metadata", + [ + ( + "A012345678910", + "test_experiment-test_branch-A012345", + None, + ( + "contact: TestName", + "email: test@email.com", + "created: 2023-11-15", + "description: |-", + " Test description etc", + " More description", + "notes: |-", + " Test notes", + " More notes", + "keywords:", + "- test", + "- testKeyword" + ), + ( + "contact: TestName", + "email: test@email.com", + "created: 2023-11-15", + "description: |-", + " Test description etc", + " More description", + "notes: |-", + " Test notes", + " More notes", + "keywords:", + "- test", + "- testKeyword", + "uuid: A012345678910", + "experiment: test_experiment-test_branch-A012345\n" + ) + + ) + ] +) +def test_update_file(uuid, + experiment, + previous_uuid, + previous_metadata, + expected_metadata): + # Create pre-existing metadata file + metadata_path = ctrldir / 'metadata.yaml' + if previous_metadata is not None: + previous_metadata = '\n'.join(previous_metadata) + metadata_path.write_text(previous_metadata) + expected_metadata = '\n'.join(expected_metadata) + + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + metadata = Metadata(lab) + + metadata.uuid = uuid + metadata.previous_uuid = previous_uuid + metadata.experiment_name = experiment + + # Function to test + metadata.update_file() + + assert metadata_path.exists and metadata_path.is_file + assert metadata_path.read_text() == expected_metadata From d62b704b62f3a960b29fc1af83e73a3759082080 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Wed, 22 Nov 2023 16:52:08 +1100 Subject: [PATCH 2/7] Add payu branch command to display branch and metadata infomation - payu branch -v, or payu branch --verbose, displays all contents of metadata for each branch vs just uuid --- payu/branch.py | 47 ++++++++++++++++++++++++++++++++ payu/subcommands/args.py | 13 ++++++++- payu/subcommands/branch_cmd.py | 20 ++++++++++++++ payu/subcommands/checkout_cmd.py | 1 + 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 payu/subcommands/branch_cmd.py diff --git a/payu/branch.py b/payu/branch.py index 2fe44e5d..d5c3dd11 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -13,11 +13,13 @@ from typing import Optional from ruamel.yaml import YAML +import git from payu.fsops import read_config, DEFAULT_CONFIG_FNAME from payu.laboratory import Laboratory from payu.metadata import Metadata from payu.git_utils import git_checkout_branch, git_clone, get_git_branch +from payu.git_utils import _get_git_repository def add_restart_to_config(restart_path: Path, @@ -151,3 +153,48 @@ def clone(repository: str, # Change back to previous directory os.chdir(previous_directory) + + +def print_metadata_info(branch: git.Head, verbose: bool = False): + """Print uuid for each branch. If verbose is true, it will print all + the metadata in metadata.yaml""" + contains_metadata = False + # Note: Blobs are files + for blob in branch.commit.tree.blobs: + if blob.name == 'metadata.yaml': + contains_metadata = True + + # Read file contents + content = blob.data_stream.read().decode('utf-8') + if verbose: + for line in content.splitlines(): + print(f' {line}') + else: + # Print uuid + metadata = YAML().load(content) + uuid = metadata.get('uuid', None) + if uuid is not None: + print(f" uuid: {uuid}") + else: + print(f" No uuid in metadata file") + + if not contains_metadata: + print(" No metadata file found") + + +def list_branches(config_path, verbose: bool = False): + """Print out summary of metadata on each branch""" + # Note: Control path is set in read_config + config = read_config(config_path) + control_path = Path(config.get('control_path')) + + repo = _get_git_repository(control_path) + + current_branch = repo.active_branch + print(f"* Current Branch: {current_branch.name}") + print_metadata_info(current_branch, verbose) + + for branch in repo.heads: + if branch != current_branch: + print(f"Branch: {branch.name}") + print_metadata_info(branch, verbose) diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index 4ad9f7f0..ddd0c3a1 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -225,7 +225,7 @@ # Branch start restart restart_path = { - 'flags': ('--start-from-restart', '-s'), + 'flags': ('--restart', '-r'), 'parameters': { 'dest': 'restart_path', 'action': 'store', @@ -254,3 +254,14 @@ 'help': 'Flag to opt out of branch-uuid aware experiment names' } } + +# List branches - verbose +verbose = { + 'flags': ['--verbose', '-v'], + 'parameters': { + 'dest': 'verbose', + 'action': 'store_true', + 'default': False, + 'help': 'Flag to display all contents of metadata file' + } +} \ No newline at end of file diff --git a/payu/subcommands/branch_cmd.py b/payu/subcommands/branch_cmd.py new file mode 100644 index 00000000..3888b623 --- /dev/null +++ b/payu/subcommands/branch_cmd.py @@ -0,0 +1,20 @@ +"""Run the `payu checkout` command. + +:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. +:license: Apache License, Version 2.0, see LICENSE for details. +""" + +from payu.branch import list_branches +import payu.subcommands.args as args + +title = 'branch' +parameters = {'description': ('List git branches and corresponding metadata')} + +arguments = [args.config, args.verbose] + + +def runcmd(config_path, verbose): + """Execute the command.""" + list_branches(config_path, verbose) + +runscript = runcmd \ No newline at end of file diff --git a/payu/subcommands/checkout_cmd.py b/payu/subcommands/checkout_cmd.py index 09ddd365..0e1c0ea6 100644 --- a/payu/subcommands/checkout_cmd.py +++ b/payu/subcommands/checkout_cmd.py @@ -30,6 +30,7 @@ def runcmd(model_type, config_path, lab_path, new_branch, config_path = transform_strings_to_path(config_path) lab_path = transform_strings_to_path(lab_path) + restart_path = transform_strings_to_path(restart_path) checkout_branch(lab=lab, is_new_branch=new_branch, From 0bb5552a0185092930fa5b5592b4fb1b5b9fcf3e Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Tue, 28 Nov 2023 09:08:31 +1100 Subject: [PATCH 3/7] Added documentation and review suggestions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add test for listing branch metadata (payu branch) - Remove payu uuid subcommand - Add dependencies and entries points to conda meta.yaml - Add usage documentation - Abort adding metadata in checkout if branch does not have config.yaml, add error message and test - Extend payu branch to list if branch has a config file - Extend payu branch to list remote branches, added —remote flag to cmd, added test - Move chdir into a try-finally block when cloning. - Tidy test_branch.py, added functions for common tests, add tests for previous uuid --- conda/meta.yaml | 6 + docs/source/usage.rst | 134 +++++++++- payu/branch.py | 224 ++++++++++------ payu/git_utils.py | 50 ++-- payu/metadata.py | 90 ++++--- payu/subcommands/args.py | 27 +- payu/subcommands/branch_cmd.py | 14 +- payu/subcommands/checkout_cmd.py | 9 +- payu/subcommands/uuid_cmd.py | 27 -- setup.py | 3 + test/common.py | 12 + test/models/test_mom6.py | 5 +- test/test_branch.py | 433 ++++++++++++++++++++----------- test/test_git_utils.py | 23 +- test/test_metadata.py | 121 ++++++--- 15 files changed, 771 insertions(+), 407 deletions(-) delete mode 100644 payu/subcommands/uuid_cmd.py diff --git a/conda/meta.yaml b/conda/meta.yaml index fb500a1c..431b18f9 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -12,6 +12,9 @@ build: - payu-collate = payu.subcommands.collate_cmd:runscript - payu-profile = payu.subcommands.profile_cmd:runscript - payu-sync = payu.subcommands.sync_cmd:runscript + - payu-branch = payu.subcommands.branch_cmd:runscript + - payu-clone = payu.subcommands.clone_cmd:runscript + - payu-checkout = payu.subcommands.checkout_cmd:runscript source: git_url: ../ @@ -34,6 +37,9 @@ requirements: # extra for the pypi package - pyOpenSSL >=0.14 - cryptography>=1.3.4 + - shortuuid >= 1.0.11 + - GitPython >= 3.1.40 + - ruamel.yaml >= 0.18.5 test: imports: diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 773bbf55..743bddc2 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -21,6 +21,57 @@ simultaneously that can share common executables and input data. It also allows the flexibility to have the relatively small control directories in a location that is continuously backed up. +Experiment names and metadata +----------------------------- + +The *laboratory* contains the following subdirectories: + +* ``work``, which is where the model is run. This contains a temporary directory + for each experiment which is removed after a successful run. + +* ``archive``, which contains the output of completed runs for each + experiment. + +An experiment name is used identify the experiment inside the ``work`` and +``archive`` sub-directories. This is stored in a metadata file, +``metadata.yaml``, in the *control directory*. +The experiment name and a UUID, to uniquely identify the experiment, +is set in ``metadata.yaml`` when: + +* Using payu to clone a pre-existing git_ repository of the *control directory* + +* Using payu to create and checkout a new git branch in the *control directory* + +* Or, it is set automatically when setting up an experiment run if there is + not a pre-existing metadata file. + +The experiment name historically would default to the name of the *control +directory* or the configured ``experiment`` value (see :ref:`config`). This is +still supported for experiments with a pre-existing archived outputs. To support +git branches and ensure uniqueness in shared archives, the branch name and +a short version of the experiment UUID are added to new experiment names. +For example, given a control directory named ``my_expt`` and a UUID of +``9fAsTc4sNYsH2ZBQGYK9TG``, the experiment name would be: + +* ``my_expt-perturb-9fAsT`` - if running an experiment on a branch named + ``perturb``. + +* ``my_expt-9fAsT`` - if the control directory was not a git repository or + running experiments from the ``main`` or ``master`` branch. + +* or ``my_expt`` - if running an older experiment that has a pre-existing + archive. + +Using a git repository for the experiment +----------------------------------------- + +It is recommended to use version control using git_ for the payu +*control directory*. This allows the experiment to be easily copied via +cloning. There is inbuilt support in payu for an experiment runlog which +tracks changes to files between experiment runs. There are payu commands +for creating and moving between git branches so multiple related experiments +can be run from the same control directory. + Setting up the laboratory ========================= @@ -85,22 +136,45 @@ Populate laboratory directories You will want a unique name for each input directory. + Clone experiment ------------------ +---------------- -The payu control directory is maintained under version control using -git_ so existing experiments can be cloned. This is the best way to copy -an experiment as it guarantees that only the required files are copied -to a new control directory, and maintains a link to the original -experiment through the shared git history. +This is the best way to copy an experiment as it guarantees that only the +required files are copied to a new control directory, and maintains a link +to the original experiment through the shared git history. To clone the +repository, you can use ``git clone`` or ``payu clone`` which is a wrapper +around ``git clone`` which additionally creates or updates the metadata file. For example:: mkdir -p ${HOME}/${MODEL} cd ${HOME}/${MODEL} - git clone https://github.com/payu-org/mom-example.git my_expt + payu clone ${REPOSITORY} my_expt cd my_expt +Where ``${REPOSITORY}`` is the git URL or path of the repository to clone from, +for example, https://github.com/payu-org/mom-example.git. + +To clone and checkout an existing git branch, use the ``--branch`` flag and +specify the branch name:: + + payu clone --branch ${EXISTING_BRANCH} ${REPOSITORY} my_expt + +To create and checkout a new git branch use ``--new-branch`` and specify a +new branch name: + + payu clone --new-branch ${NEW_BRANCH} ${REPOSITORY} my_expt + +To see more configuration options for ``payu clone``, +run:: + + payu clone --help + +Alternatively to creating and checking out branches in ``payu clone``, +``payu checkout`` can be used instead (see :ref:`Switching between +related experiments`). + Create experiment ----------------- @@ -305,3 +379,49 @@ at a later date. To sync all restarts including the latest restarts, use the ``--sync-restarts`` flag:: payu sync --sync-restarts + + +Switching between related experiments +===================================== + +To be able to run related experiments from the same control directory +using git branches, you can use ``payu checkout`` which is a wrapper around +``git checkout``. Creating new branches will generate a new UUID and +branch-UUID-aware experiment name in the metadata file. +Switching branches will change ``work`` and ``archive`` symlinks in the control +directory to point to directories in *laboratory* if they exist. + +To create a git branch for a new experiment, use the ``-b`` flag. +For example, to create and checkout a new branch called ``perturb1``, run:: + + payu checkout -b perturb1 + +To branch a new experiment from an existing branch, specify the branch name +or a commit hash after the new branch name. For example, +the following creates a new experiment branch called ``perturb2`` +that starts from ``perturb1``:: + + payu checkout -b perturb2 perturb1 + +To specify a restart path to start from using the ``--restart``/ ``-r`` flag, +for example:: + + payu checkout -b perturb --restart path/to/restart + +Note: This can also be achieved by configuring ``restart`` (see :ref:`config`). + +To checkout an existing branch and experiment. For example, +the following checks out the ``perturb1`` branch:: + + payu checkout perturb1 + +To see more ``payu checkout`` options, run:: + + payu checkout --help + +For more information on git branches that exist in the control directory +repository, run:: + + payu branch # Display local branches UUIDs + payu branch --verbose # Display local branches metadata + payu branch --remote # Display remote branches UUIDs diff --git a/payu/branch.py b/payu/branch.py index d5c3dd11..53c2b885 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -19,27 +19,34 @@ from payu.laboratory import Laboratory from payu.metadata import Metadata from payu.git_utils import git_checkout_branch, git_clone, get_git_branch -from payu.git_utils import _get_git_repository +from payu.git_utils import get_git_repository +from payu.git_utils import remote_branches_dict, local_branches_dict + +NO_CONFIG_FOUND_MESSAGE = """No configuration file found on this branch. +Skipping adding new metadata file and creating archive/work symlinks. + +To find a branch that has config file, you can: + - Display local branches by running: + payu branch + - Or display remote branches by running: + payu branch --remote + +To checkout an existing branch, run: + payu checkout BRANCH_NAME +Where BRANCH_NAME is the name of the branch""" def add_restart_to_config(restart_path: Path, - config_path: Optional[Path] = None) -> None: + config_path: Path) -> None: """Takes restart path and config path, and add 'restart' flag to the config file - which is used to start a run if there isn't a pre-existing restart in archive""" - if config_path is None: - config_path = Path(DEFAULT_CONFIG_FNAME) - config_path.resolve() # Check for valid paths - skip_msg = f"Skipping adding 'restart: {restart_path}' to config file" - if not config_path.exists() or not config_path.is_file: - warnings.warn(f"Given configuration file {config_path} does not " - "exist. " + skip_msg) - return if not restart_path.exists() or not restart_path.is_dir(): warnings.warn((f"Given restart directory {restart_path} does not " - "exist. " + skip_msg)) + f"exist. Skipping adding 'restart: {restart_path}' " + "to config file")) return # Default ruamel yaml preserves comments and multiline strings @@ -55,28 +62,55 @@ def add_restart_to_config(restart_path: Path, config_path.name) -def checkout_branch(lab: Laboratory, - branch_name: str, +def get_control_path(config_path: Path) -> Path: + """Given the config path, return the control path""" + # Note: Control path is set in read_config + config = read_config(config_path) + return Path(config.get('control_path')) + + +def check_config_path(config_path: Optional[Path] = None) -> Optional[Path]: + """Checks if configuration file exists""" + if config_path is None: + config_path = Path(DEFAULT_CONFIG_FNAME) + config_path.resolve() + + if not config_path.exists() or not config_path.is_file: + print(NO_CONFIG_FOUND_MESSAGE) + raise FileNotFoundError(f"Configuration file {config_path} not found") + + return config_path + + +def checkout_branch(branch_name: str, is_new_branch: bool = False, is_new_experiment: bool = False, start_point: Optional[str] = None, restart_path: Optional[Path] = None, - config_path: Optional[Path] = None) -> None: + config_path: Optional[Path] = None, + control_path: Optional[Path] = None, + model_type: Optional[str] = None, + lab_path: Optional[Path] = None,) -> None: """Checkout branch""" - # Note: Control path is set in read_config - config = read_config(config_path) - control_path = Path(config.get('control_path')) + if control_path is None: + control_path = get_control_path(config_path) # Checkout branch git_checkout_branch(control_path, branch_name, is_new_branch, start_point) + # Check config file exists on checked out branch + config_path = check_config_path(config_path) + + # Initialise Lab and Metadata + lab = Laboratory(model_type, config_path, lab_path) metadata = Metadata(lab, branch=branch_name, config_path=config_path) + if is_new_branch or is_new_experiment: - # Creates new uuid, experiment name, updates and commit metadata file + # Create new uuid, experiment name, update and commit metadata file metadata.setup_new_experiment() else: - # Setup metadata if there is no uuid, otherwise check existing metadata - # and commit any changes + # Create/update metadata if no uuid, otherwise run checks on existing + # metadata and commit any changes metadata.setup() metadata.commit_file() @@ -103,7 +137,7 @@ def switch_symlink(lab_dir_path: Path, control_path: Path, sym_path.unlink() print(f"Removed {sym_dir} symlink to {previous_path}") - # Create symlink, if directory exists in laboratory + # Create symlink, if experiment directory exists in laboratory if dir_path.exists(): sym_path.symlink_to(dir_path) print(f"Added {sym_dir} symlink to {dir_path}") @@ -122,79 +156,97 @@ def clone(repository: str, # git clone the repository git_clone(repository, directory, branch) - # Resolve directory to an absolute path and cd into cloned directory - previous_directory = os.getcwd() - dir_path = directory.resolve() - os.chdir(directory) - - # Initial lab and metadata - lab = Laboratory(model_type, config_path, lab_path) - - # Use checkout wrapper - if new_branch_name is not None: - # Create and checkout new branch - checkout_branch(lab=lab, - is_new_branch=True, - branch_name=new_branch_name, - restart_path=restart_path, - config_path=config_path) - else: - # Checkout branch - if branch is None: - branch = get_git_branch(dir_path) - - checkout_branch(lab=lab, - branch_name=branch, - config_path=config_path, - is_new_experiment=not keep_uuid, - restart_path=restart_path) - # Note: is_new_experiment ensures new uuid and metadata is created - # Otherwise uuid is generated only if there's no pre-existing uuid - - # Change back to previous directory - os.chdir(previous_directory) - - -def print_metadata_info(branch: git.Head, verbose: bool = False): - """Print uuid for each branch. If verbose is true, it will print all - the metadata in metadata.yaml""" - contains_metadata = False - # Note: Blobs are files + # Resolve directory to an absolute path + control_path = directory.resolve() + + owd = os.getcwd() + try: + # cd into cloned directory + os.chdir(control_path) + + # Use checkout wrapper + if new_branch_name is not None: + # Create and checkout new branch + checkout_branch(is_new_branch=True, + branch_name=new_branch_name, + restart_path=restart_path, + config_path=config_path, + control_path=control_path, + model_type=model_type, + lab_path=lab_path) + else: + # Checkout branch + if branch is None: + branch = get_git_branch(control_path) + + checkout_branch(branch_name=branch, + config_path=config_path, + is_new_experiment=not keep_uuid, + restart_path=restart_path, + control_path=control_path, + model_type=model_type, + lab_path=lab_path) + # Note: is_new_experiment ensures new uuid and metadata is created + # Otherwise uuid is generated only if there's no pre-existing uuid + finally: + # Change back to original working directory + os.chdir(owd) + + print(f"To change directory to control directory run:\n cd {directory}") + + +def print_branch_metadata(branch: git.Head, verbose: bool = False): + """Print uuid for each branch. If verbose is true, it will print all lines + of the metadata file""" + contains_config = False + metadata_content = None + # Note: Blobs are files in the commit tree for blob in branch.commit.tree.blobs: + if blob.name == 'config.yaml': + contains_config = True if blob.name == 'metadata.yaml': - contains_metadata = True - # Read file contents - content = blob.data_stream.read().decode('utf-8') - if verbose: - for line in content.splitlines(): - print(f' {line}') - else: - # Print uuid - metadata = YAML().load(content) - uuid = metadata.get('uuid', None) - if uuid is not None: - print(f" uuid: {uuid}") - else: - print(f" No uuid in metadata file") - - if not contains_metadata: - print(" No metadata file found") + metadata_content = blob.data_stream.read().decode('utf-8') + # Print branch info + if not contains_config: + print(f" No config file found") + elif metadata_content is None: + print(" No metadata file found") + else: + if verbose: + # Print all metadata + for line in metadata_content.splitlines(): + print(f' {line}') + else: + # Print uuid + metadata = YAML().load(metadata_content) + uuid = metadata.get('uuid', None) + if uuid is not None: + print(f" uuid: {uuid}") + else: + print(f" No uuid in metadata file") -def list_branches(config_path, verbose: bool = False): - """Print out summary of metadata on each branch""" - # Note: Control path is set in read_config - config = read_config(config_path) - control_path = Path(config.get('control_path')) - repo = _get_git_repository(control_path) +def list_branches(config_path: Optional[Path] = None, + verbose: bool = False, + remote: bool = False): + """Print uuid, or metadata if verbose, for each branch in control repo""" + control_path = get_control_path(config_path) + repo = get_git_repository(control_path) current_branch = repo.active_branch print(f"* Current Branch: {current_branch.name}") - print_metadata_info(current_branch, verbose) + print_branch_metadata(current_branch, verbose) + + if remote: + branches = remote_branches_dict(repo) + label = "Remote Branch" + else: + branches = local_branches_dict(repo) + label = "Branch" - for branch in repo.heads: + for branch_name, branch in branches.items(): if branch != current_branch: - print(f"Branch: {branch.name}") - print_metadata_info(branch, verbose) + print(f"{label}: {branch_name}") + print_branch_metadata(branch, verbose) diff --git a/payu/git_utils.py b/payu/git_utils.py index e5df3ef2..e1afeee4 100644 --- a/payu/git_utils.py +++ b/payu/git_utils.py @@ -5,7 +5,7 @@ import warnings from pathlib import Path -from typing import Optional, Union, List, Dict +from typing import Optional, Union, List, Dict, Set import git import configparser @@ -15,9 +15,13 @@ class PayuBranchError(Exception): """Custom exception for payu branch operations""" -def _get_git_repository(repo_path: Union[Path, str], - initialise: bool = False, - catch_error: bool = False) -> Optional[git.Repo]: +class PayuGitWarning(Warning): + """Custom warning class - useful for testing""" + + +def get_git_repository(repo_path: Union[Path, str], + initialise: bool = False, + catch_error: bool = False) -> Optional[git.Repo]: """Return a PythonGit repository object at given path. If initialise is true, it will attempt to initialise a repository if it does not exist. Otherwise, if catch_error is true, it will return None""" @@ -31,7 +35,8 @@ def _get_git_repository(repo_path: Union[Path, str], return repo warnings.warn( - f"Path is not a valid git repository: {repo_path}" + f"Path is not a valid git repository: {repo_path}", + PayuGitWarning ) if catch_error: return None @@ -41,7 +46,7 @@ def _get_git_repository(repo_path: Union[Path, str], def get_git_branch(repo_path: Union[Path, str]) -> Optional[str]: """Return the current git branch or None if repository path is not a git repository""" - repo = _get_git_repository(repo_path, catch_error=True) + repo = get_git_repository(repo_path, catch_error=True) if repo: return str(repo.active_branch) @@ -51,7 +56,7 @@ def get_git_user_info(repo_path: Union[Path, str], example_value: str) -> Optional[str]: """Return git config user info, None otherwise. Used for retrieving name and email saved in git""" - repo = _get_git_repository(repo_path, catch_error=True) + repo = get_git_repository(repo_path, catch_error=True) if repo is None: return @@ -72,7 +77,7 @@ def git_commit(repo_path: Union[Path, str], """Add a git commit of changes to paths""" # Get/Create git repository - initialise is true as adding a commit # directly after - repo = _get_git_repository(repo_path, initialise=True) + repo = get_git_repository(repo_path, initialise=True) # Un-stage any pre-existing changes repo.index.reset() @@ -82,7 +87,7 @@ def git_commit(repo_path: Union[Path, str], untracked_files = [Path(repo_path) / path for path in repo.untracked_files] for path in paths_to_commit: if repo.git.diff(None, path) or path in untracked_files: - repo.index.add(paths_to_commit) + repo.index.add([path]) changes = True # Run commit if there's changes @@ -91,19 +96,22 @@ def git_commit(repo_path: Union[Path, str], print(commit_message) -def list_local_branches(repo: git.Repo) -> List[str]: - """List all local branches""" - return [head.name for head in repo.heads] +def local_branches_dict(repo: git.Repo) -> Dict[str, git.Head]: + """Return a dictionary mapping local branch names to git.Head objects""" + branch_names_dict = {} + for head in repo.heads: + branch_names_dict[head.name] = head + return branch_names_dict -def remote_branches_dict(repo: git.Repo) -> Dict[str, git.Commit]: - """Return a dictionary mapping remote branch names to commits""" - branch_to_commits = {} +def remote_branches_dict(repo: git.Repo) -> Dict[str, git.Head]: + """Return a dictionary mapping remote branch names to git.Head objects""" + branch_names_dict = {} for remote in repo.remotes: remote.fetch() for ref in remote.refs: - branch_to_commits[ref.remote_head] = ref.commit - return branch_to_commits + branch_names_dict[ref.remote_head] = ref + return branch_names_dict def git_checkout_branch(repo_path: Union[Path, str], @@ -112,12 +120,12 @@ def git_checkout_branch(repo_path: Union[Path, str], start_point: Optional[str] = None) -> None: """Checkout branch and create branch if specified""" # Get git repository - repo = _get_git_repository(repo_path) + repo = get_git_repository(repo_path) # Existing branches - local_branches = list_local_branches(repo) + local_branches = local_branches_dict(repo).keys() remote_branches = remote_branches_dict(repo) - all_branches = local_branches + list(remote_branches.keys()) + all_branches = local_branches | set(remote_branches.keys()) # Create new branch, if specified if new_branch: @@ -132,7 +140,7 @@ def git_checkout_branch(repo_path: Union[Path, str], if (start_point not in local_branches and start_point in remote_branches): # Use hash for remote start point -local branch names work fine - start_point = remote_branches[start_point] + start_point = remote_branches[start_point].commit branch = repo.create_head(branch_name, commit=start_point) else: branch = repo.create_head(branch_name) diff --git a/payu/metadata.py b/payu/metadata.py index 76cab6ae..e1a716ca 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -19,39 +19,37 @@ from payu.laboratory import Laboratory from payu.git_utils import get_git_branch, get_git_user_info, git_commit -# Short uuid is used for experiment names (for work and archive directories) -SHORT_UUID_LENGTH = 7 +# A truncated uuid is used for branch-uuid aware experiment names +TRUNCATED_UUID_LENGTH = 5 METADATA_FILENAME = 'metadata.yaml' USAGE_HELP = """ -If this is a new experiment, either: - - Create a new git branch, by running: - payu checkout -b NEW_BRANCH_NAME - where NEW_BRANCH_NAME is name of the new branch - - Or generate a new experiment uuid on the current git branch, by running: - payu uuid -Both of the above will generate a new uuid, a branch-uuid aware experiment -name, and update and commit changes to the metadata file. -Note: Experiment names will be of the format: - {CONTROL_DIR}-{BRANCH_NAME}-{SHORTENED_UUID} - -If this an older experiment, or if wanting to opt out of branch-uuid aware -experiment names, run: - payu uuid --legacy -This will generate a new uuid, set the experiment name to be the name of -the control directory (default) or the set 'experiment' value in the -configuration file. This command will also update and commit changes to the +If this is a new experiment, create a new git branch by running: + payu checkout -b NEW_BRANCH_NAME +where NEW_BRANCH_NAME is name of the new branch. This will generate a new +uuid, a branch-uuid aware experiment name and commit changes to the metadata file. + +Alternatively to generate a new uuid or experiment name on the current git +branch at the next payu setup or run command, remove the pre-existing 'uuid' or +'experiment' fields from the metadata file. + +Note: Experiment names are the name used for work and archive directories +in the laboratory directory. """ class ExperimentMetadataError(Exception): - """Class for metadata processing exceptions""" + """Class for experiment name exceptions""" def __init__(self, message="Invalid experiment name in metadata"): super().__init__(message) print(USAGE_HELP) +class MetadataWarning(Warning): + pass + + class Metadata: """ Class to store/update/create metadata such as experiment uuid and name @@ -83,8 +81,6 @@ def __init__(self, self.control_path = control_path self.filepath = self.control_path / METADATA_FILENAME - if branch is None: - branch = get_git_branch(control_path) self.branch = branch self.base_experiment_name = self.config.get('experiment', @@ -104,13 +100,16 @@ def read_file(self) -> CommentedMap: return metadata def setup(self) -> None: - """To be run at experiment initialisation""" + """Create/update metadata if no uuid or experiment name, otherwise run + checks on existing metadata""" if self.uuid is None: - warnings.warn("No experiment uuid found. Generating a new uuid") + warnings.warn("No experiment uuid found in metadata. " + "Generating a new uuid", MetadataWarning) self.update_metadata() elif self.experiment_name is None: # Add an experiment name back into metadata - warnings.warn("No experiment name found in metadata") + warnings.warn("No experiment name found in metadata. " + "Generating a new experiment name.", MetadataWarning) self.update_metadata(set_only_experiment_name=True) self.check_experiment_name() @@ -123,11 +122,12 @@ def update_metadata(self, set_only_experiment_name: bool = False) -> None: if archive_path.exists(): warnings.warn( - f"Pre-existing archive found at: {archive_path}" - f"Experiment name will remain: {self.base_experiment_name}" + f"Pre-existing archive found at: {archive_path}. " + f"Experiment name will remain: {self.base_experiment_name}", + MetadataWarning ) if set_only_experiment_name: - self.base_experiment_name = self.base_experiment_name + self.set_new_experiment_name(legacy=True) else: self.set_new_uuid(legacy=True) else: @@ -141,10 +141,9 @@ def update_metadata(self, set_only_experiment_name: bool = False) -> None: def check_experiment_name(self) -> None: """Check experiment name in metadata file""" - truncated_uuid = self.uuid[:SHORT_UUID_LENGTH] + truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] if self.experiment_name.endswith(truncated_uuid): - # Check whether on the same branch or control directory as - # using the experiment name in metadata.yaml + # Branch-uuid aware experiment name metadata_experiment = self.experiment_name self.set_new_experiment_name() if self.experiment_name != metadata_experiment: @@ -153,12 +152,12 @@ def check_experiment_name(self) -> None: "configured 'experiment' value has changed.\n" f"Experiment name in {METADATA_FILENAME}: " f"{metadata_experiment}\nGenerated experiment name: " - f"{self.experiment_name}." + f"{self.experiment_name}.", + MetadataWarning ) raise ExperimentMetadataError() else: - # Legacy experiment name: Check metadata's experiment name matches - # base experiment name + # Legacy experiment name if self.experiment_name != self.base_experiment_name: msg = f"Experiment name in {METADATA_FILENAME} does not match" if 'experiment' in self.config: @@ -166,19 +165,23 @@ def check_experiment_name(self) -> None: else: msg += " the control directory base name." warnings.warn(msg + f"{self.experiment_name} does not equal " - "{self.base_experiment_name}") + f"{self.base_experiment_name}", + MetadataWarning) raise ExperimentMetadataError() - def set_new_experiment_name(self, legacy=False) -> None: - """Set a new experiment name - this the name used work - and archive directories""" + def set_new_experiment_name(self, legacy: bool = False) -> None: + """Set a new experiment name - this is used for work and archive + directories""" if legacy: # Experiment remains base experiment name self.experiment_name = self.base_experiment_name return + if self.branch is None: + self.branch = get_git_branch(self.control_path) + # Add branch and a truncated uuid to experiment name - truncated_uuid = self.uuid[:SHORT_UUID_LENGTH] + truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] if self.branch is None or self.branch in ('main', 'master'): suffix = f'-{truncated_uuid}' else: @@ -186,7 +189,7 @@ def set_new_experiment_name(self, legacy=False) -> None: self.experiment_name = self.base_experiment_name + suffix - def set_new_uuid(self, legacy=False) -> None: + def set_new_uuid(self, legacy: bool = False) -> None: """Create a new uuid and set experiment name""" # Generate new uuid and experiment name self.uuid = generate_uuid() @@ -239,12 +242,15 @@ def update_user_info(self, metadata: CommentedMap, metadata_key: str, config_key: str, filler_values=List[str]): """Add user email/name to metadata - if defined and not already set in metadata""" + example_value = filler_values[0] + filler_values = {value.casefold() for value in filler_values} if (metadata_key not in metadata - or metadata[metadata_key] in filler_values): + or metadata[metadata_key] is None + or metadata[metadata_key].casefold() in filler_values): # Get config value from git value = get_git_user_info(repo_path=self.control_path, config_key=config_key, - example_value=filler_values[0]) + example_value=example_value) if value is not None: metadata[metadata_key] = value diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index ddd0c3a1..6867fcfc 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -176,8 +176,7 @@ 'action': 'store_true', 'default': False, 'dest': 'keep_uuid', - 'help': 'If the cloned experiment uuid exists, leave it \ - unchanged' + 'help': 'If an experiment uuid exists, leave it unchanged' } } @@ -223,7 +222,7 @@ } -# Branch start restart +# Branch starting restart restart_path = { 'flags': ('--restart', '-r'), 'parameters': { @@ -244,24 +243,24 @@ } } -# Legacy experiment -legacy_experiment = { - 'flags': ['--legacy'], +# List branches verbose flag +verbose = { + 'flags': ['--verbose', '-v'], 'parameters': { - 'dest': 'legacy_experiment', + 'dest': 'verbose', 'action': 'store_true', 'default': False, - 'help': 'Flag to opt out of branch-uuid aware experiment names' + 'help': 'Display all contents of metadata file' } } -# List branches - verbose -verbose = { - 'flags': ['--verbose', '-v'], +# List remote branches flag +remote = { + 'flags': ['--remote', '-r'], 'parameters': { - 'dest': 'verbose', + 'dest': 'remote', 'action': 'store_true', 'default': False, - 'help': 'Flag to display all contents of metadata file' + 'help': 'Display metadata of branches in remote directory' } -} \ No newline at end of file +} diff --git a/payu/subcommands/branch_cmd.py b/payu/subcommands/branch_cmd.py index 3888b623..4a3df853 100644 --- a/payu/subcommands/branch_cmd.py +++ b/payu/subcommands/branch_cmd.py @@ -1,20 +1,24 @@ -"""Run the `payu checkout` command. +"""Run the `payu branch` command. :copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. :license: Apache License, Version 2.0, see LICENSE for details. """ +from pathlib import Path + from payu.branch import list_branches import payu.subcommands.args as args title = 'branch' parameters = {'description': ('List git branches and corresponding metadata')} -arguments = [args.config, args.verbose] +arguments = [args.config, args.verbose, args.remote] -def runcmd(config_path, verbose): +def runcmd(config_path, verbose, remote): """Execute the command.""" - list_branches(config_path, verbose) + config_path = Path(config_path) if config_path is not None else None + list_branches(config_path, verbose, remote) + -runscript = runcmd \ No newline at end of file +runscript = runcmd diff --git a/payu/subcommands/checkout_cmd.py b/payu/subcommands/checkout_cmd.py index 0e1c0ea6..c433026a 100644 --- a/payu/subcommands/checkout_cmd.py +++ b/payu/subcommands/checkout_cmd.py @@ -26,18 +26,17 @@ def transform_strings_to_path(path_str=None): def runcmd(model_type, config_path, lab_path, new_branch, branch_name, start_point, restart_path): """Execute the command.""" - lab = Laboratory(model_type, config_path, lab_path) - config_path = transform_strings_to_path(config_path) lab_path = transform_strings_to_path(lab_path) restart_path = transform_strings_to_path(restart_path) - checkout_branch(lab=lab, - is_new_branch=new_branch, + checkout_branch(is_new_branch=new_branch, branch_name=branch_name, start_point=start_point, restart_path=restart_path, - config_path=config_path) + config_path=config_path, + lab_path=lab_path, + model_type=model_type) runscript = runcmd diff --git a/payu/subcommands/uuid_cmd.py b/payu/subcommands/uuid_cmd.py deleted file mode 100644 index 55cbf21b..00000000 --- a/payu/subcommands/uuid_cmd.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Run the `payu uuid` command. - -:copyright: Copyright 2018 Marshall Ward, see AUTHORS for details. -:license: Apache License, Version 2.0, see LICENSE for details. -""" - -import payu.subcommands.args as args -from payu.metadata import Metadata -from payu.laboratory import Laboratory - -title = 'uuid' -parameters = {'description': ('Generates and commits a new experiment uuid, ' - 'update/create and commit experiment metadata')} -arguments = [args.model, args.config, args.laboratory, args.legacy_experiment] - - -def runcmd(model_type, config_path, lab_path, legacy_experiment): - """Execute the command.""" - lab = Laboratory(model_type=model_type, - config_path=config_path, - lab_path=lab_path) - metadata = Metadata(lab=lab, config_path=config_path) - - metadata.setup_new_experiment(legacy=legacy_experiment) - - -runscript = runcmd diff --git a/setup.py b/setup.py index 9b44e155..14245fbb 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,9 @@ 'payu-collate = payu.subcommands.collate_cmd:runscript', 'payu-profile = payu.subcommands.profile_cmd:runscript', 'payu-sync = payu.subcommands.sync_cmd:runscript', + 'payu-branch = payu.subcommands.branch_cmd:runscript', + 'payu-clone = payu.subcommands.clone_cmd:runscript', + 'payu-checkout = payu.subcommands.checkout_cmd:runscript' ] }, classifiers=[ diff --git a/test/common.py b/test/common.py index 2abd341c..f510d235 100644 --- a/test/common.py +++ b/test/common.py @@ -29,6 +29,7 @@ expt_workdir = labdir / 'work' / ctrldir_basename config_path = ctrldir / 'config.yaml' +metadata_path = ctrldir / 'metadata.yaml' print('tmpdir: {}'.format(tmpdir)) @@ -54,6 +55,11 @@ 'runlog': False } +metadata = { + "uuid": "testUuid", + "experiment": ctrldir_basename +} + @contextmanager def cd(directory): @@ -203,7 +209,13 @@ def remove_expt_archive_dirs(type='restart'): print(e) +def write_metadata(metadata=metadata, path=metadata_path): + with path.open('w') as file: + file.write(yaml.dump(metadata, default_flow_style=False)) + + def make_all_files(): make_inputs() make_exe() make_restarts() + write_metadata() diff --git a/test/models/test_mom6.py b/test/models/test_mom6.py index 1faedb1c..71713d3b 100644 --- a/test/models/test_mom6.py +++ b/test/models/test_mom6.py @@ -9,7 +9,7 @@ from test.common import cd from test.common import tmpdir, ctrldir, labdir, expt_workdir -from test.common import write_config +from test.common import write_config, write_metadata from test.common import make_random_file, make_inputs, make_exe verbose = True @@ -34,6 +34,8 @@ def setup_module(module): ctrldir.mkdir() expt_workdir.mkdir(parents=True) make_inputs() + make_exe() + write_metadata() except Exception as e: print(e) @@ -44,7 +46,6 @@ def setup_module(module): 'exe': 'test.exe' } write_config(config) - make_exe() def teardown_module(module): diff --git a/test/test_branch.py b/test/test_branch.py index eeaccd37..dc8d1365 100644 --- a/test/test_branch.py +++ b/test/test_branch.py @@ -4,20 +4,21 @@ import pytest import git +from ruamel.yaml import YAML from unittest.mock import patch import payu from payu.branch import add_restart_to_config, switch_symlink -from payu.branch import checkout_branch, clone -from payu.metadata import Metadata +from payu.branch import checkout_branch, clone, list_branches +from payu.metadata import MetadataWarning from payu.fsops import read_config from test.common import cd from test.common import tmpdir, ctrldir, labdir from test.common import ctrldir_basename from test.common import config as config_orig, write_config -from test.common import config_path -from test.common import make_all_files, make_expt_archive_dir +from test.common import config_path, metadata_path +from test.common import make_expt_archive_dir, expt_archive_dir # Global config @@ -31,7 +32,6 @@ def setup_and_teardown(): tmpdir.mkdir() labdir.mkdir() ctrldir.mkdir() - make_all_files() except Exception as e: print(e) @@ -44,9 +44,12 @@ def setup_and_teardown(): print(e) -def setup_control_repository(path: Path = ctrldir) -> git.Repo: +def setup_control_repository(path=ctrldir, set_config=True): """ Return an new control repository""" - write_config(config, path=(path / 'config.yaml')) + if set_config: + write_config(config, path=(path / 'config.yaml')) + else: + (path / 'newFile').touch() # Initialise a control repo repo = git.Repo.init(path) repo.index.add("*") @@ -56,53 +59,47 @@ def setup_control_repository(path: Path = ctrldir) -> git.Repo: @pytest.mark.parametrize( - "config_lines, expected_lines", + "test_config, expected_config", [ ( - ( - 'sync:', - ' restart: true', - '# Test comment', - 'restart: old/path/to/restart', - 'anotherField: 1\n' - ), - ( - 'sync:', - ' restart: true', - '# Test comment', - 'restart: {0}', - 'anotherField: 1\n' - ) + """sync: + restart: true +# Test comment +restart: old/path/to/restart +anotherField: 1 +""", + """sync: + restart: true +# Test comment +restart: {0} +anotherField: 1 +""" ), ( - ( - '# Test comment', - '', - 'anotherField: 1', - ), - ( - '# Test comment', - '', - 'anotherField: 1', - 'restart: {0}\n', - ) + """# Test comment + +anotherField: 1""", + """# Test comment + +anotherField: 1 +restart: {0} +""" ), ] ) -def test_add_restart_to_config(config_lines, expected_lines): +def test_add_restart_to_config(test_config, expected_config): """Test adding restart: path/to/restart to configuration file""" restart_path = labdir / 'archive' / 'tmpRestart' - restart_path.mkdir() + restart_path.mkdir(parents=True) - test_config = '\n'.join(config_lines) - expected_config = '\n'.join(expected_lines).format(restart_path) + expected_config = expected_config.format(restart_path) with config_path.open('w') as file: file.write(test_config) # Function to test with cd(ctrldir): - add_restart_to_config(restart_path) + add_restart_to_config(restart_path, config_path) with config_path.open('r') as file: updated_config = file.read() @@ -124,27 +121,13 @@ def test_add_restart_to_config_invalid_restart_path(): with cd(ctrldir): with pytest.warns(UserWarning, match=expected_msg): - add_restart_to_config(restart_path) + add_restart_to_config(restart_path, config_path) # Test config unchanged with config_path.open('r') as file: assert file.read() == config_content -def test_add_restart_to_config_invalid_config_path(): - """Test restart path that does not exist raises a warning""" - config_path = tmpdir / "configDNE" - - restart_path = labdir / 'archive' / 'tmpRestart' - restart_path.mkdir(exist_ok=True) - - expected_msg = f"Given configuration file {config_path} does not exist. " - expected_msg += f"Skipping adding 'restart: {restart_path}' to config file" - - with pytest.warns(UserWarning, match=expected_msg): - add_restart_to_config(restart_path, config_path) - - def test_switch_symlink_when_symlink_and_archive_exists(): # Pre-existing experiment symlink lab_archive = labdir / 'archive' @@ -202,31 +185,61 @@ def test_switch_symlink_when_no_symlink_exists_and_no_archive(): assert not archive_symlink.is_symlink() +def check_metadata(expected_uuid, + expected_experiment, + expected_previous_uuid=None, + metadata_file=metadata_path): + """Helper function to read metadata file and assert changed as expected""" + assert metadata_file.exists() + metadata = YAML().load(metadata_file) + assert metadata.get('uuid', None) == expected_uuid + assert metadata.get('experiment', None) == expected_experiment + assert metadata.get('previous_uuid', None) == expected_previous_uuid + + +def check_branch_metadata(repo, + expected_current_branch, + expected_uuid, + expected_experiment, + expected_previous_uuid=None, + metadata_file=metadata_path): + """Helper function for checking expected branch and metadata""" + # Check metadata + check_metadata(expected_uuid, + expected_experiment, + expected_previous_uuid, + metadata_file=metadata_file) + + # Check cuurent branch + assert str(repo.active_branch) == expected_current_branch + + # Check last commit message + expected_commit_msg = f"Updated metadata. Experiment uuid: {expected_uuid}" + assert repo.head.commit.message == expected_commit_msg + + @patch('shortuuid.uuid') def test_checkout_branch(mock_uuid): repo = setup_control_repository() - with cd(ctrldir): - lab = payu.laboratory.Laboratory(lab_path=labdir) - # Mock uuid1 value uuid1 = 'a1234567890' mock_uuid.return_value = uuid1 with cd(ctrldir): # Test checkout new branch (with no existing metadata) - checkout_branch(lab=lab, - branch_name="Branch1", - is_new_branch=True) - metadata = Metadata(lab) + checkout_branch(branch_name="Branch1", + is_new_branch=True, + lab_path=labdir) - # Check metadata was created and commited - assert str(repo.active_branch) == "Branch1" - assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' - assert metadata.uuid == uuid1 + # Check current branch, new commit was added, and metadata created + branch1_experiment_name = f'{ctrldir_basename}-Branch1-a1234' + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch='Branch1', + expected_experiment=branch1_experiment_name) - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" - assert repo.head.commit.message == expected_commit_msg + # Save commit hash to check later on branch_1_commit_hash = repo.active_branch.object.hexsha # Mock uuid2 value @@ -235,97 +248,119 @@ def test_checkout_branch(mock_uuid): with cd(ctrldir): # Test checkout new branch from branch with existing metadata - checkout_branch(lab=lab, - branch_name="Branch2", + checkout_branch(branch_name="Branch2", is_new_branch=True, - start_point="Branch1") - metadata = Metadata(lab) + lab_path=labdir) - # Check metadata has been updated and commited - assert str(repo.active_branch) == "Branch2" - assert metadata.experiment_name == f'{ctrldir_basename}-Branch2-b123456' - assert metadata.uuid == uuid2 + # Check current branch, new commit was added, and metadata created + branch2_experiment_name = f'{ctrldir_basename}-Branch2-b1234' + check_branch_metadata(repo, + expected_uuid=uuid2, + expected_current_branch='Branch2', + expected_experiment=branch2_experiment_name, + expected_previous_uuid=uuid1) - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid2}" - assert repo.head.commit.message == expected_commit_msg + # Mock uuid3 value + uuid3 = 'c1234567890' + mock_uuid.return_value = uuid3 + + with cd(ctrldir): + # Test checkout new branch from starting branch with existing metadata + checkout_branch(branch_name="Branch3", + is_new_branch=True, + start_point="Branch1", + lab_path=labdir) + + # Check current branch, new commit was added, and metadata created + branch3_experiment_name = f'{ctrldir_basename}-Branch3-c1234' + check_branch_metadata(repo, + expected_uuid=uuid3, + expected_current_branch='Branch3', + expected_experiment=branch3_experiment_name, + expected_previous_uuid=uuid1) with cd(ctrldir): # Test checkout existing branch with existing metadata - checkout_branch(lab=lab, - branch_name="Branch1") - metadata = Metadata(lab) + checkout_branch(branch_name="Branch1", + lab_path=labdir) # Check metadata and commit has not changed on Branch1 assert str(repo.active_branch) == "Branch1" - assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' - assert metadata.uuid == uuid1 + check_metadata(expected_experiment=branch1_experiment_name, + expected_uuid=uuid1) # Assert commit hash is the same assert repo.active_branch.object.hexsha == branch_1_commit_hash @patch('shortuuid.uuid') -def test_checkout_existing_branches_with_no_metadata(mock_uuid): +def test_checkout_existing_branch_with_no_metadata(mock_uuid): repo = setup_control_repository() - main_commit = repo.active_branch.object.hexsha # Create new branch repo.create_head("Branch1") - with cd(ctrldir): - lab = payu.laboratory.Laboratory(lab_path=labdir) - # Mock uuid1 value uuid1 = 'a1234567890' mock_uuid.return_value = uuid1 + expected_no_uuid_msg = ( + "No experiment uuid found in metadata. Generating a new uuid" + ) with cd(ctrldir): # Test checkout existing branch with no existing metadata - checkout_branch(lab=lab, - branch_name="Branch1") - metadata = Metadata(lab) + with pytest.warns(MetadataWarning, match=expected_no_uuid_msg): + checkout_branch(branch_name="Branch1", + lab_path=labdir) # Check metadata was created and commited - assert str(repo.active_branch) == "Branch1" - assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' - assert metadata.uuid == uuid1 + branch1_experiment_name = f'{ctrldir_basename}-Branch1-a1234' + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch='Branch1', + expected_experiment=branch1_experiment_name) - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" - assert repo.head.commit.message == expected_commit_msg - # Create new branch - from main commit - repo.create_head("Branch2", commit=main_commit) +@patch('shortuuid.uuid') +def test_checkout_branch_with_no_metadata_and_with_legacy_archive(mock_uuid): # Make experiment archive - This function creates legacy experiment archive - make_expt_archive_dir(type='restart') + make_expt_archive_dir(type='restart', index=0) - with cd(ctrldir): - lab = payu.laboratory.Laboratory(lab_path=labdir) + # Setup repo + repo = setup_control_repository() + + # Create new branch using git + repo.create_head("Branch1") # Mock uuid1 value - uuid2 = 'b1234567890' - mock_uuid.return_value = uuid2 + uuid1 = 'a1234567890' + mock_uuid.return_value = uuid1 + expected_no_uuid_msg = ( + "No experiment uuid found in metadata. Generating a new uuid" + ) + + archive_warning_msg = ( + f"Pre-existing archive found at: {expt_archive_dir}. " + f"Experiment name will remain: ctrl" + ) with cd(ctrldir): # Test checkout existing branch (with no existing metadata) - # But crucially with archive - checkout_branch(lab=lab, - branch_name="Branch2") - metadata = Metadata(lab) - - # Check metadata was created and commited - assert str(repo.active_branch) == "Branch2" - - # Check for legacy experiment name - assert metadata.experiment_name == f'{ctrldir_basename}' - assert metadata.uuid == uuid2 + # and with pre-existing archive + with pytest.warns(MetadataWarning) as metadata_warnings: + checkout_branch(branch_name="Branch1", + lab_path=labdir) - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid2}" - assert repo.head.commit.message == expected_commit_msg + # Check metadata was created and commited - with legacy experiment name + branch1_experiment_name = ctrldir_basename + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch='Branch1', + expected_experiment=branch1_experiment_name) - # Note: new experiments branches created with payu checkout - # can work with existing repo's but using git branch to create branch - # will result in branch using the same archive (as it worked before branch - # support) + # Check warnings were raised + warnings_msgs = [warning.message.args[0] for warning in metadata_warnings] + assert warnings_msgs == [expected_no_uuid_msg, archive_warning_msg] @patch('shortuuid.uuid') @@ -337,35 +372,45 @@ def test_checkout_new_branch_existing_legacy_archive(mock_uuid): # Add archive under legacy name restart_path = Path(make_expt_archive_dir(type='restart')) - with cd(ctrldir): - lab = payu.laboratory.Laboratory(lab_path=labdir) - # Mock uuid1 value uuid1 = 'a1234567890' mock_uuid.return_value = uuid1 with cd(ctrldir): # Test checkout new branch (with no existing metadata) - checkout_branch(lab=lab, - branch_name="Branch1", + checkout_branch(branch_name="Branch1", is_new_branch=True, restart_path=restart_path, - config_path=config_path) - metadata = Metadata(lab) + config_path=config_path, + lab_path=labdir) # Check metadata was created and commited - with branch-uuid aware name - assert str(repo.active_branch) == "Branch1" - assert metadata.experiment_name == f'{ctrldir_basename}-Branch1-a123456' - assert metadata.uuid == uuid1 - - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" - assert repo.head.commit.message == expected_commit_msg + branch1_experiment_name = f'{ctrldir_basename}-Branch1-a1234' + check_branch_metadata(repo, + expected_uuid=uuid1, + expected_current_branch='Branch1', + expected_experiment=branch1_experiment_name) # Check restart path was added to configuration file config = read_config(config_path) assert config['restart'] == str(restart_path) +def test_checkout_branch_with_no_config(): + # Initialise a control repo with no config + repo = setup_control_repository(set_config=False) + + repo.create_head("Branch1") + + with cd(ctrldir): + # Test checkout branch that has no config raise error + with pytest.raises(FileNotFoundError): + checkout_branch(branch_name="Branch1", + lab_path=labdir) + + assert not metadata_path.exists() + + @patch('shortuuid.uuid') def test_clone(mock_uuid): # Create a repo to clone @@ -386,19 +431,14 @@ def test_clone(mock_uuid): cloned_repo_path = tmpdir / 'clonedRepo' clone(source_repo_path, cloned_repo_path, lab_path=labdir) - # Check new commit added + # Check new commit added and expected metadata cloned_repo = git.Repo(cloned_repo_path) - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid1}" - assert cloned_repo.head.commit.message == expected_commit_msg - assert str(cloned_repo.active_branch) == 'Branch1' - - # Check metadata - with cd(cloned_repo_path): - lab = payu.laboratory.Laboratory(lab_path=labdir) - metadata = Metadata(lab) - - assert metadata.uuid == uuid1 - assert metadata.experiment_name == 'clonedRepo-Branch1-a123456' + metadata_file = cloned_repo_path / 'metadata.yaml' + check_branch_metadata(repo=cloned_repo, + expected_current_branch="Branch1", + expected_uuid=uuid1, + expected_experiment="clonedRepo-Branch1-a1234", + metadata_file=metadata_file) cloned_repo.git.checkout(source_main_branch) @@ -411,8 +451,109 @@ def test_clone(mock_uuid): clone(cloned_repo_path, Path('clonedRepo2'), lab_path=labdir, new_branch_name='Branch2', branch='Branch1') - # Check new commit added + # Check new commit added and expected metadata cloned_repo2 = git.Repo(tmpdir / 'clonedRepo2') - expected_commit_msg = f"Updated metadata. Experiment uuid: {uuid2}" - assert cloned_repo2.head.commit.message == expected_commit_msg + metadata_file = tmpdir / 'clonedRepo2' / 'metadata.yaml' + check_branch_metadata(repo=cloned_repo2, + expected_current_branch="Branch2", + expected_uuid=uuid2, + expected_experiment="clonedRepo2-Branch2-b1234", + expected_previous_uuid=uuid1, + metadata_file=metadata_file) + + # Check local branches assert [head.name for head in cloned_repo2.heads] == ['Branch1', 'Branch2'] + + +def add_and_commit_metadata(repo, metadata): + """Helper function to create/update metadata file and commit""" + metadata_path = ctrldir / 'metadata.yaml' + YAML().dump(metadata, metadata_path) + repo.index.add('*') + repo.index.commit("Updated metadata.yaml") + + +def test_list_branches(capsys): + # Create repo and a few branches with and without metadata files + repo = setup_control_repository(set_config=False) + # Leave main branch with no metadata file + main_branch_name = str(repo.active_branch) + + # Branch 1 - has config but no metadata + branch1 = repo.create_head("Branch1") + branch1.checkout() + write_config(config) + repo.index.add('*') + repo.index.commit("Added config.yaml") + + # Checkout and add metadata to new branch + branch2 = repo.create_head("Branch2") + branch2.checkout() + write_config(config) + branch_2_metadata = { + "uuid": "b12345678", + "experiment": "testExperimentName2" + } + add_and_commit_metadata(repo, branch_2_metadata) + + # New branch with no uuid in metadata + branch3 = repo.create_head("Branch3") + branch3.checkout() + branch_3_metadata = { + "experiment": "testExperimentName3", + "contact": "TestUser" + } + add_and_commit_metadata(repo, branch_3_metadata) + + # Test list branches + with cd(ctrldir): + list_branches() + + expected_printed_output = f"""* Current Branch: Branch3 + No uuid in metadata file +Branch: Branch1 + No metadata file found +Branch: Branch2 + uuid: b12345678 +Branch: {main_branch_name} + No config file found""" + captured = capsys.readouterr() + assert captured.out.strip() == expected_printed_output + + # Test list branches with verbose set + with cd(ctrldir): + list_branches(verbose=True) + + expected_verbose_output = f"""* Current Branch: Branch3 + experiment: testExperimentName3 + contact: TestUser +Branch: Branch1 + No metadata file found +Branch: Branch2 + uuid: b12345678 + experiment: testExperimentName2 +Branch: {main_branch_name} + No config file found""" + captured = capsys.readouterr() + assert captured.out.strip() == expected_verbose_output + + # Test remote branches + cloned_repo_path = tmpdir / 'cloned_repo' + repo.clone(cloned_repo_path) + + with cd(cloned_repo_path): + list_branches(remote=True) + expected_remote_output = f"""* Current Branch: Branch3 + No uuid in metadata file +Remote Branch: Branch1 + No metadata file found +Remote Branch: Branch2 + uuid: b12345678 +Remote Branch: Branch3 + No uuid in metadata file +Remote Branch: HEAD + No uuid in metadata file +Remote Branch: {main_branch_name} + No config file found""" + captured = capsys.readouterr() + assert captured.out.strip() == expected_remote_output diff --git a/test/test_git_utils.py b/test/test_git_utils.py index deb24d65..3c9b467b 100644 --- a/test/test_git_utils.py +++ b/test/test_git_utils.py @@ -4,8 +4,9 @@ import git import pytest -from payu.git_utils import _get_git_repository, get_git_user_info -from payu.git_utils import git_checkout_branch, PayuBranchError +from payu.git_utils import get_git_repository, get_git_user_info +from payu.git_utils import git_checkout_branch +from payu.git_utils import PayuBranchError, PayuGitWarning from test.common import tmpdir @@ -46,7 +47,7 @@ def add_file_and_commit(repo, file_path, commit_no=0): def test_get_git_repo_invalid_repo_initialise(): invalid_repo_path = tmpdir / "invalidRepo" invalid_repo_path.mkdir() - repo = _get_git_repository(invalid_repo_path, initialise=True) + repo = get_git_repository(invalid_repo_path, initialise=True) assert not repo.bare @@ -55,18 +56,18 @@ def test_get_git_repo_invalid_repo_catch_error(): invalid_path.mkdir() expected_warning_msg = "Path is not a valid git repository: " expected_warning_msg += str(invalid_path) - with pytest.warns(UserWarning, match=expected_warning_msg): - repo = _get_git_repository(invalid_path, catch_error=True) + with pytest.warns(PayuGitWarning, match=expected_warning_msg): + repo = get_git_repository(invalid_path, catch_error=True) assert repo is None def test_get_git_user_info_no_config_set(): - # Testing this is tricky as don't want to remove any global configs for - # name or email. Instead using assumption that key 'testKey-c9hCC' is not - # defined in the 'user' namespace. + # Testing this is tricky as don't want to remove any global configs for + # name or email. Instead using assumption that key 'testKey-54321' is not + # defined in the 'user' namespace. repo_path = tmpdir / "test_repo" create_new_repo(repo_path) - value = get_git_user_info(repo_path, 'testKey-c9hCC', 'test_value') + value = get_git_user_info(repo_path, 'testKey-54321', 'test_value') assert value is None @@ -74,7 +75,7 @@ def test_get_git_user_info_config_set(): repo_path = tmpdir / "test_repo" create_new_repo(repo_path) try: - # Set config that is local to repository only + # Set config that is local to temporary test repository only subprocess.run('git config user.name "TestUserName"', check=True, shell=True, @@ -151,7 +152,7 @@ def test_git_checkout_non_existent_branch(): repo_path = tmpdir / 'remoteRepo' create_new_repo(repo_path) - # Test create branch with existing branch + # Test create branch with non-existent branch with pytest.raises(PayuBranchError): git_checkout_branch(repo_path, "Gibberish") diff --git a/test/test_metadata.py b/test/test_metadata.py index 5601da70..d940c2f5 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -1,18 +1,17 @@ -import os import copy import shutil import pytest +from unittest.mock import patch import payu from payu.metadata import Metadata from test.common import cd -from test.common import tmpdir, ctrldir, labdir, expt_archive_dir +from test.common import tmpdir, ctrldir, labdir from test.common import config as config_orig from test.common import write_config -from test.common import make_all_files, make_random_file -from test.common import make_expt_archive_dir +from test.common import make_all_files verbose = True @@ -58,69 +57,109 @@ def teardown_module(module): print(e) +def mocked_get_git_user_info(repo_path, config_key, example_value): + if config_key == 'name': + return 'mockUser' + elif config_key == 'email': + return 'mock@email.com' + else: + return None + + @pytest.mark.parametrize( - "uuid, experiment, previous_uuid, previous_metadata, expected_metadata", + "uuid, experiment, previous_metadata, expected_metadata", [ + ( + "A012345678910", + "test_experiment-test_branch-A012345", + """contact: TestUser +email: Test@email.com +description: |- + Test description etc + More description +keywords: +- test +- testKeyword +# Test Comment +uuid: A012345678910 +experiment: test_experiment-test_branch-A012345 +""", + """contact: TestUser +email: Test@email.com +description: |- + Test description etc + More description +keywords: +- test +- testKeyword +# Test Comment +uuid: A012345678910 +experiment: test_experiment-test_branch-A012345 +""" + ), ( "A012345678910", "test_experiment-test_branch-A012345", None, - ( - "contact: TestName", - "email: test@email.com", - "created: 2023-11-15", - "description: |-", - " Test description etc", - " More description", - "notes: |-", - " Test notes", - " More notes", - "keywords:", - "- test", - "- testKeyword" - ), - ( - "contact: TestName", - "email: test@email.com", - "created: 2023-11-15", - "description: |-", - " Test description etc", - " More description", - "notes: |-", - " Test notes", - " More notes", - "keywords:", - "- test", - "- testKeyword", - "uuid: A012345678910", - "experiment: test_experiment-test_branch-A012345\n" - ) - + """uuid: A012345678910 +experiment: test_experiment-test_branch-A012345 +contact: mockUser +email: mock@email.com +""" + ), + ( + "NewUuid", + "NewExperimentName", + """uuid: PreviousUuid +experiment: PreviousExperimentName +contact: Add your name here +email: Add your email address here +""", + """uuid: NewUuid +experiment: NewExperimentName +contact: mockUser +email: mock@email.com +previous_uuid: PreviousUuid +""" + ), + ( + "NewUuid", + "NewExperimentName", + """ +contact: AdD Your nAme hEre +email: # +""", + """contact: mockUser +email: mock@email.com # +uuid: NewUuid +experiment: NewExperimentName +""" ) ] ) def test_update_file(uuid, experiment, - previous_uuid, previous_metadata, expected_metadata): # Create pre-existing metadata file metadata_path = ctrldir / 'metadata.yaml' if previous_metadata is not None: - previous_metadata = '\n'.join(previous_metadata) metadata_path.write_text(previous_metadata) - expected_metadata = '\n'.join(expected_metadata) with cd(ctrldir): lab = payu.laboratory.Laboratory(lab_path=str(labdir)) metadata = Metadata(lab) metadata.uuid = uuid - metadata.previous_uuid = previous_uuid metadata.experiment_name = experiment # Function to test - metadata.update_file() + with patch('payu.metadata.get_git_user_info', + side_effect=mocked_get_git_user_info): + metadata.update_file() assert metadata_path.exists and metadata_path.is_file assert metadata_path.read_text() == expected_metadata + + # Remove metadata file + metadata_path.unlink() From 939808fbb08a9c2f6c2251d8b872e0004c66bcef Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Mon, 4 Dec 2023 12:22:29 +1100 Subject: [PATCH 4/7] Change logic to generate experiment name (used for archive/work dirs) on metadata setup. - Change metadata field names to experiment_uuid, parent_experiment - Change UUID to use built-in uuid.uuid4 - Copy metadata file over to archive direcotry - Commit metadata automatically if it's a git repository --- conda/meta.yaml | 1 - payu/branch.py | 119 +++++++++++--- payu/experiment.py | 7 +- payu/git_utils.py | 12 +- payu/metadata.py | 274 +++++++++++++++---------------- payu/subcommands/checkout_cmd.py | 9 +- setup.py | 2 - test/common.py | 8 +- test/models/test_mom6.py | 5 +- test/pytest.ini | 3 + test/test_branch.py | 196 +++++++++++----------- test/test_metadata.py | 207 ++++++++++++++++++----- 12 files changed, 509 insertions(+), 334 deletions(-) create mode 100644 test/pytest.ini diff --git a/conda/meta.yaml b/conda/meta.yaml index 431b18f9..9a51d328 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -37,7 +37,6 @@ requirements: # extra for the pypi package - pyOpenSSL >=0.14 - cryptography>=1.3.4 - - shortuuid >= 1.0.11 - GitPython >= 3.1.40 - ruamel.yaml >= 0.18.5 diff --git a/payu/branch.py b/payu/branch.py index 53c2b885..21c93be1 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -17,7 +17,7 @@ from payu.fsops import read_config, DEFAULT_CONFIG_FNAME from payu.laboratory import Laboratory -from payu.metadata import Metadata +from payu.metadata import Metadata, UUID_FIELD from payu.git_utils import git_checkout_branch, git_clone, get_git_branch from payu.git_utils import get_git_repository from payu.git_utils import remote_branches_dict, local_branches_dict @@ -25,7 +25,7 @@ NO_CONFIG_FOUND_MESSAGE = """No configuration file found on this branch. Skipping adding new metadata file and creating archive/work symlinks. -To find a branch that has config file, you can: +To try find a branch that has config file, you can: - Display local branches by running: payu branch - Or display remote branches by running: @@ -85,13 +85,39 @@ def check_config_path(config_path: Optional[Path] = None) -> Optional[Path]: def checkout_branch(branch_name: str, is_new_branch: bool = False, is_new_experiment: bool = False, + keep_uuid: bool = False, start_point: Optional[str] = None, restart_path: Optional[Path] = None, config_path: Optional[Path] = None, control_path: Optional[Path] = None, model_type: Optional[str] = None, - lab_path: Optional[Path] = None,) -> None: - """Checkout branch""" + lab_path: Optional[Path] = None) -> None: + """Checkout branch, setup metadata and add symlinks + + Parameters + ---------- + branch_name : str + Name of branch to checkout/create + is_new_branch: bool, default False + Create new branch and mark as new experiment + is_new_experiment: bool, default False + Create new uuid for this experiment + keep_uuid: bool, default False + Keep UUID unchanged, if it exists - this overrides is_new_experiment + if there is a pre-existing UUID + start_point: Optional[str], default None + Branch name or commit hash to start new branch from + restart_path: Optional[Path], default None + Absolute restart path to start experiment from + config_path: Optional[Path], default None + Path to configuration file - config.yaml + control_path: Optional[Path], default None + Path to control directory - defaults to current working directory + model_type: Optional[str], default None + Type of model - used for creating a Laboratory + lab_path: Optional[Path], default None + Path to laboratory directory + """ if control_path is None: control_path = get_control_path(config_path) @@ -103,16 +129,13 @@ def checkout_branch(branch_name: str, # Initialise Lab and Metadata lab = Laboratory(model_type, config_path, lab_path) - metadata = Metadata(lab, branch=branch_name, config_path=config_path) + metadata = Metadata(Path(lab.archive_path), + branch=branch_name, + config_path=config_path) - if is_new_branch or is_new_experiment: - # Create new uuid, experiment name, update and commit metadata file - metadata.setup_new_experiment() - else: - # Create/update metadata if no uuid, otherwise run checks on existing - # metadata and commit any changes - metadata.setup() - metadata.commit_file() + # Setup Metadata + is_new_experiment = is_new_experiment or is_new_branch + metadata.setup(keep_uuid=keep_uuid, is_new_experiment=is_new_experiment) # Add restart option to config if restart_path: @@ -145,14 +168,41 @@ def switch_symlink(lab_dir_path: Path, control_path: Path, def clone(repository: str, directory: Path, - branch: Optional[Path] = None, + branch: Optional[str] = None, new_branch_name: Optional[str] = None, keep_uuid: bool = False, model_type: Optional[str] = None, config_path: Optional[Path] = None, lab_path: Optional[Path] = None, restart_path: Optional[Path] = None) -> None: - """Clone an experiment control repo""" + """Clone an experiment control repository. + + Parameters: + repository: str + Git URL or path to Git repository to clone + directory: Path + The control directory where the repository will be cloned + branch: Optional[str] + Name of branch to clone and checkout + new_branch_name: Optional[str] + Name of new branch to create and checkout. + If branch is also defined, the new branch will start from the + latest commit of the branch. + keep_uuid: bool, default False + Keep UUID unchanged, if it exists + config_path: Optional[Path] + Path to configuration file - config.yaml + control_path: Optional[Path] + Path to control directory - defaults to current working directory + model_type: Optional[str] + Type of model - used for creating a Laboratory + lab_path: Optional[Path] + Path to laboratory directory + restart_path: Optional[Path] + Absolute restart path to start experiment from + + Returns: None + """ # git clone the repository git_clone(repository, directory, branch) @@ -168,6 +218,7 @@ def clone(repository: str, if new_branch_name is not None: # Create and checkout new branch checkout_branch(is_new_branch=True, + keep_uuid=keep_uuid, branch_name=new_branch_name, restart_path=restart_path, config_path=config_path, @@ -181,13 +232,12 @@ def clone(repository: str, checkout_branch(branch_name=branch, config_path=config_path, - is_new_experiment=not keep_uuid, + keep_uuid=keep_uuid, restart_path=restart_path, control_path=control_path, model_type=model_type, - lab_path=lab_path) - # Note: is_new_experiment ensures new uuid and metadata is created - # Otherwise uuid is generated only if there's no pre-existing uuid + lab_path=lab_path, + is_new_experiment=True) finally: # Change back to original working directory os.chdir(owd) @@ -196,8 +246,19 @@ def clone(repository: str, def print_branch_metadata(branch: git.Head, verbose: bool = False): - """Print uuid for each branch. If verbose is true, it will print all lines - of the metadata file""" + """Display given Git branch UUID, or if config.yaml or metadata.yaml does + not exist. + + Parameters: + branch: git.Head + Branch object to parse commit tree. + verbose: bool, default False + Display entire metadata files + remote: bool, default False + Display remote Git branches + + Returns: None + """ contains_config = False metadata_content = None # Note: Blobs are files in the commit tree @@ -221,17 +282,25 @@ def print_branch_metadata(branch: git.Head, verbose: bool = False): else: # Print uuid metadata = YAML().load(metadata_content) - uuid = metadata.get('uuid', None) + uuid = metadata.get(UUID_FIELD, None) if uuid is not None: - print(f" uuid: {uuid}") + print(f" {UUID_FIELD}: {uuid}") else: - print(f" No uuid in metadata file") + print(f" No UUID in metadata file") def list_branches(config_path: Optional[Path] = None, verbose: bool = False, remote: bool = False): - """Print uuid, or metadata if verbose, for each branch in control repo""" + """Display local Git branches UUIDs. + + Parameters: + verbose: bool, default False + Display entire metadata files + remote: bool, default False + Display remote Git branches + + Returns: None""" control_path = get_control_path(config_path) repo = get_git_repository(control_path) diff --git a/payu/experiment.py b/payu/experiment.py index 2109c239..ca146843 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -19,6 +19,7 @@ import shutil import subprocess as sp import sysconfig +from pathlib import Path # Extensions import yaml @@ -57,7 +58,7 @@ def __init__(self, lab, reproduce=False, force=False): self.start_time = datetime.datetime.now() # Initialise experiment metadata - uuid and experiment name - self.metadata = Metadata(lab) + self.metadata = Metadata(Path(lab.archive_path)) self.metadata.setup() # TODO: replace with dict, check versions via key-value pairs @@ -457,10 +458,6 @@ def setup(self, force_archive=False): if self.config.get('archive', True): self.get_restarts_to_prune() - # Commit any changes to metadata - if self.runlog.enabled: - self.metadata.commit_file() - def run(self, *user_flags): # XXX: This was previously done in reversion diff --git a/payu/git_utils.py b/payu/git_utils.py index e1afeee4..2b142cfe 100644 --- a/payu/git_utils.py +++ b/payu/git_utils.py @@ -73,11 +73,15 @@ def get_git_user_info(repo_path: Union[Path, str], def git_commit(repo_path: Union[Path, str], commit_message: str, - paths_to_commit: List[Union[Path, str]]) -> None: + paths_to_commit: List[Union[Path, str]], + initialise_repo: bool = True) -> None: """Add a git commit of changes to paths""" - # Get/Create git repository - initialise is true as adding a commit - # directly after - repo = get_git_repository(repo_path, initialise=True) + # Get/Create git repository + repo = get_git_repository(repo_path, + catch_error=True, + initialise=initialise_repo) + if repo is None: + return # Un-stage any pre-existing changes repo.index.reset() diff --git a/payu/metadata.py b/payu/metadata.py index e1a716ca..b67ac9ef 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -8,42 +8,26 @@ """ import warnings +import shutil +import uuid from pathlib import Path from typing import Optional, List -import shortuuid from ruamel.yaml import YAML from ruamel.yaml.comments import CommentedMap -from payu.fsops import read_config -from payu.laboratory import Laboratory +from payu.fsops import read_config, mkdir_p from payu.git_utils import get_git_branch, get_git_user_info, git_commit # A truncated uuid is used for branch-uuid aware experiment names -TRUNCATED_UUID_LENGTH = 5 -METADATA_FILENAME = 'metadata.yaml' - -USAGE_HELP = """ -If this is a new experiment, create a new git branch by running: - payu checkout -b NEW_BRANCH_NAME -where NEW_BRANCH_NAME is name of the new branch. This will generate a new -uuid, a branch-uuid aware experiment name and commit changes to the -metadata file. - -Alternatively to generate a new uuid or experiment name on the current git -branch at the next payu setup or run command, remove the pre-existing 'uuid' or -'experiment' fields from the metadata file. - -Note: Experiment names are the name used for work and archive directories -in the laboratory directory. -""" - +TRUNCATED_UUID_LENGTH = 8 -class ExperimentMetadataError(Exception): - """Class for experiment name exceptions""" - def __init__(self, message="Invalid experiment name in metadata"): - super().__init__(message) - print(USAGE_HELP) +# Metadata file field names +UUID_FIELD = "experiment_uuid" +PARENT_UUID_FIELD = "parent_experiment" +CONTACT_FIELD = "contact" +EMAIL_FIELD = "email" +METADATA_FILENAME = "metadata.yaml" class MetadataWarning(Warning): @@ -55,40 +39,41 @@ class Metadata: Class to store/update/create metadata such as experiment uuid and name Parameters: - lab : Laboratory - The modules laboratory - branch : str | None = None + laboratory_archive_path : Path + The archive sub-directory in Laboratory + branch : Optional[str] The git branch on which the experiment is run - control_path : Path | None = None + control_path : Optional[Path] Path to where the experiment is configured and run. The default is set to the current working directory. This default is set in in fsops.read_config - config_path : Path | None = None + config_path : Optional[Path] Configuration Path. The default is config.yaml in the current working directory. This is also set in fsop.read_config """ def __init__(self, - lab: Laboratory, + laboratory_archive_path: Path, config_path: Optional[Path] = None, branch: Optional[str] = None, control_path: Optional[Path] = None) -> None: - self.lab = lab + self.lab_archive_path = laboratory_archive_path self.config = read_config(config_path) if control_path is None: - control_path = Path(self.config.get('control_path')) + control_path = Path(self.config.get("control_path")) self.control_path = control_path self.filepath = self.control_path / METADATA_FILENAME self.branch = branch + self.branch_uuid_experiment = True - self.base_experiment_name = self.config.get('experiment', - self.control_path.name) - + # Set uuid if in metadata file metadata = self.read_file() - self.uuid = metadata.get('uuid', None) - self.experiment_name = metadata.get('experiment', None) + self.uuid = metadata.get(UUID_FIELD, None) + + # Experiment name configuration - this overrides experiment name + self.config_experiment_name = self.config.get("experiment", None) def read_file(self) -> CommentedMap: """Read metadata file - preserving orginal format if it exists""" @@ -99,138 +84,134 @@ def read_file(self) -> CommentedMap: metadata = YAML().load(self.filepath) return metadata - def setup(self) -> None: - """Create/update metadata if no uuid or experiment name, otherwise run - checks on existing metadata""" - if self.uuid is None: - warnings.warn("No experiment uuid found in metadata. " - "Generating a new uuid", MetadataWarning) - self.update_metadata() - elif self.experiment_name is None: - # Add an experiment name back into metadata - warnings.warn("No experiment name found in metadata. " - "Generating a new experiment name.", MetadataWarning) - self.update_metadata(set_only_experiment_name=True) - - self.check_experiment_name() - - def update_metadata(self, set_only_experiment_name: bool = False) -> None: - """Create/Update metadata - uses legacy existing name if there's an - existing local archive""" - lab_archive_path = Path(self.lab.archive_path) - archive_path = lab_archive_path / self.base_experiment_name - - if archive_path.exists(): - warnings.warn( - f"Pre-existing archive found at: {archive_path}. " - f"Experiment name will remain: {self.base_experiment_name}", - MetadataWarning - ) - if set_only_experiment_name: - self.set_new_experiment_name(legacy=True) - else: - self.set_new_uuid(legacy=True) - else: - if set_only_experiment_name: - self.set_new_experiment_name() - else: - self.set_new_uuid() - - # Update metadata file + def setup(self, keep_uuid: bool = False, + is_new_experiment: bool = False) -> None: + """Set UUID and experiment name, create/update metadata file, + commit any changes and copy metadata file to the experiment archive. + + Parameters: + keep_uuid: bool, default False + Keep pre-existing UUID, if it exists. + is_new_experiment: bool, default False + If not keep_uuid, generate a new_uuid and a branch-uuid aware + experiment name. + Return: None + + Note: Experiment name is the name used for the work and archive + directories in the Laboratory. + """ + self.set_uuid_and_experiment_name(keep_uuid=keep_uuid, + is_new_experiment=is_new_experiment) self.update_file() - - def check_experiment_name(self) -> None: - """Check experiment name in metadata file""" - truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] - if self.experiment_name.endswith(truncated_uuid): - # Branch-uuid aware experiment name - metadata_experiment = self.experiment_name - self.set_new_experiment_name() - if self.experiment_name != metadata_experiment: - warnings.warn( - "Either the branch name, the control directory, or the " - "configured 'experiment' value has changed.\n" - f"Experiment name in {METADATA_FILENAME}: " - f"{metadata_experiment}\nGenerated experiment name: " - f"{self.experiment_name}.", - MetadataWarning - ) - raise ExperimentMetadataError() + self.commit_file() + self.copy_to_archive() + + def set_uuid_and_experiment_name(self, + is_new_experiment: bool = False, + keep_uuid: bool = False) -> None: + """Set experiment name and UUID""" + if self.uuid is not None and (keep_uuid or not is_new_experiment): + self.set_experiment_name(keep_uuid=keep_uuid, + is_new_experiment=is_new_experiment) else: - # Legacy experiment name - if self.experiment_name != self.base_experiment_name: - msg = f"Experiment name in {METADATA_FILENAME} does not match" - if 'experiment' in self.config: - msg += " the configured 'experiment' value." - else: - msg += " the control directory base name." - warnings.warn(msg + f"{self.experiment_name} does not equal " - f"{self.base_experiment_name}", - MetadataWarning) - raise ExperimentMetadataError() - - def set_new_experiment_name(self, legacy: bool = False) -> None: - """Set a new experiment name - this is used for work and archive - directories""" - if legacy: - # Experiment remains base experiment name - self.experiment_name = self.base_experiment_name - return + if self.uuid is None and not is_new_experiment: + warnings.warn("No experiment uuid found in metadata. " + "Generating a new uuid", MetadataWarning) + self.set_new_uuid(is_new_experiment=is_new_experiment) + def get_branch_uuid_experiment_name(self) -> Path: + """Return a Branch-UUID aware experiment name""" if self.branch is None: self.branch = get_git_branch(self.control_path) - # Add branch and a truncated uuid to experiment name + # Add branch and a truncated uuid to control directory name truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] if self.branch is None or self.branch in ('main', 'master'): suffix = f'-{truncated_uuid}' else: suffix = f'-{self.branch}-{truncated_uuid}' - self.experiment_name = self.base_experiment_name + suffix + return self.control_path.name + suffix + + def set_experiment_name(self, + is_new_experiment: bool = False, + keep_uuid: bool = False) -> None: + """Set experiment name - this is used for work and archive + sub-directories in the Laboratory""" + if self.config_experiment_name is not None: + # The configured value over-rides the experiment name + self.experiment_name = self.config_experiment_name + self.branch_uuid_experiment = False + print(f"Experiment name is configured in config.yaml: ", + self.experiment_name) + return - def set_new_uuid(self, legacy: bool = False) -> None: - """Create a new uuid and set experiment name""" - # Generate new uuid and experiment name + # Branch-UUID experiment name and archive path + branch_uuid_experiment_name = self.get_branch_uuid_experiment_name() + archive_path = self.lab_archive_path / branch_uuid_experiment_name + + # Legacy experiment name and archive path + legacy_name = self.control_path.name + legacy_archive_path = self.lab_archive_path / legacy_name + + if is_new_experiment or archive_path.exists(): + # Use branch-UUID aware experiment name + self.experiment_name = branch_uuid_experiment_name + elif legacy_archive_path.exists(): + # Use legacy CONTROL-DIR experiment name + self.experiment_name = legacy_name + print(f"Pre-existing archive found at: {legacy_archive_path}. " + f"Experiment name will remain: {legacy_name}") + self.branch_uuid_experiment = False + elif keep_uuid: + # Use same experiment UUID and use branch-UUID name for archive + self.experiment_name = branch_uuid_experiment_name + else: + # No archive exists - Detecting new experiment + warnings.warn( + "No pre-existing archive found. Generating a new uuid", + MetadataWarning + ) + self.set_new_uuid(is_new_experiment=True) + + def set_new_uuid(self, is_new_experiment: bool = False) -> None: + """Generate a new uuid and set experiment name""" self.uuid = generate_uuid() - self.set_new_experiment_name(legacy=legacy) + self.set_experiment_name(is_new_experiment=is_new_experiment) - if legacy: + # If experiment name does not include UUID, leave it unchanged + if not self.branch_uuid_experiment: return # Check experiment name is unique in local archive - lab_archive_path = Path(self.lab.archive_path) + lab_archive_path = self.lab_archive_path if lab_archive_path.exists(): local_experiments = [item for item in lab_archive_path.iterdir() if item.is_dir()] while self.experiment_name in local_experiments: # Generate a new id and experiment name self.uuid = generate_uuid() - self.set_new_experiment_name() + self.set_experiment_name(is_new_experiment=is_new_experiment) def update_file(self) -> None: """Write any updates to metadata file""" metadata = self.read_file() - previous_uuid = metadata.get('uuid', None) - if previous_uuid is not None and previous_uuid != self.uuid: - metadata['previous_uuid'] = previous_uuid - - # Update uuid - metadata['uuid'] = self.uuid - - # Add experiment name - metadata['experiment'] = self.experiment_name + # Update UUID and parent UUID + parent_uuid = metadata.get(UUID_FIELD, None) + if parent_uuid is not None and parent_uuid != self.uuid: + metadata[PARENT_UUID_FIELD] = parent_uuid + metadata[UUID_FIELD] = self.uuid # Update email/contact in metadata self.update_user_info(metadata=metadata, - metadata_key='contact', + metadata_key=CONTACT_FIELD, config_key='name', filler_values=['Your name', 'Add your name here']) self.update_user_info(metadata=metadata, - metadata_key='email', + metadata_key=EMAIL_FIELD, config_key='email', filler_values=['you@example.com', 'Add your email address here']) @@ -255,20 +236,23 @@ def update_user_info(self, metadata: CommentedMap, metadata_key: str, metadata[metadata_key] = value def commit_file(self) -> None: - "Add a git commit for changes to metadata file, if file has changed" - commit_message = f"Updated metadata. Experiment uuid: {self.uuid}" + """Add a git commit for changes to metadata file, if file has changed + and if control path is a git repository""" + commit_message = f"Updated metadata. Experiment UUID: {self.uuid}" git_commit(repo_path=self.control_path, commit_message=commit_message, - paths_to_commit=[self.filepath]) + paths_to_commit=[self.filepath], + initialise_repo=False) - def setup_new_experiment(self, legacy: bool = False) -> None: - """Creates new uuid, creates/updates metadata file and - commits file to git""" - self.set_new_uuid(legacy) - self.update_file() - self.commit_file() + def copy_to_archive(self) -> None: + """Copy metadata file to archive""" + archive_path = self.lab_archive_path / self.experiment_name + mkdir_p(archive_path) + shutil.copy(self.filepath, archive_path / METADATA_FILENAME) + # Note: The existence of archive path is also used for determining + # experiment names and whether to generate a new UUID -def generate_uuid() -> shortuuid.uuid: +def generate_uuid() -> str: """Generate a new uuid""" - return shortuuid.uuid() + return str(uuid.uuid4()) diff --git a/payu/subcommands/checkout_cmd.py b/payu/subcommands/checkout_cmd.py index c433026a..70f8b93b 100644 --- a/payu/subcommands/checkout_cmd.py +++ b/payu/subcommands/checkout_cmd.py @@ -5,7 +5,6 @@ """ from pathlib import Path -from payu.laboratory import Laboratory from payu.branch import checkout_branch import payu.subcommands.args as args @@ -16,7 +15,8 @@ 'and create/switch archive/work symlinks')} arguments = [args.model, args.config, args.laboratory, args.new_branch, - args.branch_name, args.start_point, args.restart_path] + args.branch_name, args.start_point, args.restart_path, + args.keep_uuid] def transform_strings_to_path(path_str=None): @@ -24,7 +24,7 @@ def transform_strings_to_path(path_str=None): def runcmd(model_type, config_path, lab_path, new_branch, - branch_name, start_point, restart_path): + branch_name, start_point, restart_path, keep_uuid): """Execute the command.""" config_path = transform_strings_to_path(config_path) lab_path = transform_strings_to_path(lab_path) @@ -36,7 +36,8 @@ def runcmd(model_type, config_path, lab_path, new_branch, restart_path=restart_path, config_path=config_path, lab_path=lab_path, - model_type=model_type) + model_type=model_type, + keep_uuid=keep_uuid) runscript = runcmd diff --git a/setup.py b/setup.py index 14245fbb..ebdff68c 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ 'dateutil', 'tenacity', 'cftime', - 'shortuuid', 'GitPython', 'ruamel.yaml' ], @@ -49,7 +48,6 @@ 'python-dateutil', 'tenacity!=7.0.0', 'cftime', - 'shortuuid >= 1.0.11', 'GitPython >= 3.1.40', 'ruamel.yaml >= 0.18.5' ], diff --git a/test/common.py b/test/common.py index f510d235..e5dc235c 100644 --- a/test/common.py +++ b/test/common.py @@ -25,6 +25,8 @@ payudir = tmpdir / 'payu' archive_dir = labdir / 'archive' + +# Note: These are using a fixed archive name which is set in config.yaml expt_archive_dir = archive_dir / ctrldir_basename expt_workdir = labdir / 'work' / ctrldir_basename @@ -52,12 +54,12 @@ 'exe': False } }, - 'runlog': False + 'runlog': False, + "experiment": ctrldir_basename } metadata = { - "uuid": "testUuid", - "experiment": ctrldir_basename + "experiment_uuid": "testUuid", } diff --git a/test/models/test_mom6.py b/test/models/test_mom6.py index 71713d3b..3b066eb0 100644 --- a/test/models/test_mom6.py +++ b/test/models/test_mom6.py @@ -8,7 +8,7 @@ import payu from test.common import cd -from test.common import tmpdir, ctrldir, labdir, expt_workdir +from test.common import tmpdir, ctrldir, labdir, expt_workdir, ctrldir_basename from test.common import write_config, write_metadata from test.common import make_random_file, make_inputs, make_exe @@ -43,7 +43,8 @@ def setup_module(module): 'laboratory': 'lab', 'jobname': 'testrun', 'model': 'mom6', - 'exe': 'test.exe' + 'exe': 'test.exe', + 'experiment': ctrldir_basename } write_config(config) diff --git a/test/pytest.ini b/test/pytest.ini new file mode 100644 index 00000000..dfcd5794 --- /dev/null +++ b/test/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore::payu.git_utils.PayuGitWarning \ No newline at end of file diff --git a/test/test_branch.py b/test/test_branch.py index dc8d1365..2f533894 100644 --- a/test/test_branch.py +++ b/test/test_branch.py @@ -7,22 +7,23 @@ from ruamel.yaml import YAML from unittest.mock import patch -import payu from payu.branch import add_restart_to_config, switch_symlink from payu.branch import checkout_branch, clone, list_branches from payu.metadata import MetadataWarning from payu.fsops import read_config from test.common import cd -from test.common import tmpdir, ctrldir, labdir +from test.common import tmpdir, ctrldir, labdir, archive_dir from test.common import ctrldir_basename from test.common import config as config_orig, write_config from test.common import config_path, metadata_path -from test.common import make_expt_archive_dir, expt_archive_dir +from test.common import make_expt_archive_dir # Global config config = copy.deepcopy(config_orig) +# Remove Experiment override name +config.pop("experiment") @pytest.fixture(autouse=True) @@ -45,11 +46,11 @@ def setup_and_teardown(): def setup_control_repository(path=ctrldir, set_config=True): - """ Return an new control repository""" + """Return an new control repository""" if set_config: - write_config(config, path=(path / 'config.yaml')) + write_config(config, path=(path / "config.yaml")) else: - (path / 'newFile').touch() + (path / "newFile").touch() # Initialise a control repo repo = git.Repo.init(path) repo.index.add("*") @@ -89,19 +90,19 @@ def setup_control_repository(path=ctrldir, set_config=True): ) def test_add_restart_to_config(test_config, expected_config): """Test adding restart: path/to/restart to configuration file""" - restart_path = labdir / 'archive' / 'tmpRestart' + restart_path = labdir / "archive" / "tmpRestart" restart_path.mkdir(parents=True) expected_config = expected_config.format(restart_path) - with config_path.open('w') as file: + with config_path.open("w") as file: file.write(test_config) # Function to test with cd(ctrldir): add_restart_to_config(restart_path, config_path) - with config_path.open('r') as file: + with config_path.open("r") as file: updated_config = file.read() # Test order, comments are preserved @@ -110,10 +111,10 @@ def test_add_restart_to_config(test_config, expected_config): def test_add_restart_to_config_invalid_restart_path(): """Test restart path that does not exist raises a warning""" - restart_path = tmpdir / 'restartDNE' + restart_path = tmpdir / "restartDNE" config_content = "# Test config content" - with config_path.open('w') as file: + with config_path.open("w") as file: file.write(config_content) expected_msg = f"Given restart directory {restart_path} does not exist. " @@ -124,26 +125,26 @@ def test_add_restart_to_config_invalid_restart_path(): add_restart_to_config(restart_path, config_path) # Test config unchanged - with config_path.open('r') as file: + with config_path.open("r") as file: assert file.read() == config_content def test_switch_symlink_when_symlink_and_archive_exists(): # Pre-existing experiment symlink - lab_archive = labdir / 'archive' - previous_archive_dir = lab_archive / 'Experiment0' + lab_archive = labdir / "archive" + previous_archive_dir = lab_archive / "Experiment0" previous_archive_dir.mkdir(parents=True) - archive_symlink = ctrldir / 'archive' + archive_symlink = ctrldir / "archive" archive_symlink.symlink_to(previous_archive_dir) # New Experiment - Existing archive - experiment_name = 'Experiment1' + experiment_name = "Experiment1" archive_dir = lab_archive / experiment_name archive_dir.mkdir(parents=True) # Test Function - switch_symlink(lab_archive, ctrldir, experiment_name, 'archive') + switch_symlink(lab_archive, ctrldir, experiment_name, "archive") # Assert new symlink is created assert archive_symlink.exists() and archive_symlink.is_symlink() @@ -152,18 +153,18 @@ def test_switch_symlink_when_symlink_and_archive_exists(): def test_switch_symlink_when_symlink_exists_but_no_archive(): # Pre-existing experiment symlink - lab_archive = labdir / 'archive' - previous_archive_dir = lab_archive / 'Experiment0' + lab_archive = labdir / "archive" + previous_archive_dir = lab_archive / "Experiment0" previous_archive_dir.mkdir(parents=True) - archive_symlink = ctrldir / 'archive' + archive_symlink = ctrldir / "archive" archive_symlink.symlink_to(previous_archive_dir) # New Experiment - experiment_name = 'Experiment1' + experiment_name = "Experiment1" # Test Function - switch_symlink(lab_archive, ctrldir, experiment_name, 'archive') + switch_symlink(lab_archive, ctrldir, experiment_name, "archive") # Assert no symlink is created but previous one is removed assert not archive_symlink.exists() @@ -172,13 +173,13 @@ def test_switch_symlink_when_symlink_exists_but_no_archive(): def test_switch_symlink_when_no_symlink_exists_and_no_archive(): # New Experiment - experiment_name = 'Experiment1' - lab_archive = labdir / 'archive' + experiment_name = "Experiment1" + lab_archive = labdir / "archive" - archive_symlink = ctrldir / 'archive' + archive_symlink = ctrldir / "archive" # Test Function - switch_symlink(lab_archive, ctrldir, experiment_name, 'archive') + switch_symlink(lab_archive, ctrldir, experiment_name, "archive") # Assert no symlink assert not archive_symlink.exists() @@ -187,43 +188,49 @@ def test_switch_symlink_when_no_symlink_exists_and_no_archive(): def check_metadata(expected_uuid, expected_experiment, - expected_previous_uuid=None, + expected_parent_uuid=None, metadata_file=metadata_path): """Helper function to read metadata file and assert changed as expected""" assert metadata_file.exists() metadata = YAML().load(metadata_file) - assert metadata.get('uuid', None) == expected_uuid - assert metadata.get('experiment', None) == expected_experiment - assert metadata.get('previous_uuid', None) == expected_previous_uuid + assert metadata.get("experiment_uuid", None) == expected_uuid + assert metadata.get("parent_experiment", None) == expected_parent_uuid + + # Assert archive exists for experiment name + assert (archive_dir / expected_experiment / "metadata.yaml").exists() + copied_metadata = YAML().load(metadata_file) + assert copied_metadata.get("experiment_uuid", None) == expected_uuid + parent_uuid = copied_metadata.get("parent_experiment", None) + assert parent_uuid == expected_parent_uuid def check_branch_metadata(repo, expected_current_branch, expected_uuid, expected_experiment, - expected_previous_uuid=None, + expected_parent_uuid=None, metadata_file=metadata_path): """Helper function for checking expected branch and metadata""" # Check metadata check_metadata(expected_uuid, expected_experiment, - expected_previous_uuid, + expected_parent_uuid, metadata_file=metadata_file) # Check cuurent branch assert str(repo.active_branch) == expected_current_branch # Check last commit message - expected_commit_msg = f"Updated metadata. Experiment uuid: {expected_uuid}" + expected_commit_msg = f"Updated metadata. Experiment UUID: {expected_uuid}" assert repo.head.commit.message == expected_commit_msg -@patch('shortuuid.uuid') +@patch("uuid.uuid4") def test_checkout_branch(mock_uuid): repo = setup_control_repository() # Mock uuid1 value - uuid1 = 'a1234567890' + uuid1 = "8ddc1985-b7d0-4d4d-884f-061ecd90d478" mock_uuid.return_value = uuid1 with cd(ctrldir): @@ -233,17 +240,17 @@ def test_checkout_branch(mock_uuid): lab_path=labdir) # Check current branch, new commit was added, and metadata created - branch1_experiment_name = f'{ctrldir_basename}-Branch1-a1234' + branch1_experiment_name = f"{ctrldir_basename}-Branch1-8ddc1985" check_branch_metadata(repo, expected_uuid=uuid1, - expected_current_branch='Branch1', + expected_current_branch="Branch1", expected_experiment=branch1_experiment_name) # Save commit hash to check later on branch_1_commit_hash = repo.active_branch.object.hexsha # Mock uuid2 value - uuid2 = 'b1234567890' + uuid2 = "2de5b001-df08-4c0b-ab15-f47f8ad72929" mock_uuid.return_value = uuid2 with cd(ctrldir): @@ -253,15 +260,15 @@ def test_checkout_branch(mock_uuid): lab_path=labdir) # Check current branch, new commit was added, and metadata created - branch2_experiment_name = f'{ctrldir_basename}-Branch2-b1234' + branch2_experiment_name = f"{ctrldir_basename}-Branch2-2de5b001" check_branch_metadata(repo, expected_uuid=uuid2, - expected_current_branch='Branch2', + expected_current_branch="Branch2", expected_experiment=branch2_experiment_name, - expected_previous_uuid=uuid1) + expected_parent_uuid=uuid1) # Mock uuid3 value - uuid3 = 'c1234567890' + uuid3 = "98c99f06-260e-42cc-a23f-f113fae825e5" mock_uuid.return_value = uuid3 with cd(ctrldir): @@ -272,12 +279,12 @@ def test_checkout_branch(mock_uuid): lab_path=labdir) # Check current branch, new commit was added, and metadata created - branch3_experiment_name = f'{ctrldir_basename}-Branch3-c1234' + branch3_experiment_name = f"{ctrldir_basename}-Branch3-98c99f06" check_branch_metadata(repo, expected_uuid=uuid3, - expected_current_branch='Branch3', + expected_current_branch="Branch3", expected_experiment=branch3_experiment_name, - expected_previous_uuid=uuid1) + expected_parent_uuid=uuid1) with cd(ctrldir): # Test checkout existing branch with existing metadata @@ -293,7 +300,7 @@ def test_checkout_branch(mock_uuid): assert repo.active_branch.object.hexsha == branch_1_commit_hash -@patch('shortuuid.uuid') +@patch("uuid.uuid4") def test_checkout_existing_branch_with_no_metadata(mock_uuid): repo = setup_control_repository() @@ -301,7 +308,7 @@ def test_checkout_existing_branch_with_no_metadata(mock_uuid): repo.create_head("Branch1") # Mock uuid1 value - uuid1 = 'a1234567890' + uuid1 = "574ea2c9-2379-4484-86b4-1d1a0f820773" mock_uuid.return_value = uuid1 expected_no_uuid_msg = ( "No experiment uuid found in metadata. Generating a new uuid" @@ -314,17 +321,17 @@ def test_checkout_existing_branch_with_no_metadata(mock_uuid): lab_path=labdir) # Check metadata was created and commited - branch1_experiment_name = f'{ctrldir_basename}-Branch1-a1234' + branch1_experiment_name = f"{ctrldir_basename}-Branch1-574ea2c9" check_branch_metadata(repo, expected_uuid=uuid1, - expected_current_branch='Branch1', + expected_current_branch="Branch1", expected_experiment=branch1_experiment_name) -@patch('shortuuid.uuid') +@patch("uuid.uuid4") def test_checkout_branch_with_no_metadata_and_with_legacy_archive(mock_uuid): # Make experiment archive - This function creates legacy experiment archive - make_expt_archive_dir(type='restart', index=0) + make_expt_archive_dir(type="restart", index=0) # Setup repo repo = setup_control_repository() @@ -333,21 +340,16 @@ def test_checkout_branch_with_no_metadata_and_with_legacy_archive(mock_uuid): repo.create_head("Branch1") # Mock uuid1 value - uuid1 = 'a1234567890' + uuid1 = "df050eaf-c8bb-4b10-9998-e0202a1eabd2" mock_uuid.return_value = uuid1 expected_no_uuid_msg = ( "No experiment uuid found in metadata. Generating a new uuid" ) - archive_warning_msg = ( - f"Pre-existing archive found at: {expt_archive_dir}. " - f"Experiment name will remain: ctrl" - ) - with cd(ctrldir): # Test checkout existing branch (with no existing metadata) # and with pre-existing archive - with pytest.warns(MetadataWarning) as metadata_warnings: + with pytest.warns(MetadataWarning, match=expected_no_uuid_msg): checkout_branch(branch_name="Branch1", lab_path=labdir) @@ -355,25 +357,21 @@ def test_checkout_branch_with_no_metadata_and_with_legacy_archive(mock_uuid): branch1_experiment_name = ctrldir_basename check_branch_metadata(repo, expected_uuid=uuid1, - expected_current_branch='Branch1', + expected_current_branch="Branch1", expected_experiment=branch1_experiment_name) - # Check warnings were raised - warnings_msgs = [warning.message.args[0] for warning in metadata_warnings] - assert warnings_msgs == [expected_no_uuid_msg, archive_warning_msg] - -@patch('shortuuid.uuid') +@patch("uuid.uuid4") def test_checkout_new_branch_existing_legacy_archive(mock_uuid): # Using payu checkout new branch should generate new uuid, - # and experiment name - even if there's a legacy archive + # and experiment name - even if there"s a legacy archive repo = setup_control_repository() # Add archive under legacy name - restart_path = Path(make_expt_archive_dir(type='restart')) + restart_path = Path(make_expt_archive_dir(type="restart")) # Mock uuid1 value - uuid1 = 'a1234567890' + uuid1 = "d4437aae-8370-4567-a698-94d00ba87cdc" mock_uuid.return_value = uuid1 with cd(ctrldir): @@ -385,15 +383,15 @@ def test_checkout_new_branch_existing_legacy_archive(mock_uuid): lab_path=labdir) # Check metadata was created and commited - with branch-uuid aware name - branch1_experiment_name = f'{ctrldir_basename}-Branch1-a1234' + branch1_experiment_name = f"{ctrldir_basename}-Branch1-d4437aae" check_branch_metadata(repo, expected_uuid=uuid1, - expected_current_branch='Branch1', + expected_current_branch="Branch1", expected_experiment=branch1_experiment_name) # Check restart path was added to configuration file config = read_config(config_path) - assert config['restart'] == str(restart_path) + assert config["restart"] == str(restart_path) def test_checkout_branch_with_no_config(): @@ -411,10 +409,10 @@ def test_checkout_branch_with_no_config(): assert not metadata_path.exists() -@patch('shortuuid.uuid') +@patch("uuid.uuid4") def test_clone(mock_uuid): # Create a repo to clone - source_repo_path = tmpdir / 'sourceRepo' + source_repo_path = tmpdir / "sourceRepo" source_repo_path.mkdir() source_repo = setup_control_repository(path=source_repo_path) source_main_branch = str(source_repo.active_branch) @@ -424,52 +422,52 @@ def test_clone(mock_uuid): branch1.checkout() # Mock uuid1 value - uuid1 = 'a1234567890' + uuid1 = "9cc04c9b-f13d-4f1d-8a35-87146a4381ef" mock_uuid.return_value = uuid1 # Test clone - cloned_repo_path = tmpdir / 'clonedRepo' + cloned_repo_path = tmpdir / "clonedRepo" clone(source_repo_path, cloned_repo_path, lab_path=labdir) # Check new commit added and expected metadata cloned_repo = git.Repo(cloned_repo_path) - metadata_file = cloned_repo_path / 'metadata.yaml' + metadata_file = cloned_repo_path / "metadata.yaml" check_branch_metadata(repo=cloned_repo, expected_current_branch="Branch1", expected_uuid=uuid1, - expected_experiment="clonedRepo-Branch1-a1234", + expected_experiment="clonedRepo-Branch1-9cc04c9b", metadata_file=metadata_file) cloned_repo.git.checkout(source_main_branch) # Test clone of a clone - adding a new branch - uuid2 = 'b1234567890' + uuid2 = "fd7b4804-d306-4a18-9d95-a8f565abfc9a" mock_uuid.return_value = uuid2 # Run clone with cd(tmpdir): - clone(cloned_repo_path, Path('clonedRepo2'), - lab_path=labdir, new_branch_name='Branch2', branch='Branch1') + clone(cloned_repo_path, Path("clonedRepo2"), + lab_path=labdir, new_branch_name="Branch2", branch="Branch1") # Check new commit added and expected metadata - cloned_repo2 = git.Repo(tmpdir / 'clonedRepo2') - metadata_file = tmpdir / 'clonedRepo2' / 'metadata.yaml' + cloned_repo2 = git.Repo(tmpdir / "clonedRepo2") + metadata_file = tmpdir / "clonedRepo2" / "metadata.yaml" check_branch_metadata(repo=cloned_repo2, expected_current_branch="Branch2", expected_uuid=uuid2, - expected_experiment="clonedRepo2-Branch2-b1234", - expected_previous_uuid=uuid1, + expected_experiment="clonedRepo2-Branch2-fd7b4804", + expected_parent_uuid=uuid1, metadata_file=metadata_file) # Check local branches - assert [head.name for head in cloned_repo2.heads] == ['Branch1', 'Branch2'] + assert [head.name for head in cloned_repo2.heads] == ["Branch1", "Branch2"] def add_and_commit_metadata(repo, metadata): """Helper function to create/update metadata file and commit""" - metadata_path = ctrldir / 'metadata.yaml' + metadata_path = ctrldir / "metadata.yaml" YAML().dump(metadata, metadata_path) - repo.index.add('*') + repo.index.add("*") repo.index.commit("Updated metadata.yaml") @@ -483,7 +481,7 @@ def test_list_branches(capsys): branch1 = repo.create_head("Branch1") branch1.checkout() write_config(config) - repo.index.add('*') + repo.index.add("*") repo.index.commit("Added config.yaml") # Checkout and add metadata to new branch @@ -491,8 +489,7 @@ def test_list_branches(capsys): branch2.checkout() write_config(config) branch_2_metadata = { - "uuid": "b12345678", - "experiment": "testExperimentName2" + "experiment_uuid": "b12345678", } add_and_commit_metadata(repo, branch_2_metadata) @@ -500,7 +497,7 @@ def test_list_branches(capsys): branch3 = repo.create_head("Branch3") branch3.checkout() branch_3_metadata = { - "experiment": "testExperimentName3", + "email": "test@email.com", "contact": "TestUser" } add_and_commit_metadata(repo, branch_3_metadata) @@ -510,11 +507,11 @@ def test_list_branches(capsys): list_branches() expected_printed_output = f"""* Current Branch: Branch3 - No uuid in metadata file + No UUID in metadata file Branch: Branch1 No metadata file found Branch: Branch2 - uuid: b12345678 + experiment_uuid: b12345678 Branch: {main_branch_name} No config file found""" captured = capsys.readouterr() @@ -525,34 +522,33 @@ def test_list_branches(capsys): list_branches(verbose=True) expected_verbose_output = f"""* Current Branch: Branch3 - experiment: testExperimentName3 + email: test@email.com contact: TestUser Branch: Branch1 No metadata file found Branch: Branch2 - uuid: b12345678 - experiment: testExperimentName2 + experiment_uuid: b12345678 Branch: {main_branch_name} No config file found""" captured = capsys.readouterr() assert captured.out.strip() == expected_verbose_output # Test remote branches - cloned_repo_path = tmpdir / 'cloned_repo' + cloned_repo_path = tmpdir / "cloned_repo" repo.clone(cloned_repo_path) with cd(cloned_repo_path): list_branches(remote=True) expected_remote_output = f"""* Current Branch: Branch3 - No uuid in metadata file + No UUID in metadata file Remote Branch: Branch1 No metadata file found Remote Branch: Branch2 - uuid: b12345678 + experiment_uuid: b12345678 Remote Branch: Branch3 - No uuid in metadata file + No UUID in metadata file Remote Branch: HEAD - No uuid in metadata file + No UUID in metadata file Remote Branch: {main_branch_name} No config file found""" captured = capsys.readouterr() diff --git a/test/test_metadata.py b/test/test_metadata.py index d940c2f5..de1a4fde 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -4,11 +4,10 @@ import pytest from unittest.mock import patch -import payu -from payu.metadata import Metadata +from payu.metadata import Metadata, MetadataWarning from test.common import cd -from test.common import tmpdir, ctrldir, labdir +from test.common import tmpdir, ctrldir, labdir, archive_dir from test.common import config as config_orig from test.common import write_config from test.common import make_all_files @@ -17,6 +16,7 @@ # Global config config = copy.deepcopy(config_orig) +config.pop("experiment") def setup_module(module): @@ -26,22 +26,11 @@ def setup_module(module): if verbose: print("setup_module module:%s" % module.__name__) - # Should be taken care of by teardown, in case remnants lying around - try: - shutil.rmtree(tmpdir) - except FileNotFoundError: - pass - try: tmpdir.mkdir() - labdir.mkdir() - ctrldir.mkdir() - make_all_files() except Exception as e: print(e) - write_config(config) - def teardown_module(module): """ @@ -66,12 +55,28 @@ def mocked_get_git_user_info(repo_path, config_key, example_value): return None +@pytest.fixture(autouse=True) +def setup_and_teardown(): + try: + ctrldir.mkdir() + labdir.mkdir() + except Exception as e: + print(e) + + yield + + try: + shutil.rmtree(ctrldir) + shutil.rmtree(labdir) + except Exception as e: + print(e) + + @pytest.mark.parametrize( - "uuid, experiment, previous_metadata, expected_metadata", + "uuid, previous_metadata, expected_metadata", [ ( - "A012345678910", - "test_experiment-test_branch-A012345", + "0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75", """contact: TestUser email: Test@email.com description: |- @@ -81,8 +86,8 @@ def mocked_get_git_user_info(repo_path, config_key, example_value): - test - testKeyword # Test Comment -uuid: A012345678910 -experiment: test_experiment-test_branch-A012345 +experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 +parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 """, """contact: TestUser email: Test@email.com @@ -93,65 +98,54 @@ def mocked_get_git_user_info(repo_path, config_key, example_value): - test - testKeyword # Test Comment -uuid: A012345678910 -experiment: test_experiment-test_branch-A012345 +experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 +parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 """ ), ( - "A012345678910", - "test_experiment-test_branch-A012345", + "b1f3ce3d-99da-40e4-849a-c8b352948a31", None, - """uuid: A012345678910 -experiment: test_experiment-test_branch-A012345 + """experiment_uuid: b1f3ce3d-99da-40e4-849a-c8b352948a31 contact: mockUser email: mock@email.com """ ), ( - "NewUuid", - "NewExperimentName", - """uuid: PreviousUuid -experiment: PreviousExperimentName + "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + """experiment_uuid: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 contact: Add your name here email: Add your email address here """, - """uuid: NewUuid -experiment: NewExperimentName + """experiment_uuid: 7b90f37c-4619-44f9-a439-f76fdf6ae2bd contact: mockUser email: mock@email.com -previous_uuid: PreviousUuid +parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 """ ), ( - "NewUuid", - "NewExperimentName", + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", """ contact: AdD Your nAme hEre email: # """, """contact: mockUser email: mock@email.com # -uuid: NewUuid -experiment: NewExperimentName +experiment_uuid: 3d18b3b6-dd19-49a9-8d9e-c7fa8582f136 """ ) ] ) -def test_update_file(uuid, - experiment, - previous_metadata, - expected_metadata): +def test_update_file(uuid, previous_metadata, expected_metadata): # Create pre-existing metadata file metadata_path = ctrldir / 'metadata.yaml' if previous_metadata is not None: metadata_path.write_text(previous_metadata) + write_config(config) with cd(ctrldir): - lab = payu.laboratory.Laboratory(lab_path=str(labdir)) - metadata = Metadata(lab) + metadata = Metadata(archive_dir) metadata.uuid = uuid - metadata.experiment_name = experiment # Function to test with patch('payu.metadata.get_git_user_info', @@ -163,3 +157,130 @@ def test_update_file(uuid, # Remove metadata file metadata_path.unlink() + + +@pytest.mark.parametrize( + "uuid_exists, keep_uuid, is_new_experiment, " + "branch_uuid_archive_exists, legacy_archive_exists, catch_warning," + "expected_uuid, expected_name", + [ + # Keep UUID on new experiment - UUID Exists - no archives exist + ( + True, True, True, False, False, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl-mock_branch-3d18b3b6" + ), + # Keep UUID on new experiment - UUID Exists - legacy archive exists + ( + True, True, True, False, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl-mock_branch-3d18b3b6" + ), + # Keep UUID on not new experiement - UUID Exists -legacy archive exists + ( + True, True, False, False, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl" + ), + # Keep UUID on not new experiment - No UUID - no archives exist + ( + False, True, True, False, False, False, + "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", "ctrl-mock_branch-cb793e91" + ), + # Experiment setup - No UUID - legacy archive exists + ( + False, False, False, False, True, True, + "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", "ctrl" + ), + # Experiment setup - No UUID - no archive exists + ( + False, False, False, False, False, True, + "cb793e91-6168-4ed2-a70c-f6f9ccf1659b", "ctrl-mock_branch-cb793e91" + ), + # Experiment setup - Existing UUID - legacy archive exists + ( + True, False, False, False, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl" + ), + # Experiment setup - Existing UUID - new archive exists + ( + True, False, False, True, True, False, + "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", "ctrl-mock_branch-3d18b3b6" + ), + ] +) +def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment, + branch_uuid_archive_exists, + legacy_archive_exists, catch_warning, + expected_uuid, expected_name): + + write_config(config) + with cd(ctrldir): + metadata = Metadata(archive_dir) + + if uuid_exists: + metadata.uuid = "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136" + + if branch_uuid_archive_exists: + archive_path = archive_dir / "ctrl-mock_branch-3d18b3b6" + archive_path.mkdir(parents=True) + + if legacy_archive_exists: + archive_path = archive_dir / "ctrl" + archive_path.mkdir(parents=True) + + # Test set UUID and experiment name + with patch('payu.metadata.get_git_branch') as mock_branch, \ + patch('uuid.uuid4') as mock_uuid: + mock_branch.return_value = "mock_branch" + mock_uuid.return_value = "cb793e91-6168-4ed2-a70c-f6f9ccf1659b" + + if catch_warning: + with pytest.warns(MetadataWarning): + metadata.set_uuid_and_experiment_name( + is_new_experiment=is_new_experiment, + keep_uuid=keep_uuid + ) + else: + metadata.set_uuid_and_experiment_name( + is_new_experiment=is_new_experiment, + keep_uuid=keep_uuid + ) + + assert metadata.experiment_name == expected_name + assert metadata.uuid == expected_uuid + + +def test_set_configured_experiment_name(): + # Test configured experiment name is the set experiment name + test_config = copy.deepcopy(config) + test_config['experiment'] = "configuredExperiment" + write_config(test_config) + with cd(ctrldir): + metadata = Metadata(archive_dir) + + metadata.set_experiment_name() + + assert metadata.experiment_name == "configuredExperiment" + + metadata.set_experiment_name(is_new_experiment=True) + + assert metadata.experiment_name == "configuredExperiment" + + +@pytest.mark.parametrize( + "branch, expected_name", + [(None, "ctrl-cb793e91"), + ("main", "ctrl-cb793e91"), + ("master", "ctrl-cb793e91"), + ("branch", "ctrl-branch-cb793e91")] +) +def test_get_branch_uuid_aware_experiment_name(branch, expected_name): + # Test configured experiment name is the set experiment name + with cd(ctrldir): + metadata = Metadata(archive_dir) + + metadata.uuid = "cb793e91-6168-4ed2-a70c-f6f9ccf1659b" + + with patch('payu.metadata.get_git_branch') as mock_branch: + mock_branch.return_value = branch + experiment = metadata.get_branch_uuid_experiment_name() + + assert experiment == expected_name From 44f8ea635bf212706838857bd21f443371667943 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Thu, 7 Dec 2023 12:06:48 +1100 Subject: [PATCH 5/7] Automatically add extra metadata fields to metadata - Add git utility methods to class so they share the same repository objects (to avoid creating multiple git repo objects) - Add created date, url and name to new experiments (unless they are pre-existing legacy experiments) - Add template values of metadata fields to new experiments created by payu checkout/clone - Update metadata and payu clone/branch usage documentation - Add parent_experiment metadata logic to use UUID of prior restart file (if defined) - If restart is passed to clone/checkout, also check if there's pre-existing restarts in archive - Add model name field to metadata - Add user and contact to metadata only for on new experiments --- docs/source/config.rst | 8 +- docs/source/usage.rst | 115 ++++++++++-------- payu/branch.py | 101 +++++++++++----- payu/experiment.py | 4 +- payu/git_utils.py | 248 ++++++++++++++++++++------------------- payu/metadata.py | 192 +++++++++++++++++++++--------- payu/subcommands/args.py | 2 +- test/test_branch.py | 115 ++++++++++++++---- test/test_git_utils.py | 30 ++--- test/test_metadata.py | 177 ++++++++++++++++++++++------ 10 files changed, 659 insertions(+), 333 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index a701fab5..ccf31586 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -229,10 +229,10 @@ configuration. The control path for the experiment. The default setting is the path of the current working directory. -``experiment`` (*Default: current directory*) - The experiment name used for archival. The default setting uses the - ``control`` directory name. - +``experiment`` + The experiment name used for archival. This will override the experiment + name generated using metadata and existing archives + (see :ref:`usage-metadata`). Manifests --------- diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 743bddc2..186d3562 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -21,47 +21,6 @@ simultaneously that can share common executables and input data. It also allows the flexibility to have the relatively small control directories in a location that is continuously backed up. -Experiment names and metadata ------------------------------ - -The *laboratory* contains the following subdirectories: - -* ``work``, which is where the model is run. This contains a temporary directory - for each experiment which is removed after a successful run. - -* ``archive``, which contains the output of completed runs for each - experiment. - -An experiment name is used identify the experiment inside the ``work`` and -``archive`` sub-directories. This is stored in a metadata file, -``metadata.yaml``, in the *control directory*. -The experiment name and a UUID, to uniquely identify the experiment, -is set in ``metadata.yaml`` when: - -* Using payu to clone a pre-existing git_ repository of the *control directory* - -* Using payu to create and checkout a new git branch in the *control directory* - -* Or, it is set automatically when setting up an experiment run if there is - not a pre-existing metadata file. - -The experiment name historically would default to the name of the *control -directory* or the configured ``experiment`` value (see :ref:`config`). This is -still supported for experiments with a pre-existing archived outputs. To support -git branches and ensure uniqueness in shared archives, the branch name and -a short version of the experiment UUID are added to new experiment names. -For example, given a control directory named ``my_expt`` and a UUID of -``9fAsTc4sNYsH2ZBQGYK9TG``, the experiment name would be: - -* ``my_expt-perturb-9fAsT`` - if running an experiment on a branch named - ``perturb``. - -* ``my_expt-9fAsT`` - if the control directory was not a git repository or - running experiments from the ``main`` or ``master`` branch. - -* or ``my_expt`` - if running an older experiment that has a pre-existing - archive. - Using a git repository for the experiment ----------------------------------------- @@ -144,7 +103,8 @@ This is the best way to copy an experiment as it guarantees that only the required files are copied to a new control directory, and maintains a link to the original experiment through the shared git history. To clone the repository, you can use ``git clone`` or ``payu clone`` which is a wrapper -around ``git clone`` which additionally creates or updates the metadata file. +around ``git clone`` which additionally creates or updates the metadata file +which gets copied to the experiment archive directory (see :ref:`usage-metadata`). For example:: @@ -172,8 +132,7 @@ run:: payu clone --help Alternatively to creating and checking out branches in ``payu clone``, -``payu checkout`` can be used instead (see :ref:`Switching between -related experiments`). +``payu checkout`` can be used instead (see :ref:`usage-metadata`). Create experiment @@ -380,14 +339,68 @@ at a later date. To sync all restarts including the latest restarts, use the payu sync --sync-restarts +.. _usage-metadata: + +Metadata and Related Experiments +================================ + +Metadata files +-------------- + +Each experiment has a metadata file, called ``metadata.yaml`` in the *control +directory*. This contains high-level metadata about the experiment and uses +the ACCESS-NRI experiment schema_. An important field is the ``experiment_uuid`` +which uniquely identifies the experiment. Payu generates a new UUID when: + +* Using payu to clone a pre-existing git_ repository of the *control directory* + +* Using payu to create and checkout a new git branch in the *control directory* + +* Or, when setting up an experiment run if there is not a pre-existing metadata + file, UUID, or experiment ``archive`` directory. + +For new experiments, payu may generate some additional metadata fields. This +includes an experiment name, creation date, contact, and email if defined in +the git configuration. This also includes parent experiment UUID and git commit hashes +so the history of the experiment can be tracked. These metadata files are also +copied to the directory that stores the archived experiment outputs. + +.. _schema: https://github.com/ACCESS-NRI/schema/blob/main/experiment_asset.json + +Experiment names +---------------- + +An experiment name is used to identify the experiment inside the ``work`` and +``archive`` sub-directories inside the *laboratory*. + +The experiment name historically would default to the name of the *control +directory*. This is still supported for experiments with a pre-existing +archived outputs. To support git branches and ensure uniqueness in shared +archives, the branch name and a short version of the experiment UUID are +added to new experiment names. For example, given a control directory named +``my_expt`` and a UUID of ``416af8c6-d299-4ee6-9d77-4aefa8a9ebcb``, +the experiment name would be: + +* ``my_expt-perturb-416af8c6`` - if running an experiment on a branch named + ``perturb``. + +* ``my_expt-416af8c6`` - if the control directory was not a git repository or + experiments was run from the ``main`` or ``master`` branch. + +* ``my_expt`` - if running an older experiment that has a pre-existing + archive. + +* ``set_expt_name`` - if the ``experiment`` value is configured to + ``set_expt_name``(see :ref:`config`). Note that to use branches in one control + repository, this would need each configured experiment value to be unique. Switching between related experiments -===================================== +------------------------------------- To be able to run related experiments from the same control directory using git branches, you can use ``payu checkout`` which is a wrapper around -``git checkout``. Creating new branches will generate a new UUID and -branch-UUID-aware experiment name in the metadata file. +``git checkout``. Creating new branches will generate a new UUID, update metadata +files, and create a branch-UUID-aware experiment name in ``archive``. Switching branches will change ``work`` and ``archive`` symlinks in the control directory to point to directories in *laboratory* if they exist. @@ -396,22 +409,22 @@ For example, to create and checkout a new branch called ``perturb1``, run:: payu checkout -b perturb1 -To branch a new experiment from an existing branch, specify the branch name +To create a new experiment from an existing branch, specify the branch name or a commit hash after the new branch name. For example, the following creates a new experiment branch called ``perturb2`` that starts from ``perturb1``:: payu checkout -b perturb2 perturb1 -To specify a restart path to start from using the ``--restart``/ ``-r`` flag, +To specify a restart path to start from, use the ``--restart``/ ``-r`` flag, for example:: payu checkout -b perturb --restart path/to/restart Note: This can also be achieved by configuring ``restart`` (see :ref:`config`). -To checkout an existing branch and experiment. For example, -the following checks out the ``perturb1`` branch:: +To checkout and switch to an existing branch and experiment, omit the ``-b`` flag. +For example, the following checks out the ``perturb1`` branch:: payu checkout perturb1 diff --git a/payu/branch.py b/payu/branch.py index 21c93be1..bccfd1e7 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -10,6 +10,7 @@ import os import warnings from pathlib import Path +import re from typing import Optional from ruamel.yaml import YAML @@ -18,9 +19,8 @@ from payu.fsops import read_config, DEFAULT_CONFIG_FNAME from payu.laboratory import Laboratory from payu.metadata import Metadata, UUID_FIELD -from payu.git_utils import git_checkout_branch, git_clone, get_git_branch -from payu.git_utils import get_git_repository -from payu.git_utils import remote_branches_dict, local_branches_dict +from payu.git_utils import GitRepository, git_clone + NO_CONFIG_FOUND_MESSAGE = """No configuration file found on this branch. Skipping adding new metadata file and creating archive/work symlinks. @@ -36,19 +36,50 @@ Where BRANCH_NAME is the name of the branch""" -def add_restart_to_config(restart_path: Path, - config_path: Path) -> None: +def archive_contains_restarts(archive_path: Path) -> bool: + """Return True if there's pre-existing restarts in archive""" + pattern = re.compile(r"^restart[0-9][0-9][0-9]+$") + if not archive_path.exists(): + return False + + for path in archive_path.iterdir(): + real_path = path.resolve() + if real_path.is_dir() and pattern.match(path.name): + return True + return False + + +def check_restart(restart_path: Optional[Path], + archive_path: Path) -> Optional[Path]: + """Checks for valid prior restart path. Returns resolved restart path + if valid, otherwise returns None""" + if restart_path is None: + return + + # Check for valid path + if not restart_path.exists(): + warnings.warn((f"Given restart path {restart_path} does not " + f"exist. Skipping setting 'restart' in config file")) + return + + # Resolve to absolute path + restart_path = restart_path.resolve() + + # Check for pre-existing restarts in archive + if archive_contains_restarts(archive_path): + warnings.warn(( + f"Pre-existing restarts found in archive: {archive_path}." + f"Skipping adding 'restart: {restart_path}' to config file")) + return + + return restart_path + + +def add_restart_to_config(restart_path: Path, config_path: Path) -> None: """Takes restart path and config path, and add 'restart' flag to the config file - which is used to start a run if there isn't a pre-existing restart in archive""" - # Check for valid paths - if not restart_path.exists() or not restart_path.is_dir(): - warnings.warn((f"Given restart directory {restart_path} does not " - f"exist. Skipping adding 'restart: {restart_path}' " - "to config file")) - return - # Default ruamel yaml preserves comments and multiline strings yaml = YAML() config = yaml.load(config_path) @@ -105,24 +136,25 @@ def checkout_branch(branch_name: str, keep_uuid: bool, default False Keep UUID unchanged, if it exists - this overrides is_new_experiment if there is a pre-existing UUID - start_point: Optional[str], default None + start_point: Optional[str] Branch name or commit hash to start new branch from - restart_path: Optional[Path], default None + restart_path: Optional[Path] Absolute restart path to start experiment from - config_path: Optional[Path], default None + config_path: Optional[Path] Path to configuration file - config.yaml - control_path: Optional[Path], default None + control_path: Optional[Path] Path to control directory - defaults to current working directory - model_type: Optional[str], default None + model_type: Optional[str] Type of model - used for creating a Laboratory - lab_path: Optional[Path], default None + lab_path: Optional[Path] Path to laboratory directory """ if control_path is None: control_path = get_control_path(config_path) # Checkout branch - git_checkout_branch(control_path, branch_name, is_new_branch, start_point) + repo = GitRepository(control_path) + repo.checkout_branch(branch_name, is_new_branch, start_point) # Check config file exists on checked out branch config_path = check_config_path(config_path) @@ -135,11 +167,20 @@ def checkout_branch(branch_name: str, # Setup Metadata is_new_experiment = is_new_experiment or is_new_branch - metadata.setup(keep_uuid=keep_uuid, is_new_experiment=is_new_experiment) + metadata.setup(keep_uuid=keep_uuid, + is_new_experiment=is_new_experiment) + + # Gets valid prior restart path + prior_restart_path = check_restart(restart_path=restart_path, + archive_path=metadata.archive_path) + + # Create/update and commit metadata file + metadata.write_metadata(set_template_values=True, + restart_path=prior_restart_path) # Add restart option to config - if restart_path: - add_restart_to_config(restart_path, config_path=config_path) + if prior_restart_path: + add_restart_to_config(prior_restart_path, config_path=config_path) # Switch/Remove/Add archive and work symlinks experiment = metadata.experiment_name @@ -203,12 +244,12 @@ def clone(repository: str, Returns: None """ - # git clone the repository - git_clone(repository, directory, branch) - # Resolve directory to an absolute path control_path = directory.resolve() + # git clone the repository + repo = git_clone(repository, control_path, branch) + owd = os.getcwd() try: # cd into cloned directory @@ -228,7 +269,7 @@ def clone(repository: str, else: # Checkout branch if branch is None: - branch = get_git_branch(control_path) + branch = repo.get_branch_name() checkout_branch(branch_name=branch, config_path=config_path, @@ -302,17 +343,17 @@ def list_branches(config_path: Optional[Path] = None, Returns: None""" control_path = get_control_path(config_path) - repo = get_git_repository(control_path) + git_repo = GitRepository(control_path) - current_branch = repo.active_branch + current_branch = git_repo.repo.active_branch print(f"* Current Branch: {current_branch.name}") print_branch_metadata(current_branch, verbose) if remote: - branches = remote_branches_dict(repo) + branches = git_repo.remote_branches_dict() label = "Remote Branch" else: - branches = local_branches_dict(repo) + branches = git_repo.local_branches_dict() label = "Branch" for branch_name, branch in branches.items(): diff --git a/payu/experiment.py b/payu/experiment.py index ca146843..d8298f19 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -98,6 +98,9 @@ def __init__(self, lab, reproduce=False, force=False): self.set_output_paths() + # Create metadata file and move to archive + self.metadata.write_metadata(restart_path=self.prior_restart_path) + if not reproduce: # check environment for reproduce flag under PBS reproduce = os.environ.get('PAYU_REPRODUCE', False) @@ -459,7 +462,6 @@ def setup(self, force_archive=False): self.get_restarts_to_prune() def run(self, *user_flags): - # XXX: This was previously done in reversion envmod.setup() diff --git a/payu/git_utils.py b/payu/git_utils.py index 2b142cfe..311fd78d 100644 --- a/payu/git_utils.py +++ b/payu/git_utils.py @@ -5,7 +5,7 @@ import warnings from pathlib import Path -from typing import Optional, Union, List, Dict, Set +from typing import Optional, Union, List, Dict import git import configparser @@ -43,138 +43,148 @@ def get_git_repository(repo_path: Union[Path, str], raise -def get_git_branch(repo_path: Union[Path, str]) -> Optional[str]: - """Return the current git branch or None if repository path is not a git - repository""" - repo = get_git_repository(repo_path, catch_error=True) - if repo: - return str(repo.active_branch) - - -def get_git_user_info(repo_path: Union[Path, str], - config_key: str, - example_value: str) -> Optional[str]: - """Return git config user info, None otherwise. Used for retrieving - name and email saved in git""" - repo = get_git_repository(repo_path, catch_error=True) - if repo is None: - return - - try: - user_value = repo.config_reader().get_value('user', config_key) - return user_value - except (configparser.NoSectionError, configparser.NoOptionError): - print( - f'No git config set for user.{config_key}. ' - 'To set run the following inside the control repository:\n' - f' git config user.{config_key} "{example_value}"' - ) - - -def git_commit(repo_path: Union[Path, str], +class GitRepository: + """Simple wrapper around git python's repo and methods""" + + def __init__(self, + repo_path: Union[Path, str], + repo: Optional[git.Repo] = None, + catch_error: bool = False): + self.repo_path = repo_path + + # Initialise git repository object + if repo is None: + repo = get_git_repository(repo_path, catch_error=catch_error) + self.repo = repo + + def get_branch_name(self) -> Optional[str]: + """Return the current git branch or None if repository path is + not a git repository""" + if self.repo: + return str(self.repo.active_branch) + + def get_hash(self) -> Optional[str]: + """Return the current git commit hash or None if repository path is + not a git repository""" + if self.repo: + return self.repo.active_branch.object.hexsha + + def get_origin_url(self) -> Optional[str]: + """Return url of remote origin if it exists""" + if self.repo and self.repo.remotes and self.repo.remotes.origin: + return self.repo.remotes.origin.url + + def get_user_info(self, config_key: str) -> Optional[str]: + """Return git config user info, None otherwise. Used for retrieving + name and email saved in git""" + if self.repo is None: + return + + try: + config_reader = self.repo.config_reader() + return config_reader.get_value('user', config_key) + except (configparser.NoSectionError, configparser.NoOptionError): + # No git config set for user.$config_key + return + + def commit(self, commit_message: str, - paths_to_commit: List[Union[Path, str]], - initialise_repo: bool = True) -> None: - """Add a git commit of changes to paths""" - # Get/Create git repository - repo = get_git_repository(repo_path, - catch_error=True, - initialise=initialise_repo) - if repo is None: - return - - # Un-stage any pre-existing changes - repo.index.reset() - - # Check if paths to commit have changed, or it is an untracked file - changes = False - untracked_files = [Path(repo_path) / path for path in repo.untracked_files] - for path in paths_to_commit: - if repo.git.diff(None, path) or path in untracked_files: - repo.index.add([path]) - changes = True - - # Run commit if there's changes - if changes: - repo.index.commit(commit_message) - print(commit_message) - - -def local_branches_dict(repo: git.Repo) -> Dict[str, git.Head]: - """Return a dictionary mapping local branch names to git.Head objects""" - branch_names_dict = {} - for head in repo.heads: - branch_names_dict[head.name] = head - return branch_names_dict - - -def remote_branches_dict(repo: git.Repo) -> Dict[str, git.Head]: - """Return a dictionary mapping remote branch names to git.Head objects""" - branch_names_dict = {} - for remote in repo.remotes: - remote.fetch() - for ref in remote.refs: - branch_names_dict[ref.remote_head] = ref - return branch_names_dict - - -def git_checkout_branch(repo_path: Union[Path, str], + paths_to_commit: List[Union[Path, str]]) -> None: + """Add a git commit of changes to paths""" + if self.repo is None: + return + + # Un-stage any pre-existing changes + self.repo.index.reset() + + # Check if paths to commit have changed, or it is an untracked file + changes = False + untracked_files = [Path(self.repo_path) / path + for path in self.repo.untracked_files] + for path in paths_to_commit: + if self.repo.git.diff(None, path) or path in untracked_files: + self.repo.index.add([path]) + changes = True + + # Run commit if there's changes + if changes: + self.repo.index.commit(commit_message) + print(commit_message) + + def local_branches_dict(self) -> Dict[str, git.Head]: + """Return a dictionary mapping local branch names to git.Head + objects""" + branch_names_dict = {} + for head in self.repo.heads: + branch_names_dict[head.name] = head + return branch_names_dict + + def remote_branches_dict(self) -> Dict[str, git.Head]: + """Return a dictionary mapping remote branch names to git.Head + objects""" + branch_names_dict = {} + for remote in self.repo.remotes: + remote.fetch() + for ref in remote.refs: + branch_names_dict[ref.remote_head] = ref + return branch_names_dict + + def checkout_branch(self, branch_name: str, new_branch: bool = False, start_point: Optional[str] = None) -> None: - """Checkout branch and create branch if specified""" - # Get git repository - repo = get_git_repository(repo_path) - - # Existing branches - local_branches = local_branches_dict(repo).keys() - remote_branches = remote_branches_dict(repo) - all_branches = local_branches | set(remote_branches.keys()) - - # Create new branch, if specified - if new_branch: - if branch_name in all_branches: + """Checkout branch and create branch if specified""" + # Existing branches + local_branches = self.local_branches_dict().keys() + remote_branches = self.remote_branches_dict() + all_branches = local_branches | set(remote_branches.keys()) + + # Create new branch, if specified + if new_branch: + if branch_name in all_branches: + raise PayuBranchError( + f"A branch named {branch_name} already exists. " + "To checkout this branch, remove the new branch flag '-b' " + "from the checkout command." + ) + + if start_point is not None: + if (start_point not in local_branches and + start_point in remote_branches): + # Use hash for remote start point + start_point = remote_branches[start_point].commit + branch = self.repo.create_head(branch_name, commit=start_point) + else: + branch = self.repo.create_head(branch_name) + branch.checkout() + + print(f"Created and checked out new branch: {branch_name}") + return + + # Checkout branch + if branch_name not in all_branches: raise PayuBranchError( - f"A branch named {branch_name} already exists. " - "To checkout this branch, remove the new branch flag '-b' " - "from the checkout command." + f"There is no existing branch called {branch_name}. " + "To create this branch, add the new branch flag '-b' " + "to the checkout command." ) - if start_point is not None: - if (start_point not in local_branches and - start_point in remote_branches): - # Use hash for remote start point -local branch names work fine - start_point = remote_branches[start_point].commit - branch = repo.create_head(branch_name, commit=start_point) - else: - branch = repo.create_head(branch_name) - branch.checkout() - - print(f"Created and checked out new branch: {branch_name}") - return - - # Checkout branch - if branch_name not in all_branches: - raise PayuBranchError( - f"There is no existing branch called {branch_name}. " - "To create this branch, add the new branch flag '-b' " - "to the checkout command." - ) - - repo.git.checkout(branch_name) - print(f"Checked out branch: {branch_name}") + self.repo.git.checkout(branch_name) + print(f"Checked out branch: {branch_name}") def git_clone(repository: str, directory: Union[str, Path], - branch: Optional[str] = None) -> None: + branch: Optional[str] = None) -> GitRepository: """Clone repository to directory""" # Clone the repository if branch is not None: - git.Repo.clone_from(repository, - to_path=directory, - branch=branch) + repo = git.Repo.clone_from(repository, + to_path=directory, + branch=branch) else: - git.Repo.clone_from(repository, to_path=directory) + repo = git.Repo.clone_from(repository, to_path=directory) print(f"Cloned repository from {repository} to directory: {directory}") + + return GitRepository(repo_path=directory, repo=repo) diff --git a/payu/metadata.py b/payu/metadata.py index b67ac9ef..b7155c47 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -7,17 +7,20 @@ :license: Apache License, Version 2.0, see LICENSE for details. """ -import warnings +import re +import requests import shutil import uuid +import warnings +from datetime import datetime from pathlib import Path -from typing import Optional, List +from typing import Optional, List, Union from ruamel.yaml import YAML from ruamel.yaml.comments import CommentedMap from payu.fsops import read_config, mkdir_p -from payu.git_utils import get_git_branch, get_git_user_info, git_commit +from payu.git_utils import GitRepository # A truncated uuid is used for branch-uuid aware experiment names TRUNCATED_UUID_LENGTH = 8 @@ -27,8 +30,15 @@ PARENT_UUID_FIELD = "parent_experiment" CONTACT_FIELD = "contact" EMAIL_FIELD = "email" +NAME_FIELD = "name" +GIT_URL_FIELD = "url" +CREATED_FIELD = "created" +MODEL_FIELD = "model" METADATA_FILENAME = "metadata.yaml" +# Metadata Schema +SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/main/experiment_asset.json" + class MetadataWarning(Warning): pass @@ -65,6 +75,8 @@ def __init__(self, self.control_path = control_path self.filepath = self.control_path / METADATA_FILENAME + self.repo = GitRepository(self.control_path, catch_error=True) + self.branch = branch self.branch_uuid_experiment = True @@ -84,32 +96,22 @@ def read_file(self) -> CommentedMap: metadata = YAML().load(self.filepath) return metadata - def setup(self, keep_uuid: bool = False, - is_new_experiment: bool = False) -> None: - """Set UUID and experiment name, create/update metadata file, - commit any changes and copy metadata file to the experiment archive. + def setup(self, + is_new_experiment: bool = False, + keep_uuid: bool = False) -> None: + """Set UUID and experiment name. Parameters: keep_uuid: bool, default False Keep pre-existing UUID, if it exists. is_new_experiment: bool, default False - If not keep_uuid, generate a new_uuid and a branch-uuid aware - experiment name. + If not keep_uuid, generate a new UUID and a branch-uuid aware + experiment name. This is set in payu.branch.checkout_branch. Return: None Note: Experiment name is the name used for the work and archive directories in the Laboratory. """ - self.set_uuid_and_experiment_name(keep_uuid=keep_uuid, - is_new_experiment=is_new_experiment) - self.update_file() - self.commit_file() - self.copy_to_archive() - - def set_uuid_and_experiment_name(self, - is_new_experiment: bool = False, - keep_uuid: bool = False) -> None: - """Set experiment name and UUID""" if self.uuid is not None and (keep_uuid or not is_new_experiment): self.set_experiment_name(keep_uuid=keep_uuid, is_new_experiment=is_new_experiment) @@ -119,10 +121,12 @@ def set_uuid_and_experiment_name(self, "Generating a new uuid", MetadataWarning) self.set_new_uuid(is_new_experiment=is_new_experiment) + self.archive_path = self.lab_archive_path / self.experiment_name + def get_branch_uuid_experiment_name(self) -> Path: """Return a Branch-UUID aware experiment name""" if self.branch is None: - self.branch = get_git_branch(self.control_path) + self.branch = self.repo.get_branch_name() # Add branch and a truncated uuid to control directory name truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] @@ -193,56 +197,110 @@ def set_new_uuid(self, is_new_experiment: bool = False) -> None: self.uuid = generate_uuid() self.set_experiment_name(is_new_experiment=is_new_experiment) - def update_file(self) -> None: + def write_metadata(self, + restart_path: Optional[Union[Path, str]] = None, + set_template_values: bool = False) -> None: + """Create/update metadata file, commit any changes and + copy metadata file to the experiment archive. + + Parameters: + restart_path: Optional[Path] + Prior restart path - used for finding parent metadata + set_template_values: bool, default False + Read schema and set metadata template values for new + experiments + + Return: None + """ + # Assumes uuid and experiment name has been set + restart_path = Path(restart_path) if restart_path else None + self.update_file(restart_path=restart_path, + set_template_values=set_template_values) + self.commit_file() + self.copy_to_archive() + + def update_file(self, + restart_path: Optional[Path] = None, + set_template_values: bool = False) -> None: """Write any updates to metadata file""" metadata = self.read_file() - # Update UUID and parent UUID - parent_uuid = metadata.get(UUID_FIELD, None) - if parent_uuid is not None and parent_uuid != self.uuid: - metadata[PARENT_UUID_FIELD] = parent_uuid + # Check if UUID has changed + uuid_updated = metadata.get(UUID_FIELD, None) != self.uuid + if not uuid_updated: + # Leave metadata file unchanged + return + + # Add UUID field metadata[UUID_FIELD] = self.uuid - # Update email/contact in metadata - self.update_user_info(metadata=metadata, - metadata_key=CONTACT_FIELD, - config_key='name', - filler_values=['Your name', - 'Add your name here']) + # Update parent UUID field + parent_uuid = self.get_parent_experiment(restart_path) + if parent_uuid and parent_uuid != self.uuid: + metadata[PARENT_UUID_FIELD] = parent_uuid + + # Add extra fields if new branch-uuid experiment + # so to not over-write fields if it's a pre-existing legacy experiment + if self.branch_uuid_experiment: + metadata[CREATED_FIELD] = datetime.now().strftime('%Y-%m-%d') + metadata[NAME_FIELD] = self.experiment_name + metadata[MODEL_FIELD] = self.get_model_name() + + # Add origin git URL, if defined + url = self.repo.get_origin_url() + if url: + metadata[GIT_URL_FIELD] = url + + # Add email + contact if defined in git config + contact = self.repo.get_user_info(config_key='name') + if contact: + metadata[CONTACT_FIELD] = contact - self.update_user_info(metadata=metadata, - metadata_key=EMAIL_FIELD, - config_key='email', - filler_values=['you@example.com', - 'Add your email address here']) + email = self.repo.get_user_info(config_key="email") + if email: + metadata[EMAIL_FIELD] = email + + if set_template_values: + # Note that retrieving schema requires internet access + add_template_metadata_values(metadata) # Write updated metadata to file YAML().dump(metadata, self.filepath) - def update_user_info(self, metadata: CommentedMap, metadata_key: str, - config_key: str, filler_values=List[str]): - """Add user email/name to metadata - if defined and not already set - in metadata""" - example_value = filler_values[0] - filler_values = {value.casefold() for value in filler_values} - if (metadata_key not in metadata - or metadata[metadata_key] is None - or metadata[metadata_key].casefold() in filler_values): - # Get config value from git - value = get_git_user_info(repo_path=self.control_path, - config_key=config_key, - example_value=example_value) - if value is not None: - metadata[metadata_key] = value + def get_model_name(self) -> str: + """Get model name from config file""" + model_name = self.config.get('model') + if model_name == 'access': + # TODO: Is access used for anything other than ACCESS-ESM1-5? + # If so, won't set anything here. + model_name = 'ACCESS-ESM1-5' + return model_name.upper() + + def get_parent_experiment(self, prior_restart_path: Path) -> None: + """Searches UUID in the metadata in the parent directory that + contains the restart""" + if prior_restart_path is None: + return + + # Resolve to absolute path + prior_restart_path = prior_restart_path.resolve() + + # Check for pre-existing metadata file + base_output_directory = Path(prior_restart_path).parent + metadata_filepath = base_output_directory / METADATA_FILENAME + if not metadata_filepath.exists(): + return + + # Read metadata file + parent_metadata = YAML().load(metadata_filepath) + return parent_metadata.get(UUID_FIELD, None) def commit_file(self) -> None: """Add a git commit for changes to metadata file, if file has changed and if control path is a git repository""" commit_message = f"Updated metadata. Experiment UUID: {self.uuid}" - git_commit(repo_path=self.control_path, - commit_message=commit_message, - paths_to_commit=[self.filepath], - initialise_repo=False) + self.repo.commit(commit_message=commit_message, + paths_to_commit=[self.filepath]) def copy_to_archive(self) -> None: """Copy metadata file to archive""" @@ -253,6 +311,30 @@ def copy_to_archive(self) -> None: # experiment names and whether to generate a new UUID +def get_schema_from_github(): + """Retrieve metadata schema from github""" + response = requests.get(SCHEMA_URL) + + if response.status_code == 200: + return response.json() + else: + print(f"Failed to fetch schema from {SCHEMA_URL}") + return response.json() if response.status_code == 200 else {} + + +def add_template_metadata_values(metadata: CommentedMap) -> None: + """Add in templates for un-set metadata values""" + schema = get_schema_from_github() + + for key, value in schema.get('properties', {}).items(): + if key not in metadata: + # Add field with commented description of value + description = value.get('description', None) + if description is not None: + metadata[key] = None + metadata.yaml_add_eol_comment(description, key) + + def generate_uuid() -> str: """Generate a new uuid""" return str(uuid.uuid4()) diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index 6867fcfc..ac0efb6f 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -228,7 +228,7 @@ 'parameters': { 'dest': 'restart_path', 'action': 'store', - 'help': 'The absolute restart path from which to start the model run' + 'help': 'The restart path from which to start the model run' } } diff --git a/test/test_branch.py b/test/test_branch.py index 2f533894..aa9ff973 100644 --- a/test/test_branch.py +++ b/test/test_branch.py @@ -7,7 +7,7 @@ from ruamel.yaml import YAML from unittest.mock import patch -from payu.branch import add_restart_to_config, switch_symlink +from payu.branch import add_restart_to_config, check_restart, switch_symlink from payu.branch import checkout_branch, clone, list_branches from payu.metadata import MetadataWarning from payu.fsops import read_config @@ -90,7 +90,7 @@ def setup_control_repository(path=ctrldir, set_config=True): ) def test_add_restart_to_config(test_config, expected_config): """Test adding restart: path/to/restart to configuration file""" - restart_path = labdir / "archive" / "tmpRestart" + restart_path = tmpdir / "archive" / "tmpRestart" restart_path.mkdir(parents=True) expected_config = expected_config.format(restart_path) @@ -109,24 +109,40 @@ def test_add_restart_to_config(test_config, expected_config): assert updated_config == expected_config -def test_add_restart_to_config_invalid_restart_path(): +def test_check_restart_with_non_existent_restart(): """Test restart path that does not exist raises a warning""" restart_path = tmpdir / "restartDNE" - config_content = "# Test config content" - with config_path.open("w") as file: - file.write(config_content) + expected_msg = (f"Given restart path {restart_path} does not exist. " + f"Skipping setting 'restart' in config file") + + with cd(ctrldir): + with pytest.warns(UserWarning, match=expected_msg): + restart_path = check_restart(restart_path, labdir / "archive") + + assert restart_path is None + - expected_msg = f"Given restart directory {restart_path} does not exist. " - expected_msg += f"Skipping adding 'restart: {restart_path}' to config file" +def test_check_restart_with_pre_existing_restarts_in_archive(): + """Test pre-existing restarts in archive raises a warning""" + # Create pre-existing restart in archive + archive_path = labdir / "archive" + (archive_path / "restart000").mkdir(parents=True) + + # Create restart path in different archive + restart_path = labdir / "different_archive" / "restart000" + restart_path.mkdir(parents=True) + + expected_msg = ( + f"Pre-existing restarts found in archive: {archive_path}." + f"Skipping adding 'restart: {restart_path}' to config file" + ) with cd(ctrldir): with pytest.warns(UserWarning, match=expected_msg): - add_restart_to_config(restart_path, config_path) + restart_path = check_restart(restart_path, archive_path) - # Test config unchanged - with config_path.open("r") as file: - assert file.read() == config_content + assert restart_path is None def test_switch_symlink_when_symlink_and_archive_exists(): @@ -199,9 +215,7 @@ def check_metadata(expected_uuid, # Assert archive exists for experiment name assert (archive_dir / expected_experiment / "metadata.yaml").exists() copied_metadata = YAML().load(metadata_file) - assert copied_metadata.get("experiment_uuid", None) == expected_uuid - parent_uuid = copied_metadata.get("parent_experiment", None) - assert parent_uuid == expected_parent_uuid + assert copied_metadata == metadata def check_branch_metadata(repo, @@ -264,8 +278,7 @@ def test_checkout_branch(mock_uuid): check_branch_metadata(repo, expected_uuid=uuid2, expected_current_branch="Branch2", - expected_experiment=branch2_experiment_name, - expected_parent_uuid=uuid1) + expected_experiment=branch2_experiment_name) # Mock uuid3 value uuid3 = "98c99f06-260e-42cc-a23f-f113fae825e5" @@ -283,8 +296,11 @@ def test_checkout_branch(mock_uuid): check_branch_metadata(repo, expected_uuid=uuid3, expected_current_branch="Branch3", - expected_experiment=branch3_experiment_name, - expected_parent_uuid=uuid1) + expected_experiment=branch3_experiment_name) + + # Check second to last commit was last commit on branch 1 + second_latest_commit = list(repo.iter_commits(max_count=2))[1] + assert second_latest_commit.hexsha == branch_1_commit_hash with cd(ctrldir): # Test checkout existing branch with existing metadata @@ -409,6 +425,57 @@ def test_checkout_branch_with_no_config(): assert not metadata_path.exists() +@patch("uuid.uuid4") +def test_checkout_branch_with_restart_path(mock_uuid): + # Make experiment archive restart - starting with no metadata + restart_path = tmpdir / "remote_archive" / "restart0123" + restart_path.mkdir(parents=True) + + # Setup repo + repo = setup_control_repository() + + # Mock uuid1 value + uuid1 = "df050eaf-c8bb-4b10-9998-e0202a1eabd2" + mock_uuid.return_value = uuid1 + + with cd(ctrldir): + # Test checkout with restart path with no metadata + checkout_branch(is_new_branch=True, + branch_name="Branch1", + lab_path=labdir, + restart_path=restart_path) + + # Check metadata + experiment1_name = f"{ctrldir_basename}-Branch1-df050eaf" + check_branch_metadata(repo, + expected_current_branch='Branch1', + expected_uuid=uuid1, + expected_experiment=experiment1_name) + + # Create restart directory in Branch1 archive + restart_path = archive_dir / experiment1_name / 'restart0123' + restart_path.mkdir() + + # Mock uuid2 value + uuid2 = "9cc04c9b-f13d-4f1d-8a35-87146a4381ef" + mock_uuid.return_value = uuid2 + + with cd(ctrldir): + # Test checkout with restart path with metadata + checkout_branch(is_new_branch=True, + branch_name="Branch2", + lab_path=labdir, + restart_path=restart_path) + + # Check metadta - Check parent experiment is experment 1 UUID + experiment2_name = f"{ctrldir_basename}-Branch2-9cc04c9b" + check_branch_metadata(repo, + expected_current_branch='Branch2', + expected_uuid=uuid2, + expected_experiment=experiment2_name, + expected_parent_uuid=uuid1) + + @patch("uuid.uuid4") def test_clone(mock_uuid): # Create a repo to clone @@ -427,7 +494,7 @@ def test_clone(mock_uuid): # Test clone cloned_repo_path = tmpdir / "clonedRepo" - clone(source_repo_path, cloned_repo_path, lab_path=labdir) + clone(str(source_repo_path), cloned_repo_path, lab_path=labdir) # Check new commit added and expected metadata cloned_repo = git.Repo(cloned_repo_path) @@ -437,6 +504,7 @@ def test_clone(mock_uuid): expected_uuid=uuid1, expected_experiment="clonedRepo-Branch1-9cc04c9b", metadata_file=metadata_file) + branch_1_commit_hash = cloned_repo.active_branch.object.hexsha cloned_repo.git.checkout(source_main_branch) @@ -446,7 +514,7 @@ def test_clone(mock_uuid): # Run clone with cd(tmpdir): - clone(cloned_repo_path, Path("clonedRepo2"), + clone(str(cloned_repo_path), Path("clonedRepo2"), lab_path=labdir, new_branch_name="Branch2", branch="Branch1") # Check new commit added and expected metadata @@ -456,9 +524,12 @@ def test_clone(mock_uuid): expected_current_branch="Branch2", expected_uuid=uuid2, expected_experiment="clonedRepo2-Branch2-fd7b4804", - expected_parent_uuid=uuid1, metadata_file=metadata_file) + # Check branched from Branch1 + second_latest_commit = list(cloned_repo2.iter_commits(max_count=2))[1] + assert second_latest_commit.hexsha == branch_1_commit_hash + # Check local branches assert [head.name for head in cloned_repo2.heads] == ["Branch1", "Branch2"] diff --git a/test/test_git_utils.py b/test/test_git_utils.py index 3c9b467b..c6711315 100644 --- a/test/test_git_utils.py +++ b/test/test_git_utils.py @@ -4,8 +4,7 @@ import git import pytest -from payu.git_utils import get_git_repository, get_git_user_info -from payu.git_utils import git_checkout_branch +from payu.git_utils import get_git_repository, GitRepository from payu.git_utils import PayuBranchError, PayuGitWarning from test.common import tmpdir @@ -67,7 +66,8 @@ def test_get_git_user_info_no_config_set(): # defined in the 'user' namespace. repo_path = tmpdir / "test_repo" create_new_repo(repo_path) - value = get_git_user_info(repo_path, 'testKey-54321', 'test_value') + repo = GitRepository(repo_path) + value = repo.get_user_info('testKey-54321') assert value is None @@ -84,7 +84,8 @@ def test_get_git_user_info_config_set(): except subprocess.CalledProcessError as e: print(f"Error setting user name: {e}") - value = get_git_user_info(repo_path, 'name', 'test_value') + repo = GitRepository(repo_path) + value = repo.get_user_info('name') assert value == 'TestUserName' @@ -123,10 +124,10 @@ def test_git_checkout_new_branch_from_remote_ref(ref): expected_hash = main_branch_hash # Test startpoint being remote branch/hash/None - git_checkout_branch(cloned_repo_path, - 'branch-2', - new_branch=True, - start_point=start_point) + repo = GitRepository(cloned_repo_path) + repo.checkout_branch('branch-2', + new_branch=True, + start_point=start_point) current_branch = cloned_repo.active_branch current_hash = current_branch.object.hexsha @@ -141,10 +142,10 @@ def test_git_checkout_new_branch_existing(): existing_branch = repo.active_branch # Test create branch with existing branch + repo = GitRepository(repo_path) with pytest.raises(PayuBranchError): - git_checkout_branch(repo_path, - str(existing_branch), - new_branch=True) + repo.checkout_branch(str(existing_branch), + new_branch=True) def test_git_checkout_non_existent_branch(): @@ -153,8 +154,9 @@ def test_git_checkout_non_existent_branch(): create_new_repo(repo_path) # Test create branch with non-existent branch + repo = GitRepository(repo_path) with pytest.raises(PayuBranchError): - git_checkout_branch(repo_path, "Gibberish") + repo.checkout_branch("Gibberish") def test_git_checkout_existing_branch(): @@ -177,8 +179,8 @@ def test_git_checkout_existing_branch(): cloned_repo = remote_repo.clone(cloned_repo_path) # Test checkout existing remote branch - git_checkout_branch(cloned_repo_path, - 'branch-1') + repo = GitRepository(cloned_repo_path) + repo.checkout_branch('branch-1') current_branch = cloned_repo.active_branch current_hash = current_branch.object.hexsha diff --git a/test/test_metadata.py b/test/test_metadata.py index de1a4fde..f7c681f1 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -1,8 +1,9 @@ import copy import shutil +from datetime import datetime import pytest -from unittest.mock import patch +from unittest.mock import patch, Mock from payu.metadata import Metadata, MetadataWarning @@ -10,7 +11,6 @@ from test.common import tmpdir, ctrldir, labdir, archive_dir from test.common import config as config_orig from test.common import write_config -from test.common import make_all_files verbose = True @@ -46,7 +46,7 @@ def teardown_module(module): print(e) -def mocked_get_git_user_info(repo_path, config_key, example_value): +def mocked_get_git_user_info(config_key): if config_key == 'name': return 'mockUser' elif config_key == 'email': @@ -72,11 +72,14 @@ def setup_and_teardown(): print(e) +@patch("payu.metadata.GitRepository") @pytest.mark.parametrize( - "uuid, previous_metadata, expected_metadata", + "uuid, legacy_archive_exists, previous_metadata, expected_metadata", [ + # Test metadata file format stays the same when no UUID changed ( "0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75", + False, """contact: TestUser email: Test@email.com description: |- @@ -88,6 +91,7 @@ def setup_and_teardown(): # Test Comment experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 +name: UserDefinedExperimentName """, """contact: TestUser email: Test@email.com @@ -100,18 +104,27 @@ def setup_and_teardown(): # Test Comment experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 +name: UserDefinedExperimentName """ ), + # Test new metadata file created ( "b1f3ce3d-99da-40e4-849a-c8b352948a31", + False, None, """experiment_uuid: b1f3ce3d-99da-40e4-849a-c8b352948a31 +created: '2000-01-01' +name: DefaultExperimentName +model: TEST-MODEL +url: mockUrl contact: mockUser email: mock@email.com """ ), + # Test metadata file updated when new UUID ( "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + False, """experiment_uuid: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 contact: Add your name here email: Add your email address here @@ -119,45 +132,62 @@ def setup_and_teardown(): """experiment_uuid: 7b90f37c-4619-44f9-a439-f76fdf6ae2bd contact: mockUser email: mock@email.com -parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 +created: '2000-01-01' +name: DefaultExperimentName +model: TEST-MODEL +url: mockUrl """ ), + # Test extra fields not added with legacy experiments ( - "3d18b3b6-dd19-49a9-8d9e-c7fa8582f136", - """ -contact: AdD Your nAme hEre -email: # + "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + True, + """contact: TestUser +email: Test@email.com +experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 +name: UserDefinedExperimentName """, - """contact: mockUser -email: mock@email.com # -experiment_uuid: 3d18b3b6-dd19-49a9-8d9e-c7fa8582f136 + """contact: TestUser +email: Test@email.com +experiment_uuid: 7b90f37c-4619-44f9-a439-f76fdf6ae2bd +name: UserDefinedExperimentName """ - ) + ), ] ) -def test_update_file(uuid, previous_metadata, expected_metadata): +def test_update_file(mock_repo, uuid, legacy_archive_exists, + previous_metadata, expected_metadata): # Create pre-existing metadata file metadata_path = ctrldir / 'metadata.yaml' if previous_metadata is not None: metadata_path.write_text(previous_metadata) - write_config(config) + # Add mock git values + mock_repo.return_value.get_origin_url.return_value = "mockUrl" + mock_repo.return_value.get_user_info.side_effect = mocked_get_git_user_info + + # Setup config + test_config = config.copy() + test_config['model'] = "test-model" + write_config(test_config) + + # Initialise Metadata with cd(ctrldir): metadata = Metadata(archive_dir) - metadata.uuid = uuid + metadata.experiment_name = "DefaultExperimentName" + metadata.branch_uuid_experiment = not legacy_archive_exists + + # Mock datetime (for created date) + with patch('payu.metadata.datetime') as mock_date: + mock_date.now.return_value = datetime(2000, 1, 1) - # Function to test - with patch('payu.metadata.get_git_user_info', - side_effect=mocked_get_git_user_info): + # Function to test metadata.update_file() assert metadata_path.exists and metadata_path.is_file assert metadata_path.read_text() == expected_metadata - # Remove metadata file - metadata_path.unlink() - @pytest.mark.parametrize( "uuid_exists, keep_uuid, is_new_experiment, " @@ -210,7 +240,7 @@ def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment, branch_uuid_archive_exists, legacy_archive_exists, catch_warning, expected_uuid, expected_name): - + # Setup config and metadata write_config(config) with cd(ctrldir): metadata = Metadata(archive_dir) @@ -227,41 +257,38 @@ def test_set_experiment_and_uuid(uuid_exists, keep_uuid, is_new_experiment, archive_path.mkdir(parents=True) # Test set UUID and experiment name - with patch('payu.metadata.get_git_branch') as mock_branch, \ + with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch, \ patch('uuid.uuid4') as mock_uuid: mock_branch.return_value = "mock_branch" mock_uuid.return_value = "cb793e91-6168-4ed2-a70c-f6f9ccf1659b" if catch_warning: + # Test warning raised with pytest.warns(MetadataWarning): - metadata.set_uuid_and_experiment_name( - is_new_experiment=is_new_experiment, - keep_uuid=keep_uuid - ) + metadata.setup(is_new_experiment=is_new_experiment, + keep_uuid=keep_uuid) else: - metadata.set_uuid_and_experiment_name( - is_new_experiment=is_new_experiment, - keep_uuid=keep_uuid - ) + metadata.setup(is_new_experiment=is_new_experiment, + keep_uuid=keep_uuid) assert metadata.experiment_name == expected_name assert metadata.uuid == expected_uuid def test_set_configured_experiment_name(): - # Test configured experiment name is the set experiment name + # Set experiment in config file test_config = copy.deepcopy(config) test_config['experiment'] = "configuredExperiment" write_config(test_config) + with cd(ctrldir): metadata = Metadata(archive_dir) + # Test configured experiment name is always the set experiment name metadata.set_experiment_name() - assert metadata.experiment_name == "configuredExperiment" metadata.set_experiment_name(is_new_experiment=True) - assert metadata.experiment_name == "configuredExperiment" @@ -279,8 +306,86 @@ def test_get_branch_uuid_aware_experiment_name(branch, expected_name): metadata.uuid = "cb793e91-6168-4ed2-a70c-f6f9ccf1659b" - with patch('payu.metadata.get_git_branch') as mock_branch: + with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch: mock_branch.return_value = branch experiment = metadata.get_branch_uuid_experiment_name() assert experiment == expected_name + + +@patch("payu.metadata.GitRepository") +def test_update_file_with_template_metadata_values(mock_repo): + # Leave out origin URL and git user info + mock_repo.return_value.get_origin_url.return_value = None + mock_repo.return_value.get_user_info.return_value = None + + # Setup config + test_config = config.copy() + test_config['model'] = "test-model" + write_config(test_config) + + # Initialise Metadata and UUID and experiment name + with cd(ctrldir): + metadata = Metadata(archive_dir) + metadata.experiment_name = "ctrldir-branch-cb793e91" + metadata.uuid = "cb793e91-6168-4ed2-a70c-f6f9ccf1659" + + with patch('requests.get') as mock_get: + # Mock request for json schema + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the experiment (string)" + }, + "experiment_uuid": { + "type": "string", + "format": "uuid", + "description": "Unique uuid for the experiment (string)" + }, + "description": { + "type": "string", + "description": ("Short description of the experiment " + "(string, < 150 char)") + }, + "long_description": { + "type": "string", + "description": ("Long description of the experiment " + "(string)") + }, + "model": { + "type": "array", + "items": {"type": ["string", "null"]}, + "description": ("The name(s) of the model(s) used in the" + " experiment (string)") + }, + }, + "required": [ + "name", + "experiment_uuid", + "description", + "long_description" + ] + } + mock_get.return_value = mock_response + + # Mock datetime (for created date) + with patch('payu.metadata.datetime') as mock_date: + mock_date.now.return_value = datetime(2000, 1, 1) + + # Test function + metadata.update_file(set_template_values=True) + + # Expect commented template values for non-null fields + expected_metadata = """experiment_uuid: cb793e91-6168-4ed2-a70c-f6f9ccf1659 +created: '2000-01-01' +name: ctrldir-branch-cb793e91 +model: TEST-MODEL +description: # Short description of the experiment (string, < 150 char) +long_description: # Long description of the experiment (string) +""" + assert (ctrldir / 'metadata.yaml').read_text() == expected_metadata From 97e66aa8f5e0e6c7adf826b74c2488df570b9b74 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Wed, 20 Dec 2023 08:47:51 +1100 Subject: [PATCH 6/7] Update metadata and documenation - sync metadata.yaml when syncing to remote archive - only update metadata when UUID has changed - add config option to disable generating UUID and metadata files - add check for staged git changes before running checkout in `payu checkout` - add metadata config for model name (if it is different from model driver name) - add documentation for metadata and runlog configuration - refactor payu branch code to return metadata dictionary when parsing commit tree --- docs/source/config.rst | 22 +++++++ docs/source/usage.rst | 17 +++-- payu/branch.py | 63 +++++++++++------- payu/git_utils.py | 8 +++ payu/metadata.py | 84 ++++++++++++++---------- payu/sync.py | 7 ++ test/common.py | 6 +- test/models/test_mom6.py | 5 +- test/pytest.ini | 3 - test/test_branch.py | 4 +- test/test_git_utils.py | 22 ++++++- test/test_metadata.py | 137 ++++++++++++++++++++------------------- test/test_sync.py | 11 +++- 13 files changed, 246 insertions(+), 143 deletions(-) delete mode 100644 test/pytest.ini diff --git a/docs/source/config.rst b/docs/source/config.rst index ccf31586..6391cc94 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -465,6 +465,28 @@ Postprocessing Create or update a bare git repository clone of the run history, called ``git-runlog``, in the remote archive directory. +Experiment Tracking +------------------- + +``runlog`` + Automatically commits changes to configuration files in the + *control directory* when the model runs. This creates a git runlog of the + history of the experiment. + + ``enable`` (*Default:* ``True``) + Flag to enable/disable runlog. + +``metadata`` + Generates and updates metadata files and UUIDs. For more details, see + :ref:`usage-metadata`. + + ``enable`` (*Default:* ``True``) + Flag to enable/disable creating/updating metadata files and UUIDs. + If set to False, the UUID is left out of the experiment name used + for archival. + + ``model`` (*Default: The configured model value*) + Model name used when generating metadata for new experiments. Miscellaneous ============= diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 186d3562..a014adb9 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -27,7 +27,7 @@ Using a git repository for the experiment It is recommended to use version control using git_ for the payu *control directory*. This allows the experiment to be easily copied via cloning. There is inbuilt support in payu for an experiment runlog which -tracks changes to files between experiment runs. There are payu commands +tracks changes to configuration files between experiment runs. There are payu commands for creating and moving between git branches so multiple related experiments can be run from the same control directory. @@ -361,9 +361,12 @@ which uniquely identifies the experiment. Payu generates a new UUID when: For new experiments, payu may generate some additional metadata fields. This includes an experiment name, creation date, contact, and email if defined in -the git configuration. This also includes parent experiment UUID and git commit hashes -so the history of the experiment can be tracked. These metadata files are also -copied to the directory that stores the archived experiment outputs. +the git configuration. This also includes parent experiment UUID if starting +from restarts, if it is defined in metadata of the parent directory +containing the restart. + +Once a metadata file is created or updated, it is copied to the directory +that stores the archived experiment outputs. .. _schema: https://github.com/ACCESS-NRI/schema/blob/main/experiment_asset.json @@ -390,9 +393,9 @@ the experiment name would be: * ``my_expt`` - if running an older experiment that has a pre-existing archive. -* ``set_expt_name`` - if the ``experiment`` value is configured to - ``set_expt_name``(see :ref:`config`). Note that to use branches in one control - repository, this would need each configured experiment value to be unique. +* ``set_expt_name`` - if the ``experiment`` value is configured to ``set_expt_name`` + (see :ref:`config`). Note that to use branches in one control + repository, this would need each configured ``experiment`` value to be unique. Switching between related experiments ------------------------------------- diff --git a/payu/branch.py b/payu/branch.py index bccfd1e7..5f963a87 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -13,12 +13,12 @@ import re from typing import Optional -from ruamel.yaml import YAML +from ruamel.yaml import YAML, CommentedMap import git from payu.fsops import read_config, DEFAULT_CONFIG_FNAME from payu.laboratory import Laboratory -from payu.metadata import Metadata, UUID_FIELD +from payu.metadata import Metadata, UUID_FIELD, METADATA_FILENAME from payu.git_utils import GitRepository, git_clone @@ -286,6 +286,25 @@ def clone(repository: str, print(f"To change directory to control directory run:\n cd {directory}") +def get_branch_metadata(branch: git.Head) -> Optional[CommentedMap]: + """Return dictionary of branch metadata if it exists, None otherwise""" + # Note: Blobs are files in the commit tree + for blob in branch.commit.tree.blobs: + if blob.name == METADATA_FILENAME: + # Read file contents + metadata_content = blob.data_stream.read().decode('utf-8') + return YAML().load(metadata_content) + + +def contains_config(branch: git.Head) -> bool: + """Checks if config file in defined in given branch""" + contains_config = False + for blob in branch.commit.tree.blobs: + if blob.name == DEFAULT_CONFIG_FNAME: + contains_config = True + return contains_config + + def print_branch_metadata(branch: git.Head, verbose: bool = False): """Display given Git branch UUID, or if config.yaml or metadata.yaml does not exist. @@ -300,34 +319,28 @@ def print_branch_metadata(branch: git.Head, verbose: bool = False): Returns: None """ - contains_config = False - metadata_content = None - # Note: Blobs are files in the commit tree - for blob in branch.commit.tree.blobs: - if blob.name == 'config.yaml': - contains_config = True - if blob.name == 'metadata.yaml': - # Read file contents - metadata_content = blob.data_stream.read().decode('utf-8') - # Print branch info - if not contains_config: + if not contains_config(branch): print(f" No config file found") - elif metadata_content is None: + return + + metadata = get_branch_metadata(branch) + + if metadata is None: print(" No metadata file found") + return + + if verbose: + # Print all metadata + for key, value in metadata.items(): + print(f' {key}: {value}') else: - if verbose: - # Print all metadata - for line in metadata_content.splitlines(): - print(f' {line}') + # Print uuid + uuid = metadata.get(UUID_FIELD, None) + if uuid is not None: + print(f" {UUID_FIELD}: {uuid}") else: - # Print uuid - metadata = YAML().load(metadata_content) - uuid = metadata.get(UUID_FIELD, None) - if uuid is not None: - print(f" {UUID_FIELD}: {uuid}") - else: - print(f" No UUID in metadata file") + print(f" No UUID in metadata file") def list_branches(config_path: Optional[Path] = None, diff --git a/payu/git_utils.py b/payu/git_utils.py index 311fd78d..3f83362e 100644 --- a/payu/git_utils.py +++ b/payu/git_utils.py @@ -134,6 +134,14 @@ def checkout_branch(self, new_branch: bool = False, start_point: Optional[str] = None) -> None: """Checkout branch and create branch if specified""" + # First check for staged changes + if self.repo.is_dirty(index=True, working_tree=False): + raise PayuBranchError( + "There are staged git changes. Please stash or commit them " + "before running the checkout command again.\n" + "To see what files are staged, run: git status" + ) + # Existing branches local_branches = self.local_branches_dict().keys() remote_branches = self.remote_branches_dict() diff --git a/payu/metadata.py b/payu/metadata.py index b7155c47..68f1a10e 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -14,7 +14,7 @@ import warnings from datetime import datetime from pathlib import Path -from typing import Optional, List, Union +from typing import Optional, Union from ruamel.yaml import YAML from ruamel.yaml.comments import CommentedMap @@ -67,15 +67,20 @@ def __init__(self, config_path: Optional[Path] = None, branch: Optional[str] = None, control_path: Optional[Path] = None) -> None: - self.lab_archive_path = laboratory_archive_path self.config = read_config(config_path) + self.metadata_config = self.config.get('metadata', {}) if control_path is None: control_path = Path(self.config.get("control_path")) self.control_path = control_path self.filepath = self.control_path / METADATA_FILENAME + self.lab_archive_path = laboratory_archive_path - self.repo = GitRepository(self.control_path, catch_error=True) + # Config flag to disable creating metadata files and UUIDs + self.enabled = self.metadata_config.get('enable', True) + + if self.enabled: + self.repo = GitRepository(self.control_path, catch_error=True) self.branch = branch self.branch_uuid_experiment = True @@ -83,6 +88,7 @@ def __init__(self, # Set uuid if in metadata file metadata = self.read_file() self.uuid = metadata.get(UUID_FIELD, None) + self.uuid_updated = False # Experiment name configuration - this overrides experiment name self.config_experiment_name = self.config.get("experiment", None) @@ -112,10 +118,15 @@ def setup(self, Note: Experiment name is the name used for the work and archive directories in the Laboratory. """ - if self.uuid is not None and (keep_uuid or not is_new_experiment): + if not self.enabled: + # Set experiment name only - either configured or includes branch + self.set_experiment_name(ignore_uuid=True) + + elif self.uuid is not None and (keep_uuid or not is_new_experiment): self.set_experiment_name(keep_uuid=keep_uuid, is_new_experiment=is_new_experiment) else: + # Generate new UUID if self.uuid is None and not is_new_experiment: warnings.warn("No experiment uuid found in metadata. " "Generating a new uuid", MetadataWarning) @@ -123,23 +134,25 @@ def setup(self, self.archive_path = self.lab_archive_path / self.experiment_name - def get_branch_uuid_experiment_name(self) -> Path: - """Return a Branch-UUID aware experiment name""" + def new_experiment_name(self, ignore_uuid: bool = False) -> str: + """Generate a new experiment name""" if self.branch is None: self.branch = self.repo.get_branch_name() # Add branch and a truncated uuid to control directory name - truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] - if self.branch is None or self.branch in ('main', 'master'): - suffix = f'-{truncated_uuid}' - else: - suffix = f'-{self.branch}-{truncated_uuid}' + adding_branch = self.branch not in (None, 'main', 'master') + suffix = f'-{self.branch}' if adding_branch else '' + + if not ignore_uuid: + truncated_uuid = self.uuid[:TRUNCATED_UUID_LENGTH] + suffix += f'-{truncated_uuid}' return self.control_path.name + suffix def set_experiment_name(self, is_new_experiment: bool = False, - keep_uuid: bool = False) -> None: + keep_uuid: bool = False, + ignore_uuid: bool = False) -> None: """Set experiment name - this is used for work and archive sub-directories in the Laboratory""" if self.config_experiment_name is not None: @@ -150,8 +163,13 @@ def set_experiment_name(self, self.experiment_name) return + if ignore_uuid: + # Leave experiment UUID out of experiment name + self.experiment_name = self.new_experiment_name(ignore_uuid=True) + return + # Branch-UUID experiment name and archive path - branch_uuid_experiment_name = self.get_branch_uuid_experiment_name() + branch_uuid_experiment_name = self.new_experiment_name() archive_path = self.lab_archive_path / branch_uuid_experiment_name # Legacy experiment name and archive path @@ -180,6 +198,7 @@ def set_experiment_name(self, def set_new_uuid(self, is_new_experiment: bool = False) -> None: """Generate a new uuid and set experiment name""" + self.uuid_updated = True self.uuid = generate_uuid() self.set_experiment_name(is_new_experiment=is_new_experiment) @@ -211,12 +230,20 @@ def write_metadata(self, experiments Return: None + + Note: This assumes setup() has been run to set UUID and experiment name """ - # Assumes uuid and experiment name has been set - restart_path = Path(restart_path) if restart_path else None - self.update_file(restart_path=restart_path, - set_template_values=set_template_values) - self.commit_file() + if not self.enabled: + # Skip creating/updating/commiting metadata + return + + if self.uuid_updated: + # Update metadata if UUID has changed + restart_path = Path(restart_path) if restart_path else None + self.update_file(restart_path=restart_path, + set_template_values=set_template_values) + self.commit_file() + self.copy_to_archive() def update_file(self, @@ -225,12 +252,6 @@ def update_file(self, """Write any updates to metadata file""" metadata = self.read_file() - # Check if UUID has changed - uuid_updated = metadata.get(UUID_FIELD, None) != self.uuid - if not uuid_updated: - # Leave metadata file unchanged - return - # Add UUID field metadata[UUID_FIELD] = self.uuid @@ -269,11 +290,9 @@ def update_file(self, def get_model_name(self) -> str: """Get model name from config file""" - model_name = self.config.get('model') - if model_name == 'access': - # TODO: Is access used for anything other than ACCESS-ESM1-5? - # If so, won't set anything here. - model_name = 'ACCESS-ESM1-5' + # Use model name unless specific model is specified in metadata config + default_model_name = self.config.get('model') + model_name = self.metadata_config.get('model', default_model_name) return model_name.upper() def get_parent_experiment(self, prior_restart_path: Path) -> None: @@ -304,10 +323,9 @@ def commit_file(self) -> None: def copy_to_archive(self) -> None: """Copy metadata file to archive""" - archive_path = self.lab_archive_path / self.experiment_name - mkdir_p(archive_path) - shutil.copy(self.filepath, archive_path / METADATA_FILENAME) - # Note: The existence of archive path is also used for determining + mkdir_p(self.archive_path) + shutil.copy(self.filepath, self.archive_path / METADATA_FILENAME) + # Note: The existence of an archive is used for determining # experiment names and whether to generate a new UUID diff --git a/payu/sync.py b/payu/sync.py index 88026074..01aa9b78 100644 --- a/payu/sync.py +++ b/payu/sync.py @@ -14,6 +14,7 @@ # Local from payu.fsops import mkdir_p +from payu.metadata import METADATA_FILENAME class SourcePath(): @@ -250,6 +251,12 @@ def run(self): self.source_paths.append(SourcePath(path=log_path, is_log_file=True)) + # Add metadata path to protected paths, if it exists + metadata_path = os.path.join(self.expt.archive_path, METADATA_FILENAME) + if os.path.isfile(metadata_path): + self.source_paths.append(SourcePath(path=metadata_path, + protected=True)) + # Add any additional paths to protected paths self.add_extra_source_paths() diff --git a/test/common.py b/test/common.py index e5dc235c..cbb6b9c9 100644 --- a/test/common.py +++ b/test/common.py @@ -55,7 +55,10 @@ } }, 'runlog': False, - "experiment": ctrldir_basename + "experiment": ctrldir_basename, + "metadata": { + "enable": False + } } metadata = { @@ -220,4 +223,3 @@ def make_all_files(): make_inputs() make_exe() make_restarts() - write_metadata() diff --git a/test/models/test_mom6.py b/test/models/test_mom6.py index 3b066eb0..941fe3f1 100644 --- a/test/models/test_mom6.py +++ b/test/models/test_mom6.py @@ -44,7 +44,10 @@ def setup_module(module): 'jobname': 'testrun', 'model': 'mom6', 'exe': 'test.exe', - 'experiment': ctrldir_basename + 'experiment': ctrldir_basename, + 'metadata': { + 'enable': False + } } write_config(config) diff --git a/test/pytest.ini b/test/pytest.ini deleted file mode 100644 index dfcd5794..00000000 --- a/test/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -filterwarnings = - ignore::payu.git_utils.PayuGitWarning \ No newline at end of file diff --git a/test/test_branch.py b/test/test_branch.py index aa9ff973..243b9a3f 100644 --- a/test/test_branch.py +++ b/test/test_branch.py @@ -20,10 +20,10 @@ from test.common import make_expt_archive_dir -# Global config +# Global config - Remove set experiment and metadata config config = copy.deepcopy(config_orig) -# Remove Experiment override name config.pop("experiment") +config.pop("metadata") @pytest.fixture(autouse=True) diff --git a/test/test_git_utils.py b/test/test_git_utils.py index c6711315..fc8ae927 100644 --- a/test/test_git_utils.py +++ b/test/test_git_utils.py @@ -141,7 +141,7 @@ def test_git_checkout_new_branch_existing(): repo = create_new_repo(repo_path) existing_branch = repo.active_branch - # Test create branch with existing branch + # Test checkout branch with existing branch repo = GitRepository(repo_path) with pytest.raises(PayuBranchError): repo.checkout_branch(str(existing_branch), @@ -153,12 +153,30 @@ def test_git_checkout_non_existent_branch(): repo_path = tmpdir / 'remoteRepo' create_new_repo(repo_path) - # Test create branch with non-existent branch + # Test checkout branch with non-existent branch repo = GitRepository(repo_path) with pytest.raises(PayuBranchError): repo.checkout_branch("Gibberish") +def test_git_checkout_staged_changes(): + # Setup + repo_path = tmpdir / 'remoteRepo' + create_new_repo(repo_path) + + repo = GitRepository(repo_path) + file_path = repo_path / 'newTestFile.txt' + file_path.touch() + + # Test checkout branch works with untracked files + repo.checkout_branch(new_branch=True, branch_name="NewBranch") + + # Test checkout raises error with staged changes + repo.repo.index.add([file_path]) + with pytest.raises(PayuBranchError): + repo.checkout_branch(new_branch=True, branch_name="NewBranch2") + + def test_git_checkout_existing_branch(): # Setup remote_repo_path = tmpdir / 'remoteRepo' diff --git a/test/test_metadata.py b/test/test_metadata.py index f7c681f1..b30adb9c 100644 --- a/test/test_metadata.py +++ b/test/test_metadata.py @@ -4,6 +4,7 @@ import pytest from unittest.mock import patch, Mock +from ruamel.yaml import YAML from payu.metadata import Metadata, MetadataWarning @@ -14,9 +15,13 @@ verbose = True -# Global config +# Global config - Remove set experiment and metadata config config = copy.deepcopy(config_orig) config.pop("experiment") +config.pop("metadata") + +pytestmark = pytest.mark.filterwarnings( + "ignore::payu.git_utils.PayuGitWarning") def setup_module(module): @@ -76,82 +81,58 @@ def setup_and_teardown(): @pytest.mark.parametrize( "uuid, legacy_archive_exists, previous_metadata, expected_metadata", [ - # Test metadata file format stays the same when no UUID changed - ( - "0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75", - False, - """contact: TestUser -email: Test@email.com -description: |- - Test description etc - More description -keywords: -- test -- testKeyword -# Test Comment -experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 -parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 -name: UserDefinedExperimentName -""", - """contact: TestUser -email: Test@email.com -description: |- - Test description etc - More description -keywords: -- test -- testKeyword -# Test Comment -experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 -parent_experiment: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 -name: UserDefinedExperimentName -""" - ), # Test new metadata file created ( "b1f3ce3d-99da-40e4-849a-c8b352948a31", False, None, - """experiment_uuid: b1f3ce3d-99da-40e4-849a-c8b352948a31 -created: '2000-01-01' -name: DefaultExperimentName -model: TEST-MODEL -url: mockUrl -contact: mockUser -email: mock@email.com -""" + { + "experiment_uuid": "b1f3ce3d-99da-40e4-849a-c8b352948a31", + "created": '2000-01-01', + "name": "DefaultExperimentName", + "model": "TEST-MODEL", + "url": "mockUrl", + "contact": "mockUser", + "email": "mock@email.com" + } ), # Test metadata file updated when new UUID ( "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", False, - """experiment_uuid: b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04 -contact: Add your name here -email: Add your email address here -""", - """experiment_uuid: 7b90f37c-4619-44f9-a439-f76fdf6ae2bd -contact: mockUser -email: mock@email.com -created: '2000-01-01' -name: DefaultExperimentName -model: TEST-MODEL -url: mockUrl -""" + { + "experiment_uuid": "b3298c7f-01f6-4f0a-be32-ce5d2cfb9a04", + "contact": "Add your name here", + "email": "Add your email address here", + "description": "Add description here", + }, + { + "experiment_uuid": "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + "description": "Add description here", + "created": '2000-01-01', + "name": "DefaultExperimentName", + "model": "TEST-MODEL", + "url": "mockUrl", + "contact": "mockUser", + "email": "mock@email.com" + } ), # Test extra fields not added with legacy experiments ( "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", True, - """contact: TestUser -email: Test@email.com -experiment_uuid: 0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75 -name: UserDefinedExperimentName -""", - """contact: TestUser -email: Test@email.com -experiment_uuid: 7b90f37c-4619-44f9-a439-f76fdf6ae2bd -name: UserDefinedExperimentName -""" + { + "experiment_uuid": "0f49f2a0-f45e-4c0b-a3b6-4b0bf21f2b75", + "name": "UserDefinedExperimentName", + "contact": "TestUser", + "email": "Test@email.com" + }, + { + "experiment_uuid": "7b90f37c-4619-44f9-a439-f76fdf6ae2bd", + "name": "UserDefinedExperimentName", + "contact": "TestUser", + "email": "Test@email.com" + } ), ] ) @@ -159,8 +140,10 @@ def test_update_file(mock_repo, uuid, legacy_archive_exists, previous_metadata, expected_metadata): # Create pre-existing metadata file metadata_path = ctrldir / 'metadata.yaml' + yaml = YAML() if previous_metadata is not None: - metadata_path.write_text(previous_metadata) + with open(metadata_path, 'w') as file: + yaml.dump(previous_metadata, file) # Add mock git values mock_repo.return_value.get_origin_url.return_value = "mockUrl" @@ -186,7 +169,11 @@ def test_update_file(mock_repo, uuid, legacy_archive_exists, metadata.update_file() assert metadata_path.exists and metadata_path.is_file - assert metadata_path.read_text() == expected_metadata + + with open(metadata_path, 'r') as file: + metadata = yaml.load(metadata_path) + + assert metadata == expected_metadata @pytest.mark.parametrize( @@ -299,7 +286,7 @@ def test_set_configured_experiment_name(): ("master", "ctrl-cb793e91"), ("branch", "ctrl-branch-cb793e91")] ) -def test_get_branch_uuid_aware_experiment_name(branch, expected_name): +def test_new_experiment_name(branch, expected_name): # Test configured experiment name is the set experiment name with cd(ctrldir): metadata = Metadata(archive_dir) @@ -308,7 +295,25 @@ def test_get_branch_uuid_aware_experiment_name(branch, expected_name): with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch: mock_branch.return_value = branch - experiment = metadata.get_branch_uuid_experiment_name() + experiment = metadata.new_experiment_name() + + assert experiment == expected_name + + +@pytest.mark.parametrize( + "branch, expected_name", + [(None, "ctrl"), + ("main", "ctrl"), + ("branch", "ctrl-branch")] +) +def test_new_experiment_name_ignore_uuid(branch, expected_name): + # Test configured experiment name is the set experiment name + with cd(ctrldir): + metadata = Metadata(archive_dir) + + with patch('payu.metadata.GitRepository.get_branch_name') as mock_branch: + mock_branch.return_value = branch + experiment = metadata.new_experiment_name(ignore_uuid=True) assert experiment == expected_name diff --git a/test/test_sync.py b/test/test_sync.py index e1d27a31..766480ec 100644 --- a/test/test_sync.py +++ b/test/test_sync.py @@ -10,7 +10,7 @@ from test.common import tmpdir, ctrldir, labdir, expt_archive_dir from test.common import config as config_orig from test.common import write_config -from test.common import make_all_files, make_random_file +from test.common import make_all_files, make_random_file, write_metadata from test.common import make_expt_archive_dir verbose = True @@ -18,6 +18,11 @@ # Global config config = copy.deepcopy(config_orig) +# Enable metadata +config.pop('metadata') +pytestmark = pytest.mark.filterwarnings( + "ignore::payu.git_utils.PayuGitWarning") + def setup_module(module): """ @@ -37,6 +42,7 @@ def setup_module(module): labdir.mkdir() ctrldir.mkdir() make_all_files() + write_metadata() except Exception as e: print(e) @@ -285,7 +291,8 @@ def test_sync(): sync.run() expected_dirs_synced = {'output000', 'output001', 'output002', - 'output003', 'output004', 'pbs_logs'} + 'output003', 'output004', + 'pbs_logs', 'metadata.yaml'} # Test output is moved to remote dir assert set(os.listdir(remote_archive)) == expected_dirs_synced From db2d0f1d42c9ec0f2531d766e0defb07a3668978 Mon Sep 17 00:00:00 2001 From: jo-basevi Date: Thu, 21 Dec 2023 11:05:20 +1100 Subject: [PATCH 7/7] Apply suggestions from code review - add suggested edits to usage and config documentation - refactor list_archive_dirs function out of experiment to fsops - add parent experiment UUID as argument to payu clone - add test for fsops.list_archive_dirs - Add parent_experiment flag as an option to payu checkout --- docs/source/config.rst | 4 +-- docs/source/usage.rst | 38 ++++++++++---------- payu/branch.py | 62 +++++++++++++++----------------- payu/experiment.py | 18 +++------- payu/fsops.py | 23 +++++++++++- payu/metadata.py | 25 +++++++------ payu/subcommands/args.py | 11 ++++++ payu/subcommands/checkout_cmd.py | 8 +++-- payu/subcommands/clone_cmd.py | 9 +++-- payu/sync.py | 14 +++++--- test/test_branch.py | 4 ++- test/test_payu.py | 38 +++++++++++++++++++- 12 files changed, 163 insertions(+), 91 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index 6391cc94..9d8fc95a 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -469,7 +469,7 @@ Experiment Tracking ------------------- ``runlog`` - Automatically commits changes to configuration files in the + Automatically commits changes to configuration files and manifests in the *control directory* when the model runs. This creates a git runlog of the history of the experiment. @@ -477,7 +477,7 @@ Experiment Tracking Flag to enable/disable runlog. ``metadata`` - Generates and updates metadata files and UUIDs. For more details, see + Generates and updates metadata files and unique experiment IDs (UUIDs). For more details, see :ref:`usage-metadata`. ``enable`` (*Default:* ``True``) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index a014adb9..881ab4bc 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -24,10 +24,10 @@ in a location that is continuously backed up. Using a git repository for the experiment ----------------------------------------- -It is recommended to use version control using git_ for the payu +It is recommended to use the git_ version control system for the payu *control directory*. This allows the experiment to be easily copied via cloning. There is inbuilt support in payu for an experiment runlog which -tracks changes to configuration files between experiment runs. There are payu commands +uses git to track changes to configuration files between experiment runs. There are payu commands for creating and moving between git branches so multiple related experiments can be run from the same control directory. @@ -99,13 +99,12 @@ Populate laboratory directories Clone experiment ---------------- -This is the best way to copy an experiment as it guarantees that only the +Cloning is the best way to copy an experiment as it guarantees that only the required files are copied to a new control directory, and maintains a link to the original experiment through the shared git history. To clone the -repository, you can use ``git clone`` or ``payu clone`` which is a wrapper -around ``git clone`` which additionally creates or updates the metadata file -which gets copied to the experiment archive directory (see :ref:`usage-metadata`). - +repository, you can use ``payu clone``. This is a wrapper around ``git clone`` +which additionally creates or updates the metadata file which gets copied to +the experiment archive directory (see :ref:`usage-metadata`). For example:: mkdir -p ${HOME}/${MODEL} @@ -131,7 +130,7 @@ run:: payu clone --help -Alternatively to creating and checking out branches in ``payu clone``, +As an alternative to creating and checking out branches with ``payu clone``, ``payu checkout`` can be used instead (see :ref:`usage-metadata`). @@ -362,7 +361,7 @@ which uniquely identifies the experiment. Payu generates a new UUID when: For new experiments, payu may generate some additional metadata fields. This includes an experiment name, creation date, contact, and email if defined in the git configuration. This also includes parent experiment UUID if starting -from restarts, if it is defined in metadata of the parent directory +from restarts and the experiment UUID is defined in metadata of the parent directory containing the restart. Once a metadata file is created or updated, it is copied to the directory @@ -377,10 +376,13 @@ An experiment name is used to identify the experiment inside the ``work`` and ``archive`` sub-directories inside the *laboratory*. The experiment name historically would default to the name of the *control -directory*. This is still supported for experiments with a pre-existing +directory*. This is still supported for experiments with pre-existing archived outputs. To support git branches and ensure uniqueness in shared -archives, the branch name and a short version of the experiment UUID are -added to new experiment names. For example, given a control directory named +archives, the new default behaviour is to add the branch name and a short +version of the experiment UUID to the name of the *control directory* when +creating experiment names. + +For example, given a control directory named ``my_expt`` and a UUID of ``416af8c6-d299-4ee6-9d77-4aefa8a9ebcb``, the experiment name would be: @@ -388,14 +390,12 @@ the experiment name would be: ``perturb``. * ``my_expt-416af8c6`` - if the control directory was not a git repository or - experiments was run from the ``main`` or ``master`` branch. - -* ``my_expt`` - if running an older experiment that has a pre-existing - archive. + experiment was run from the ``main`` or ``master`` git branch. -* ``set_expt_name`` - if the ``experiment`` value is configured to ``set_expt_name`` - (see :ref:`config`). Note that to use branches in one control - repository, this would need each configured ``experiment`` value to be unique. +To preserve backwards compatibility, if there's a pre-existing archive under +the *control directory* name, this will remain the experiment name (e.g. +``my_expt`` in the above example). Similarly, if the ``experiment`` value is +configured (see :ref:`config`), this will be used for the experiment name. Switching between related experiments ------------------------------------- diff --git a/payu/branch.py b/payu/branch.py index 5f963a87..adaed3a0 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -10,13 +10,12 @@ import os import warnings from pathlib import Path -import re from typing import Optional from ruamel.yaml import YAML, CommentedMap import git -from payu.fsops import read_config, DEFAULT_CONFIG_FNAME +from payu.fsops import read_config, DEFAULT_CONFIG_FNAME, list_archive_dirs from payu.laboratory import Laboratory from payu.metadata import Metadata, UUID_FIELD, METADATA_FILENAME from payu.git_utils import GitRepository, git_clone @@ -25,7 +24,7 @@ NO_CONFIG_FOUND_MESSAGE = """No configuration file found on this branch. Skipping adding new metadata file and creating archive/work symlinks. -To try find a branch that has config file, you can: +To find a branch that has a config file, you can: - Display local branches by running: payu branch - Or display remote branches by running: @@ -36,25 +35,10 @@ Where BRANCH_NAME is the name of the branch""" -def archive_contains_restarts(archive_path: Path) -> bool: - """Return True if there's pre-existing restarts in archive""" - pattern = re.compile(r"^restart[0-9][0-9][0-9]+$") - if not archive_path.exists(): - return False - - for path in archive_path.iterdir(): - real_path = path.resolve() - if real_path.is_dir() and pattern.match(path.name): - return True - return False - - def check_restart(restart_path: Optional[Path], archive_path: Path) -> Optional[Path]: """Checks for valid prior restart path. Returns resolved restart path if valid, otherwise returns None""" - if restart_path is None: - return # Check for valid path if not restart_path.exists(): @@ -66,11 +50,12 @@ def check_restart(restart_path: Optional[Path], restart_path = restart_path.resolve() # Check for pre-existing restarts in archive - if archive_contains_restarts(archive_path): - warnings.warn(( - f"Pre-existing restarts found in archive: {archive_path}." - f"Skipping adding 'restart: {restart_path}' to config file")) - return + if archive_path.exists(): + if len(list_archive_dirs(archive_path, dir_type="restart")) > 0: + warnings.warn(( + f"Pre-existing restarts found in archive: {archive_path}." + f"Skipping adding 'restart: {restart_path}' to config file")) + return return restart_path @@ -122,7 +107,8 @@ def checkout_branch(branch_name: str, config_path: Optional[Path] = None, control_path: Optional[Path] = None, model_type: Optional[str] = None, - lab_path: Optional[Path] = None) -> None: + lab_path: Optional[Path] = None, + parent_experiment: Optional[str] = None) -> None: """Checkout branch, setup metadata and add symlinks Parameters @@ -148,6 +134,8 @@ def checkout_branch(branch_name: str, Type of model - used for creating a Laboratory lab_path: Optional[Path] Path to laboratory directory + parent_experiment: Optional[str] + Parent experiment UUID to add to generated metadata """ if control_path is None: control_path = get_control_path(config_path) @@ -171,12 +159,15 @@ def checkout_branch(branch_name: str, is_new_experiment=is_new_experiment) # Gets valid prior restart path - prior_restart_path = check_restart(restart_path=restart_path, - archive_path=metadata.archive_path) + prior_restart_path = None + if restart_path: + prior_restart_path = check_restart(restart_path=restart_path, + archive_path=metadata.archive_path) # Create/update and commit metadata file metadata.write_metadata(set_template_values=True, - restart_path=prior_restart_path) + restart_path=prior_restart_path, + parent_experiment=parent_experiment) # Add restart option to config if prior_restart_path: @@ -215,7 +206,8 @@ def clone(repository: str, model_type: Optional[str] = None, config_path: Optional[Path] = None, lab_path: Optional[Path] = None, - restart_path: Optional[Path] = None) -> None: + restart_path: Optional[Path] = None, + parent_experiment: Optional[str] = None) -> None: """Clone an experiment control repository. Parameters: @@ -241,6 +233,8 @@ def clone(repository: str, Path to laboratory directory restart_path: Optional[Path] Absolute restart path to start experiment from + parent_experiment: Optional[str] + Parent experiment UUID to add to generated metadata Returns: None """ @@ -265,7 +259,8 @@ def clone(repository: str, config_path=config_path, control_path=control_path, model_type=model_type, - lab_path=lab_path) + lab_path=lab_path, + parent_experiment=parent_experiment) else: # Checkout branch if branch is None: @@ -278,7 +273,8 @@ def clone(repository: str, control_path=control_path, model_type=model_type, lab_path=lab_path, - is_new_experiment=True) + is_new_experiment=True, + parent_experiment=parent_experiment) finally: # Change back to original working directory os.chdir(owd) @@ -288,7 +284,6 @@ def clone(repository: str, def get_branch_metadata(branch: git.Head) -> Optional[CommentedMap]: """Return dictionary of branch metadata if it exists, None otherwise""" - # Note: Blobs are files in the commit tree for blob in branch.commit.tree.blobs: if blob.name == METADATA_FILENAME: # Read file contents @@ -331,9 +326,10 @@ def print_branch_metadata(branch: git.Head, verbose: bool = False): return if verbose: - # Print all metadata + # Print all non-null metadata values for key, value in metadata.items(): - print(f' {key}: {value}') + if value: + print(f' {key}: {value}') else: # Print uuid uuid = metadata.get(UUID_FIELD, None) diff --git a/payu/experiment.py b/payu/experiment.py index d8298f19..40572fbb 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -27,6 +27,7 @@ # Local from payu import envmod from payu.fsops import mkdir_p, make_symlink, read_config, movetree +from payu.fsops import list_archive_dirs from payu.schedulers.pbs import get_job_info, pbs_env_init, get_job_id from payu.models import index as model_index import payu.profilers @@ -198,7 +199,8 @@ def max_output_index(self, output_type="output"): """Given a output directory type (output or restart), return the maximum index of output directories found""" try: - output_dirs = self.list_output_dirs(output_type) + output_dirs = list_archive_dirs(archive_path=self.archive_path, + dir_type=output_type) except EnvironmentError as exc: if exc.errno == errno.ENOENT: output_dirs = None @@ -208,17 +210,6 @@ def max_output_index(self, output_type="output"): if output_dirs and len(output_dirs): return int(output_dirs[-1].lstrip(output_type)) - def list_output_dirs(self, output_type="output", full_path=False): - """Return a sorted list of restart or output directories in archive""" - naming_pattern = re.compile(fr"^{output_type}[0-9][0-9][0-9]+$") - dirs = [d for d in os.listdir(self.archive_path) - if naming_pattern.match(d)] - dirs.sort(key=lambda d: int(d.lstrip(output_type))) - - if full_path: - dirs = [os.path.join(self.archive_path, d) for d in dirs] - return dirs - def set_stacksize(self, stacksize): if stacksize == 'unlimited': @@ -972,7 +963,8 @@ def get_restarts_to_prune(self, return [] # List all restart directories in archive - restarts = self.list_output_dirs(output_type='restart') + restarts = list_archive_dirs(archive_path=self.archive_path, + dir_type='restart') # TODO: Previous logic was to prune all restarts if self.repeat_run # Still need to figure out what should happen in this case diff --git a/payu/fsops.py b/payu/fsops.py index 5a58a82c..cc1c09f7 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -10,10 +10,13 @@ # Standard library import errno import os +from pathlib import Path +import re import shutil import sys import shlex import subprocess +from typing import Union, List # Extensions import yaml @@ -207,4 +210,22 @@ def required_libs(bin_path): except: print("payu: error running ldd command on exe path: ", bin_path) return {} - return parse_ldd_output(ldd_out) \ No newline at end of file + return parse_ldd_output(ldd_out) + + +def list_archive_dirs(archive_path: Union[Path, str], + dir_type: str = "output") -> List[str]: + """Return a sorted list of restart or output directories in archive""" + naming_pattern = re.compile(fr"^{dir_type}[0-9][0-9][0-9]+$") + + if isinstance(archive_path, str): + archive_path = Path(archive_path) + + dirs = [] + for path in archive_path.iterdir(): + real_path = path.resolve() + if real_path.is_dir() and naming_pattern.match(path.name): + dirs.append(path.name) + + dirs.sort(key=lambda d: int(d.lstrip(dir_type))) + return dirs diff --git a/payu/metadata.py b/payu/metadata.py index 68f1a10e..84a798f8 100644 --- a/payu/metadata.py +++ b/payu/metadata.py @@ -7,7 +7,6 @@ :license: Apache License, Version 2.0, see LICENSE for details. """ -import re import requests import shutil import uuid @@ -218,7 +217,8 @@ def set_new_uuid(self, is_new_experiment: bool = False) -> None: def write_metadata(self, restart_path: Optional[Union[Path, str]] = None, - set_template_values: bool = False) -> None: + set_template_values: bool = False, + parent_experiment: Optional[str] = None) -> None: """Create/update metadata file, commit any changes and copy metadata file to the experiment archive. @@ -228,6 +228,8 @@ def write_metadata(self, set_template_values: bool, default False Read schema and set metadata template values for new experiments + parent_experiment: Optional[str] + Parent experiment UUID to add to generated metadata Return: None @@ -241,14 +243,16 @@ def write_metadata(self, # Update metadata if UUID has changed restart_path = Path(restart_path) if restart_path else None self.update_file(restart_path=restart_path, - set_template_values=set_template_values) + set_template_values=set_template_values, + parent_experiment=parent_experiment) self.commit_file() self.copy_to_archive() def update_file(self, restart_path: Optional[Path] = None, - set_template_values: bool = False) -> None: + set_template_values: bool = False, + parent_experiment: Optional[str] = None) -> None: """Write any updates to metadata file""" metadata = self.read_file() @@ -256,9 +260,10 @@ def update_file(self, metadata[UUID_FIELD] = self.uuid # Update parent UUID field - parent_uuid = self.get_parent_experiment(restart_path) - if parent_uuid and parent_uuid != self.uuid: - metadata[PARENT_UUID_FIELD] = parent_uuid + if parent_experiment is None: + parent_experiment = self.get_parent_experiment(restart_path) + if parent_experiment and parent_experiment != self.uuid: + metadata[PARENT_UUID_FIELD] = parent_experiment # Add extra fields if new branch-uuid experiment # so to not over-write fields if it's a pre-existing legacy experiment @@ -290,10 +295,10 @@ def update_file(self, def get_model_name(self) -> str: """Get model name from config file""" - # Use model name unless specific model is specified in metadata config - default_model_name = self.config.get('model') + # Use capitilised model name unless a specific model name is defined + default_model_name = self.config.get('model').upper() model_name = self.metadata_config.get('model', default_model_name) - return model_name.upper() + return model_name def get_parent_experiment(self, prior_restart_path: Path) -> None: """Searches UUID in the metadata in the parent directory that diff --git a/payu/subcommands/args.py b/payu/subcommands/args.py index ac0efb6f..cf3da7b3 100644 --- a/payu/subcommands/args.py +++ b/payu/subcommands/args.py @@ -202,6 +202,17 @@ } } +# Parent experiment UUID +parent_experiment = { + 'flags': ('--parent-experiment', '-p'), + 'parameters': { + 'action': 'store', + 'dest': 'parent_experiment', + 'default': None, + 'help': 'The parent experiment UUID to add to generated metadata' + } +} + # Branch name branch_name = { 'flags': [], diff --git a/payu/subcommands/checkout_cmd.py b/payu/subcommands/checkout_cmd.py index 70f8b93b..8ec26109 100644 --- a/payu/subcommands/checkout_cmd.py +++ b/payu/subcommands/checkout_cmd.py @@ -16,7 +16,7 @@ arguments = [args.model, args.config, args.laboratory, args.new_branch, args.branch_name, args.start_point, args.restart_path, - args.keep_uuid] + args.keep_uuid, args.parent_experiment] def transform_strings_to_path(path_str=None): @@ -24,7 +24,8 @@ def transform_strings_to_path(path_str=None): def runcmd(model_type, config_path, lab_path, new_branch, - branch_name, start_point, restart_path, keep_uuid): + branch_name, start_point, + restart_path, keep_uuid, parent_experiment): """Execute the command.""" config_path = transform_strings_to_path(config_path) lab_path = transform_strings_to_path(lab_path) @@ -37,7 +38,8 @@ def runcmd(model_type, config_path, lab_path, new_branch, config_path=config_path, lab_path=lab_path, model_type=model_type, - keep_uuid=keep_uuid) + keep_uuid=keep_uuid, + parent_experiment=parent_experiment) runscript = runcmd diff --git a/payu/subcommands/clone_cmd.py b/payu/subcommands/clone_cmd.py index 2f875cc6..c15c60a0 100644 --- a/payu/subcommands/clone_cmd.py +++ b/payu/subcommands/clone_cmd.py @@ -17,7 +17,8 @@ arguments = [args.model, args.config, args.laboratory, args.keep_uuid, args.clone_branch, args.repository, args.local_directory, - args.new_branch_name, args.restart_path] + args.new_branch_name, args.restart_path, + args.parent_experiment] def transform_strings_to_path(path_str=None): @@ -25,7 +26,8 @@ def transform_strings_to_path(path_str=None): def runcmd(model_type, config_path, lab_path, keep_uuid, - branch, repository, local_directory, new_branch_name, restart_path): + branch, repository, local_directory, new_branch_name, restart_path, + parent_experiment): """Execute the command.""" config_path = transform_strings_to_path(config_path) restart_path = transform_strings_to_path(restart_path) @@ -40,7 +42,8 @@ def runcmd(model_type, config_path, lab_path, keep_uuid, config_path=config_path, lab_path=lab_path, new_branch_name=new_branch_name, - restart_path=restart_path) + restart_path=restart_path, + parent_experiment=parent_experiment) runscript = runcmd diff --git a/payu/sync.py b/payu/sync.py index 01aa9b78..f658f215 100644 --- a/payu/sync.py +++ b/payu/sync.py @@ -13,7 +13,7 @@ # Local -from payu.fsops import mkdir_p +from payu.fsops import mkdir_p, list_archive_dirs from payu.metadata import METADATA_FILENAME @@ -49,8 +49,10 @@ def __init__(self, expt): def add_outputs_to_sync(self): """Add paths of outputs in archive to sync. The last output is protected""" - outputs = self.expt.list_output_dirs(output_type='output', - full_path=True) + outputs = list_archive_dirs(archive_path=self.expt.archive_path, + dir_type='output') + outputs = [os.path.join(self.expt.archive_path, output) + for output in outputs] if len(outputs) > 0: last_output = outputs.pop() if not self.ignore_last: @@ -70,8 +72,10 @@ def add_restarts_to_sync(self): return # Get sorted list of restarts in archive - restarts = self.expt.list_output_dirs(output_type='restart', - full_path=True) + restarts = list_archive_dirs(archive_path=self.expt.archive_path, + dir_type='restart') + restarts = [os.path.join(self.expt.archive_path, restart) + for restart in restarts] if restarts == []: return diff --git a/test/test_branch.py b/test/test_branch.py index 243b9a3f..f8fc9ae7 100644 --- a/test/test_branch.py +++ b/test/test_branch.py @@ -515,7 +515,8 @@ def test_clone(mock_uuid): # Run clone with cd(tmpdir): clone(str(cloned_repo_path), Path("clonedRepo2"), - lab_path=labdir, new_branch_name="Branch2", branch="Branch1") + lab_path=labdir, new_branch_name="Branch2", branch="Branch1", + parent_experiment=uuid1) # Check new commit added and expected metadata cloned_repo2 = git.Repo(tmpdir / "clonedRepo2") @@ -524,6 +525,7 @@ def test_clone(mock_uuid): expected_current_branch="Branch2", expected_uuid=uuid2, expected_experiment="clonedRepo2-Branch2-fd7b4804", + expected_parent_uuid=uuid1, metadata_file=metadata_file) # Check branched from Branch1 diff --git a/test/test_payu.py b/test/test_payu.py index 8a6d4065..b1184fc5 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -264,4 +264,40 @@ def test_lib_update_if_nci_module_not_required(): 'libmpi_usempif08.so.40': '/$HOME/exe/spack-microarchitectures.git/opt/spack/linux-rocky8-cascadelake/intel-2019.5.281/openmpi-4.1.5-ooyg5wc7sa3tvmcpazqqb44pzip3wbyo/lib/libmpi_usempif08.so.40', } result = payu.envmod.lib_update(required_libs_dict, 'libmpi.so') - assert(result == '') \ No newline at end of file + assert (result == '') + + +def test_list_archive_dirs(): + # Create archive directories - mix of valid/invalid names + archive_dirs = [ + 'output000', 'output1001', 'output023', + 'output', 'Output001', 'output1', + 'Restart', 'restart2', 'restart', + 'restart102932', 'restart021', 'restart001', + ] + tmp_archive = tmpdir / 'test_archive' + for dir in archive_dirs: + (tmp_archive / dir).mkdir(parents=True) + + # Add some files + (tmp_archive / 'restart005').touch() + (tmp_archive / 'output005').touch() + + # Add a restart symlink + tmp_archive_2 = tmpdir / 'test_archive_2' + source_path = tmp_archive_2 / 'restart999' + source_path.mkdir(parents=True) + (tmp_archive / 'restart23042').symlink_to(source_path) + + # Test list output dirs and with string archive path + outputs = payu.fsops.list_archive_dirs(str(tmp_archive), dir_type="output") + assert outputs == ['output000', 'output023', 'output1001'] + + # Test list restarts + restarts = payu.fsops.list_archive_dirs(tmp_archive, dir_type="restart") + assert restarts == ['restart001', 'restart021', + 'restart23042', 'restart102932'] + + # Clean up test archive + shutil.rmtree(tmp_archive) + shutil.rmtree(tmp_archive_2)