From ac43f26360bb689519e145c2cdd130e355720471 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 9 Jan 2024 15:31:34 -0500 Subject: [PATCH 1/6] Add support for partial clones --- CHANGELOG.md | 9 ++++++++ etc/mepoconfig-example | 43 ++++++++++++++++++++++++++++++++++- mepo.d/cmdline/parser.py | 7 ++++++ mepo.d/command/clone/clone.py | 30 ++++++++++++++++++++---- mepo.d/repository/git.py | 9 +++++++- 5 files changed, 91 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5737e3e8..ffd113c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed +## [1.52.0] - 2024-01-10 + +### Added + +- Added new `--partial` option to `mepo clone` with two settings: `blobless` and `treeless`. If you set, `--partial=blobless` then + the clone will not download blobs by using `--filter=blob:none`. If you set `--partial=treeless` then the clone will not download + trees by using `--filter=tree:0`. The `blobless` option is useful for large repos that have a lot of binary files that you don't + need. The `treeless` option is even more aggressive and *SHOULD NOT* be used unless you know what you are doing. + ## [1.51.1] - 2023-08-25 ### Fixed diff --git a/etc/mepoconfig-example b/etc/mepoconfig-example index 6123d00f..49c0f12a 100644 --- a/etc/mepoconfig-example +++ b/etc/mepoconfig-example @@ -2,7 +2,7 @@ # # .mepoconfig is a config file a la gitconfig with sections and options. # -# Currently, .mepoconfig files recognize two sections: [init] and [alias] +# Currently, .mepoconfig files recognize three sections: [init], [alias], and [clone]. # # ======================================================================= # @@ -24,6 +24,10 @@ # # mepo clone --style postfix # +# You set these options by running: +# +# mepo config set init.style +# # ======================================================================= # # [alias] Section @@ -40,3 +44,40 @@ # you can only alias mepo primary commands and not "subcommands" or # "options". So you can have an alias for "commit" and for "branch", # but you can't do an option for "commit -m" or "branch create". +# +# You can set an alias by running: +# +# mepo config set alias. +# +# ======================================================================= +# +# [clone] Section +# +# The clone section currently recognizes one option, partial. +# This has two allowed values: blobless and treeless +# +# So if you have: +# +# [clone] +# partial = blobless +# +# This is equivalent to doing: +# +# mepo clone --partial=blobless +# +# which corresponds to the git clone option --filter=blob:none +# +# and similarly for treeless: +# +# [clone] +# partial = treeless +# +# is equivalent to doing: +# +# mepo clone --partial=treeless +# +# which corresponds to the git clone option --filter=tree:0 +# +# You set these options by running: +# +# mepo config set clone.partial diff --git a/mepo.d/cmdline/parser.py b/mepo.d/cmdline/parser.py index 5407b469..21233a9c 100644 --- a/mepo.d/cmdline/parser.py +++ b/mepo.d/cmdline/parser.py @@ -104,6 +104,13 @@ def __clone(self): '--allrepos', action = 'store_true', help = 'Must be passed with -b/--branch. When set, it not only checkouts out the branch/tag for the fixture, but for all the subrepositories as well.') + clone.add_argument( + '--partial', + metavar = 'partial-type', + nargs = '?', + default = None, + choices = ['blobless','treeless'], + help = 'Style of partial clone, default: None, allowed options: %(choices)s. Note that blobless means cloning with --filter=blob:none and treeless means cloning with --filter=tree:0. NOTE: We do *not* recommend using "treeless" as it is very aggressive and will cause problems with many git commands.') def __list(self): listcomps = self.subparsers.add_parser( diff --git a/mepo.d/command/clone/clone.py b/mepo.d/command/clone/clone.py index 7295d627..18381bf5 100644 --- a/mepo.d/command/clone/clone.py +++ b/mepo.d/command/clone/clone.py @@ -1,7 +1,7 @@ from state.state import MepoState, StateDoesNotExistError from repository.git import GitRepository from command.init import init as mepo_init -from utilities import shellcmd, colors +from utilities import shellcmd, colors, mepoconfig from urllib.parse import urlparse import os @@ -18,6 +18,22 @@ def run(args): if args.allrepos and not args.branch: raise RuntimeError("The allrepos option must be used with a branch/tag.") + # We can get the blobless and treeless options from the config or the args + if args.partial: + partial = args.partial + elif mepoconfig.has_option('clone','partial'): + allowed = ['blobless','treeless'] + partial = mepoconfig.get('clone','partial') + if partial not in allowed: + raise Exception(f'Detected partial clone type [{partial}] from .mepoconfig is not an allowed partial clone type: {allowed}') + else: + print(f'Found partial clone type [{partial}] in .mepoconfig') + else: + partial = None + + blobless = partial == 'blobless' + treeless = partial == 'treeless' + # If you pass in a config, with clone, it could be outside the repo. # So use the full path passed_in_config = False @@ -34,7 +50,7 @@ def run(args): last_url_node = p.path.rsplit('/')[-1] url_suffix = pathlib.Path(last_url_node).suffix if args.directory: - local_clone(args.repo_url,args.branch,args.directory) + local_clone(args.repo_url,args.branch,args.directory,blobless,treeless) os.chdir(args.directory) else: if url_suffix == '.git': @@ -42,7 +58,7 @@ def run(args): else: git_url_directory = last_url_node - local_clone(args.repo_url,args.branch,git_url_directory) + local_clone(args.repo_url,args.branch,git_url_directory,blobless,treeless) os.chdir(git_url_directory) # Copy the new file into the repo only if we pass it in @@ -71,7 +87,7 @@ def run(args): recurse = comp.recurse_submodules # We need the type to handle hashes in components.yaml type = comp.version.type - git.clone(version,recurse,type,comp.name) + git.clone(version,recurse,type,comp.name,blobless,treeless) if comp.sparse: git.sparsify(comp.sparse) print_clone_info(comp, max_namelen) @@ -89,8 +105,12 @@ def print_clone_info(comp, name_width): ver_name_type = '({}) {}'.format(comp.version.type, comp.version.name) print('{:<{width}} | {: Date: Tue, 9 Jan 2024 15:42:24 -0500 Subject: [PATCH 2/6] Add mepoconfig entry to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffd113c3..d2a5eaf1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 the clone will not download blobs by using `--filter=blob:none`. If you set `--partial=treeless` then the clone will not download trees by using `--filter=tree:0`. The `blobless` option is useful for large repos that have a lot of binary files that you don't need. The `treeless` option is even more aggressive and *SHOULD NOT* be used unless you know what you are doing. +- Add a new section for `.mepoconfig` to allow users to set `--partial` as a default for `mepo clone`. ## [1.51.1] - 2023-08-25 From fbf924ddd8c68761ee9d6e0150f022e84e4ff997 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 9 Jan 2024 16:02:24 -0500 Subject: [PATCH 3/6] Update unit tests and github CI --- .github/workflows/mepo.yaml | 6 +++--- mepo.d/utest/test_mepo_commands.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mepo.yaml b/.github/workflows/mepo.yaml index 4d9a4662..4e7710c2 100644 --- a/.github/workflows/mepo.yaml +++ b/.github/workflows/mepo.yaml @@ -8,14 +8,14 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - python-version: ['3.9', '3.10', '3.11', 'pypy-3.9'] + python-version: ['3.9', '3.10', '3.11', '3.12', 'pypy-3.9', 'pypy-3.10'] name: Python ${{ matrix.python-version }} on ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' diff --git a/mepo.d/utest/test_mepo_commands.py b/mepo.d/utest/test_mepo_commands.py index 87d12afa..c1fc5a64 100644 --- a/mepo.d/utest/test_mepo_commands.py +++ b/mepo.d/utest/test_mepo_commands.py @@ -53,6 +53,7 @@ def setUpClass(cls): args.repo_url = None args.branch = None args.directory = None + args.partial = 'blobless' mepo_clone.run(args) # In order to better test compare, we need to do *something* args.comp_name = ['env','cmake','fvdycore'] From dc705f0a6f5524c42f1f42f0b37e071c26b33ca7 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 9 Jan 2024 16:09:24 -0500 Subject: [PATCH 4/6] Clean up the code --- mepo.d/command/clone/clone.py | 15 ++++++--------- mepo.d/repository/git.py | 7 +++---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/mepo.d/command/clone/clone.py b/mepo.d/command/clone/clone.py index 18381bf5..c153c2f5 100644 --- a/mepo.d/command/clone/clone.py +++ b/mepo.d/command/clone/clone.py @@ -31,9 +31,6 @@ def run(args): else: partial = None - blobless = partial == 'blobless' - treeless = partial == 'treeless' - # If you pass in a config, with clone, it could be outside the repo. # So use the full path passed_in_config = False @@ -50,7 +47,7 @@ def run(args): last_url_node = p.path.rsplit('/')[-1] url_suffix = pathlib.Path(last_url_node).suffix if args.directory: - local_clone(args.repo_url,args.branch,args.directory,blobless,treeless) + local_clone(args.repo_url,args.branch,args.directory,partial) os.chdir(args.directory) else: if url_suffix == '.git': @@ -58,7 +55,7 @@ def run(args): else: git_url_directory = last_url_node - local_clone(args.repo_url,args.branch,git_url_directory,blobless,treeless) + local_clone(args.repo_url,args.branch,git_url_directory,partial) os.chdir(git_url_directory) # Copy the new file into the repo only if we pass it in @@ -87,7 +84,7 @@ def run(args): recurse = comp.recurse_submodules # We need the type to handle hashes in components.yaml type = comp.version.type - git.clone(version,recurse,type,comp.name,blobless,treeless) + git.clone(version,recurse,type,comp.name,partial) if comp.sparse: git.sparsify(comp.sparse) print_clone_info(comp, max_namelen) @@ -105,11 +102,11 @@ def print_clone_info(comp, name_width): ver_name_type = '({}) {}'.format(comp.version.type, comp.version.name) print('{:<{width}} | {: Date: Wed, 10 Jan 2024 08:20:22 -0500 Subject: [PATCH 5/6] Add protection for treeless clones with submodules --- CHANGELOG.md | 6 ++++-- mepo.d/cmdline/parser.py | 4 ++-- mepo.d/command/clone/clone.py | 21 +++++++++++++++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2a5eaf1..7b26be00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,10 +19,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Added new `--partial` option to `mepo clone` with two settings: `blobless` and `treeless`. If you set, `--partial=blobless` then +- Added new `--partial` option to `mepo clone` with two settings: `off`, `blobless`, and `treeless`. If you set, `--partial=blobless` then the clone will not download blobs by using `--filter=blob:none`. If you set `--partial=treeless` then the clone will not download trees by using `--filter=tree:0`. The `blobless` option is useful for large repos that have a lot of binary files that you don't - need. The `treeless` option is even more aggressive and *SHOULD NOT* be used unless you know what you are doing. + need. The `treeless` option is even more aggressive and *SHOULD NOT* be used unless you know what you are doing. The + `--partial=off` option allows a user to override the default behavior of `--partial` in `.mepoconfig` and turn it off for a + run of `mepo clone`. - Add a new section for `.mepoconfig` to allow users to set `--partial` as a default for `mepo clone`. ## [1.51.1] - 2023-08-25 diff --git a/mepo.d/cmdline/parser.py b/mepo.d/cmdline/parser.py index 21233a9c..34f9045e 100644 --- a/mepo.d/cmdline/parser.py +++ b/mepo.d/cmdline/parser.py @@ -109,8 +109,8 @@ def __clone(self): metavar = 'partial-type', nargs = '?', default = None, - choices = ['blobless','treeless'], - help = 'Style of partial clone, default: None, allowed options: %(choices)s. Note that blobless means cloning with --filter=blob:none and treeless means cloning with --filter=tree:0. NOTE: We do *not* recommend using "treeless" as it is very aggressive and will cause problems with many git commands.') + choices = ['off','blobless','treeless'], + help = 'Style of partial clone, default: None, allowed options: %(choices)s. Off means a "normal" full git clone, blobless means cloning with "--filter=blob:none" and treeless means cloning with "--filter=tree:0". NOTE: We do *not* recommend using "treeless" as it is very aggressive and will cause problems with many git commands.') def __list(self): listcomps = self.subparsers.add_parser( diff --git a/mepo.d/command/clone/clone.py b/mepo.d/command/clone/clone.py index c153c2f5..b339177d 100644 --- a/mepo.d/command/clone/clone.py +++ b/mepo.d/command/clone/clone.py @@ -20,7 +20,10 @@ def run(args): # We can get the blobless and treeless options from the config or the args if args.partial: - partial = args.partial + # We need to set partial to None if it's off, otherwise we use the + # string. This is safe because argparse only allows for 'off', + # 'blobless', or 'treeless' + partial = None if args.partial == 'off' else args.partial elif mepoconfig.has_option('clone','partial'): allowed = ['blobless','treeless'] partial = mepoconfig.get('clone','partial') @@ -31,6 +34,7 @@ def run(args): else: partial = None + # If you pass in a config, with clone, it could be outside the repo. # So use the full path passed_in_config = False @@ -82,9 +86,18 @@ def run(args): version = comp.version.name version = version.replace('origin/','') recurse = comp.recurse_submodules + + # According to Git, treeless clones do not interact well with + # submodules. So we need to see if any comp has the recurse + # option set to True. If so, we need to clone that comp "normally" + + _partial = None if partial == 'treeless' and recurse else partial + + print(f'Cloning {comp.name} {version}. Recurse: {recurse}. Partial: {_partial}') + # We need the type to handle hashes in components.yaml type = comp.version.type - git.clone(version,recurse,type,comp.name,partial) + git.clone(version,recurse,type,comp.name,_partial) if comp.sparse: git.sparsify(comp.sparse) print_clone_info(comp, max_namelen) @@ -104,10 +117,14 @@ def print_clone_info(comp, name_width): def local_clone(url,branch=None,directory=None,partial=None): cmd1 = 'git clone ' + if partial == 'blobless': cmd1 += '--filter=blob:none ' elif partial == 'treeless': cmd1 += '--filter=tree:0 ' + else: + partial = None + if branch: cmd1 += '--branch {} '.format(branch) cmd1 += '--quiet {}'.format(url) From 1db99b10ceef47c3ab9e6efa8ec3496c03ff632c Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Wed, 10 Jan 2024 09:44:13 -0500 Subject: [PATCH 6/6] Remove debug print --- mepo.d/command/clone/clone.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mepo.d/command/clone/clone.py b/mepo.d/command/clone/clone.py index b339177d..448e2849 100644 --- a/mepo.d/command/clone/clone.py +++ b/mepo.d/command/clone/clone.py @@ -93,8 +93,6 @@ def run(args): _partial = None if partial == 'treeless' and recurse else partial - print(f'Cloning {comp.name} {version}. Recurse: {recurse}. Partial: {_partial}') - # We need the type to handle hashes in components.yaml type = comp.version.type git.clone(version,recurse,type,comp.name,_partial)