From 9793d67d3d032eb62f24b00ca835fdaf69244ea9 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Thu, 4 Aug 2022 09:10:58 -0700 Subject: [PATCH 01/34] Issues/91 (#92) * added citation creation tests and functionality to subscriber and downloader * added verbose option to create_citation_file command, previously hard coded * updated changelog (whoops) and fixed regression test: 1. Issue where the citation file now downloaded affected the counts 2. Issue where the logic for determining if a file modified time was changing or not was picking up the new citation file which _always_ gets rewritten to update the 'last accessed' date. * updated request to include exec_info in warning; fixed issue with params not being a dictionary caused errors * changed a warning to debug for citation file. fixed test issues * Enable debug logging during regression tests and set max parallel workflows to 2 * added output to pytest * fixed test to only look for downlaoded data files not citation file due to 'random' cmr errors when creating a citation. * added mock testing and retry on 503 * added 503 fixes Co-authored-by: Frank Greguska --- .github/workflows/python-app.yml | 5 +- CHANGELOG.md | 7 +++ poetry.lock | 24 ++++++++-- pyproject.toml | 1 + subscriber/podaac_access.py | 71 ++++++++++++++++++++++++++++ subscriber/podaac_data_downloader.py | 18 ++++++- subscriber/podaac_data_subscriber.py | 18 ++++++- tests/test_downloader_regression.py | 15 ++++-- tests/test_subscriber.py | 64 +++++++++++++++++++++++++ tests/test_subscriber_regression.py | 2 +- 10 files changed, 211 insertions(+), 14 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index dbde039..d212c98 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -13,6 +13,7 @@ jobs: build: strategy: fail-fast: false + max-parallel: 2 matrix: python-version: [ "3.7", "3.8", "3.9", "3.10" ] poetry-version: [ "1.1" ] @@ -47,5 +48,7 @@ jobs: username: ${{ secrets.EDL_OPS_USERNAME }} password: ${{ secrets.EDL_OPS_PASSWORD }} - name: Regression Test with pytest + env: + PODAAC_LOGLEVEL: "DEBUG" run: | - poetry run pytest -m "regression" + poetry run pytest -o log_cli=true --log-cli-level=DEBUG -m "regression" diff --git a/CHANGELOG.md b/CHANGELOG.md index f06ebe6..e4995cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +## [unreleased] +### Fixed +- Fixed an issue where token-refresh was expecting a dictionary, not a list of tuples +- Fixed an issue with 503 errors on data download not being re-tried. [97](https://github.com/podaac/data-subscriber/issues/97) +### Added +- Added citation file creation when data are downloaded [91](https://github.com/podaac/data-subscriber/issues/91). Required some updates to the regression testing. + ## [1.10.2] ### Fixed - Fixed an issue where using a default global bounding box prevented download of data that didn't use the horizontal spatial domain [87](https://github.com/podaac/data-subscriber/issues/87) diff --git a/poetry.lock b/poetry.lock index 0a666cb..94f8c70 100644 --- a/poetry.lock +++ b/poetry.lock @@ -124,8 +124,8 @@ python-versions = ">=3.6" importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} [package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] +testing = ["pytest-benchmark", "pytest"] +dev = ["tox", "pre-commit"] [[package]] name = "py" @@ -184,6 +184,20 @@ tomli = ">=1.0.0" [package.extras] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +[[package]] +name = "pytest-mock" +version = "3.8.2" +description = "Thin-wrapper around the mock package for easier use with pytest" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +pytest = ">=5.0" + +[package.extras] +dev = ["pre-commit", "tox", "pytest-asyncio"] + [[package]] name = "requests" version = "2.27.1" @@ -257,7 +271,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "8ff84c55fbd8a74bae903e6216e256cea47f571d88fcfa54f0492125746244ed" +content-hash = "c5ece7741408cb266fe803842b66f646317dc3a384e9c54ecbe66a14ce895fed" [metadata.files] atomicwrites = [ @@ -328,6 +342,10 @@ pytest = [ {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, ] +pytest-mock = [ + {file = "pytest-mock-3.8.2.tar.gz", hash = "sha256:77f03f4554392558700295e05aed0b1096a20d4a60a4f3ddcde58b0c31c8fca2"}, + {file = "pytest_mock-3.8.2-py3-none-any.whl", hash = "sha256:8a9e226d6c0ef09fcf20c94eb3405c388af438a90f3e39687f84166da82d5948"}, +] requests = [ {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, diff --git a/pyproject.toml b/pyproject.toml index 44ae341..13d676c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ tenacity = "^8.0.1" [tool.poetry.dev-dependencies] pytest = "^7.1.2" flake8 = "^4.0.1" +pytest-mock = "^3.8.2" [tool.poetry.scripts] podaac-data-subscriber = 'subscriber.podaac_data_subscriber:main' diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 327155a..81ba9e8 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -10,10 +10,14 @@ from typing import Dict from urllib import request from urllib.error import HTTPError +from urllib.request import urlretrieve import subprocess from urllib.parse import urlencode from urllib.request import Request, urlopen import hashlib +from datetime import datetime +import time + import requests @@ -286,6 +290,26 @@ def get_temporal_range(start, end, now): raise ValueError("One of start-date or end-date must be specified.") +def download_file(remote_file, output_path, retries=3): + failed = False + for r in range(retries): + try: + urlretrieve(remote_file, output_path) + except HTTPError as e: + if e.code == 503: + logging.warning(f'Error downloading {remote_file}. Retrying download.') + # back off on sleep time each error... + time.sleep(r) + if r >= retries: + failed = True + else: + #downlaoded fie without 503 + break + + if failed: + raise Exception("Could not download file.") + + # Retry using random exponential backoff if a 500 error is raised. Maximum 10 attempts. @tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60), stop=tenacity.stop_after_attempt(10), @@ -436,3 +460,50 @@ def make_checksum(file_path, algorithm): for chunk in iter(lambda: f.read(4096), b""): hash_alg.update(chunk) return hash_alg.hexdigest() + +def get_cmr_collections(params, verbose=False): + query = urlencode(params) + url = "https://" + cmr + "/search/collections.umm_json?" + query + if verbose: + logging.info(url) + + # Build the request, add the search after header to it if it's not None (e.g. after the first iteration) + req = Request(url) + response = urlopen(req) + result = json.loads(response.read().decode()) + return result + + +def create_citation(collection_json, access_date): + citation_template = "{creator}. {year}. {title}. Ver. {version}. PO.DAAC, CA, USA. Dataset accessed {access_date} at {doi_authority}/{doi}" + + # Better error handling here may be needed... + doi = collection_json['DOI']["DOI"] + doi_authority = collection_json['DOI']["Authority"] + citation = collection_json["CollectionCitations"][0] + creator = citation["Creator"] + release_date = citation["ReleaseDate"] + title = citation["Title"] + version = citation["Version"] + year = datetime.strptime(release_date, "%Y-%m-%dT%H:%M:%S.000Z").year + return citation_template.format(creator=creator, year=year, title=title, version=version, doi_authority=doi_authority, doi=doi, access_date=access_date) + +def create_citation_file(short_name, provider, data_path, token=None, verbose=False): + # get collection umm-c METADATA + params = [ + ('provider', provider), + ('ShortName', short_name) + ] + if token is not None: + params.append(('token', token)) + + collection = get_cmr_collections(params, verbose)['items'][0] + + access_date = datetime.now().strftime("%Y-%m-%d") + + # create citation from umm-c metadata + citation = create_citation(collection['umm'], access_date) + # write file + + with open(data_path + "/" + short_name + ".citation.txt", "w") as text_file: + text_file.write(citation) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 158c541..38c97d8 100644 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -193,7 +193,11 @@ def run(args=None): except HTTPError as e: if e.code == 401: token = pa.refresh_token(token, 'podaac-subscriber') - params['token'] = token + # Updated: This is not always a dictionary... + # in fact, here it's always a list of tuples + for i, p in enumerate(params) : + if p[1] == "token": + params[i] = ("token", token) results = pa.get_search_results(params, args.verbose) else: raise e @@ -268,7 +272,9 @@ def run(args=None): skip_cnt += 1 continue - urlretrieve(f, output_path) + pa.download_file(f,output_path) + #urlretrieve(f, output_path) + pa.process_file(process_cmd, output_path, args) logging.info(str(datetime.now()) + " SUCCESS: " + f) success_cnt = success_cnt + 1 @@ -284,6 +290,14 @@ def run(args=None): logging.info("Downloaded Files: " + str(success_cnt)) logging.info("Failed Files: " + str(failure_cnt)) logging.info("Skipped Files: " + str(skip_cnt)) + + #create citation file if success > 0 + if success_cnt > 0: + try: + pa.create_citation_file(short_name, provider, data_path, token, args.verbose) + except: + logging.debug("Error generating citation",exc_info=True) + pa.delete_token(token_url, token) logging.info("END\n\n") diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index 04df68d..23cdc16 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -218,7 +218,12 @@ def run(args=None): except HTTPError as e: if e.code == 401: token = pa.refresh_token(token, 'podaac-subscriber') - params['token'] = token + # Updated: This is not always a dictionary... + # in fact, here it's always a list of tuples + for i, p in enumerate(params) : + if p[1] == "token": + params[i] = ("token", token) + #params['token'] = token results = pa.get_search_results(params, args.verbose) else: raise e @@ -294,7 +299,9 @@ def run(args=None): skip_cnt += 1 continue - urlretrieve(f, output_path) + #urlretrieve(f, output_path) + pa.download_file(f,output_path) + pa.process_file(process_cmd, output_path, args) logging.info(str(datetime.now()) + " SUCCESS: " + f) success_cnt = success_cnt + 1 @@ -314,6 +321,13 @@ def run(args=None): logging.info("Downloaded Files: " + str(success_cnt)) logging.info("Failed Files: " + str(failure_cnt)) logging.info("Skipped Files: " + str(skip_cnt)) + + if success_cnt > 0: + try: + pa.create_citation_file(short_name, provider, data_path, token, args.verbose) + except: + logging.debug("Error generating citation", exc_info=True) + pa.delete_token(token_url, token) logging.info("END\n\n") #exit(0) diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py index 875a517..cea7df0 100644 --- a/tests/test_downloader_regression.py +++ b/tests/test_downloader_regression.py @@ -19,10 +19,11 @@ def create_downloader_args(args): @pytest.mark.regression def test_downloader_limit_MUR(): shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True) - args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1'.split()) + args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1 --verbose'.split()) pdd.run(args2) - # count number of files downloaded... - assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name)])==1 + # So running the test in parallel, sometimes we get a 401 on the token... + # Let's ensure we're only looking for data files here + assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==1 shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') #Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure, @@ -31,7 +32,7 @@ def test_downloader_limit_MUR(): @pytest.mark.regression def test_downloader_MUR(): shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True) - args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4'.split()) + args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 --verbose'.split()) pdd.run(args2) assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/02/20200102090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') @@ -54,7 +55,7 @@ def test_downloader_MUR(): t1 = os.path.getmtime('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') # Set the args to --force to re-download those data - args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 -f'.split()) + args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 -f --verbose'.split()) pdd.run(args2) assert t1 != os.path.getmtime('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') assert t2 != os.path.getmtime('./MUR25-JPL-L4-GLOB-v04.2/2020/01/02/20200102090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') @@ -73,6 +74,10 @@ def test_downloader_GRACE_with_SHA_512(tmpdir): pdd.run(args) assert len( os.listdir(directory_str) ) > 0 filename = directory_str + "/" + os.listdir(directory_str)[0] + #if the citation file was chosen above, get the next file since citation file is updated on successful run + if "citation.txt" in filename: + filename = directory_str + "/" + os.listdir(directory_str)[1] + modified_time_1 = os.path.getmtime(filename) print( modified_time_1 ) diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index 2e9d4cb..983cdce 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -1,9 +1,14 @@ from subscriber import podaac_data_subscriber as pds from subscriber import podaac_access as pa + +from urllib.error import HTTPError import pytest import os from pathlib import Path import shutil +import json +import tempfile +from os.path import exists def test_temporal_range(): @@ -23,6 +28,46 @@ def cleanup_update_test(): print("Cleanup...") shutil.rmtree(data_dir_with_updates) +def test_create_citation_file(): + with tempfile.TemporaryDirectory() as tmpdirname: + pa.create_citation_file("SWOT_SIMULATED_L2_KARIN_SSH_GLORYS_CALVAL_V1", "POCLOUD", tmpdirname) + assert exists(tmpdirname+"/SWOT_SIMULATED_L2_KARIN_SSH_GLORYS_CALVAL_V1.citation.txt") + +def test_citation_creation(): + collection_umm = '''{ + "DOI": { + "DOI": "10.5067/KARIN-2GLC1", + "Authority": "https://doi.org" + }, + "CollectionCitations": [ + { + "Creator": "SWOT", + "ReleasePlace": "PO.DAAC", + "Title": "SWOT Level-2 Simulated SSH from MITgcm LLC4320 Science Quality Version 1.0", + "Publisher": "PO.DAAC", + "ReleaseDate": "2022-01-31T00:00:00.000Z", + "Version": "1.0" + }, + { + "Creator": "CNES/CLS", + "ReleasePlace": "CNES/AVISO", + "Title": "Simulated SWOT products", + "OnlineResource": { + "Linkage": "http://doi.org/10.24400/527896/a01-2021.006", + "Name": " Simulated SWOT Sea Surface Height products", + "Description": "Simulated SWOT Sea Surface Height products KaRIn and Nadir.", + "MimeType": "text/html" + }, + "Publisher": "PODAAC", + "ReleaseDate": "2021-11-01T00:00:00.000Z", + "Version": "1.0" + } + ] + } + ''' + collection_umm_json = json.loads(collection_umm) + citation = pa.create_citation(collection_umm_json, "2022-07-21") + assert citation == "SWOT. 2022. SWOT Level-2 Simulated SSH from MITgcm LLC4320 Science Quality Version 1.0. Ver. 1.0. PO.DAAC, CA, USA. Dataset accessed 2022-07-21 at https://doi.org/10.5067/KARIN-2GLC1" def test_search_after(): # cmr query: https://cmr.earthdata.nasa.gov/search/granules.umm_json?page_size=2000&sort_key=-start_date&provider=POCLOUD&ShortName=JASON_CS_S6A_L2_ALT_LR_STD_OST_NRT_F&temporal=2000-01-01T10%3A00%3A00Z%2C2022-04-15T00%3A00%3A00Z&bounding_box=-180%2C-90%2C180%2C90 @@ -136,6 +181,25 @@ def test_validate(): # with pytest.raises(SystemExit): # a = validate(["-c", "viirs", "-d", "/data", "-m","60b"]) +def test_param_update(): + params = [ + ('sort_key', "-start_date"), + ('provider', "'POCLOUD'"), + ('token', "123"), + ] + + for i, p in enumerate(params) : + if p[1] == "token": + params[i] = ("token", "newToken") + + for i,p in enumerate(params) : + if p[1] == "token": + assert p[2] == "newToken" + +def test_downloader_retry(mocker): + mck = mocker.patch('subscriber.podaac_access.urlretrieve', side_effect=HTTPError("url", 503, "msg", None, None)) + pa.download_file("myUrl", "outputPath") + assert mck.call_count == 3 def validate(args): parser = pds.create_parser() diff --git a/tests/test_subscriber_regression.py b/tests/test_subscriber_regression.py index 07dc2f8..ffa3685 100644 --- a/tests/test_subscriber_regression.py +++ b/tests/test_subscriber_regression.py @@ -53,7 +53,7 @@ def test_subscriber_MUR_update_file_no_redownload(): except OSError as e: print("Expecting this...") - args2 = create_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4'.split()) + args2 = create_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 --verbose'.split()) pds.run(args2) assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/02/20200102090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc') From c26bafbe0cd7939c837abd976d1d69aa6fc8def1 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Fri, 5 Aug 2022 09:58:11 -0700 Subject: [PATCH 02/34] fixed issues where token was not proagated to CMR queries (#95) --- CHANGELOG.md | 1 + subscriber/podaac_data_downloader.py | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4995cc..bbc6ed3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [unreleased] ### Fixed - Fixed an issue where token-refresh was expecting a dictionary, not a list of tuples +- Fixed issues where token was not propagated to downloader CMR query [94](https://github.com/podaac/data-subscriber/issues/94) - Fixed an issue with 503 errors on data download not being re-tried. [97](https://github.com/podaac/data-subscriber/issues/97) ### Added - Added citation file creation when data are downloaded [91](https://github.com/podaac/data-subscriber/issues/91). Required some updates to the regression testing. diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 38c97d8..3ee5347 100644 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -178,6 +178,7 @@ def run(args=None): ('provider', provider), ('ShortName', short_name), ('temporal', temporal_range), + ('token', token), ] if args.verbose: logging.info("Temporal Range: " + temporal_range) From f8783bdf69dd5d8fbf1aee4c152358af83f4b727 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Tue, 23 Aug 2022 15:12:45 -0700 Subject: [PATCH 03/34] Misc fixes (#101) * added ".tiff" to default extensions to address #100 * removed 'warning' message on not downloading all data to close #99 * updated help documentation for start/end times to close #79 --- subscriber/podaac_access.py | 2 +- subscriber/podaac_data_downloader.py | 8 ++------ subscriber/podaac_data_subscriber.py | 8 ++------ 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 81ba9e8..241d757 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -26,7 +26,7 @@ from datetime import datetime __version__ = "1.10.2" -extensions = [".nc", ".h5", ".zip", ".tar.gz"] +extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" token_url = "https://" + cmr + "/legacy-services/rest/tokens" diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 3ee5347..44fdb45 100644 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -60,9 +60,9 @@ def create_parser(): help="Cycle number for determining downloads. can be repeated for multiple cycles", action='append', type=int) parser.add_argument("-sd", "--start-date", required=False, dest="startDate", - help="The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z") # noqa E501 + help="The ISO date time after which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z") # noqa E501 parser.add_argument("-ed", "--end-date", required=False, dest="endDate", - help="The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z") # noqa E501 + help="The ISO date time before which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z") # noqa E501 # Adding optional arguments parser.add_argument("-f", "--force", dest="force", action="store_true", help = "Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches") # noqa E501 @@ -226,10 +226,6 @@ def run(args=None): downloads = [item for sublist in downloads_all for item in sublist] - if len(downloads) >= page_size: - logging.warning("Only the most recent " + str( - page_size) + " granules will be downloaded; try adjusting your search criteria (suggestion: reduce time period or spatial region of search) to ensure you retrieve all granules.") - # filter list based on extension if not extensions: extensions = pa.extensions diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index 23cdc16..e843a5f 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -66,10 +66,10 @@ def create_parser(): # spatiotemporal arguments parser.add_argument("-sd", "--start-date", dest="startDate", - help="The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z", + help="The ISO date time after which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z", default=False) # noqa E501 parser.add_argument("-ed", "--end-date", dest="endDate", - help="The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z", + help="The ISO date time before which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z", default=False) # noqa E501 parser.add_argument("-b", "--bounds", dest="bbox", help="The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b=\"-180,-90,180,90\" syntax when calling from the command line. Default: \"-180,-90,180,90\".", @@ -254,10 +254,6 @@ def run(args=None): downloads = [item for sublist in downloads_all for item in sublist] - if len(downloads) >= page_size: - logging.warning("Only the most recent " + str( - page_size) + " granules will be downloaded; try adjusting your search criteria (suggestion: reduce time period or spatial region of search) to ensure you retrieve all granules.") - # filter list based on extension if not extensions: extensions = pa.extensions From 5694b9de1dcd06dc1a9b80d9a4f57581fd7fc932 Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Tue, 23 Aug 2022 15:15:36 -0700 Subject: [PATCH 04/34] added version update, updates to CHANGELOG --- CHANGELOG.md | 7 +++++-- pyproject.toml | 2 +- subscriber/podaac_access.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbc6ed3..1f5b15f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -## [unreleased] +## 1.11.0 ### Fixed - Fixed an issue where token-refresh was expecting a dictionary, not a list of tuples - Fixed issues where token was not propagated to downloader CMR query [94](https://github.com/podaac/data-subscriber/issues/94) -- Fixed an issue with 503 errors on data download not being re-tried. [97](https://github.com/podaac/data-subscriber/issues/97) +- Fixed an issue with 503 errors on data download not being re-tried. [97](https://github.com/podaac/data-subscriber/issues/9797) +- added ".tiff" to default extensions to address #[100](https://github.com/podaac/data-subscriber/issues/100) +- removed erroneous 'warning' message on not downloading all data to close [99](https://github.com/podaac/data-subscriber/issues/99) +- updated help documentation for start/end times to close [79](https://github.com/podaac/data-subscriber/issues/79) ### Added - Added citation file creation when data are downloaded [91](https://github.com/podaac/data-subscriber/issues/91). Required some updates to the regression testing. diff --git a/pyproject.toml b/pyproject.toml index 13d676c..2ad16d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.10.2" +version = "1.11.0" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 81ba9e8..0b914bb 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -25,7 +25,7 @@ import tenacity from datetime import datetime -__version__ = "1.10.2" +__version__ = "1.11.0" extensions = [".nc", ".h5", ".zip", ".tar.gz"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" From 15aba90201b79bbf01f4787900b3b41160441390 Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Wed, 24 Aug 2022 12:17:42 -0700 Subject: [PATCH 05/34] added token get,delete, refresh and list operations --- subscriber/podaac_access.py | 72 +++++++++++++++++++++++++--------- tests/test_token_regression.py | 49 +++++++++++++++++++++++ 2 files changed, 102 insertions(+), 19 deletions(-) create mode 100644 tests/test_token_regression.py diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 5c0e3bb..0f1edf0 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -17,6 +17,8 @@ import hashlib from datetime import datetime import time +from requests.auth import HTTPBasicAuth + import requests @@ -29,7 +31,8 @@ extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" -token_url = "https://" + cmr + "/legacy-services/rest/tokens" +token_url = "https://" + edl + "/api/users" + IPAddr = "127.0.0.1" # socket.gethostbyname(hostname) @@ -92,17 +95,26 @@ def setup_earthdata_login_auth(endpoint): ############################################################################### # GET TOKEN FROM CMR ############################################################################### -def get_token(url: str, client_id: str, endpoint: str) -> str: +@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60), + stop=tenacity.stop_after_attempt(3), + reraise=True, + retry=(tenacity.retry_if_result(lambda x: x == '')) + ) +def get_token(url: str) -> str: try: token: str = '' - username, _, password = netrc.netrc().authenticators(endpoint) - xml: str = """ - {}{}{} - {}""".format(username, password, client_id, IPAddr) # noqa E501 - headers: Dict = {'Content-Type': 'application/xml', 'Accept': 'application/json'} # noqa E501 - resp = requests.post(url, headers=headers, data=xml) + username, _, password = netrc.netrc().authenticators(edl) + headers: Dict = {'Accept': 'application/json'} # noqa E501 + resp = requests.post(url+"/token", headers=headers, auth=HTTPBasicAuth(username, password)) response_content: Dict = json.loads(resp.content) - token = response_content['token']['id'] + if "error" in response_content: + if response_content["error"] == "max_token_limit": + logging.error("Max tokens acquired from URS. Deleting existing tokens") + for t in list_tokens(url): + delete_token(token_url,t) + return '' + #logging.debug("Status: {}".format(resp.status_code)) + token = response_content['access_token'] # What error is thrown here? Value Error? Request Errors? except: # noqa E722 @@ -113,23 +125,45 @@ def get_token(url: str, client_id: str, endpoint: str) -> str: ############################################################################### # DELETE TOKEN FROM CMR ############################################################################### -def delete_token(url: str, token: str) -> None: +def delete_token(url: str, token: str) -> bool: try: - headers: Dict = {'Content-Type': 'application/xml', 'Accept': 'application/json'} # noqa E501 - url = '{}/{}'.format(url, token) - resp = requests.request('DELETE', url, headers=headers) - if resp.status_code == 204: - logging.info("CMR token successfully deleted") + username, _, password = netrc.netrc().authenticators(edl) + headers: Dict = {'Accept': 'application/json'} + resp = requests.post(url+"/revoke_token",params={"token":token}, headers=headers, auth=HTTPBasicAuth(username, password)) + + if resp.status_code == 200: + logging.info("EDL token successfully deleted") + return True else: - logging.info("CMR token deleting failed.") + logging.info("EDL token deleting failed.") + except: # noqa E722 logging.warning("Error deleting the token") + return False + +def list_tokens(url: str): + try: + tokens = [] + username, _, password = netrc.netrc().authenticators(edl) + headers: Dict = {'Accept': 'application/json'} # noqa E501 + resp = requests.get(url+"/tokens", headers=headers, auth=HTTPBasicAuth(username, password)) + response_content = json.loads(resp.content) + + for x in response_content: + tokens.append(x['access_token']) + + except: # noqa E722 + logging.warning("Error getting the token - check user name and password") + return tokens + + + -def refresh_token(old_token: str, client_id: str): +def refresh_token(old_token: str): setup_earthdata_login_auth(edl) - delete_token(token_url, old_token) - return get_token(token_url, client_id, edl) + delete_token(token_url,old_token) + return get_token(token_url) def validate(args): diff --git a/tests/test_token_regression.py b/tests/test_token_regression.py new file mode 100644 index 0000000..c732bf2 --- /dev/null +++ b/tests/test_token_regression.py @@ -0,0 +1,49 @@ +import pytest +import os +from os.path import exists + +from subscriber import podaac_access as pa +import shutil +from pathlib import Path + + +def setup_module(module): + print('*****SETUP*****') + tokens = pa.list_tokens(pa.token_url) + for x in tokens: + pa.delete_token(pa.token_url, x) + +def teardown_module(module): + print('*****TEARDOWN*****') + tokens = pa.list_tokens(pa.token_url) + for x in tokens: + pa.delete_token(pa.token_url, x) + +# REGRESSION TEST CURRENTLY REQUIRES A .NETRC file for CMR/Data Download +# token API can be found here: https://wiki.earthdata.nasa.gov/display/EL/API+Documentation +def test_list_tokens(): + tokens = pa.list_tokens(pa.token_url) + for x in tokens: + pa.delete_token(pa.token_url, x) + +def test_edl_getToken(): + token = pa.get_token(pa.token_url) + assert token != "" + token = pa.refresh_token(token) + assert token != "" + tokens = pa.list_tokens(pa.token_url) + + assert len(tokens) == 1 + for x in tokens: + assert x != "" + + assert True == pa.delete_token(pa.token_url, token) + +def test_edl_max_token(): + #call this 3 times since we're capped out at 2 total... + token = pa.get_token(pa.token_url) + assert token != "" + token = pa.get_token(pa.token_url) + assert token != "" + token = pa.get_token(pa.token_url) + assert token != "" From 80f6b11ff0ddccb398e7c091d1f4e88fd3e6929c Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Wed, 24 Aug 2022 12:25:13 -0700 Subject: [PATCH 06/34] Revert "added token get,delete, refresh and list operations" This reverts commit 15aba90201b79bbf01f4787900b3b41160441390. --- subscriber/podaac_access.py | 72 +++++++++------------------------- tests/test_token_regression.py | 49 ----------------------- 2 files changed, 19 insertions(+), 102 deletions(-) delete mode 100644 tests/test_token_regression.py diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 0f1edf0..5c0e3bb 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -17,8 +17,6 @@ import hashlib from datetime import datetime import time -from requests.auth import HTTPBasicAuth - import requests @@ -31,8 +29,7 @@ extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" -token_url = "https://" + edl + "/api/users" - +token_url = "https://" + cmr + "/legacy-services/rest/tokens" IPAddr = "127.0.0.1" # socket.gethostbyname(hostname) @@ -95,26 +92,17 @@ def setup_earthdata_login_auth(endpoint): ############################################################################### # GET TOKEN FROM CMR ############################################################################### -@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60), - stop=tenacity.stop_after_attempt(3), - reraise=True, - retry=(tenacity.retry_if_result(lambda x: x == '')) - ) -def get_token(url: str) -> str: +def get_token(url: str, client_id: str, endpoint: str) -> str: try: token: str = '' - username, _, password = netrc.netrc().authenticators(edl) - headers: Dict = {'Accept': 'application/json'} # noqa E501 - resp = requests.post(url+"/token", headers=headers, auth=HTTPBasicAuth(username, password)) + username, _, password = netrc.netrc().authenticators(endpoint) + xml: str = """ + {}{}{} + {}""".format(username, password, client_id, IPAddr) # noqa E501 + headers: Dict = {'Content-Type': 'application/xml', 'Accept': 'application/json'} # noqa E501 + resp = requests.post(url, headers=headers, data=xml) response_content: Dict = json.loads(resp.content) - if "error" in response_content: - if response_content["error"] == "max_token_limit": - logging.error("Max tokens acquired from URS. Deleting existing tokens") - for t in list_tokens(url): - delete_token(token_url,t) - return '' - #logging.debug("Status: {}".format(resp.status_code)) - token = response_content['access_token'] + token = response_content['token']['id'] # What error is thrown here? Value Error? Request Errors? except: # noqa E722 @@ -125,45 +113,23 @@ def get_token(url: str) -> str: ############################################################################### # DELETE TOKEN FROM CMR ############################################################################### -def delete_token(url: str, token: str) -> bool: +def delete_token(url: str, token: str) -> None: try: - username, _, password = netrc.netrc().authenticators(edl) - headers: Dict = {'Accept': 'application/json'} - resp = requests.post(url+"/revoke_token",params={"token":token}, headers=headers, auth=HTTPBasicAuth(username, password)) - - if resp.status_code == 200: - logging.info("EDL token successfully deleted") - return True + headers: Dict = {'Content-Type': 'application/xml', 'Accept': 'application/json'} # noqa E501 + url = '{}/{}'.format(url, token) + resp = requests.request('DELETE', url, headers=headers) + if resp.status_code == 204: + logging.info("CMR token successfully deleted") else: - logging.info("EDL token deleting failed.") - + logging.info("CMR token deleting failed.") except: # noqa E722 logging.warning("Error deleting the token") - return False - -def list_tokens(url: str): - try: - tokens = [] - username, _, password = netrc.netrc().authenticators(edl) - headers: Dict = {'Accept': 'application/json'} # noqa E501 - resp = requests.get(url+"/tokens", headers=headers, auth=HTTPBasicAuth(username, password)) - response_content = json.loads(resp.content) - - for x in response_content: - tokens.append(x['access_token']) - - except: # noqa E722 - logging.warning("Error getting the token - check user name and password") - return tokens - - - -def refresh_token(old_token: str): +def refresh_token(old_token: str, client_id: str): setup_earthdata_login_auth(edl) - delete_token(token_url,old_token) - return get_token(token_url) + delete_token(token_url, old_token) + return get_token(token_url, client_id, edl) def validate(args): diff --git a/tests/test_token_regression.py b/tests/test_token_regression.py deleted file mode 100644 index c732bf2..0000000 --- a/tests/test_token_regression.py +++ /dev/null @@ -1,49 +0,0 @@ -import pytest -import os -from os.path import exists - -from subscriber import podaac_access as pa -import shutil -from pathlib import Path - - -def setup_module(module): - print('*****SETUP*****') - tokens = pa.list_tokens(pa.token_url) - for x in tokens: - pa.delete_token(pa.token_url, x) - -def teardown_module(module): - print('*****TEARDOWN*****') - tokens = pa.list_tokens(pa.token_url) - for x in tokens: - pa.delete_token(pa.token_url, x) - -# REGRESSION TEST CURRENTLY REQUIRES A .NETRC file for CMR/Data Download -# token API can be found here: https://wiki.earthdata.nasa.gov/display/EL/API+Documentation -def test_list_tokens(): - tokens = pa.list_tokens(pa.token_url) - for x in tokens: - pa.delete_token(pa.token_url, x) - -def test_edl_getToken(): - token = pa.get_token(pa.token_url) - assert token != "" - token = pa.refresh_token(token) - assert token != "" - tokens = pa.list_tokens(pa.token_url) - - assert len(tokens) == 1 - for x in tokens: - assert x != "" - - assert True == pa.delete_token(pa.token_url, token) - -def test_edl_max_token(): - #call this 3 times since we're capped out at 2 total... - token = pa.get_token(pa.token_url) - assert token != "" - token = pa.get_token(pa.token_url) - assert token != "" - token = pa.get_token(pa.token_url) - assert token != "" From 73339481022476fd7ad1d0e7c8fe5e779b9a7ca8 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Fri, 2 Sep 2022 09:18:08 -0700 Subject: [PATCH 07/34] Update python-app.yml --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index d212c98..62275e3 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -16,7 +16,7 @@ jobs: max-parallel: 2 matrix: python-version: [ "3.7", "3.8", "3.9", "3.10" ] - poetry-version: [ "1.1" ] + poetry-version: [ "1.1.14" ] os: [ ubuntu-18.04, macos-latest, windows-latest ] runs-on: ${{ matrix.os }} steps: From 583121cff49bbec183cd6abfaa5ff017cddb729e Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Fri, 2 Sep 2022 12:15:41 -0700 Subject: [PATCH 08/34] updated poetry version Version matches build/test versions. --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c255fa6..eba32fa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: - name: Install Poetry uses: abatilo/actions-poetry@v2.0.0 with: - poetry-version: 1.1 + poetry-version: 1.1.14 - name: Install dependencies run: | poetry install From 3f9d050c1ff1491741040f8976b31526c0c5171b Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Wed, 19 Oct 2022 11:20:18 -0700 Subject: [PATCH 09/34] Issues/98 (#107) * added token get,delete, refresh and list operations * Revert "added token get,delete, refresh and list operations" This reverts commit 15aba90201b79bbf01f4787900b3b41160441390. * added EDL (not cmr-token) based get, list,delete, refresh token * updated token regression tests * updates and tests for subscriber moving to EDL. * marked tests as regression test * Update subscriber/podaac_data_downloader.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_data_subscriber.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_access.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_access.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_access.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * added exec info to errors, cleaned up some log statements Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> --- poetry.lock | 113 ++++++++++++--------------- subscriber/podaac_access.py | 89 +++++++++++++++------ subscriber/podaac_data_downloader.py | 6 +- subscriber/podaac_data_subscriber.py | 6 +- tests/test_token_regression.py | 46 +++++++++++ 5 files changed, 166 insertions(+), 94 deletions(-) create mode 100644 tests/test_token_regression.py diff --git a/poetry.lock b/poetry.lock index 94f8c70..72a359c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,28 +1,20 @@ -[[package]] -name = "atomicwrites" -version = "1.4.0" -description = "Atomic file writes." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - [[package]] name = "attrs" -version = "21.4.0" +version = "22.1.0" description = "Classes Without Boilerplate" category = "dev" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.5" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] [[package]] name = "certifi" -version = "2022.5.18.1" +version = "2022.9.24" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false @@ -30,18 +22,18 @@ python-versions = ">=3.6" [[package]] name = "charset-normalizer" -version = "2.0.12" +version = "2.1.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" optional = false -python-versions = ">=3.5.0" +python-versions = ">=3.6.0" [package.extras] unicode_backport = ["unicodedata2"] [[package]] name = "colorama" -version = "0.4.4" +version = "0.4.5" description = "Cross-platform colored terminal text." category = "dev" optional = false @@ -63,7 +55,7 @@ pyflakes = ">=2.4.0,<2.5.0" [[package]] name = "idna" -version = "3.3" +version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" category = "main" optional = false @@ -164,14 +156,13 @@ diagrams = ["railroad-diagrams", "jinja2"] [[package]] name = "pytest" -version = "7.1.2" +version = "7.1.3" description = "pytest: simple powerful testing with Python" category = "dev" optional = false python-versions = ">=3.7" [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} @@ -186,7 +177,7 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2. [[package]] name = "pytest-mock" -version = "3.8.2" +version = "3.9.0" description = "Thin-wrapper around the mock package for easier use with pytest" category = "dev" optional = false @@ -200,25 +191,25 @@ dev = ["pre-commit", "tox", "pytest-asyncio"] [[package]] name = "requests" -version = "2.27.1" +version = "2.28.1" description = "Python HTTP for Humans." category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.7, <4" [package.dependencies] certifi = ">=2017.4.17" -charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} -idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" urllib3 = ">=1.21.1,<1.27" [package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "tenacity" -version = "8.0.1" +version = "8.1.0" description = "Retry code until it succeeds" category = "main" optional = false @@ -237,7 +228,7 @@ python-versions = ">=3.7" [[package]] name = "typing-extensions" -version = "4.2.0" +version = "4.3.0" description = "Backported and Experimental Type Hints for Python 3.7+" category = "dev" optional = false @@ -245,28 +236,28 @@ python-versions = ">=3.7" [[package]] name = "urllib3" -version = "1.26.9" +version = "1.26.12" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" [package.extras] brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "zipp" -version = "3.8.0" +version = "3.8.1" description = "Backport of pathlib-compatible object wrapper for zip files" category = "dev" optional = false python-versions = ">=3.7" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] +docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"] +testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] [metadata] lock-version = "1.1" @@ -274,33 +265,29 @@ python-versions = "^3.7" content-hash = "c5ece7741408cb266fe803842b66f646317dc3a384e9c54ecbe66a14ce895fed" [metadata.files] -atomicwrites = [ - {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, - {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, -] attrs = [ - {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, - {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, + {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, + {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] certifi = [ - {file = "certifi-2022.5.18.1-py3-none-any.whl", hash = "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a"}, - {file = "certifi-2022.5.18.1.tar.gz", hash = "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7"}, + {file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"}, + {file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"}, ] charset-normalizer = [ - {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, - {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, + {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, + {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, ] colorama = [ - {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, - {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, + {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, + {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, ] flake8 = [ {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"}, ] idna = [ - {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, - {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] importlib-metadata = [ {file = "importlib_metadata-4.2.0-py3-none-any.whl", hash = "sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b"}, @@ -339,34 +326,34 @@ pyparsing = [ {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, ] pytest = [ - {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, - {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, + {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"}, + {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"}, ] pytest-mock = [ - {file = "pytest-mock-3.8.2.tar.gz", hash = "sha256:77f03f4554392558700295e05aed0b1096a20d4a60a4f3ddcde58b0c31c8fca2"}, - {file = "pytest_mock-3.8.2-py3-none-any.whl", hash = "sha256:8a9e226d6c0ef09fcf20c94eb3405c388af438a90f3e39687f84166da82d5948"}, + {file = "pytest-mock-3.9.0.tar.gz", hash = "sha256:c899a0dcc8a5f22930acd020b500abd5f956911f326864a3b979e4866e14da82"}, + {file = "pytest_mock-3.9.0-py3-none-any.whl", hash = "sha256:1a1b9264224d026932d6685a0f9cef3b61d91563c3e74af9fe5afb2767e13812"}, ] requests = [ - {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, - {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, + {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, + {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, ] tenacity = [ - {file = "tenacity-8.0.1-py3-none-any.whl", hash = "sha256:f78f4ea81b0fabc06728c11dc2a8c01277bfc5181b321a4770471902e3eb844a"}, - {file = "tenacity-8.0.1.tar.gz", hash = "sha256:43242a20e3e73291a28bcbcacfd6e000b02d3857a9a9fff56b297a27afdc932f"}, + {file = "tenacity-8.1.0-py3-none-any.whl", hash = "sha256:35525cd47f82830069f0d6b73f7eb83bc5b73ee2fff0437952cedf98b27653ac"}, + {file = "tenacity-8.1.0.tar.gz", hash = "sha256:e48c437fdf9340f5666b92cd7990e96bc5fc955e1298baf4a907e3972067a445"}, ] tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] typing-extensions = [ - {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, - {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, + {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, + {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, ] urllib3 = [ - {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, - {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, + {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, + {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, ] zipp = [ - {file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"}, - {file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"}, + {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, + {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, ] diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 5c0e3bb..8ba7fad 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -17,6 +17,8 @@ import hashlib from datetime import datetime import time +from requests.auth import HTTPBasicAuth + import requests @@ -29,7 +31,8 @@ extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" -token_url = "https://" + cmr + "/legacy-services/rest/tokens" +token_url = "https://" + edl + "/api/users" + IPAddr = "127.0.0.1" # socket.gethostbyname(hostname) @@ -89,47 +92,87 @@ def setup_earthdata_login_auth(endpoint): request.install_opener(opener) + +def get_token(url: str) -> str: + tokens = list_tokens(url) + if len(tokens) == 0 : + return create_token(url) + else: + return tokens[0] + ############################################################################### # GET TOKEN FROM CMR ############################################################################### -def get_token(url: str, client_id: str, endpoint: str) -> str: +@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60), + stop=tenacity.stop_after_attempt(3), + reraise=True, + retry=(tenacity.retry_if_result(lambda x: x == '')) + ) +def create_token(url: str) -> str: try: token: str = '' - username, _, password = netrc.netrc().authenticators(endpoint) - xml: str = """ - {}{}{} - {}""".format(username, password, client_id, IPAddr) # noqa E501 - headers: Dict = {'Content-Type': 'application/xml', 'Accept': 'application/json'} # noqa E501 - resp = requests.post(url, headers=headers, data=xml) - response_content: Dict = json.loads(resp.content) - token = response_content['token']['id'] + username, _, password = netrc.netrc().authenticators(edl) + headers: Dict = {'Accept': 'application/json'} # noqa E501 - # What error is thrown here? Value Error? Request Errors? + + resp = requests.post(url+"/token", headers=headers, auth=HTTPBasicAuth(username, password)) + response_content: Dict = json.loads(resp.content) + if "error" in response_content: + if response_content["error"] == "max_token_limit": + logging.error("Max tokens acquired from URS. Using existing token") + tokens=list_tokens(url) + return tokens[0] + token = response_content['access_token'] + + # Add better error handling there + # Max tokens + # Wrong Username/Passsword + # Other except: # noqa E722 - logging.warning("Error getting the token - check user name and password") + logging.warning("Error getting the token - check user name and password", exc_info=True) return token ############################################################################### # DELETE TOKEN FROM CMR ############################################################################### -def delete_token(url: str, token: str) -> None: +def delete_token(url: str, token: str) -> bool: try: - headers: Dict = {'Content-Type': 'application/xml', 'Accept': 'application/json'} # noqa E501 - url = '{}/{}'.format(url, token) - resp = requests.request('DELETE', url, headers=headers) - if resp.status_code == 204: - logging.info("CMR token successfully deleted") + username, _, password = netrc.netrc().authenticators(edl) + headers: Dict = {'Accept': 'application/json'} + resp = requests.post(url+"/revoke_token",params={"token":token}, headers=headers, auth=HTTPBasicAuth(username, password)) + + if resp.status_code == 200: + logging.info("EDL token successfully deleted") + return True else: - logging.info("CMR token deleting failed.") + logging.info("EDL token deleting failed.") + + except: # noqa E722 + logging.warning("Error deleting the token", exc_info=True) + + return False + +def list_tokens(url: str): + try: + tokens = [] + username, _, password = netrc.netrc().authenticators(edl) + headers: Dict = {'Accept': 'application/json'} # noqa E501 + resp = requests.get(url+"/tokens", headers=headers, auth=HTTPBasicAuth(username, password)) + response_content = json.loads(resp.content) + + for x in response_content: + tokens.append(x['access_token']) + except: # noqa E722 - logging.warning("Error deleting the token") + logging.warning("Error getting the token - check user name and password", exc_info=True) + return tokens -def refresh_token(old_token: str, client_id: str): +def refresh_token(old_token: str): setup_earthdata_login_auth(edl) - delete_token(token_url, old_token) - return get_token(token_url, client_id, edl) + delete_token(token_url,old_token) + return get_token(token_url) def validate(args): diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 44fdb45..00b78a3 100644 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -123,7 +123,7 @@ def run(args=None): exit(1) pa.setup_earthdata_login_auth(edl) - token = pa.get_token(token_url, 'podaac-subscriber', edl) + token = pa.get_token(token_url) provider = args.provider start_date_time = args.startDate @@ -193,7 +193,7 @@ def run(args=None): results = pa.get_search_results(params, args.verbose) except HTTPError as e: if e.code == 401: - token = pa.refresh_token(token, 'podaac-subscriber') + token = pa.refresh_token(token) # Updated: This is not always a dictionary... # in fact, here it's always a list of tuples for i, p in enumerate(params) : @@ -294,8 +294,6 @@ def run(args=None): pa.create_citation_file(short_name, provider, data_path, token, args.verbose) except: logging.debug("Error generating citation",exc_info=True) - - pa.delete_token(token_url, token) logging.info("END\n\n") diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index e843a5f..eb103ac 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -122,7 +122,7 @@ def run(args=None): exit(1) pa.setup_earthdata_login_auth(edl) - token = pa.get_token(token_url, 'podaac-subscriber', edl) + token = pa.get_token(token_url) mins = args.minutes # In this case download files ingested in the last 60 minutes -- change this to whatever setting is needed provider = args.provider @@ -217,7 +217,7 @@ def run(args=None): results = pa.get_search_results(params, args.verbose) except HTTPError as e: if e.code == 401: - token = pa.refresh_token(token, 'podaac-subscriber') + token = pa.refresh_token(token) # Updated: This is not always a dictionary... # in fact, here it's always a list of tuples for i, p in enumerate(params) : @@ -324,9 +324,7 @@ def run(args=None): except: logging.debug("Error generating citation", exc_info=True) - pa.delete_token(token_url, token) logging.info("END\n\n") - #exit(0) def main(): diff --git a/tests/test_token_regression.py b/tests/test_token_regression.py new file mode 100644 index 0000000..8eb0641 --- /dev/null +++ b/tests/test_token_regression.py @@ -0,0 +1,46 @@ +import pytest +import os +from os.path import exists + +from subscriber import podaac_access as pa +import shutil +from pathlib import Path + +@pytest.mark.regression +def setup_function(method): + # Deletes all known tokens + tokens = pa.list_tokens(pa.token_url) + for x in tokens: + pa.delete_token(pa.token_url, x) + +@pytest.mark.regression +def teardown_function(method): + # Deletes all known tokens + tokens = pa.list_tokens(pa.token_url) + for x in tokens: + pa.delete_token(pa.token_url, x) + +# REGRESSION TEST CURRENTLY REQUIRES A .NETRC file for CMR/Data Download +# token API can be found here: https://wiki.earthdata.nasa.gov/display/EL/API+Documentation +# explore https://urs.earthdata.nasa.gov/documentation/for_integrators/api_documentation#/oauth/token +@pytest.mark.regression +def test_list_tokens(): + tokens = pa.list_tokens(pa.token_url) + assert len(tokens) == 0 + pa.get_token(pa.token_url) + tokens = pa.list_tokens(pa.token_url) + assert len(tokens) == 1 + +@pytest.mark.regression +def test_edl_getToken(): + token = pa.get_token(pa.token_url) + assert token != "" + token = pa.refresh_token(token) + assert token != "" + tokens = pa.list_tokens(pa.token_url) + + assert len(tokens) == 1 + for x in tokens: + assert x != "" + + assert True == pa.delete_token(pa.token_url, token) From 27b475aad403562bda86eaa2a4627284c9e4f84e Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Mon, 12 Dec 2022 10:25:04 -0800 Subject: [PATCH 10/34] Issues/109 (#111) * Develop (#103) * Issues/91 (#92) * added citation creation tests and functionality to subscriber and downloader * added verbose option to create_citation_file command, previously hard coded * updated changelog (whoops) and fixed regression test: 1. Issue where the citation file now downloaded affected the counts 2. Issue where the logic for determining if a file modified time was changing or not was picking up the new citation file which _always_ gets rewritten to update the 'last accessed' date. * updated request to include exec_info in warning; fixed issue with params not being a dictionary caused errors * changed a warning to debug for citation file. fixed test issues * Enable debug logging during regression tests and set max parallel workflows to 2 * added output to pytest * fixed test to only look for downlaoded data files not citation file due to 'random' cmr errors when creating a citation. * added mock testing and retry on 503 * added 503 fixes Co-authored-by: Frank Greguska * fixed issues where token was not proagated to CMR queries (#95) * Misc fixes (#101) * added ".tiff" to default extensions to address #100 * removed 'warning' message on not downloading all data to close #99 * updated help documentation for start/end times to close #79 * added version update, updates to CHANGELOG * added token get,delete, refresh and list operations * Revert "added token get,delete, refresh and list operations" This reverts commit 15aba90201b79bbf01f4787900b3b41160441390. * Update python-app.yml Co-authored-by: Frank Greguska * updated poetry version Version matches build/test versions. * Update README.md * Update podaac_data_downloader.py Fixing for issues 109 - adding capability to download by granule-name * Update Downloader.md Fixed the help file * added changelog entries, regressiont ests * added poetry lock cleanup Co-authored-by: Frank Greguska Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> Co-authored-by: sureshshsv <45676320+sureshshsv@users.noreply.github.com> Co-authored-by: sureshshsv --- CHANGELOG.md | 6 ++++++ Downloader.md | 4 +++- README.md | 2 +- subscriber/podaac_data_downloader.py | 29 ++++++++++++++++++++++++---- tests/test_downloader_regression.py | 12 ++++++++++++ 5 files changed, 47 insertions(+), 6 deletions(-) mode change 100644 => 100755 subscriber/podaac_data_downloader.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f5b15f..9a0ae73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +## Unreleased +### Fixed +- Added EDL based token downloading, removing CMR tokens [98](https://github.com/podaac/data-subscriber/issues/98), +### Added +- Added ability to download by filename [109](https://github.com/podaac/data-subscriber/issues/109) and additional regression testing + ## 1.11.0 ### Fixed - Fixed an issue where token-refresh was expecting a dictionary, not a list of tuples diff --git a/Downloader.md b/Downloader.md index 78b3af6..470d519 100644 --- a/Downloader.md +++ b/Downloader.md @@ -6,7 +6,7 @@ For installation and dependency information, please see the [top-level README](R ``` $> podaac-data-downloader -h -usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--limit LIMIT] +usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [--granule-name GRANULE] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--limit LIMIT] optional arguments: -h, --help show this help message and exit @@ -34,6 +34,8 @@ optional arguments: --offset OFFSET Flag used to shift timestamp. Units are in hours, e.g. 10 or -10. -e EXTENSIONS, --extensions EXTENSIONS The extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz] + -gr GRANULE, --granule-name GRANULE + The name of the granule to download. Only one granule name can be specified. Script will download all files matching similar granule name sans extension. --process PROCESS_CMD Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times. --version Display script version information and exit. diff --git a/README.md b/README.md index 07c7695..31c4591 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ There are 2 tools in this repository, the data subscriber and the data downloade **Downloader** - [Documentation](Downloader.md) -The Downloader is useful if you need to download PO.DAAC data once in a while or prefer to do it "on-demand". The subscriber makes no assumptions about the last time run or what is new in the archive, it simply uses the provided requests and downloads all matching data. +The Downloader is useful if you need to download PO.DAAC data once in a while or prefer to do it "on-demand". The Downloader makes no assumptions about the last time run or what is new in the archive, it simply uses the provided requests and downloads all matching data. **Subscriber** - [Documentation](Subscriber.md) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py old mode 100644 new mode 100755 index 00b78a3..6f51bca --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -31,16 +31,16 @@ def parse_cycles(cycle_input): def validate(args): - if args.search_cycles is None and args.startDate is None and args.endDate is None: + if args.search_cycles is None and args.startDate is None and args.endDate is None and args.granulename is None: raise ValueError( - "Error parsing command line arguments: one of [--start-date and --end-date] or [--cycles] are required") # noqa E501 + "Error parsing command line arguments: one of [--start-date and --end-date] or [--cycles] or [--granule-name] are required ") # noqa E501 if args.search_cycles is not None and args.startDate is not None: raise ValueError( "Error parsing command line arguments: only one of -sd/--start-date and --cycles are allowed") # noqa E501 if args.search_cycles is not None and args.endDate is not None: raise ValueError( "Error parsing command line arguments: only one of -ed/--end-date and --cycles are allowed") # noqa E50 - if None in [args.endDate, args.startDate] and args.search_cycles is None: + if None in [args.endDate, args.startDate] and args.search_cycles is None and args.granulename is None: raise ValueError( "Error parsing command line arguments: Both --start-date and --end-date must be specified") # noqa E50 @@ -88,6 +88,13 @@ def create_parser(): parser.add_argument("-e", "--extensions", dest="extensions", help="The extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz]", default=None, action='append') # noqa E501 + + # Get specific granule from the search + # https://github.com/podaac/data-subscriber/issues/109 + parser.add_argument("-gr", "--granule-name", dest="granulename", + help="Flag to download specific granule from a collection. This parameter can only be used if you know the granule name. Only one granule name can be supplied", + default=None) + parser.add_argument("--process", dest="process_cmd", help="Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times.", action='append') @@ -132,6 +139,7 @@ def run(args=None): short_name = args.collection extensions = args.extensions process_cmd = args.process_cmd + granule=args.granulename data_path = args.outputDirectory download_limit = None @@ -159,7 +167,6 @@ def run(args=None): cmr_cycles = search_cycles params = [ ('page_size', page_size), - ('sort_key', "-start_date"), ('provider', provider), ('ShortName', short_name), ('token', token), @@ -169,6 +176,20 @@ def run(args=None): if args.verbose: logging.info("cycles: " + str(cmr_cycles)) + elif granule is not None: + #This line is added to strip out the extensions. Not sure if this works across the board for all collections but it seem to work on few collections that were tested. + cmr_granule = granule.rsplit( ".", 1 )[ 0 ] + params = [ + ('page_size', page_size), + ('sort_key', "-start_date"), + ('provider', provider), + ('ShortName', short_name), + ('GranuleUR[]', cmr_granule), + ('token', token), + ] + if args.verbose: + logging.info("Granule: " + str(cmr_granule)) + else: temporal_range = pa.get_temporal_range(start_date_time, end_date_time, datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501 diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py index cea7df0..a41c67b 100644 --- a/tests/test_downloader_regression.py +++ b/tests/test_downloader_regression.py @@ -17,6 +17,17 @@ def create_downloader_args(args): # and offset. Running it a second time to ensure it downlaods the files again- # the downloader doesn't care about updates. @pytest.mark.regression +def test_downloader_MUR_by_name(): + shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True) + args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -gr 20221206090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc --verbose'.split()) + pdd.run(args2) + # So running the test in parallel, sometimes we get a 401 on the token... + # Let's ensure we're only looking for data files here + assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==1 + shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') + +# Test the downlaoder on SWOT Simulated single file download +@pytest.mark.regression def test_downloader_limit_MUR(): shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True) args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1 --verbose'.split()) @@ -26,6 +37,7 @@ def test_downloader_limit_MUR(): assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==1 shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') + #Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure, # and offset. Running it a second time to ensure it downlaods the files again- # the downloader doesn't care about updates. From 2a402d763df0885bd825eeca1908858d31cf03b0 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Mon, 12 Dec 2022 13:24:12 -0800 Subject: [PATCH 11/34] added README information and updates (#113) --- CHANGELOG.md | 2 +- Downloader.md | 14 ++++++++++++++ pyproject.toml | 2 +- subscriber/podaac_access.py | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a0ae73..9431f5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -## Unreleased +## 1.12.0 ### Fixed - Added EDL based token downloading, removing CMR tokens [98](https://github.com/podaac/data-subscriber/issues/98), ### Added diff --git a/Downloader.md b/Downloader.md index 470d519..f83d737 100644 --- a/Downloader.md +++ b/Downloader.md @@ -123,6 +123,20 @@ machine urs.earthdata.nasa.gov ## Advanced Usage +### Download data by filename + +If you're aware of a file you want to download, you can use the `-gr` option to download by a filename. The `-c` (COLLECTION) and `-d` (directory) options are still required. + +The `-gr` option works by taking the file name, removing the suffix and searching for a CMR entry called the granuleUR. Some examples of this include: + +| Collection | Filename | CMR GranuleUR | +| --- | ----------- | ----------- | +| MUR25-JPL-L4-GLOB-v04.2 | 20221206090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc | 20221206090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2 | +| JASON_CS_S6A_L2_ALT_HR_STD_OST_NRT_F | S6A_P4_2__HR_STD__NR_077_039_20221212T181728_20221212T182728_F07.nc | S6A_P4_2__HR_STD__NR_077_039_20221212T181728_20221212T182728_F07 | + +Because of this behavior, granules without data suffixes and granules where the the UR does not directly follow this convention may not work as anticipated. We will be adding the ability to download by granuleUR in a future enhancement. + + ### Download data by cycle Some PO.DAAC datasets are better suited for cycles based search instead of start and end times. To enabled this, we've added 'cycle' based downloading to the data-downloader. The following example will download data from cycle 42: diff --git a/pyproject.toml b/pyproject.toml index 2ad16d3..f1bbb6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.11.0" +version = "1.12.0" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 8ba7fad..22d4e6e 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -27,7 +27,7 @@ import tenacity from datetime import datetime -__version__ = "1.11.0" +__version__ = "1.12.0" extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" From 35c8fc2d2e6a9026c0d160a79fe7e8b1e339a9fb Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Mon, 12 Dec 2022 13:41:57 -0800 Subject: [PATCH 12/34] fixed pymock issues... again --- poetry.lock | 4 ---- 1 file changed, 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 08fd07c..72a359c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -333,10 +333,6 @@ pytest-mock = [ {file = "pytest-mock-3.9.0.tar.gz", hash = "sha256:c899a0dcc8a5f22930acd020b500abd5f956911f326864a3b979e4866e14da82"}, {file = "pytest_mock-3.9.0-py3-none-any.whl", hash = "sha256:1a1b9264224d026932d6685a0f9cef3b61d91563c3e74af9fe5afb2767e13812"}, ] -pytest-mock = [ - {file = "pytest-mock-3.8.2.tar.gz", hash = "sha256:77f03f4554392558700295e05aed0b1096a20d4a60a4f3ddcde58b0c31c8fca2"}, - {file = "pytest_mock-3.8.2-py3-none-any.whl", hash = "sha256:8a9e226d6c0ef09fcf20c94eb3405c388af438a90f3e39687f84166da82d5948"}, -] requests = [ {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, From ad501782fb7bc9da9b1643f25fcf3cf88ae1e03e Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Fri, 3 Feb 2023 10:00:33 -0800 Subject: [PATCH 13/34] Extension regex (#121) * extend -e option to handle regular expressions (#115) * Develop into Main (1.12.0) (#114) * Issues/91 (#92) * added citation creation tests and functionality to subscriber and downloader * added verbose option to create_citation_file command, previously hard coded * updated changelog (whoops) and fixed regression test: 1. Issue where the citation file now downloaded affected the counts 2. Issue where the logic for determining if a file modified time was changing or not was picking up the new citation file which _always_ gets rewritten to update the 'last accessed' date. * updated request to include exec_info in warning; fixed issue with params not being a dictionary caused errors * changed a warning to debug for citation file. fixed test issues * Enable debug logging during regression tests and set max parallel workflows to 2 * added output to pytest * fixed test to only look for downlaoded data files not citation file due to 'random' cmr errors when creating a citation. * added mock testing and retry on 503 * added 503 fixes Co-authored-by: Frank Greguska * fixed issues where token was not proagated to CMR queries (#95) * Misc fixes (#101) * added ".tiff" to default extensions to address #100 * removed 'warning' message on not downloading all data to close #99 * updated help documentation for start/end times to close #79 * added version update, updates to CHANGELOG * added token get,delete, refresh and list operations * Revert "added token get,delete, refresh and list operations" This reverts commit 15aba90201b79bbf01f4787900b3b41160441390. * Update python-app.yml * updated poetry version Version matches build/test versions. * Issues/98 (#107) * added token get,delete, refresh and list operations * Revert "added token get,delete, refresh and list operations" This reverts commit 15aba90201b79bbf01f4787900b3b41160441390. * added EDL (not cmr-token) based get, list,delete, refresh token * updated token regression tests * updates and tests for subscriber moving to EDL. * marked tests as regression test * Update subscriber/podaac_data_downloader.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_data_subscriber.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_access.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_access.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Update subscriber/podaac_access.py Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * added exec info to errors, cleaned up some log statements Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> * Issues/109 (#111) * Develop (#103) * Issues/91 (#92) * added citation creation tests and functionality to subscriber and downloader * added verbose option to create_citation_file command, previously hard coded * updated changelog (whoops) and fixed regression test: 1. Issue where the citation file now downloaded affected the counts 2. Issue where the logic for determining if a file modified time was changing or not was picking up the new citation file which _always_ gets rewritten to update the 'last accessed' date. * updated request to include exec_info in warning; fixed issue with params not being a dictionary caused errors * changed a warning to debug for citation file. fixed test issues * Enable debug logging during regression tests and set max parallel workflows to 2 * added output to pytest * fixed test to only look for downlaoded data files not citation file due to 'random' cmr errors when creating a citation. * added mock testing and retry on 503 * added 503 fixes Co-authored-by: Frank Greguska * fixed issues where token was not proagated to CMR queries (#95) * Misc fixes (#101) * added ".tiff" to default extensions to address #100 * removed 'warning' message on not downloading all data to close #99 * updated help documentation for start/end times to close #79 * added version update, updates to CHANGELOG * added token get,delete, refresh and list operations * Revert "added token get,delete, refresh and list operations" This reverts commit 15aba90201b79bbf01f4787900b3b41160441390. * Update python-app.yml Co-authored-by: Frank Greguska * updated poetry version Version matches build/test versions. * Update README.md * Update podaac_data_downloader.py Fixing for issues 109 - adding capability to download by granule-name * Update Downloader.md Fixed the help file * added changelog entries, regressiont ests * added poetry lock cleanup Co-authored-by: Frank Greguska Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> Co-authored-by: sureshshsv <45676320+sureshshsv@users.noreply.github.com> Co-authored-by: sureshshsv * added README information and updates (#113) * fixed pymock issues... again Co-authored-by: Frank Greguska Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> Co-authored-by: sureshshsv <45676320+sureshshsv@users.noreply.github.com> Co-authored-by: sureshshsv * extend -e option to handle regular expressions formerly, -e could not handle PTM_\d+ extensions without the user explicitly calling all of them. --------- Co-authored-by: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Co-authored-by: Frank Greguska Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> Co-authored-by: sureshshsv <45676320+sureshshsv@users.noreply.github.com> Co-authored-by: sureshshsv * added dcoumentation and tests for regex * converted defaults to regexes, added gtiff test --------- Co-authored-by: Peter Mao Co-authored-by: Frank Greguska Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com> Co-authored-by: sureshshsv <45676320+sureshshsv@users.noreply.github.com> Co-authored-by: sureshshsv --- CHANGELOG.md | 4 ++++ Downloader.md | 14 ++++++++++++-- Subscriber.md | 15 ++++++++++++--- subscriber/podaac_access.py | 8 +++++++- subscriber/podaac_data_downloader.py | 6 +++--- subscriber/podaac_data_subscriber.py | 6 +++--- tests/test_subscriber.py | 9 +++++++++ 7 files changed, 50 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9431f5c..e63c368 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +## Unreleased +### Added +- Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (``-e PTM_1, -e PTM_2, ..., -e PMT_10`) + ## 1.12.0 ### Fixed - Added EDL based token downloading, removing CMR tokens [98](https://github.com/podaac/data-subscriber/issues/98), diff --git a/Downloader.md b/Downloader.md index f83d737..e0af08f 100644 --- a/Downloader.md +++ b/Downloader.md @@ -33,7 +33,7 @@ optional arguments: -dy Flag to use start time (Year) of downloaded data for directory where data products will be downloaded. --offset OFFSET Flag used to shift timestamp. Units are in hours, e.g. 10 or -10. -e EXTENSIONS, --extensions EXTENSIONS - The extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz] + Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] -gr GRANULE, --granule-name GRANULE The name of the granule to download. Only one granule name can be specified. Script will download all files matching similar granule name sans extension. --process PROCESS_CMD @@ -219,13 +219,23 @@ Some collections have many files. To download a specific set of files, you can s ``` -e EXTENSIONS, --extensions EXTENSIONS - The extensions of products to download. Default is [.nc, .h5, .zip] + Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] ``` An example of the -e usage- note the -e option is additive: ``` podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -e .nc -e .h5 -sd 2020-06-01T00:46:02Z -ed 2020-07-01T00:46:02Z ``` + +One may also specify a regular expression to select files. For example, the following are equivalent: + +`podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -e PTM_1, -e PTM_2, ..., -e PMT_10 -sd 2020-06-01T00:46:02Z -ed 2020-07-01T00:46:02Z` + +and + +`podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -e PTM_\\d+ -sd 2020-06-01T00:46:02Z -ed 2020-07-01T00:46:02Z` + + ### run a post download process Using the `--process` option, you can run a simple command agaisnt the "just" downloaded file. This will take the format of " ". This means you can run a command like `--process gzip` to gzip all downloaded files. We do not support more advanced processes at this time (piping, running a process on a directory, etc). diff --git a/Subscriber.md b/Subscriber.md index 0cd5e3f..3df3344 100644 --- a/Subscriber.md +++ b/Subscriber.md @@ -28,8 +28,8 @@ optional arguments: --offset OFFSET Flag used to shift timestamp. Units are in hours, e.g. 10 or -10. -m MINUTES, --minutes MINUTES How far back in time, in minutes, should the script look for data. If running this script as a cron, this value should be equal to or greater than how often your cron runs (default: 60 minutes). - -e EXTENSIONS, --extensions EXTENSIONS - The extensions of products to download. Default is [.nc, .h5, .zip] +-e EXTENSIONS, --extensions EXTENSIONS + Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] --process PROCESS_CMD Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times. --version Display script version information and exit. @@ -193,13 +193,22 @@ Some collections have many files. To download a specific set of files, you can s ``` -e EXTENSIONS, --extensions EXTENSIONS - The extensions of products to download. Default is [.nc, .h5, .zip] + Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] ``` An example of the -e usage- note the -e option is additive: ``` podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -e .nc -e .h5 ``` + +One may also specify a regular expression to select files. For example, the following are equivalent: + +`podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -e PTM_1, -e PTM_2, ..., -e PMT_10 -sd 2020-06-01T00:46:02Z -ed 2020-07-01T00:46:02Z` + +and + +`podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -e PTM_\\d+ -sd 2020-06-01T00:46:02Z -ed 2020-07-01T00:46:02Z` + ### run a post download process Using the `--process` option, you can run a simple command agaisnt the "just" downloaded file. This will take the format of " ". This means you can run a command like `--process gzip` to gzip all downloaded files. We do not support more advanced processes at this time (piping, running a process on a directory, etc). diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 22d4e6e..5b2a331 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -2,6 +2,7 @@ import logging import netrc import subprocess +import re from datetime import datetime from http.cookiejar import CookieJar from os import makedirs @@ -28,7 +29,7 @@ from datetime import datetime __version__ = "1.12.0" -extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"] +extensions = ["\\.nc", "\\.h5", "\\.zip", "\\.tar.gz", "\\.tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" token_url = "https://" + edl + "/api/users" @@ -531,6 +532,11 @@ def create_citation(collection_json, access_date): year = datetime.strptime(release_date, "%Y-%m-%dT%H:%M:%S.000Z").year return citation_template.format(creator=creator, year=year, title=title, version=version, doi_authority=doi_authority, doi=doi, access_date=access_date) +def search_extension(extension, filename): + if re.search(extension + "$", filename) is not None: + return True + return False + def create_citation_file(short_name, provider, data_path, token=None, verbose=False): # get collection umm-c METADATA params = [ diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 6f51bca..7a41f30 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import argparse import logging -import os +import os, re import sys from datetime import datetime, timedelta from os import makedirs @@ -86,7 +86,7 @@ def create_parser(): help="Flag used to shift timestamp. Units are in hours, e.g. 10 or -10.") # noqa E501 parser.add_argument("-e", "--extensions", dest="extensions", - help="The extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz]", + help="Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff]", default=None, action='append') # noqa E501 # Get specific granule from the search @@ -253,7 +253,7 @@ def run(args=None): filtered_downloads = [] for f in downloads: for extension in extensions: - if f.lower().endswith(extension): + if pa.search_extension(extension, f): filtered_downloads.append(f) downloads = filtered_downloads diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index eb103ac..2a0a9f3 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -14,7 +14,7 @@ # Accounts are free to create and take just a moment to set up. import argparse import logging -import os +import os, re import sys from datetime import datetime, timedelta from os import makedirs @@ -92,7 +92,7 @@ def create_parser(): help="How far back in time, in minutes, should the script look for data. If running this script as a cron, this value should be equal to or greater than how often your cron runs.", type=int, default=None) # noqa E501 parser.add_argument("-e", "--extensions", dest="extensions", - help="The extensions of products to download. Default is [.nc, .h5, .zip]", default=None, + help="Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff]", default=None, action='append') # noqa E501 parser.add_argument("--process", dest="process_cmd", help="Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times.", @@ -260,7 +260,7 @@ def run(args=None): filtered_downloads = [] for f in downloads: for extension in extensions: - if f.lower().endswith(extension): + if pa.search_extension(extension, f): filtered_downloads.append(f) downloads = filtered_downloads diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index 983cdce..23f8e46 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -206,3 +206,12 @@ def validate(args): args2 = parser.parse_args(args) pa.validate(args2) return args2 + +def test_extensions(): + assert pa.search_extension('\\.tiff', "myfile.tiff") == True + assert pa.search_extension('\\.tiff', "myfile.tif") == False + assert pa.search_extension('\\.tiff', "myfile.gtiff") == False + assert pa.search_extension('PTM_\\d+', "myfile.PTM_1") == True + assert pa.search_extension('PTM_\\d+', "myfile.PTM_10") == True + assert pa.search_extension('PTM_\\d+', "myfile.PTM_09") == True + assert pa.search_extension('PTM_\\d+', "myfile.PTM_9") == True From 0e37c14270fb38204d2b2eb8d9edf1fa1a547280 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Wed, 15 Mar 2023 10:29:56 -0700 Subject: [PATCH 14/34] closes 118. retries was never hit because range is not end inclusive. (#119) * closes 118. retries was never hit ebcause range is not end inclusive. * updated test to catch now-thrown exception --- subscriber/podaac_access.py | 4 +++- tests/test_subscriber.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 5b2a331..12c9e44 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -344,7 +344,9 @@ def download_file(remote_file, output_path, retries=3): logging.warning(f'Error downloading {remote_file}. Retrying download.') # back off on sleep time each error... time.sleep(r) - if r >= retries: + # range is exclusive, so range(3): 0,1,2 so retries will + # never be >= 3; need to subtract 1 (doh) + if r >= retries-1: failed = True else: #downlaoded fie without 503 diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index 23f8e46..c38c9be 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -198,7 +198,10 @@ def test_param_update(): def test_downloader_retry(mocker): mck = mocker.patch('subscriber.podaac_access.urlretrieve', side_effect=HTTPError("url", 503, "msg", None, None)) - pa.download_file("myUrl", "outputPath") + try: + pa.download_file("myUrl", "outputPath") + except Exception: + pass assert mck.call_count == 3 def validate(args): From 0b9dd56afbce621250d9aca0ed55fa30ff18b937 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Thu, 16 Mar 2023 05:19:06 -0700 Subject: [PATCH 15/34] added --dry-run option, docs, and test cases (#124) * added --dry-run option, docs, and test cases * Update subscriber/podaac_data_downloader.py Added more elegant way of download limit application Co-authored-by: Stepheny Perez --------- Co-authored-by: Stepheny Perez --- CHANGELOG.md | 3 +- Downloader.md | 32 +++++++-------- Subscriber.md | 59 ++++++++++++++-------------- subscriber/podaac_data_downloader.py | 12 ++++++ subscriber/podaac_data_subscriber.py | 10 +++++ tests/test_downloader_regression.py | 11 ++++++ tests/test_subscriber_regression.py | 8 ++++ 7 files changed, 87 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e63c368..08d2971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## Unreleased ### Added -- Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (``-e PTM_1, -e PTM_2, ..., -e PMT_10`) +- Added --dry-run option to subscriber and downloader to view the files that _would_ be downloaded without actuall downloading them. [102](https://github.com/podaac/data-subscriber/issues/102) +- Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (``-e PTM_1, -e PTM_2, ..., -e PMT_10`) [115](https://github.com/podaac/data-subscriber/issues/115) ## 1.12.0 ### Fixed diff --git a/Downloader.md b/Downloader.md index e0af08f..7195fd5 100644 --- a/Downloader.md +++ b/Downloader.md @@ -6,7 +6,8 @@ For installation and dependency information, please see the [top-level README](R ``` $> podaac-data-downloader -h -usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [--granule-name GRANULE] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--limit LIMIT] +usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [-gr GRANULENAME] [--process PROCESS_CMD] [--version] [--verbose] + [-p PROVIDER] [--limit LIMIT] [--dry-run] optional arguments: -h, --help show this help message and exit @@ -17,32 +18,30 @@ optional arguments: --cycle SEARCH_CYCLES Cycle number for determining downloads. can be repeated for multiple cycles -sd STARTDATE, --start-date STARTDATE - The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z + The ISO date time after which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z -ed ENDDATE, --end-date ENDDATE - The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z - -f, --force - Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches + The ISO date time before which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z + -f, --force Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches -b BBOX, --bounds BBOX - The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without - spaces. Due to an issue with parsing arguments, to use this command, please use the -b="-180,-90,180,90" syntax - when calling from the command line. Default: "-180,-90,180,90". + The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b="-180,-90,180,90" syntax when calling from the command line. + Default: "-180,-90,180,90". -dc Flag to use cycle number for directory where data products will be downloaded. -dydoy Flag to use start time (Year/DOY) of downloaded data for directory where data products will be downloaded. - -dymd Flag to use start time (Year/Month/Day) of downloaded data for directory where data products will be - downloaded. + -dymd Flag to use start time (Year/Month/Day) of downloaded data for directory where data products will be downloaded. -dy Flag to use start time (Year) of downloaded data for directory where data products will be downloaded. --offset OFFSET Flag used to shift timestamp. Units are in hours, e.g. 10 or -10. -e EXTENSIONS, --extensions EXTENSIONS - Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] - -gr GRANULE, --granule-name GRANULE - The name of the granule to download. Only one granule name can be specified. Script will download all files matching similar granule name sans extension. + Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] + -gr GRANULENAME, --granule-name GRANULENAME + Flag to download specific granule from a collection. This parameter can only be used if you know the granule name. Only one granule name can be supplied --process PROCESS_CMD Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times. --version Display script version information and exit. --verbose Verbose mode. -p PROVIDER, --provider PROVIDER Specify a provider for collection search. Default is POCLOUD. - --limit LIMIT Integer limit for number of granules to download. Useful in testing. Defaults to 2000 + --limit LIMIT Integer limit for number of granules to download. Useful in testing. Defaults to no limit. + --dry-run Search and identify files to download, but do not actually download them ``` @@ -52,9 +51,8 @@ optional arguments: Usage: ``` -usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] - [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [--process PROCESS_CMD] - [--version] [--verbose] [-p PROVIDER] [--limit LIMIT] +usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [-gr GRANULENAME] [--process PROCESS_CMD] [--version] [--verbose] + [-p PROVIDER] [--limit LIMIT] [--dry-run] ``` To run the script, the following parameters are required: diff --git a/Subscriber.md b/Subscriber.md index 3df3344..db96c93 100644 --- a/Subscriber.md +++ b/Subscriber.md @@ -6,43 +6,42 @@ For installation and dependency information, please see the [top-level README](R ``` $> podaac-data-subscriber -h -usage: PO.DAAC data subscriber [-h] -c COLLECTION -d OUTPUTDIRECTORY [-f] [-sd STARTDATE] [-ed ENDDATE] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-m MINUTES] [-e EXTENSIONS] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] - -optional arguments: - -h, --help show this help message and exit - -c COLLECTION, --collection-shortname COLLECTION - The collection shortname for which you want to retrieve data. - -d OUTPUTDIRECTORY, --data-dir OUTPUTDIRECTORY - The directory where data products will be downloaded. - -f, --force Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches - -sd STARTDATE, --start-date STARTDATE - The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z - -ed ENDDATE, --end-date ENDDATE - The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z - -b BBOX, --bounds BBOX - The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b="-180,-90,180,90" syntax when calling from the command line. Default: "-180,-90,180,90". - -dc Flag to use cycle number for directory where data products will be downloaded. - -dydoy Flag to use start time (Year/DOY) of downloaded data for directory where data products will be downloaded. - -dymd Flag to use start time (Year/Month/Day) of downloaded data for directory where data products will be downloaded. - -dy Flag to use start time (Year) of downloaded data for directory where data products will be downloaded. - --offset OFFSET Flag used to shift timestamp. Units are in hours, e.g. 10 or -10. - -m MINUTES, --minutes MINUTES - How far back in time, in minutes, should the script look for data. If running this script as a cron, this value should be equal to or greater than how often your cron runs (default: 60 minutes). +-h, --help show this help message and exit +-c COLLECTION, --collection-shortname COLLECTION + The collection shortname for which you want to retrieve data. +-d OUTPUTDIRECTORY, --data-dir OUTPUTDIRECTORY + The directory where data products will be downloaded. +-f, --force Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches +-sd STARTDATE, --start-date STARTDATE + The ISO date time after which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z +-ed ENDDATE, --end-date ENDDATE + The ISO date time before which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z +-b BBOX, --bounds BBOX + The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b="-180,-90,180,90" syntax when calling from the command line. + Default: "-180,-90,180,90". +-dc Flag to use cycle number for directory where data products will be downloaded. +-dydoy Flag to use start time (Year/DOY) of downloaded data for directory where data products will be downloaded. +-dymd Flag to use start time (Year/Month/Day) of downloaded data for directory where data products will be downloaded. +-dy Flag to use start time (Year) of downloaded data for directory where data products will be downloaded. +--offset OFFSET Flag used to shift timestamp. Units are in hours, e.g. 10 or -10. +-m MINUTES, --minutes MINUTES + How far back in time, in minutes, should the script look for data. If running this script as a cron, this value should be equal to or greater than how often your cron runs. -e EXTENSIONS, --extensions EXTENSIONS - Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] - --process PROCESS_CMD - Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times. - --version Display script version information and exit. - --verbose Verbose mode. - -p PROVIDER, --provider PROVIDER - Specify a provider for collection search. Default is POCLOUD. + Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] +--process PROCESS_CMD + Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times. +--version Display script version information and exit. +--verbose Verbose mode. +-p PROVIDER, --provider PROVIDER + Specify a provider for collection search. Default is POCLOUD. +--dry-run Search and identify files to download, but do not actually download them ``` ## Run the Script Usage: ``` -usage: podaac_data_subscriber.py [-h] -c COLLECTION -d OUTPUTDIRECTORY [-f] [-sd STARTDATE] [-ed ENDDATE] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-m MINUTES] [-e EXTENSIONS] [--version] [--verbose] [-p PROVIDER] +usage: PO.DAAC data subscriber [-h] -c COLLECTION -d OUTPUTDIRECTORY [-f] [-sd STARTDATE] [-ed ENDDATE] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-m MINUTES] [-e EXTENSIONS] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--dry-run] ``` To run the script, the following parameters are required: diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 7a41f30..11ab835 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -107,6 +107,8 @@ def create_parser(): parser.add_argument("--limit", dest="limit", default=None, type=int, help="Integer limit for number of granules to download. Useful in testing. Defaults to no limit.") # noqa E501 + parser.add_argument("--dry-run", dest="dry_run", action="store_true", help="Search and identify files to download, but do not actually download them") # noqa E501 + return parser @@ -264,9 +266,19 @@ def run(args=None): logging.info("Found " + str(len(downloads)) + " total files to download") if download_limit: logging.info("Limiting downloads to " + str(args.limit) + " total files") + if args.verbose: logging.info("Downloading files with extensions: " + str(extensions)) + if args.dry_run: + logging.info("Dry-run option specified. Listing Downloads.") + for download in downloads[:download_limit]: + logging.info(download) + logging.info("Dry-run option specific. Exiting.") + return + + + # NEED TO REFACTOR THIS, A LOT OF STUFF in here # Finish by downloading the files to the data directory in a loop. # Overwrite `.update` with a new timestamp on success. diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index 2a0a9f3..5f7fd09 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -104,6 +104,8 @@ def create_parser(): parser.add_argument("-p", "--provider", dest="provider", default='POCLOUD', help="Specify a provider for collection search. Default is POCLOUD.") # noqa E501 + parser.add_argument("--dry-run", dest="dry_run", action="store_true", help="Search and identify files to download, but do not actually download them") # noqa E501 + return parser @@ -272,6 +274,14 @@ def run(args=None): if args.verbose: logging.info("Downloading files with extensions: " + str(extensions)) + if args.dry_run: + logging.info("Dry-run option specified. Listing Downloads.") + for download in downloads: + logging.info(download) + logging.info("Dry-run option specific. Exiting.") + return + + # NEED TO REFACTOR THIS, A LOT OF STUFF in here # Finish by downloading the files to the data directory in a loop. # Overwrite `.update` with a new timestamp on success. diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py index a41c67b..027ea50 100644 --- a/tests/test_downloader_regression.py +++ b/tests/test_downloader_regression.py @@ -37,6 +37,17 @@ def test_downloader_limit_MUR(): assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==1 shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') +# Test the downlaoder on SWOT Simulated single file download +@pytest.mark.regression +def test_downloader_limit_dry_run(): + shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True) + args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 --dry-run -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1 --verbose'.split()) + pdd.run(args2) + # So running the test in parallel, sometimes we get a 401 on the token... + # Let's ensure we're only looking for data files here + assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==0 + shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') + #Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure, # and offset. Running it a second time to ensure it downlaods the files again- diff --git a/tests/test_subscriber_regression.py b/tests/test_subscriber_regression.py index ffa3685..d94e0ac 100644 --- a/tests/test_subscriber_regression.py +++ b/tests/test_subscriber_regression.py @@ -23,6 +23,14 @@ def test_subscriber_ecco_only_enddate(): assert exists('./ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4/1992/003/ATM_SURFACE_TEMP_HUM_WIND_PRES_day_mean_1992-01-03_ECCO_V4r4_latlon_0p50deg.nc') shutil.rmtree('./ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4') +# Test to ensure nothing is downloaded via dry-run +@pytest.mark.regression +def test_subscriber_ecco_dry_run(): + args2 = create_args('-c ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4 -ed 1992-01-03T00:00:00Z -d ./ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4 -dydoy --dry-run'.split()) + pds.run(args2) + assert len(os.listdir('./ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4')) == 0 + shutil.rmtree('./ECCO_L4_ATM_STATE_05DEG_DAILY_V4R4') + # test to download S6 data by start/stop time, and bbox, and put it in the # cycle based directory structure @pytest.mark.regression From cc2e97bf2e307d784f76a13fe3eb081d9563aeaa Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Wed, 29 Mar 2023 08:21:23 -0700 Subject: [PATCH 16/34] Issues/70 (#117) * added code for updating version * added chagnelog * moved version check into __main__ instead of on import of the module * added sorting of releases from github to find latest release. * added authenticated (option) access to github API to rpevent rate limiting * separate out auth/token regression tests --- .github/workflows/python-app.yml | 40 ++- CHANGELOG.md | 2 + README.md | 8 + poetry.lock | 240 +++++++++------ pyproject.toml | 3 +- subscriber/podaac_access.py | 32 ++ subscriber/podaac_data_downloader.py | 1 + subscriber/podaac_data_subscriber.py | 1 + tests/releases.json | 431 +++++++++++++++++++++++++++ tests/test_subscriber.py | 23 ++ tests/test_token_regression.py | 8 +- 11 files changed, 698 insertions(+), 91 deletions(-) create mode 100644 tests/releases.json diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 62275e3..cb18515 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -39,8 +39,10 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - poetry run pytest -m "not regression" + poetry run pytest -m "not regression and not token" - name: netrc-gen uses: extractions/netrc@v1 with: @@ -50,5 +52,41 @@ jobs: - name: Regression Test with pytest env: PODAAC_LOGLEVEL: "DEBUG" + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | poetry run pytest -o log_cli=true --log-cli-level=DEBUG -m "regression" + + auth_regression: + needs: build + strategy: + fail-fast: false + max-parallel: 1 + matrix: + python-version: [ "3.7", "3.8", "3.9", "3.10" ] + poetry-version: [ "1.1.14" ] + os: [ ubuntu-18.04, macos-latest, windows-latest ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Poetry + uses: abatilo/actions-poetry@v2.0.0 + with: + poetry-version: ${{ matrix.poetry-version }} + - name: Install dependencies + run: | + poetry install + - name: netrc-gen + uses: extractions/netrc@v1 + with: + machine: urs.earthdata.nasa.gov + username: ${{ secrets.EDL_OPS_USERNAME }} + password: ${{ secrets.EDL_OPS_PASSWORD }} + - name: Regression Test with pytest + env: + PODAAC_LOGLEVEL: "DEBUG" + run: | + poetry run pytest -o log_cli=true --log-cli-level=DEBUG -m "token" diff --git a/CHANGELOG.md b/CHANGELOG.md index 08d2971..9b69c7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Added - Added --dry-run option to subscriber and downloader to view the files that _would_ be downloaded without actuall downloading them. [102](https://github.com/podaac/data-subscriber/issues/102) - Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (``-e PTM_1, -e PTM_2, ..., -e PMT_10`) [115](https://github.com/podaac/data-subscriber/issues/115) +- Added check for updated version [70](https://github.com/podaac/data-subscriber/issues/70) + ## 1.12.0 ### Fixed diff --git a/README.md b/README.md index 31c4591..0fcd55d 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,14 @@ export PODAAC_LOGLEVEL=DEBUG And then run the script. This should give you more verbose output on URL requests to CMR, tokens, etc. +### OTHER OPTIONS + +The podaac downloader and subscriber make calls to github for checking recent releases. Unauthenticated requests are limited to 60 per hour. If you start seeing errors like: +``` +releases_json = {'documentation_url': 'https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting', 'message': "API... here's the good news: Authenticated requests get a higher rate limit. Check out the documentation for more details.)"} +``` +You'll want to set the environment variable GITHUB_TOKEN to a github personal access token- this allows for up to 5000 calls per hour. This requires a free github account. Most users will not run in to this issue. + ### In need of Help? The PO.DAAC User Services Office is the primary point of contact for answering your questions concerning data and information held by the PO.DAAC. User Services staff members are knowledgeable about both the data ordering system and the data products themselves. We answer questions about data, route requests to other DAACs, and direct questions we cannot answer to the appropriate information source. diff --git a/poetry.lock b/poetry.lock index 72a359c..6f6b375 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,20 +1,22 @@ [[package]] name = "attrs" -version = "22.1.0" +version = "22.2.0" description = "Classes Without Boilerplate" category = "dev" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] -docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] +cov = ["attrs", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] +dev = ["attrs"] +docs = ["furo", "sphinx", "myst-parser", "zope.interface", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] +tests = ["attrs", "zope.interface"] +tests-no-zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] +tests_no_zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] [[package]] name = "certifi" -version = "2022.9.24" +version = "2022.12.7" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false @@ -22,22 +24,30 @@ python-versions = ">=3.6" [[package]] name = "charset-normalizer" -version = "2.1.1" +version = "3.0.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" optional = false -python-versions = ">=3.6.0" - -[package.extras] -unicode_backport = ["unicodedata2"] +python-versions = "*" [[package]] name = "colorama" -version = "0.4.5" +version = "0.4.6" description = "Cross-platform colored terminal text." category = "dev" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "exceptiongroup" +version = "1.1.0" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6)"] [[package]] name = "flake8" @@ -79,11 +89,11 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [[package]] name = "iniconfig" -version = "1.1.1" -description = "iniconfig: brain-dead simple config-ini parsing" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" category = "dev" optional = false -python-versions = "*" +python-versions = ">=3.7" [[package]] name = "mccabe" @@ -95,14 +105,11 @@ python-versions = "*" [[package]] name = "packaging" -version = "21.3" +version = "23.0" description = "Core utilities for Python packages" category = "dev" optional = false -python-versions = ">=3.6" - -[package.dependencies] -pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" +python-versions = ">=3.7" [[package]] name = "pluggy" @@ -119,14 +126,6 @@ importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} testing = ["pytest-benchmark", "pytest"] dev = ["tox", "pre-commit"] -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - [[package]] name = "pycodestyle" version = "2.8.0" @@ -143,20 +142,9 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -[[package]] -name = "pyparsing" -version = "3.0.9" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" -optional = false -python-versions = ">=3.6.8" - -[package.extras] -diagrams = ["railroad-diagrams", "jinja2"] - [[package]] name = "pytest" -version = "7.1.3" +version = "7.2.1" description = "pytest: simple powerful testing with Python" category = "dev" optional = false @@ -165,19 +153,19 @@ python-versions = ">=3.7" [package.dependencies] attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -tomli = ">=1.0.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] [[package]] name = "pytest-mock" -version = "3.9.0" +version = "3.10.0" description = "Thin-wrapper around the mock package for easier use with pytest" category = "dev" optional = false @@ -191,7 +179,7 @@ dev = ["pre-commit", "tox", "pytest-asyncio"] [[package]] name = "requests" -version = "2.28.1" +version = "2.28.2" description = "Python HTTP for Humans." category = "main" optional = false @@ -199,7 +187,7 @@ python-versions = ">=3.7, <4" [package.dependencies] certifi = ">=2017.4.17" -charset-normalizer = ">=2,<3" +charset-normalizer = ">=2,<4" idna = ">=2.5,<4" urllib3 = ">=1.21.1,<1.27" @@ -228,7 +216,7 @@ python-versions = ">=3.7" [[package]] name = "typing-extensions" -version = "4.3.0" +version = "4.4.0" description = "Backported and Experimental Type Hints for Python 3.7+" category = "dev" optional = false @@ -236,11 +224,11 @@ python-versions = ">=3.7" [[package]] name = "urllib3" -version = "1.26.12" +version = "1.26.14" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" [package.extras] brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] @@ -249,15 +237,15 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "zipp" -version = "3.8.1" +version = "3.12.0" description = "Backport of pathlib-compatible object wrapper for zip files" category = "dev" optional = false python-versions = ">=3.7" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] +docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "sphinx-lint", "jaraco.tidelift (>=1.4)"] +testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "jaraco.functools", "more-itertools", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"] [metadata] lock-version = "1.1" @@ -266,20 +254,110 @@ content-hash = "c5ece7741408cb266fe803842b66f646317dc3a384e9c54ecbe66a14ce895fed [metadata.files] attrs = [ - {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, - {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, + {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, + {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, ] certifi = [ - {file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"}, - {file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"}, + {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, + {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, ] charset-normalizer = [ - {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, - {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, + {file = "charset-normalizer-3.0.1.tar.gz", hash = "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-win32.whl", hash = "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-win32.whl", hash = "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-win32.whl", hash = "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-win32.whl", hash = "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-win32.whl", hash = "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59"}, + {file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"}, ] colorama = [ - {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, - {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"}, + {file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"}, ] flake8 = [ {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, @@ -294,25 +372,21 @@ importlib-metadata = [ {file = "importlib_metadata-4.2.0.tar.gz", hash = "sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31"}, ] iniconfig = [ - {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, - {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] mccabe = [ {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, ] packaging = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, + {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, + {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, ] pluggy = [ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, ] -py = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] pycodestyle = [ {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"}, {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"}, @@ -321,21 +395,17 @@ pyflakes = [ {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"}, {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"}, ] -pyparsing = [ - {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, - {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, -] pytest = [ - {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"}, - {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"}, + {file = "pytest-7.2.1-py3-none-any.whl", hash = "sha256:c7c6ca206e93355074ae32f7403e8ea12163b1163c976fee7d4d84027c162be5"}, + {file = "pytest-7.2.1.tar.gz", hash = "sha256:d45e0952f3727241918b8fd0f376f5ff6b301cc0777c6f9a556935c92d8a7d42"}, ] pytest-mock = [ - {file = "pytest-mock-3.9.0.tar.gz", hash = "sha256:c899a0dcc8a5f22930acd020b500abd5f956911f326864a3b979e4866e14da82"}, - {file = "pytest_mock-3.9.0-py3-none-any.whl", hash = "sha256:1a1b9264224d026932d6685a0f9cef3b61d91563c3e74af9fe5afb2767e13812"}, + {file = "pytest-mock-3.10.0.tar.gz", hash = "sha256:fbbdb085ef7c252a326fd8cdcac0aa3b1333d8811f131bdcc701002e1be7ed4f"}, + {file = "pytest_mock-3.10.0-py3-none-any.whl", hash = "sha256:f4c973eeae0282963eb293eb173ce91b091a79c1334455acfac9ddee8a1c784b"}, ] requests = [ - {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, - {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, + {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, + {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, ] tenacity = [ {file = "tenacity-8.1.0-py3-none-any.whl", hash = "sha256:35525cd47f82830069f0d6b73f7eb83bc5b73ee2fff0437952cedf98b27653ac"}, @@ -346,14 +416,14 @@ tomli = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] typing-extensions = [ - {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"}, - {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"}, + {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, + {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, ] urllib3 = [ - {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, - {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, + {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, + {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, ] zipp = [ - {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, - {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, + {file = "zipp-3.12.0-py3-none-any.whl", hash = "sha256:9eb0a4c5feab9b08871db0d672745b53450d7f26992fd1e4653aa43345e97b86"}, + {file = "zipp-3.12.0.tar.gz", hash = "sha256:73efd63936398aac78fd92b6f4865190119d6c91b531532e798977ea8dd402eb"}, ] diff --git a/pyproject.toml b/pyproject.toml index f1bbb6b..dcd1aba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,8 @@ podaac-data-downloader = 'subscriber.podaac_data_downloader:main' [tool.pytest.ini_options] markers = [ - "regression: marks a test as a regression, requires netrc file (deselect with '-m \"not regresion\"')" + "regression: marks a test as a regression, requires netrc file (deselect with '-m \"not regresion\"')", + "token: marks a test as a token regression, requires netrc file and relies on enterprise URS (deselect with '-m \"not token\"')" ] [build-system] diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 12c9e44..2bbba6a 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -5,6 +5,7 @@ import re from datetime import datetime from http.cookiejar import CookieJar +import os from os import makedirs from os.path import isdir, basename, join, splitext from urllib import request @@ -19,6 +20,7 @@ from datetime import datetime import time from requests.auth import HTTPBasicAuth +from packaging import version @@ -558,3 +560,33 @@ def create_citation_file(short_name, provider, data_path, token=None, verbose=Fa with open(data_path + "/" + short_name + ".citation.txt", "w") as text_file: text_file.write(citation) + +def get_latest_release(): + github_url = "https://api.github.com/repos/podaac/data-subscriber/releases" + headers = {} + ghtoken = os.environ.get('GITHUB_TOKEN', None) + if ghtoken is not None: + headers = {"Authorization": "Bearer " + ghtoken} + + releases_json = requests.get(github_url, headers=headers).json() + latest_release = get_latest_release_from_json(releases_json) + return latest_release + +def release_is_current(latest_release, this_version): + return not (version.parse(this_version) < version.parse(latest_release)) + +def get_latest_release_from_json(releases_json): + releases = [] + for x in releases_json: + releases.append(x['tag_name']) + sorted(releases, key=lambda x: version.Version(x)).reverse() + return releases[0] + + +def check_for_latest(): + try: + latest_version = get_latest_release() + if not release_is_current(latest_version,__version__): + print(f'You are currently using version {__version__} of the PO.DAAC Data Subscriber/Downloader. Please run:\n\n pip install podaac-data-subscriber --upgrade \n\n to upgrade to the latest version.') + except: + print("Error checking for new version of the po.daac data subscriber. Continuing") diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 11ab835..de9e597 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -347,4 +347,5 @@ def main(): if __name__ == '__main__': + pa.check_for_latest() main() diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index 5f7fd09..ca7262d 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -352,4 +352,5 @@ def main(): if __name__ == '__main__': + pa.check_for_latest() main() diff --git a/tests/releases.json b/tests/releases.json new file mode 100644 index 0000000..76840cd --- /dev/null +++ b/tests/releases.json @@ -0,0 +1,431 @@ +[ + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/85798710", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/85798710/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/85798710/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.12.0", + "id": 85798710, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4FHS82", + "tag_name": "1.12.0", + "target_commitish": "main", + "name": "Subscriber 1.12.0", + "draft": false, + "prerelease": false, + "created_at": "2022-12-12T22:14:24Z", + "published_at": "2022-12-12T23:04:11Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.12.0", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.12.0", + "body": "## 1.12.0\r\n### Fixed\r\n- Added EDL based token downloading, removing CMR tokens [98](https://github.com/podaac/data-subscriber/issues/98),\r\n### Added\r\n- Added ability to download by filename [109](https://github.com/podaac/data-subscriber/issues/109) and additional regression testing" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/76177645", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/76177645/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/76177645/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.11.0", + "id": 76177645, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4EimDt", + "tag_name": "1.12.0-rc2", + "target_commitish": "main", + "name": "Subscriber 1.11.0", + "draft": false, + "prerelease": false, + "created_at": "2022-09-02T17:47:28Z", + "published_at": "2022-09-02T19:27:17Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.11.0", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.11.0", + "body": "## 1.11.0\r\n### Fixed\r\n- Fixed an issue where token-refresh was expecting a dictionary, not a list of tuples\r\n- Fixed issues where token was not propagated to downloader CMR query [94](https://github.com/podaac/data-subscriber/issues/94)\r\n- Fixed an issue with 503 errors on data download not being re-tried. [97](https://github.com/podaac/data-subscriber/issues/9797)\r\n- added \".tiff\" to default extensions to address #[100](https://github.com/podaac/data-subscriber/issues/100)\r\n- removed erroneous 'warning' message on not downloading all data to close [99](https://github.com/podaac/data-subscriber/issues/99)\r\n- updated help documentation for start/end times to close [79](https://github.com/podaac/data-subscriber/issues/79)\r\n### Added\r\n- Added citation file creation when data are downloaded [91](https://github.com/podaac/data-subscriber/issues/91). Required some updates to the regression testing." + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/70187121", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/70187121/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/70187121/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.10.2", + "id": 70187121, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4ELvhx", + "tag_name": "1.10.2", + "target_commitish": "main", + "name": "Subscriber 1.10.2", + "draft": false, + "prerelease": false, + "created_at": "2022-06-22T23:40:00Z", + "published_at": "2022-06-22T23:41:15Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.10.2", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.10.2", + "body": "## [1.10.2]\r\n### Fixed\r\n- Fixed an issue where using a default global bounding box prevented download of data that didn't use the horizontal spatial domain [87](https://github.com/podaac/data-subscriber/issues/87)\r\n- Fixed limit option not being respected. [86](https://github.com/podaac/data-subscriber/issues/86)" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/69558165", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/69558165/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/69558165/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.10.1", + "id": 69558165, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4EJV-V", + "tag_name": "1.10.1", + "target_commitish": "main", + "name": "Subscriber 1.10.1", + "draft": false, + "prerelease": false, + "created_at": "2022-06-15T19:50:57Z", + "published_at": "2022-06-15T20:09:30Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.10.1", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.10.1", + "body": "# Changelog\r\nAll notable changes to this project will be documented in this file.\r\n\r\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)\r\n\r\n## [1.10.1]\r\n### Fixed\r\n- Support for SHA-256 and SHA-512 checksums" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/67586638", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/67586638/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/67586638/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.10.0", + "id": 67586638, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4EB0pO", + "tag_name": "1.10.0", + "target_commitish": "main", + "name": "Subscriber 1.10.0", + "draft": false, + "prerelease": false, + "created_at": "2022-05-23T18:52:49Z", + "published_at": "2022-05-23T18:56:07Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.10.0", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.10.0", + "body": "## [1.10.0]\r\n### Changed\r\n- Changed minimum supported python version to 3.7, down from 3.8.\r\n" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/67304549", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/67304549/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/67304549/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.9.1-r2", + "id": 67304549, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4EAvxl", + "tag_name": "1.9.1-r2", + "target_commitish": "main", + "name": "Subscriber 1.9.1", + "draft": false, + "prerelease": false, + "created_at": "2022-05-19T17:23:04Z", + "published_at": "2022-05-19T17:27:11Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.9.1-r2", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.9.1-r2", + "body": "# Changelog\r\nAll notable changes to this project will be documented in this file.\r\n\r\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)\r\n\r\n## [1.9.1]\r\n### Changed\r\n- Switched to [poetry](https://python-poetry.org/) as the build tool for the project\r\n" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/65602913", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/65602913/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/65602913/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.9.0", + "id": 65602913, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4D6QVh", + "tag_name": "1.9.0", + "target_commitish": "main", + "name": "Subscriber 1.9.0", + "draft": false, + "prerelease": false, + "created_at": "2022-04-28T16:40:35Z", + "published_at": "2022-04-28T16:49:59Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.9.0", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.9.0", + "body": "# Changelog\r\nAll notable changes to this project will be documented in this file.\r\n\r\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)\r\n\r\n## [1.9.0]\r\n### Added\r\n- check if file exists before downloading a file. [17](https://github.com/podaac/data-subscriber/issues/17)\r\n- added automated regression testing\r\n### Changed\r\n- Implemented Search After CMR interface to allow granule listings > 2000 [15](https://github.com/podaac/data-subscriber/issues/15)\r\n- Retry CMR queries on server error using random exponential backoff max 60 seconds and 10 retries\r\n- Refresh token if CMR returns 401 error\r\n- Converted print statements to log statements\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/60633077", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/60633077/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/60633077/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.8.0", + "id": 60633077, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4DnS_1", + "tag_name": "1.8.0", + "target_commitish": "main", + "name": "Subscriber 1.8.0", + "draft": false, + "prerelease": false, + "created_at": "2022-02-28T18:22:48Z", + "published_at": "2022-02-28T18:25:14Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.8.0", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.8.0", + "body": "### Added\r\n- limit to set limit of downloads- useful for testing\r\n- cycle based downloads to the podaac-data-downloader. [41](https://github.com/podaac/data-subscriber/issues/41)\r\n- conftest.py added to force module inclusion for pytest\r\n- podaac-data-downloader script for bulk data downloading\r\n### Changed\r\n- created library of common access mechanisms to split between subscriber and downloader capabilities\r\n- added .tar.gz to list of default extensions. [40](https://github.com/podaac/data-subscriber/issues/40)\r\n- Ignore error if destination directory already exists. [46](https://github.com/podaac/data-subscriber/issues/46)\r\n- Updated the naming convention of .update file. [44](https://github.com/podaac/data-subscriber/issues/44)\r\n- one of -m, -sd, or -ed must be given to subscriber. Previously -m 60 was the default if nothing was specified.\r\n### Deprecated\r\n- use of \".update\" file naming convention. This will still work, but will be renamed to .update__COLLECTIONNAME after a successful run. the \".update\" file will need to be manually cleaned up. See [issue 44](https://github.com/podaac/data-subscriber/issues/44)\r\n### Removed\r\n### Fixed\r\n- issue where only specifying an end data cause issues in subscriber. [39](https://github.com/podaac/data-subscriber/issues/39)\r\n### Security" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/58477196", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/58477196/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/58477196/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.7.2", + "id": 58477196, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4DfEqM", + "tag_name": "1.7.2", + "target_commitish": "main", + "name": "Subscriber 1.7.2", + "draft": false, + "prerelease": false, + "created_at": "2022-02-02T00:48:22Z", + "published_at": "2022-02-02T00:49:57Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.7.2", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.7.2", + "body": "## [1.7.2]\r\n### Added\r\n### Changed\r\n- Made number of files to download a non-verbose default printout. [33](https://github.com/podaac/data-subscriber/issues/33)\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/58470446", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/58470446/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/58470446/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.7.1", + "id": 58470446, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4DfDAu", + "tag_name": "1.7.1", + "target_commitish": "main", + "name": "Subscriber 1.7.1", + "draft": false, + "prerelease": false, + "created_at": "2022-02-01T22:09:40Z", + "published_at": "2022-02-01T22:11:41Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.7.1", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.7.1", + "body": "# Changelog\r\nAll notable changes to this project will be documented in this file.\r\n\r\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)\r\n\r\n## [1.7.1]\r\n### Added\r\n- Auto build and deploy to pypi on tag/release.\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n## [1.7.0]\r\n### Added\r\n- Added ability to call a process on downlaoded files. [Thank to Joe Sapp](https://github.com/sappjw).\r\n### Changed\r\n- Turned -e option into 'additive' mode (multiple -e options allowed.) [Thanks to Joe Sapp](https://github.com/sappjw)\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n- issue not being able to find granuleUR [#28](https://github.com/podaac/data-subscriber/issues/28)\r\n### Security\r\n\r\n## [1.6.1]\r\n### Added\r\n- added warning for more than 2k granules\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n- strip newline characters from .update to fix https://github.com/podaac/data-subscriber/issues/25\r\n### Security\r\n\r\n## [1.6.0]\r\n### Added\r\n- added --offset flag for timestamp shift when creating DOY folder - (resolves https://github.com/podaac/data-subscriber/issues/23)\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n## [1.5.0] - 2021-10-12\r\n### Added\r\n- added ability to change the provider using the -p/--provider flag. Default is 'POCLOUD'\r\n- added pyproject info and setup.py fixes to enable pypi pushes\r\n### Changed\r\n- added pytest and flake8 fixes for automated builds\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n## [1.4.0] - 2021-10-05\r\n### Added\r\n### Changed\r\n- changed changing created_at to updated_since to allow for re-download of updated granules based on collection redeliveries - (resolves https://github.com/podaac/data-subscriber/issues/18)\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n\r\n## [1.3.0] - 2021-08-26\r\n### Added\r\n- added additional non-flat output directory option of -dy - (resolves https://github.com/podaac/data-subscriber/issues/13)\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n## [1.2.0] - 2021-08-15\r\n### Added\r\n- Added logging capability using the SUBSCRIBER_LOGLEVEL environment variable\r\n- Added -st and -ed flags and respect the .update flag\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n- removed the -ds flag as it caused confusion.\r\n### Fixed\r\n### Security\r\n\r\n## [1.1.2] - 2021-06-20\r\n### Added\r\n- added default layouts for non-flat output directories - (resolves https://github.com/podaac/data-subscriber/issues/6)\r\n- Added logging capability using the SUBSCRIBER_LOGLEVEL environment variable\r\n- added additional non-flat output directory option of -dy - (resolves https://github.com/podaac/data-subscriber/issues/13)\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n## [1.1.1] - 2021-06-06\r\n### Added\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n- updated urllib3>=1.26.5 (fixes https://github.com/advisories/GHSA-q2q7-5pp4-w6pg)\r\n\r\n## [1.1.0] - 2021-05-28\r\n### Added\r\n- User Agent to request so we can determine better support posture based on metrics\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security\r\n\r\n\r\n## [1.0.0] - 2021-05-13\r\n### Added\r\n- data subscriber functionality\r\n### Changed\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n### Security" + }, + { + "url": "https://api.github.com/repos/podaac/data-subscriber/releases/58470099", + "assets_url": "https://api.github.com/repos/podaac/data-subscriber/releases/58470099/assets", + "upload_url": "https://uploads.github.com/repos/podaac/data-subscriber/releases/58470099/assets{?name,label}", + "html_url": "https://github.com/podaac/data-subscriber/releases/tag/1.7.0", + "id": 58470099, + "author": { + "login": "mike-gangl", + "id": 59702631, + "node_id": "MDQ6VXNlcjU5NzAyNjMx", + "avatar_url": "https://avatars.githubusercontent.com/u/59702631?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/mike-gangl", + "html_url": "https://github.com/mike-gangl", + "followers_url": "https://api.github.com/users/mike-gangl/followers", + "following_url": "https://api.github.com/users/mike-gangl/following{/other_user}", + "gists_url": "https://api.github.com/users/mike-gangl/gists{/gist_id}", + "starred_url": "https://api.github.com/users/mike-gangl/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/mike-gangl/subscriptions", + "organizations_url": "https://api.github.com/users/mike-gangl/orgs", + "repos_url": "https://api.github.com/users/mike-gangl/repos", + "events_url": "https://api.github.com/users/mike-gangl/events{/privacy}", + "received_events_url": "https://api.github.com/users/mike-gangl/received_events", + "type": "User", + "site_admin": false + }, + "node_id": "RE_kwDOFdnRzs4DfC7T", + "tag_name": "1.7.0", + "target_commitish": "main", + "name": "Subscriber 1.7.0", + "draft": false, + "prerelease": false, + "created_at": "2022-02-01T22:01:53Z", + "published_at": "2022-02-01T22:05:17Z", + "assets": [], + "tarball_url": "https://api.github.com/repos/podaac/data-subscriber/tarball/1.7.0", + "zipball_url": "https://api.github.com/repos/podaac/data-subscriber/zipball/1.7.0", + "body": "# Changelog\r\nAll notable changes to this project will be documented in this file.\r\n\r\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)\r\n\r\n## [1.7.0]\r\n### Added\r\n- Added ability to call a process on downlaoded files. [Thank to Joe Sapp](https://github.com/sappjw).\r\n### Changed\r\n- Turned -e option into 'additive' mode (multiple -e options allowed.) [Thanks to Joe Sapp](https://github.com/sappjw)\r\n### Deprecated\r\n### Removed\r\n### Fixed\r\n- issue not being able to find granuleUR [#28](https://github.com/podaac/data-subscriber/issues/28)\r\n### Security" + } +] diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index c38c9be..125b864 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -9,6 +9,9 @@ import json import tempfile from os.path import exists +from packaging import version + + def test_temporal_range(): @@ -210,6 +213,19 @@ def validate(args): pa.validate(args2) return args2 +def test_check_updates(): + version.parse(pa.get_latest_release()) + +def test_compare_release(): + tag="1.11.0" + assert pa.release_is_current(tag,"1.11.0") + assert pa.release_is_current(tag,"2.10.0") + assert pa.release_is_current(tag,"1.11.1") + + assert not pa.release_is_current(tag,"1.10.0") + assert not pa.release_is_current(tag,"1.10.5") + assert not pa.release_is_current(tag,"0.9000.5") + def test_extensions(): assert pa.search_extension('\\.tiff', "myfile.tiff") == True assert pa.search_extension('\\.tiff', "myfile.tif") == False @@ -218,3 +234,10 @@ def test_extensions(): assert pa.search_extension('PTM_\\d+', "myfile.PTM_10") == True assert pa.search_extension('PTM_\\d+', "myfile.PTM_09") == True assert pa.search_extension('PTM_\\d+', "myfile.PTM_9") == True + + +def test_get_latest_release_from_json(): + f = open('tests/releases.json') + release_json = json.load(f) + latest_release = pa.get_latest_release_from_json(release_json) + assert latest_release == "1.12.0" diff --git a/tests/test_token_regression.py b/tests/test_token_regression.py index 8eb0641..85e76a7 100644 --- a/tests/test_token_regression.py +++ b/tests/test_token_regression.py @@ -6,14 +6,14 @@ import shutil from pathlib import Path -@pytest.mark.regression +@pytest.mark.token def setup_function(method): # Deletes all known tokens tokens = pa.list_tokens(pa.token_url) for x in tokens: pa.delete_token(pa.token_url, x) -@pytest.mark.regression +@pytest.mark.token def teardown_function(method): # Deletes all known tokens tokens = pa.list_tokens(pa.token_url) @@ -23,7 +23,7 @@ def teardown_function(method): # REGRESSION TEST CURRENTLY REQUIRES A .NETRC file for CMR/Data Download # token API can be found here: https://wiki.earthdata.nasa.gov/display/EL/API+Documentation # explore https://urs.earthdata.nasa.gov/documentation/for_integrators/api_documentation#/oauth/token -@pytest.mark.regression +@pytest.mark.token def test_list_tokens(): tokens = pa.list_tokens(pa.token_url) assert len(tokens) == 0 @@ -31,7 +31,7 @@ def test_list_tokens(): tokens = pa.list_tokens(pa.token_url) assert len(tokens) == 1 -@pytest.mark.regression +@pytest.mark.token def test_edl_getToken(): token = pa.get_token(pa.token_url) assert token != "" From c8ed86e2532bbd85d67366f2e8b58801d7d54ce1 Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Tue, 11 Apr 2023 09:43:02 -0700 Subject: [PATCH 17/34] Issues/127 (#128) * added token sensitivity filter to remove tokens from CMR queries * added changelog updates --- CHANGELOG.md | 1 + subscriber/podaac_data_downloader.py | 7 ++++++- subscriber/podaac_data_subscriber.py | 7 ++++++- subscriber/token_formatter.py | 14 ++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 subscriber/token_formatter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b69c7f..2f6ca01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added --dry-run option to subscriber and downloader to view the files that _would_ be downloaded without actuall downloading them. [102](https://github.com/podaac/data-subscriber/issues/102) - Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (``-e PTM_1, -e PTM_2, ..., -e PMT_10`) [115](https://github.com/podaac/data-subscriber/issues/115) - Added check for updated version [70](https://github.com/podaac/data-subscriber/issues/70) +- Removed CMR Token from log messages [127](https://github.com/podaac/data-subscriber/issues/127) ## 1.12.0 diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index de9e597..6334f68 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -10,6 +10,7 @@ from urllib.request import urlretrieve from subscriber import podaac_access as pa +from subscriber import token_formatter __version__ = pa.__version__ @@ -333,10 +334,14 @@ def run(args=None): def main(): + log_format = '[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s' log_level = os.environ.get('PODAAC_LOGLEVEL', 'INFO').upper() logging.basicConfig(stream=sys.stdout, - format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', + format=log_format, level=log_level) + + for handler in logging.root.handlers: + handler.setFormatter(token_formatter.TokenFormatter(log_format)) logging.debug("Log level set to " + log_level) try: diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index ca7262d..52ec5d4 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -23,6 +23,8 @@ from urllib.request import urlretrieve from subscriber import podaac_access as pa +from subscriber import token_formatter + __version__ = pa.__version__ @@ -338,10 +340,13 @@ def run(args=None): def main(): + log_format = '[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s' log_level = os.environ.get('PODAAC_LOGLEVEL', 'INFO').upper() logging.basicConfig(stream=sys.stdout, - format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', + format=log_format, level=log_level) + for handler in logging.root.handlers: + handler.setFormatter(token_formatter.TokenFormatter(log_format)) logging.debug("Log level set to " + log_level) try: diff --git a/subscriber/token_formatter.py b/subscriber/token_formatter.py new file mode 100644 index 0000000..3bb4128 --- /dev/null +++ b/subscriber/token_formatter.py @@ -0,0 +1,14 @@ +import logging +import re + + +class TokenFormatter(logging.Formatter): + """Formatter that removes sensitive information in urls.""" + @staticmethod + def _filter(s): + + return re.sub(r'token=(.*)\??', r'token=****', s) + + def format(self, record): + original = logging.Formatter.format(self, record) + return self._filter(original) From 133f217464eac9b243c2107a3c4c2bd238e57959 Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Thu, 20 Apr 2023 06:50:56 -0700 Subject: [PATCH 18/34] updated some lingering merge issues (huh?) --- Downloader.md | 2 -- tests/test_downloader_regression.py | 3 --- tests/test_token_regression.py | 17 +---------------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/Downloader.md b/Downloader.md index e5d9a2c..65f7754 100644 --- a/Downloader.md +++ b/Downloader.md @@ -44,8 +44,6 @@ optional arguments: ``` -##Run the Script - ## Step 2: Run the Script Usage: diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py index 5b0e840..027ea50 100644 --- a/tests/test_downloader_regression.py +++ b/tests/test_downloader_regression.py @@ -37,7 +37,6 @@ def test_downloader_limit_MUR(): assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==1 shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') -<<<<<<< HEAD # Test the downlaoder on SWOT Simulated single file download @pytest.mark.regression def test_downloader_limit_dry_run(): @@ -49,8 +48,6 @@ def test_downloader_limit_dry_run(): assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==0 shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2') -======= ->>>>>>> main #Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure, # and offset. Running it a second time to ensure it downlaods the files again- diff --git a/tests/test_token_regression.py b/tests/test_token_regression.py index 59b9523..b4242b6 100644 --- a/tests/test_token_regression.py +++ b/tests/test_token_regression.py @@ -6,22 +6,15 @@ import shutil from pathlib import Path -<<<<<<< HEAD @pytest.mark.token -======= -@pytest.mark.regression ->>>>>>> main def setup_function(method): # Deletes all known tokens tokens = pa.list_tokens(pa.token_url) for x in tokens: pa.delete_token(pa.token_url, x) -<<<<<<< HEAD + @pytest.mark.token -======= -@pytest.mark.regression ->>>>>>> main def teardown_function(method): # Deletes all known tokens tokens = pa.list_tokens(pa.token_url) @@ -31,11 +24,7 @@ def teardown_function(method): # REGRESSION TEST CURRENTLY REQUIRES A .NETRC file for CMR/Data Download # token API can be found here: https://wiki.earthdata.nasa.gov/display/EL/API+Documentation # explore https://urs.earthdata.nasa.gov/documentation/for_integrators/api_documentation#/oauth/token -<<<<<<< HEAD @pytest.mark.token -======= -@pytest.mark.regression ->>>>>>> main def test_list_tokens(): tokens = pa.list_tokens(pa.token_url) assert len(tokens) == 0 @@ -43,11 +32,7 @@ def test_list_tokens(): tokens = pa.list_tokens(pa.token_url) assert len(tokens) == 1 -<<<<<<< HEAD @pytest.mark.token -======= -@pytest.mark.regression ->>>>>>> main def test_edl_getToken(): token = pa.get_token(pa.token_url) assert token != "" From 5d93610373b09903cfab2a2db21364b15ff3023f Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Thu, 20 Apr 2023 07:36:25 -0700 Subject: [PATCH 19/34] updated regression test --- tests/test_subscriber.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py index 125b864..f40994c 100644 --- a/tests/test_subscriber.py +++ b/tests/test_subscriber.py @@ -85,8 +85,8 @@ def test_search_after(): 'bounding_box': "-180,-90,180,90", } results = pa.get_search_results(params, True) - assert results['hits'] == 3748 - assert len(results['items']) == 3748 + assert results['hits'] == 3751 + assert len(results['items']) == 3751 def test_update_format_change(cleanup_update_test): print("Running Test") From fb91a77de894d342deffdd9e443c061fe5e023d8 Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Thu, 20 Apr 2023 10:08:03 -0700 Subject: [PATCH 20/34] updated ubuntu versions --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index cb18515..649782f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -17,7 +17,7 @@ jobs: matrix: python-version: [ "3.7", "3.8", "3.9", "3.10" ] poetry-version: [ "1.1.14" ] - os: [ ubuntu-18.04, macos-latest, windows-latest ] + os: [ ubuntu-22.04, macos-latest, windows-latest ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 From 628fa0d18d6be365236d35e74101b0ad41d5151e Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Thu, 20 Apr 2023 10:43:00 -0700 Subject: [PATCH 21/34] removed 18.04 ubuntu from workflows/actions --- .github/workflows/python-app.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 649782f..d713f51 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -64,7 +64,7 @@ jobs: matrix: python-version: [ "3.7", "3.8", "3.9", "3.10" ] poetry-version: [ "1.1.14" ] - os: [ ubuntu-18.04, macos-latest, windows-latest ] + os: [ ubuntu-22.04, macos-latest, windows-latest ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index eba32fa..2697eb1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -44,7 +44,7 @@ jobs: fail-fast: false matrix: python-version: [ "3.7", "3.8", "3.9", "3.10" ] - os: [ ubuntu-18.04, macos-latest, windows-latest ] + os: [ ubuntu-22.04, macos-latest, windows-latest ] runs-on: ${{ matrix.os }} steps: - name: Set up Python From 3204beea48c091383943958ce1cbc3a90fe7d87c Mon Sep 17 00:00:00 2001 From: mike-gangl <59702631+mike-gangl@users.noreply.github.com> Date: Mon, 24 Apr 2023 08:32:53 -0700 Subject: [PATCH 22/34] version and documentation updates (#130) --- CHANGELOG.md | 4 ++-- Downloader.md | 2 +- README.md | 10 +++++----- Subscriber.md | 2 +- pyproject.toml | 2 +- subscriber/podaac_access.py | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f6ca01..7e29718 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -## Unreleased +## 1.13.0 ### Added - Added --dry-run option to subscriber and downloader to view the files that _would_ be downloaded without actuall downloading them. [102](https://github.com/podaac/data-subscriber/issues/102) -- Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (``-e PTM_1, -e PTM_2, ..., -e PMT_10`) [115](https://github.com/podaac/data-subscriber/issues/115) +- Added new feature allowing regex to be used in `--extension` `-e` options. For example using -e `PTM_\\d+` would match data files like `filename.PTM_1`, `filename.PTM_2` and `filename.PTM_10`, instead of specifying all possible combinations (`-e PTM_1, -e PTM_2, ..., -e PMT_10`) [115](https://github.com/podaac/data-subscriber/issues/115) - Added check for updated version [70](https://github.com/podaac/data-subscriber/issues/70) - Removed CMR Token from log messages [127](https://github.com/podaac/data-subscriber/issues/127) diff --git a/Downloader.md b/Downloader.md index 65f7754..ebe0dd2 100644 --- a/Downloader.md +++ b/Downloader.md @@ -210,7 +210,7 @@ podaac-data-downloader -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -b="-180,-90,180,90 ### Setting extensions -Some collections have many files. To download a specific set of files, you can set the extensions on which downloads are filtered. By default, ".nc", ".h5", and ".zip" files are downloaded by default. +Some collections have many files. To download a specific set of files, you can set the extensions on which downloads are filtered. By default, ".nc", ".h5", and ".zip" files are downloaded by default. The `-e` option is a regular expression check so you can do advanced things like `-e PTM_\\d+` to match `PTM_` followed by one or more digits- useful when the ending of a file has no suffix and has a number (1-12 for PTM, in this example) ``` -e EXTENSIONS, --extensions EXTENSIONS diff --git a/README.md b/README.md index 0fcd55d..e32c6be 100644 --- a/README.md +++ b/README.md @@ -37,16 +37,16 @@ pip install podaac-data-subscriber you should now have access to the downloader and subscriber Command line interfaces: ``` -$> podaac-data-subscriber -h -usage: podaac_data_subscriber.py [-h] -c COLLECTION -d OUTPUTDIRECTORY [-sd STARTDATE] [-ed ENDDATE] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-m MINUTES] - [-e EXTENSIONS] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] +$> usage: PO.DAAC data subscriber [-h] -c COLLECTION -d OUTPUTDIRECTORY [-f] [-sd STARTDATE] [-ed ENDDATE] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-m MINUTES] + [-e EXTENSIONS] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--dry-run] ... ``` ``` -$> podaac-data-downloader -h -usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] [--offset OFFSET] [-e EXTENSIONS] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--limit LIMIT] +$> usage: PO.DAAC bulk-data downloader [-h] -c COLLECTION -d OUTPUTDIRECTORY [--cycle SEARCH_CYCLES] [-sd STARTDATE] [-ed ENDDATE] [-f] [-b BBOX] [-dc] [-dydoy] [-dymd] [-dy] + [--offset OFFSET] [-e EXTENSIONS] [-gr GRANULENAME] [--process PROCESS_CMD] [--version] [--verbose] [-p PROVIDER] [--limit LIMIT] [--dry-run] + ... ``` diff --git a/Subscriber.md b/Subscriber.md index db96c93..f3afc1b 100644 --- a/Subscriber.md +++ b/Subscriber.md @@ -188,7 +188,7 @@ podaac-data-subscriber -c VIIRS_N20-OSPO-L2P-v2.61 -d ./data -b="-180,-90,180,90 ### Setting extensions -Some collections have many files. To download a specific set of files, you can set the extensions on which downloads are filtered. By default, ".nc", ".h5", and ".zip" files are downloaded by default. +Some collections have many files. To download a specific set of files, you can set the extensions on which downloads are filtered. By default, ".nc", ".h5", and ".zip" files are downloaded by default. The `-e` option is a regular expression check so you can do advanced things like `-e PTM_\\d+` to match `PTM_` followed by one or more digits- useful when the ending of a file has no suffix and has a number (1-12 for PTM, in this example) ``` -e EXTENSIONS, --extensions EXTENSIONS diff --git a/pyproject.toml b/pyproject.toml index dcd1aba..0cfffb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.12.0" +version = "1.13.0" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 2bbba6a..3c19d7a 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -30,7 +30,7 @@ import tenacity from datetime import datetime -__version__ = "1.12.0" +__version__ = "1.13.0" extensions = ["\\.nc", "\\.h5", "\\.zip", "\\.tar.gz", "\\.tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" From 1622cd8668558c313a4575c885ab50f8458fc5d6 Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Thu, 27 Apr 2023 08:46:51 -0700 Subject: [PATCH 23/34] 1.13.1 changelog and dependecny updates --- CHANGELOG.md | 4 ++++ pyproject.toml | 3 ++- subscriber/podaac_access.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e29718..6918aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +## 1.13.1 +### Fixed +- Fixed an issue where a required library wasn't being included in the installation. + ## 1.13.0 ### Added - Added --dry-run option to subscriber and downloader to view the files that _would_ be downloaded without actuall downloading them. [102](https://github.com/podaac/data-subscriber/issues/102) diff --git a/pyproject.toml b/pyproject.toml index 0cfffb3..78dcce4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.13.0" +version = "1.13.1" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" @@ -15,6 +15,7 @@ packages = [ python = "^3.7" requests = "^2.27.1" tenacity = "^8.0.1" +packaging = "^23.0" [tool.poetry.dev-dependencies] pytest = "^7.1.2" diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 3c19d7a..8742141 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -30,7 +30,7 @@ import tenacity from datetime import datetime -__version__ = "1.13.0" +__version__ = "1.13.1" extensions = ["\\.nc", "\\.h5", "\\.zip", "\\.tar.gz", "\\.tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" From c73516fd2e955e29160d7bf79f00ec8107c8fc77 Mon Sep 17 00:00:00 2001 From: mike-gangl Date: Thu, 27 Apr 2023 08:54:32 -0700 Subject: [PATCH 24/34] fixed formatting from unsaved merges --- CHANGELOG.md | 3 --- pyproject.toml | 4 ---- 2 files changed, 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9157ffa..6918aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -<<<<<<< HEAD ## 1.13.1 ### Fixed - Fixed an issue where a required library wasn't being included in the installation. -======= ->>>>>>> main ## 1.13.0 ### Added - Added --dry-run option to subscriber and downloader to view the files that _would_ be downloaded without actuall downloading them. [102](https://github.com/podaac/data-subscriber/issues/102) diff --git a/pyproject.toml b/pyproject.toml index 60ec60c..78dcce4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -<<<<<<< HEAD version = "1.13.1" -======= -version = "1.13.0" ->>>>>>> main description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" From 8313e1ab17e0529c275557e75d5e71048f124174 Mon Sep 17 00:00:00 2001 From: Jack McNelis Date: Wed, 14 Jun 2023 14:07:34 -0400 Subject: [PATCH 25/34] provide support for granule wildcard patterns in data downloader --- subscriber/podaac_data_downloader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 6334f68..8ce2f30 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -190,6 +190,9 @@ def run(args=None): ('GranuleUR[]', cmr_granule), ('token', token), ] + #jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search + if '*' or '?' in cmr_granule: + params.append(('options[GranuleUR][pattern]', 'true')) if args.verbose: logging.info("Granule: " + str(cmr_granule)) From 41b89043896aebfe1c590272ac9e00e399a6e32e Mon Sep 17 00:00:00 2001 From: Jack McNelis Date: Wed, 14 Jun 2023 14:49:07 -0400 Subject: [PATCH 26/34] Update subscriber/podaac_data_downloader.py Co-authored-by: Stepheny Perez --- subscriber/podaac_data_downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 8ce2f30..83b08aa 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -191,7 +191,7 @@ def run(args=None): ('token', token), ] #jmcnelis, 2023/06/14 - provide for wildcards in granuleur-based search - if '*' or '?' in cmr_granule: + if '*' in cmr_granule or '?' in cmr_granule: params.append(('options[GranuleUR][pattern]', 'true')) if args.verbose: logging.info("Granule: " + str(cmr_granule)) From 7639b4df5d698facecc3f873b4aa7eb28d4b8a77 Mon Sep 17 00:00:00 2001 From: Jack McNelis Date: Wed, 14 Jun 2023 16:14:45 -0400 Subject: [PATCH 27/34] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6918aef..455edb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +## [unreleased] +### Added +- Added support for wildcard search patterns in podaac-data-downloader when executed with the -gr option (i.e. search/download by CMR Granule Ur/Id). Also, added usage details to Downloader.md to describe this new feature [138](https://github.com/podaac/data-subscriber/pull/138). + ## 1.13.1 ### Fixed - Fixed an issue where a required library wasn't being included in the installation. From 3220c55c712bab979b8aa8bce859e0293ca41cb9 Mon Sep 17 00:00:00 2001 From: Jack McNelis Date: Wed, 14 Jun 2023 16:32:06 -0400 Subject: [PATCH 28/34] Update Downloader.md --- Downloader.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Downloader.md b/Downloader.md index ebe0dd2..3b15e0d 100644 --- a/Downloader.md +++ b/Downloader.md @@ -32,7 +32,7 @@ optional arguments: -e EXTENSIONS, --extensions EXTENSIONS Regexps of extensions of products to download. Default is [.nc, .h5, .zip, .tar.gz, .tiff] -gr GRANULENAME, --granule-name GRANULENAME - Flag to download specific granule from a collection. This parameter can only be used if you know the granule name. Only one granule name can be supplied + Flag to download specific granule from a collection. This parameter can only be used if you know the granule name. Only one granule name can be supplied. Supports wildcard search patterns allowing the user to identify multiple granules for download by using `?` for single- and `*` for multi-character expansion. --process PROCESS_CMD Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times. --version Display script version information and exit. @@ -131,6 +131,7 @@ The `-gr` option works by taking the file name, removing the suffix and searchin Because of this behavior, granules without data suffixes and granules where the the UR does not directly follow this convention may not work as anticipated. We will be adding the ability to download by granuleUR in a future enhancement. +The -gr option supports wildcard search patterns (using `?` for single- and `*` for multi-character expansion) to select and download multiple granules based on the filename pattern. This feature is supported through wildcard search functionality provided through CMR, which is described in the [CMR Search API documentation](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#parameter-options). ### Download data by cycle From 2948bb76d4f1248cedb592b70bc6582a804fd929 Mon Sep 17 00:00:00 2001 From: Jack McNelis Date: Wed, 14 Jun 2023 16:34:18 -0400 Subject: [PATCH 29/34] Update podaac_data_downloader.py gr option help text to indicate wildcard support --- subscriber/podaac_data_downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index 83b08aa..4651f44 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -93,7 +93,7 @@ def create_parser(): # Get specific granule from the search # https://github.com/podaac/data-subscriber/issues/109 parser.add_argument("-gr", "--granule-name", dest="granulename", - help="Flag to download specific granule from a collection. This parameter can only be used if you know the granule name. Only one granule name can be supplied", + help="Flag to download specific granule from a collection. This parameter can only be used if you know the granule name. Only one granule name can be supplied. Supports wildcard search patterns allowing the user to identify multiple granules for download by using `?` for single- and `*` for multi-character expansion.", default=None) parser.add_argument("--process", dest="process_cmd", From 7991df2814deb3b43374a432c202201e7c18c5d1 Mon Sep 17 00:00:00 2001 From: skperez Date: Tue, 20 Jun 2023 17:35:59 -0700 Subject: [PATCH 30/34] bump version --- CHANGELOG.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 455edb3..d8ca5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -## [unreleased] +## [1.14.0] ### Added - Added support for wildcard search patterns in podaac-data-downloader when executed with the -gr option (i.e. search/download by CMR Granule Ur/Id). Also, added usage details to Downloader.md to describe this new feature [138](https://github.com/podaac/data-subscriber/pull/138). diff --git a/pyproject.toml b/pyproject.toml index 78dcce4..8c68790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.13.1" +version = "1.14.0" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" From 2ebe5aa62eafad67b71a3aebc9820de7cb65824c Mon Sep 17 00:00:00 2001 From: skperez Date: Thu, 22 Jun 2023 13:22:53 -0700 Subject: [PATCH 31/34] /version 1.14.0-alpha.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8c68790..f47e4cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.14.0" +version = "1.14.0-alpha.0" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" From 9d5ec2c8b3fe58502d2496c5a89913c7c25b491b Mon Sep 17 00:00:00 2001 From: skperez Date: Thu, 13 Jul 2023 15:34:18 -0700 Subject: [PATCH 32/34] Updated hardcoded version to correct value --- subscriber/podaac_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index 8742141..827a6ac 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -30,7 +30,7 @@ import tenacity from datetime import datetime -__version__ = "1.13.1" +__version__ = "1.14.0" extensions = ["\\.nc", "\\.h5", "\\.zip", "\\.tar.gz", "\\.tiff"] edl = "urs.earthdata.nasa.gov" cmr = "cmr.earthdata.nasa.gov" From f2df79d436019055a07fc7831e3693c2dc086370 Mon Sep 17 00:00:00 2001 From: skorper Date: Mon, 17 Jul 2023 11:15:04 -0700 Subject: [PATCH 33/34] Update toml with version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f47e4cd..a4acb7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.14.0-alpha.0" +version = "1.14.0-alpha.1" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md" From 5daf6e7b722e1d84ce1922129503e1dca52f9793 Mon Sep 17 00:00:00 2001 From: skperez Date: Tue, 18 Jul 2023 08:37:16 -0700 Subject: [PATCH 34/34] bump version to 1.14.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a4acb7d..8c68790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "podaac-data-subscriber" -version = "1.14.0-alpha.1" +version = "1.14.0" description = "PO.DAAC Data Subscriber Command Line Tool" authors = ["PO.DAAC "] readme = "README.md"