From 665c9c5e3e412b087472dbb946aa63de4dfdbef2 Mon Sep 17 00:00:00 2001
From: mike-gangl <59702631+mike-gangl@users.noreply.github.com>
Date: Fri, 2 Sep 2022 10:47:28 -0700
Subject: [PATCH] Develop (#103)

* Issues/91 (#92)

* added citation creation tests and functionality to subscriber and downloader

* added verbose option to create_citation_file command, previously hard coded

* updated changelog (whoops) and fixed regression test:
1. Issue where the citation file now downloaded affected the counts
2. Issue where the logic for determining if a file modified time was changing or not was picking up the new citation file which _always_ gets rewritten to update the 'last accessed' date.

* updated request to include exec_info in warning; fixed issue with params not being a dictionary caused errors

* changed a warning to debug for citation file. fixed test issues

* Enable debug logging during regression tests and set max parallel workflows to 2

* added output to pytest

* fixed test to only look for downlaoded data files not citation file due to 'random' cmr errors when creating a citation.

* added mock testing and retry on 503

* added 503 fixes

Co-authored-by: Frank Greguska <Francis.Greguska@jpl.nasa.gov>

* fixed issues where token was not proagated to CMR queries (#95)

* Misc fixes (#101)

* added ".tiff" to default extensions to address #100

* removed 'warning' message on not downloading all data to close #99

* updated help documentation for start/end times to close #79

* added version update, updates to CHANGELOG

* added token get,delete, refresh and list operations

* Revert "added token get,delete, refresh and list operations"

This reverts commit 15aba90201b79bbf01f4787900b3b41160441390.

* Update python-app.yml

Co-authored-by: Frank Greguska <Francis.Greguska@jpl.nasa.gov>
---
 .github/workflows/python-app.yml     |  7 ++-
 CHANGELOG.md                         | 11 ++++
 poetry.lock                          | 24 +++++++--
 pyproject.toml                       |  3 +-
 subscriber/podaac_access.py          | 75 +++++++++++++++++++++++++++-
 subscriber/podaac_data_downloader.py | 27 +++++++---
 subscriber/podaac_data_subscriber.py | 26 +++++++---
 tests/test_downloader_regression.py  | 15 ++++--
 tests/test_subscriber.py             | 64 ++++++++++++++++++++++++
 tests/test_subscriber_regression.py  |  2 +-
 10 files changed, 224 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index dbde039..62275e3 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -13,9 +13,10 @@ jobs:
   build:
     strategy:
       fail-fast: false
+      max-parallel: 2
       matrix:
         python-version: [ "3.7", "3.8", "3.9", "3.10" ]
-        poetry-version: [ "1.1" ]
+        poetry-version: [ "1.1.14" ]
         os: [ ubuntu-18.04, macos-latest, windows-latest ]
     runs-on: ${{ matrix.os }}
     steps:
@@ -47,5 +48,7 @@ jobs:
        username: ${{ secrets.EDL_OPS_USERNAME }}
        password: ${{ secrets.EDL_OPS_PASSWORD }}
     - name: Regression Test with pytest
+      env:
+        PODAAC_LOGLEVEL: "DEBUG"
       run: |
-        poetry run pytest -m "regression"
+        poetry run pytest -o log_cli=true --log-cli-level=DEBUG -m "regression"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f06ebe6..1f5b15f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 
+## 1.11.0
+### Fixed
+- Fixed an issue where token-refresh was expecting a dictionary, not a list of tuples
+- Fixed issues where token was not propagated to downloader CMR query [94](https://github.com/podaac/data-subscriber/issues/94)
+- Fixed an issue with 503 errors on data download not being re-tried. [97](https://github.com/podaac/data-subscriber/issues/9797)
+- added ".tiff" to default extensions to address #[100](https://github.com/podaac/data-subscriber/issues/100)
+- removed erroneous 'warning' message on not downloading all data to close [99](https://github.com/podaac/data-subscriber/issues/99)
+- updated help documentation for start/end times to close [79](https://github.com/podaac/data-subscriber/issues/79)
+### Added
+- Added citation file creation when data are downloaded [91](https://github.com/podaac/data-subscriber/issues/91). Required some updates to the regression testing.
+
 ## [1.10.2]
 ### Fixed
 - Fixed an issue where using a default global bounding box prevented download of data that didn't use the horizontal spatial domain [87](https://github.com/podaac/data-subscriber/issues/87)
diff --git a/poetry.lock b/poetry.lock
index 0a666cb..94f8c70 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -124,8 +124,8 @@ python-versions = ">=3.6"
 importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
 
 [package.extras]
-dev = ["pre-commit", "tox"]
-testing = ["pytest", "pytest-benchmark"]
+testing = ["pytest-benchmark", "pytest"]
+dev = ["tox", "pre-commit"]
 
 [[package]]
 name = "py"
@@ -184,6 +184,20 @@ tomli = ">=1.0.0"
 [package.extras]
 testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
 
+[[package]]
+name = "pytest-mock"
+version = "3.8.2"
+description = "Thin-wrapper around the mock package for easier use with pytest"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+pytest = ">=5.0"
+
+[package.extras]
+dev = ["pre-commit", "tox", "pytest-asyncio"]
+
 [[package]]
 name = "requests"
 version = "2.27.1"
@@ -257,7 +271,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7"
-content-hash = "8ff84c55fbd8a74bae903e6216e256cea47f571d88fcfa54f0492125746244ed"
+content-hash = "c5ece7741408cb266fe803842b66f646317dc3a384e9c54ecbe66a14ce895fed"
 
 [metadata.files]
 atomicwrites = [
@@ -328,6 +342,10 @@ pytest = [
     {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"},
     {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"},
 ]
+pytest-mock = [
+    {file = "pytest-mock-3.8.2.tar.gz", hash = "sha256:77f03f4554392558700295e05aed0b1096a20d4a60a4f3ddcde58b0c31c8fca2"},
+    {file = "pytest_mock-3.8.2-py3-none-any.whl", hash = "sha256:8a9e226d6c0ef09fcf20c94eb3405c388af438a90f3e39687f84166da82d5948"},
+]
 requests = [
     {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
     {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
diff --git a/pyproject.toml b/pyproject.toml
index 44ae341..2ad16d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "podaac-data-subscriber"
-version = "1.10.2"
+version = "1.11.0"
 description = "PO.DAAC Data Subscriber Command Line Tool"
 authors = ["PO.DAAC <podaac@podaac.jpl.nasa.gov>"]
 readme = "README.md"
@@ -19,6 +19,7 @@ tenacity = "^8.0.1"
 [tool.poetry.dev-dependencies]
 pytest = "^7.1.2"
 flake8 = "^4.0.1"
+pytest-mock = "^3.8.2"
 
 [tool.poetry.scripts]
 podaac-data-subscriber = 'subscriber.podaac_data_subscriber:main'
diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py
index 327155a..5c0e3bb 100644
--- a/subscriber/podaac_access.py
+++ b/subscriber/podaac_access.py
@@ -10,10 +10,14 @@
 from typing import Dict
 from urllib import request
 from urllib.error import HTTPError
+from urllib.request import urlretrieve
 import subprocess
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
 import hashlib
+from datetime import datetime
+import time
+
 
 import requests
 
@@ -21,8 +25,8 @@
 import tenacity
 from datetime import datetime
 
-__version__ = "1.10.2"
-extensions = [".nc", ".h5", ".zip", ".tar.gz"]
+__version__ = "1.11.0"
+extensions = [".nc", ".h5", ".zip", ".tar.gz", ".tiff"]
 edl = "urs.earthdata.nasa.gov"
 cmr = "cmr.earthdata.nasa.gov"
 token_url = "https://" + cmr + "/legacy-services/rest/tokens"
@@ -286,6 +290,26 @@ def get_temporal_range(start, end, now):
     raise ValueError("One of start-date or end-date must be specified.")
 
 
+def download_file(remote_file, output_path, retries=3):
+    failed = False
+    for r in range(retries):
+        try:
+            urlretrieve(remote_file, output_path)
+        except HTTPError as e:
+            if e.code == 503:
+                logging.warning(f'Error downloading {remote_file}. Retrying download.')
+                # back off on sleep time each error...
+                time.sleep(r)
+                if r >= retries:
+                    failed = True
+        else:
+            #downlaoded fie without 503
+            break
+
+        if failed:
+            raise Exception("Could not download file.")
+
+
 # Retry using random exponential backoff if a 500 error is raised. Maximum 10 attempts.
 @tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60),
                 stop=tenacity.stop_after_attempt(10),
@@ -436,3 +460,50 @@ def make_checksum(file_path, algorithm):
         for chunk in iter(lambda: f.read(4096), b""):
             hash_alg.update(chunk)
     return hash_alg.hexdigest()
+
+def get_cmr_collections(params, verbose=False):
+    query = urlencode(params)
+    url = "https://" + cmr + "/search/collections.umm_json?" + query
+    if verbose:
+        logging.info(url)
+
+    # Build the request, add the search after header to it if it's not None (e.g. after the first iteration)
+    req = Request(url)
+    response = urlopen(req)
+    result = json.loads(response.read().decode())
+    return result
+
+
+def create_citation(collection_json, access_date):
+    citation_template = "{creator}. {year}. {title}. Ver. {version}. PO.DAAC, CA, USA. Dataset accessed {access_date} at {doi_authority}/{doi}"
+
+    # Better error handling here may be needed...
+    doi = collection_json['DOI']["DOI"]
+    doi_authority = collection_json['DOI']["Authority"]
+    citation = collection_json["CollectionCitations"][0]
+    creator = citation["Creator"]
+    release_date = citation["ReleaseDate"]
+    title = citation["Title"]
+    version = citation["Version"]
+    year = datetime.strptime(release_date, "%Y-%m-%dT%H:%M:%S.000Z").year
+    return citation_template.format(creator=creator, year=year, title=title, version=version, doi_authority=doi_authority, doi=doi, access_date=access_date)
+
+def create_citation_file(short_name, provider, data_path, token=None, verbose=False):
+    # get collection umm-c METADATA
+    params = [
+        ('provider', provider),
+        ('ShortName', short_name)
+    ]
+    if token is not None:
+        params.append(('token', token))
+
+    collection = get_cmr_collections(params, verbose)['items'][0]
+
+    access_date = datetime.now().strftime("%Y-%m-%d")
+
+    # create citation from umm-c metadata
+    citation = create_citation(collection['umm'], access_date)
+    # write file
+
+    with open(data_path + "/" + short_name + ".citation.txt", "w") as text_file:
+        text_file.write(citation)
diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py
index 158c541..44fdb45 100644
--- a/subscriber/podaac_data_downloader.py
+++ b/subscriber/podaac_data_downloader.py
@@ -60,9 +60,9 @@ def create_parser():
                         help="Cycle number for determining downloads. can be repeated for multiple cycles",
                         action='append', type=int)
     parser.add_argument("-sd", "--start-date", required=False, dest="startDate",
-                        help="The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z")  # noqa E501
+                        help="The ISO date time after which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z")  # noqa E501
     parser.add_argument("-ed", "--end-date", required=False, dest="endDate",
-                        help="The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z")  # noqa E501
+                        help="The ISO date time before which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z")  # noqa E501
 
     # Adding optional arguments
     parser.add_argument("-f", "--force", dest="force", action="store_true", help = "Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches")  # noqa E501
@@ -178,6 +178,7 @@ def run(args=None):
             ('provider', provider),
             ('ShortName', short_name),
             ('temporal', temporal_range),
+            ('token', token),
         ]
         if args.verbose:
             logging.info("Temporal Range: " + temporal_range)
@@ -193,7 +194,11 @@ def run(args=None):
     except HTTPError as e:
         if e.code == 401:
             token = pa.refresh_token(token, 'podaac-subscriber')
-            params['token'] = token
+            # Updated: This is not always a dictionary...
+            # in fact, here it's always a list of tuples
+            for  i, p in enumerate(params) :
+                if p[1] == "token":
+                    params[i] = ("token", token)
             results = pa.get_search_results(params, args.verbose)
         else:
             raise e
@@ -221,10 +226,6 @@ def run(args=None):
 
     downloads = [item for sublist in downloads_all for item in sublist]
 
-    if len(downloads) >= page_size:
-        logging.warning("Only the most recent " + str(
-            page_size) + " granules will be downloaded; try adjusting your search criteria (suggestion: reduce time period or spatial region of search) to ensure you retrieve all granules.")
-
     # filter list based on extension
     if not extensions:
         extensions = pa.extensions
@@ -268,7 +269,9 @@ def run(args=None):
                 skip_cnt += 1
                 continue
 
-            urlretrieve(f, output_path)
+            pa.download_file(f,output_path)
+            #urlretrieve(f, output_path)
+
             pa.process_file(process_cmd, output_path, args)
             logging.info(str(datetime.now()) + " SUCCESS: " + f)
             success_cnt = success_cnt + 1
@@ -284,6 +287,14 @@ def run(args=None):
     logging.info("Downloaded Files: " + str(success_cnt))
     logging.info("Failed Files:     " + str(failure_cnt))
     logging.info("Skipped Files:    " + str(skip_cnt))
+
+    #create citation file if success > 0
+    if success_cnt > 0:
+        try:
+            pa.create_citation_file(short_name, provider, data_path, token, args.verbose)
+        except:
+            logging.debug("Error generating citation",exc_info=True)
+
     pa.delete_token(token_url, token)
     logging.info("END\n\n")
 
diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py
index 04df68d..e843a5f 100755
--- a/subscriber/podaac_data_subscriber.py
+++ b/subscriber/podaac_data_subscriber.py
@@ -66,10 +66,10 @@ def create_parser():
 
     # spatiotemporal arguments
     parser.add_argument("-sd", "--start-date", dest="startDate",
-                        help="The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z",
+                        help="The ISO date time after which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z",
                         default=False)  # noqa E501
     parser.add_argument("-ed", "--end-date", dest="endDate",
-                        help="The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z",
+                        help="The ISO date time before which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z",
                         default=False)  # noqa E501
     parser.add_argument("-b", "--bounds", dest="bbox",
                         help="The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b=\"-180,-90,180,90\" syntax when calling from the command line. Default: \"-180,-90,180,90\".",
@@ -218,7 +218,12 @@ def run(args=None):
     except HTTPError as e:
         if e.code == 401:
             token = pa.refresh_token(token, 'podaac-subscriber')
-            params['token'] = token
+            # Updated: This is not always a dictionary...
+            # in fact, here it's always a list of tuples
+            for  i, p in enumerate(params) :
+                if p[1] == "token":
+                    params[i] = ("token", token)
+            #params['token'] = token
             results = pa.get_search_results(params, args.verbose)
         else:
             raise e
@@ -249,10 +254,6 @@ def run(args=None):
 
     downloads = [item for sublist in downloads_all for item in sublist]
 
-    if len(downloads) >= page_size:
-        logging.warning("Only the most recent " + str(
-            page_size) + " granules will be downloaded; try adjusting your search criteria (suggestion: reduce time period or spatial region of search) to ensure you retrieve all granules.")
-
     # filter list based on extension
     if not extensions:
         extensions = pa.extensions
@@ -294,7 +295,9 @@ def run(args=None):
                 skip_cnt += 1
                 continue
 
-            urlretrieve(f, output_path)
+            #urlretrieve(f, output_path)
+            pa.download_file(f,output_path)
+
             pa.process_file(process_cmd, output_path, args)
             logging.info(str(datetime.now()) + " SUCCESS: " + f)
             success_cnt = success_cnt + 1
@@ -314,6 +317,13 @@ def run(args=None):
     logging.info("Downloaded Files: " + str(success_cnt))
     logging.info("Failed Files:     " + str(failure_cnt))
     logging.info("Skipped Files:    " + str(skip_cnt))
+
+    if success_cnt > 0:
+        try:
+            pa.create_citation_file(short_name, provider, data_path, token, args.verbose)
+        except:
+            logging.debug("Error generating citation", exc_info=True)
+
     pa.delete_token(token_url, token)
     logging.info("END\n\n")
     #exit(0)
diff --git a/tests/test_downloader_regression.py b/tests/test_downloader_regression.py
index 875a517..cea7df0 100644
--- a/tests/test_downloader_regression.py
+++ b/tests/test_downloader_regression.py
@@ -19,10 +19,11 @@ def create_downloader_args(args):
 @pytest.mark.regression
 def test_downloader_limit_MUR():
     shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True)
-    args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1'.split())
+    args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1 --verbose'.split())
     pdd.run(args2)
-    # count number of files downloaded...
-    assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name)])==1
+    # So running the test in parallel, sometimes we get a 401 on the token...
+    # Let's ensure we're only looking for data files here
+    assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name) and "citation.txt" not in name ])==1
     shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2')
 
 #Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure,
@@ -31,7 +32,7 @@ def test_downloader_limit_MUR():
 @pytest.mark.regression
 def test_downloader_MUR():
     shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True)
-    args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4'.split())
+    args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 --verbose'.split())
     pdd.run(args2)
     assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')
     assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/02/20200102090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')
@@ -54,7 +55,7 @@ def test_downloader_MUR():
     t1 = os.path.getmtime('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')
 
     # Set the args to --force to re-download those data
-    args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 -f'.split())
+    args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 -f --verbose'.split())
     pdd.run(args2)
     assert t1 != os.path.getmtime('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')
     assert t2 != os.path.getmtime('./MUR25-JPL-L4-GLOB-v04.2/2020/01/02/20200102090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')
@@ -73,6 +74,10 @@ def test_downloader_GRACE_with_SHA_512(tmpdir):
     pdd.run(args)
     assert len( os.listdir(directory_str) ) > 0
     filename = directory_str + "/" + os.listdir(directory_str)[0]
+    #if the citation file was chosen above, get the next file since citation file is updated on successful run
+    if "citation.txt" in filename:
+        filename = directory_str + "/" + os.listdir(directory_str)[1]
+
     modified_time_1 = os.path.getmtime(filename)
     print( modified_time_1 )
 
diff --git a/tests/test_subscriber.py b/tests/test_subscriber.py
index 2e9d4cb..983cdce 100644
--- a/tests/test_subscriber.py
+++ b/tests/test_subscriber.py
@@ -1,9 +1,14 @@
 from subscriber import podaac_data_subscriber as pds
 from subscriber import podaac_access as pa
+
+from urllib.error import HTTPError
 import pytest
 import os
 from pathlib import Path
 import shutil
+import json
+import tempfile
+from os.path import exists
 
 def test_temporal_range():
 
@@ -23,6 +28,46 @@ def cleanup_update_test():
     print("Cleanup...")
     shutil.rmtree(data_dir_with_updates)
 
+def test_create_citation_file():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        pa.create_citation_file("SWOT_SIMULATED_L2_KARIN_SSH_GLORYS_CALVAL_V1", "POCLOUD", tmpdirname)
+        assert exists(tmpdirname+"/SWOT_SIMULATED_L2_KARIN_SSH_GLORYS_CALVAL_V1.citation.txt")
+
+def test_citation_creation():
+    collection_umm = '''{
+        "DOI": {
+            "DOI": "10.5067/KARIN-2GLC1",
+            "Authority": "https://doi.org"
+        },
+        "CollectionCitations": [
+            {
+                "Creator": "SWOT",
+                "ReleasePlace": "PO.DAAC",
+                "Title": "SWOT Level-2 Simulated SSH from MITgcm LLC4320 Science Quality Version 1.0",
+                "Publisher": "PO.DAAC",
+                "ReleaseDate": "2022-01-31T00:00:00.000Z",
+                "Version": "1.0"
+            },
+            {
+                "Creator": "CNES/CLS",
+                "ReleasePlace": "CNES/AVISO",
+                "Title": "Simulated SWOT products",
+                "OnlineResource": {
+                    "Linkage": "http://doi.org/10.24400/527896/a01-2021.006",
+                    "Name": " Simulated SWOT Sea Surface Height products",
+                    "Description": "Simulated SWOT Sea Surface Height products KaRIn and Nadir.",
+                    "MimeType": "text/html"
+                },
+                "Publisher": "PODAAC",
+                "ReleaseDate": "2021-11-01T00:00:00.000Z",
+                "Version": "1.0"
+            }
+        ]
+    }
+    '''
+    collection_umm_json = json.loads(collection_umm)
+    citation = pa.create_citation(collection_umm_json, "2022-07-21")
+    assert citation == "SWOT. 2022. SWOT Level-2 Simulated SSH from MITgcm LLC4320 Science Quality Version 1.0. Ver. 1.0. PO.DAAC, CA, USA. Dataset accessed 2022-07-21 at https://doi.org/10.5067/KARIN-2GLC1"
 
 def test_search_after():
     # cmr query: https://cmr.earthdata.nasa.gov/search/granules.umm_json?page_size=2000&sort_key=-start_date&provider=POCLOUD&ShortName=JASON_CS_S6A_L2_ALT_LR_STD_OST_NRT_F&temporal=2000-01-01T10%3A00%3A00Z%2C2022-04-15T00%3A00%3A00Z&bounding_box=-180%2C-90%2C180%2C90
@@ -136,6 +181,25 @@ def test_validate():
     # with pytest.raises(SystemExit):
     #     a = validate(["-c", "viirs", "-d", "/data", "-m","60b"])
 
+def test_param_update():
+    params = [
+        ('sort_key', "-start_date"),
+        ('provider', "'POCLOUD'"),
+        ('token', "123"),
+    ]
+
+    for  i, p in enumerate(params) :
+        if p[1] == "token":
+            params[i] = ("token", "newToken")
+
+    for i,p in enumerate(params) :
+        if p[1] == "token":
+            assert p[2] == "newToken"
+
+def test_downloader_retry(mocker):
+    mck = mocker.patch('subscriber.podaac_access.urlretrieve', side_effect=HTTPError("url", 503, "msg", None, None))
+    pa.download_file("myUrl", "outputPath")
+    assert mck.call_count == 3
 
 def validate(args):
     parser = pds.create_parser()
diff --git a/tests/test_subscriber_regression.py b/tests/test_subscriber_regression.py
index 07dc2f8..ffa3685 100644
--- a/tests/test_subscriber_regression.py
+++ b/tests/test_subscriber_regression.py
@@ -53,7 +53,7 @@ def test_subscriber_MUR_update_file_no_redownload():
     except OSError as e:
         print("Expecting this...")
 
-    args2 = create_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4'.split())
+    args2 = create_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2  -sd 2020-01-01T00:00:00Z -ed 2020-01-02T00:00:00Z -dymd --offset 4 --verbose'.split())
     pds.run(args2)
     assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/01/20200101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')
     assert exists('./MUR25-JPL-L4-GLOB-v04.2/2020/01/02/20200102090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')