Merge pull request #257 from podaac/release/2.10.0

release 2.10.0
podaac · May 16, 2024 · 5c85c57 · 5c85c57
2 parents a1a1f6a + ed32ad6
commit 5c85c57
Show file tree

Hide file tree

Showing 9 changed files with 709 additions and 646 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Security
 
 
+## [2.10.0]
+### Added
+- [issue/260](https://github.com/podaac/l2ss-py/pull/261): Add gpm cleanup function to add a timeMidScan variable if the timeMidScan variable isn't present. Function takes the years, months, days etc ScanTime variables and creates a single time variable using datetime.datetime library.
+### Changed
+- Update code to determin lat lon time variables
+- Update xarray version
+- [pull/248](https://github.com/podaac/l2ss-py/pull/248): add Harmony extra_args.cut parameter to subset_params in service adapter 
+### Deprecated 
+### Removed
+### Fixed
+- [issue/258](https://github.com/podaac/l2ss-py/issues/258): fix so that ScanTime is what determines a GPM file 
+### Security
+
+
 ## [2.9.0]
 ### Added
 - [issue/240](https://github.com/podaac/l2ss-py/issues/240): for time vars that need datetime conversion, allow numpy arrays of n dimension to be able to convert. Create the same shape array in start time, then add the seconds since the start date to get our datetime format for temporal subsetting.

diff --git a/podaac/subsetter/gpm_cleanup.py b/podaac/subsetter/gpm_cleanup.py
@@ -3,10 +3,34 @@
 to nscan, nbin, nfreq by using the DimensionNames variable attribute
 """
 
+import datetime
+from netCDF4 import date2num  # pylint: disable=no-name-in-module
+
 dim_dict = {}
 
 
-def change_var_dims(nc_dataset, variables=None):
+def compute_new_time_data(time_group, nc_dataset):
+    """
+    create a time variable, timeMidScan, that is present in other
+    GPM collections but not the ENV collections.
+    """
+    # set the time unit for GPM
+    time_unit_out = "seconds since 1980-01-06 00:00:00"
+    # conver to a float, seconds variable
+    new_time_list = [date2num(datetime.datetime(
+        nc_dataset[time_group+'__Year'][:][i],
+        nc_dataset[time_group+'__Month'][:][i],
+        nc_dataset[time_group+'__DayOfMonth'][:][i],
+        hour=nc_dataset[time_group+'__Hour'][:][i],
+        minute=nc_dataset[time_group+'__Minute'][:][i],
+        second=nc_dataset[time_group+'__Second'][:][i],
+        microsecond=nc_dataset[time_group+'__MilliSecond'][:][i]*1000),
+        time_unit_out) for i in range(len(nc_dataset[time_group+'__Year'][:]))]
+
+    return new_time_list, time_unit_out
+
+
+def change_var_dims(nc_dataset, variables=None, time_name="_timeMidScan"):
     """
     Go through each variable and get the dimension names from attribute "DimensionNames
     If the name is unique, add it as a dimension to the netCDF4 dataset. Then change the
@@ -62,4 +86,20 @@ def change_var_dims(nc_dataset, variables=None):
                 # copy the data to the new variable with dimension names
                 new_mapped_var[var_name][:] = var[:]
 
+    if not any(time_name in var for var in var_list):
+        # if there isn't any timeMidScan variables, create one
+        scan_time_groups = ["__".join(i.split('__')[:-1]) for i in var_list if 'ScanTime' in i]
+        for time_group in list(set(scan_time_groups)):
+            # get the seconds since Jan 6, 1980
+            time_data, time_unit = compute_new_time_data(time_group, nc_dataset)
+            # make a new variable for each ScanTime group
+            new_time_var_name = time_group+time_name
+            # copy dimensions from the Year variable
+            var_dims = nc_dataset.variables[time_group+'__Year'].dimensions
+            comp_args = {"zlib": True, "complevel": 1}
+            nc_dataset.createVariable(new_time_var_name, 'f8', var_dims, **comp_args)
+            nc_dataset.variables[new_time_var_name].setncattr('unit', time_unit)
+            # copy the data in
+            nc_dataset.variables[new_time_var_name][:] = time_data
+
     return nc_dataset
diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
@@ -370,6 +370,13 @@ def var_is_coord(var_name, possible_coord_names):
             lat_coord_names = dataset.cf.coordinates.get('latitude', [])
             lon_coord_names = dataset.cf.coordinates.get('longitude', [])
 
+        if len(lat_coord_names) < 1 or len(lon_coord_names) < 1:
+            try:
+                lat_coord_names = [dataset.cf["latitude"].name]
+                lon_coord_names = [dataset.cf["longitude"].name]
+            except KeyError:
+                pass
+
     if len(lat_coord_names) < 1 or len(lon_coord_names) < 1:
         raise ValueError('Could not determine coordinate variables')
 
@@ -513,6 +520,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
     ValueError
         If the time variable could not be determined
     """
+
     time_vars = find_matching_coords(dataset, ['time'])
     if time_vars:
         # There should only be one time var match (this is called once
@@ -525,9 +533,6 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
     for var_name in time_vars:
         if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
             return var_name
-    for var_name in list(dataset.data_vars.keys()):
-        if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
-            return var_name
 
     # first check if any variables are named 'time'
     for var_name in list(dataset.data_vars.keys()):
@@ -1193,8 +1198,8 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
         time_var_names = [var.replace('/', GROUP_DELIM) for var in time_var_names]
 
     if '.HDF5' == file_extension:
-        # GPM files will have a timeMidScan time variable present
-        if '__FS__navigation__timeMidScan' in list(nc_dataset.variables.keys()):
+        # GPM files will have a ScanTime group
+        if 'ScanTime' in [var.split('__')[-2] for var in list(nc_dataset.variables.keys())]:
             gc.change_var_dims(nc_dataset, variables)
             hdf_type = 'GPM'
 
@@ -1221,6 +1226,9 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
             except AttributeError:
                 pass
 
+    if hdf_type == 'GPM':
+        args['decode_times'] = False
+
     with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args

diff --git a/podaac/subsetter/subset_harmony.py b/podaac/subsetter/subset_harmony.py
@@ -150,6 +150,11 @@ def process_item(self, item: pystac.Item, source: harmony.message.Source) -> pys
 
             subset_params['bbox'] = harmony_to_podaac_bbox(harmony_bbox)
 
+            try:
+                subset_params['cut'] = message.extraArgs['cut']
+            except (KeyError, AttributeError, TypeError):
+                pass
+
             if source.variables:
                 subset_params['variables'] = [variable.name for variable in source.process('variables')]