Skip to content

Commit

Permalink
Merge pull request #257 from podaac/release/2.10.0
Browse files Browse the repository at this point in the history
release 2.10.0
  • Loading branch information
jamesfwood committed May 16, 2024
2 parents a1a1f6a + ed32ad6 commit 5c85c57
Show file tree
Hide file tree
Showing 9 changed files with 709 additions and 646 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security


## [2.10.0]
### Added
- [issue/260](https://github.com/podaac/l2ss-py/pull/261): Add gpm cleanup function to add a timeMidScan variable if the timeMidScan variable isn't present. Function takes the years, months, days etc ScanTime variables and creates a single time variable using datetime.datetime library.
### Changed
- Update code to determin lat lon time variables
- Update xarray version
- [pull/248](https://github.com/podaac/l2ss-py/pull/248): add Harmony extra_args.cut parameter to subset_params in service adapter
### Deprecated
### Removed
### Fixed
- [issue/258](https://github.com/podaac/l2ss-py/issues/258): fix so that ScanTime is what determines a GPM file
### Security


## [2.9.0]
### Added
- [issue/240](https://github.com/podaac/l2ss-py/issues/240): for time vars that need datetime conversion, allow numpy arrays of n dimension to be able to convert. Create the same shape array in start time, then add the seconds since the start date to get our datetime format for temporal subsetting.
Expand Down
42 changes: 41 additions & 1 deletion podaac/subsetter/gpm_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,34 @@
to nscan, nbin, nfreq by using the DimensionNames variable attribute
"""

import datetime
from netCDF4 import date2num # pylint: disable=no-name-in-module

dim_dict = {}


def change_var_dims(nc_dataset, variables=None):
def compute_new_time_data(time_group, nc_dataset):
"""
create a time variable, timeMidScan, that is present in other
GPM collections but not the ENV collections.
"""
# set the time unit for GPM
time_unit_out = "seconds since 1980-01-06 00:00:00"
# conver to a float, seconds variable
new_time_list = [date2num(datetime.datetime(
nc_dataset[time_group+'__Year'][:][i],
nc_dataset[time_group+'__Month'][:][i],
nc_dataset[time_group+'__DayOfMonth'][:][i],
hour=nc_dataset[time_group+'__Hour'][:][i],
minute=nc_dataset[time_group+'__Minute'][:][i],
second=nc_dataset[time_group+'__Second'][:][i],
microsecond=nc_dataset[time_group+'__MilliSecond'][:][i]*1000),
time_unit_out) for i in range(len(nc_dataset[time_group+'__Year'][:]))]

return new_time_list, time_unit_out


def change_var_dims(nc_dataset, variables=None, time_name="_timeMidScan"):
"""
Go through each variable and get the dimension names from attribute "DimensionNames
If the name is unique, add it as a dimension to the netCDF4 dataset. Then change the
Expand Down Expand Up @@ -62,4 +86,20 @@ def change_var_dims(nc_dataset, variables=None):
# copy the data to the new variable with dimension names
new_mapped_var[var_name][:] = var[:]

if not any(time_name in var for var in var_list):
# if there isn't any timeMidScan variables, create one
scan_time_groups = ["__".join(i.split('__')[:-1]) for i in var_list if 'ScanTime' in i]
for time_group in list(set(scan_time_groups)):
# get the seconds since Jan 6, 1980
time_data, time_unit = compute_new_time_data(time_group, nc_dataset)
# make a new variable for each ScanTime group
new_time_var_name = time_group+time_name
# copy dimensions from the Year variable
var_dims = nc_dataset.variables[time_group+'__Year'].dimensions
comp_args = {"zlib": True, "complevel": 1}
nc_dataset.createVariable(new_time_var_name, 'f8', var_dims, **comp_args)
nc_dataset.variables[new_time_var_name].setncattr('unit', time_unit)
# copy the data in
nc_dataset.variables[new_time_var_name][:] = time_data

return nc_dataset
18 changes: 13 additions & 5 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,13 @@ def var_is_coord(var_name, possible_coord_names):
lat_coord_names = dataset.cf.coordinates.get('latitude', [])
lon_coord_names = dataset.cf.coordinates.get('longitude', [])

if len(lat_coord_names) < 1 or len(lon_coord_names) < 1:
try:
lat_coord_names = [dataset.cf["latitude"].name]
lon_coord_names = [dataset.cf["longitude"].name]
except KeyError:
pass

if len(lat_coord_names) < 1 or len(lon_coord_names) < 1:
raise ValueError('Could not determine coordinate variables')

Expand Down Expand Up @@ -513,6 +520,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
ValueError
If the time variable could not be determined
"""

time_vars = find_matching_coords(dataset, ['time'])
if time_vars:
# There should only be one time var match (this is called once
Expand All @@ -525,9 +533,6 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
for var_name in time_vars:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
return var_name
for var_name in list(dataset.data_vars.keys()):
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
return var_name

# first check if any variables are named 'time'
for var_name in list(dataset.data_vars.keys()):
Expand Down Expand Up @@ -1193,8 +1198,8 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
time_var_names = [var.replace('/', GROUP_DELIM) for var in time_var_names]

if '.HDF5' == file_extension:
# GPM files will have a timeMidScan time variable present
if '__FS__navigation__timeMidScan' in list(nc_dataset.variables.keys()):
# GPM files will have a ScanTime group
if 'ScanTime' in [var.split('__')[-2] for var in list(nc_dataset.variables.keys())]:
gc.change_var_dims(nc_dataset, variables)
hdf_type = 'GPM'

Expand All @@ -1221,6 +1226,9 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
except AttributeError:
pass

if hdf_type == 'GPM':
args['decode_times'] = False

with xr.open_dataset(
xr.backends.NetCDF4DataStore(nc_dataset),
**args
Expand Down
5 changes: 5 additions & 0 deletions podaac/subsetter/subset_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ def process_item(self, item: pystac.Item, source: harmony.message.Source) -> pys

subset_params['bbox'] = harmony_to_podaac_bbox(harmony_bbox)

try:
subset_params['cut'] = message.extraArgs['cut']
except (KeyError, AttributeError, TypeError):
pass

if source.variables:
subset_params['variables'] = [variable.name for variable in source.process('variables')]

Expand Down
Loading

0 comments on commit 5c85c57

Please sign in to comment.