Skip to content

Commit

Permalink
update testing so that theres only one gpm test file
Browse files Browse the repository at this point in the history
  • Loading branch information
nlensse1 committed May 1, 2024
1 parent 4cc29cb commit 2f3302a
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 29 deletions.
6 changes: 3 additions & 3 deletions podaac/subsetter/gpm_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def compute_new_time_data(time_group, nc_dataset):
return new_time_list, time_unit_out


def change_var_dims(nc_dataset, variables=None):
def change_var_dims(nc_dataset, variables=None, time_name="_timeMidScan"):
"""
Go through each variable and get the dimension names from attribute "DimensionNames
If the name is unique, add it as a dimension to the netCDF4 dataset. Then change the
Expand Down Expand Up @@ -86,14 +86,14 @@ def change_var_dims(nc_dataset, variables=None):
# copy the data to the new variable with dimension names
new_mapped_var[var_name][:] = var[:]

if not any("timeMidScan" in var for var in var_list):
if not any(time_name in var for var in var_list):
# if there isn't any timeMidScan variables, create one
scan_time_groups = ["__".join(i.split('__')[:-1]) for i in var_list if 'ScanTime' in i]
for time_group in list(set(scan_time_groups)):
# get the seconds since Jan 6, 1980
time_data, time_unit = compute_new_time_data(time_group, nc_dataset)
# make a new variable for each ScanTime group
new_time_var_name = time_group+'__timeMidScan'
new_time_var_name = time_group+time_name
# copy dimensions from the Year variable
var_dims = nc_dataset.variables[time_group+'__Year'].dimensions
comp_args = {"zlib": True, "complevel": 1}
Expand Down
Binary file removed tests/data/GPM/GPM_test_file.HDF5
Binary file not shown.
33 changes: 7 additions & 26 deletions tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2287,46 +2287,27 @@ def test_get_unique_groups():

assert expected_groups_single == unique_groups_single
assert expected_diff_counts_single == diff_counts_single

def test_gpm_dimension_map(data_dir, subset_output_dir, request):
"""Test GPM files for dimension mapping and returns the expected netCDF
dataset without the phony dimensions"""

gpm_dir = join(data_dir, 'GPM')
gpm_file = 'GPM_test_file.HDF5'
bbox = np.array(((-180, 180), (-90, 90)))
shutil.copyfile(
os.path.join(gpm_dir, gpm_file),
os.path.join(subset_output_dir, gpm_file)
)

nc_dataset, has_groups, file_extension = subset.open_as_nc_dataset(join(subset_output_dir, gpm_file))

nc_dataset = gc.change_var_dims(nc_dataset)

for var_name, var in nc_dataset.variables.items():
dims = list(var.dimensions)

for dim in dims:
assert 'phony' not in dim

def test_gpm_compute_new_var_data(data_dir, subset_output_dir, request):
"""Test GPM files that have scantime variable to compute the time for seconds
since 1980-01-06"""

gpm_dir = join(data_dir, 'GPM')
gpm_file = 'GPM_test_file_2.HDF5'
bbox = np.array(((-180, 180), (-90, 90)))
shutil.copyfile(
os.path.join(gpm_dir, gpm_file),
os.path.join(subset_output_dir, gpm_file)
)

nc_dataset, has_groups, file_extension = subset.open_as_nc_dataset(join(subset_output_dir, gpm_file))

del nc_dataset.variables["__FS__ScanTime__timeMidScan"]
del nc_dataset.variables["__HS__ScanTime__timeMidScan"]

nc_dataset = gc.change_var_dims(nc_dataset, variables=None)
nc_dataset_new = gc.change_var_dims(nc_dataset, variables=None, time_name='__test_time')
assert int(nc_dataset_new.variables["__FS__ScanTime__test_time"][:][0]) == 1306403820

assert int(nc_dataset["__FS__ScanTime__timeMidScan"][:][0]) == 1306403820
for var_name, var in nc_dataset.variables.items():
dims = list(var.dimensions)

for dim in dims:
assert 'phony' not in dim

0 comments on commit 2f3302a

Please sign in to comment.