From b254b5e860e84b6bbfea2c431e7d6231f491496d Mon Sep 17 00:00:00 2001
From: dkaufma3 <daniel.kaufman@nasa.gov>
Date: Mon, 28 Nov 2022 17:51:07 -0500
Subject: [PATCH 01/16] move methods for flattening netcdf and hdf group
 structures to separate module

---
 podaac/subsetter/group_handling.py | 231 +++++++++++++++++++++++++++++
 podaac/subsetter/subset.py         | 228 +---------------------------
 tests/test_subset.py               |  32 ++--
 3 files changed, 250 insertions(+), 241 deletions(-)
 create mode 100644 podaac/subsetter/group_handling.py

diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py
new file mode 100644
index 00000000..cf9a648b
--- /dev/null
+++ b/podaac/subsetter/group_handling.py
@@ -0,0 +1,231 @@
+from shutil import copy
+
+import h5py
+import netCDF4 as nc
+import numpy as np
+import xarray as xr
+
+GROUP_DELIM = '__'
+
+
+def transform_grouped_dataset(nc_dataset, file_to_subset):
+    """
+    Transform a netCDF4 Dataset that has groups to an xarray compatible
+    dataset. xarray does not work with groups, so this transformation
+    will flatten the variables in the dataset and use the group path as
+    the new variable name. For example, data_01 > km > sst would become
+    'data_01__km__sst', where GROUP_DELIM is __.
+
+    This same pattern is applied to dimensions, which are located under
+    the appropriate group. They are renamed and placed in the root
+    group.
+
+    Parameters
+    ----------
+    nc_dataset : nc.Dataset
+        netCDF4 Dataset that contains groups
+
+    Returns
+    -------
+    nc.Dataset
+        netCDF4 Dataset that does not contain groups and that has been
+        flattened.
+    """
+
+    # Close the existing read-only dataset and reopen in append mode
+    nc_dataset.close()
+    nc_dataset = nc.Dataset(file_to_subset, 'r+')
+
+    dimensions = {}
+
+    def walk(group_node, path):
+        for key, item in group_node.items():
+            group_path = f'{path}{GROUP_DELIM}{key}'
+
+            # If there are variables in this group, copy to root group
+            # and then delete from current group
+            if item.variables:
+                # Copy variables to root group with new name
+                for var_name, var in item.variables.items():
+                    var_group_name = f'{group_path}{GROUP_DELIM}{var_name}'
+                    nc_dataset.variables[var_group_name] = var
+                # Delete variables
+                var_names = list(item.variables.keys())
+                for var_name in var_names:
+                    del item.variables[var_name]
+
+            if item.dimensions:
+                dims = list(item.dimensions.keys())
+                for dim_name in dims:
+                    new_dim_name = f'{group_path.replace("/", GROUP_DELIM)}{GROUP_DELIM}{dim_name}'
+                    item.dimensions[new_dim_name] = item.dimensions[dim_name]
+                    dimensions[new_dim_name] = item.dimensions[dim_name]
+                    item.renameDimension(dim_name, new_dim_name)
+
+            # If there are subgroups in this group, call this function
+            # again on that group.
+            if item.groups:
+                walk(item.groups, group_path)
+
+        # Delete non-root groups
+        group_names = list(group_node.keys())
+        for group_name in group_names:
+            del group_node[group_name]
+
+    for var_name in list(nc_dataset.variables.keys()):
+        new_var_name = f'{GROUP_DELIM}{var_name}'
+        nc_dataset.variables[new_var_name] = nc_dataset.variables[var_name]
+        del nc_dataset.variables[var_name]
+
+    walk(nc_dataset.groups, '')
+
+    # Update the dimensions of the dataset in the root group
+    nc_dataset.dimensions.update(dimensions)
+
+    return nc_dataset
+
+
+def recombine_grouped_datasets(datasets, output_file, start_date):  # pylint: disable=too-many-branches
+    """
+    Given a list of xarray datasets, combine those datasets into a
+    single netCDF4 Dataset and write to the disk. Each dataset has been
+    transformed using its group path and needs to be un-transformed and
+    placed in the appropriate group.
+
+    Parameters
+    ----------
+    datasets : list (xr.Dataset)
+        List of xarray datasets to be combined
+    output_file : str
+        Name of the output file to write the resulting NetCDF file to.
+    """
+
+    base_dataset = nc.Dataset(output_file, mode='w')
+
+    for dataset in datasets:
+        group_lst = []
+        for var_name in dataset.variables.keys():  # need logic if there is data in the top level not in a group
+            group_lst.append('/'.join(var_name.split(GROUP_DELIM)[:-1]))
+        group_lst = ['/' if group == '' else group for group in group_lst]
+        groups = set(group_lst)
+        for group in groups:
+            base_dataset.createGroup(group)
+
+        for dim_name in list(dataset.dims.keys()):
+            new_dim_name = dim_name.split(GROUP_DELIM)[-1]
+            dim_group = _get_nested_group(base_dataset, dim_name)
+            dim_group.createDimension(new_dim_name, dataset.dims[dim_name])
+
+        # Rename variables
+        _rename_variables(dataset, base_dataset, start_date)
+
+    # Remove group vars from base dataset
+    for var_name in list(base_dataset.variables.keys()):
+        if GROUP_DELIM in var_name:
+            del base_dataset.variables[var_name]
+
+    # Remove group dims from base dataset
+    for dim_name in list(base_dataset.dimensions.keys()):
+        if GROUP_DELIM in dim_name:
+            del base_dataset.dimensions[dim_name]
+
+    # Copy global attributes
+    base_dataset.setncatts(datasets[0].attrs)
+    # Write and close
+    base_dataset.close()
+
+
+def _get_nested_group(dataset, group_path):
+    nested_group = dataset
+    for group in group_path.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1]:
+        nested_group = nested_group.groups[group]
+    return nested_group
+
+
+def _rename_variables(dataset, base_dataset, start_date):
+    for var_name in list(dataset.variables.keys()):
+        new_var_name = var_name.split(GROUP_DELIM)[-1]
+        var_group = _get_nested_group(base_dataset, var_name)
+        variable = dataset.variables[var_name]
+        var_dims = [x.split(GROUP_DELIM)[-1] for x in dataset.variables[var_name].dims]
+        if np.issubdtype(
+                dataset.variables[var_name].dtype, np.dtype(np.datetime64)
+        ) or np.issubdtype(
+            dataset.variables[var_name].dtype, np.dtype(np.timedelta64)
+        ):
+            if start_date:
+                dataset.variables[var_name].values = (dataset.variables[var_name].values - np.datetime64(start_date))/np.timedelta64(1, 's')
+                variable = dataset.variables[var_name]
+            else:
+                cf_dt_coder = xr.coding.times.CFDatetimeCoder()
+                encoded_var = cf_dt_coder.encode(dataset.variables[var_name])
+                variable = encoded_var
+
+        var_attrs = variable.attrs
+        fill_value = var_attrs.get('_FillValue')
+        var_attrs.pop('_FillValue', None)
+        comp_args = {"zlib": True, "complevel": 1}
+
+        if variable.dtype == object:
+            var_group.createVariable(new_var_name, 'S1', var_dims, fill_value=fill_value, **comp_args)
+        elif variable.dtype == 'timedelta64[ns]':
+            var_group.createVariable(new_var_name, 'i4', var_dims, fill_value=fill_value, **comp_args)
+        else:
+            var_group.createVariable(new_var_name, variable.dtype, var_dims, fill_value=fill_value, **comp_args)
+
+        # Copy attributes
+        var_group.variables[new_var_name].setncatts(var_attrs)
+
+        # Copy data
+        var_group.variables[new_var_name].set_auto_maskandscale(False)
+        var_group.variables[new_var_name][:] = variable.data
+
+
+def h5file_transform(finput):
+    """
+    Transform a h5py  Dataset that has groups to an xarray compatible
+    dataset. xarray does not work with groups, so this transformation
+    will flatten the variables in the dataset and use the group path as
+    the new variable name. For example, data_01 > km > sst would become
+    'data_01__km__sst', where GROUP_DELIM is __.
+
+    Returns
+    -------
+    nc.Dataset
+        netCDF4 Dataset that does not contain groups and that has been
+        flattened.
+    """
+    data_new = h5py.File(finput, 'r+')
+    del_group_list = list(data_new.keys())
+    has_groups = bool(data_new['/'])
+
+    def walk_h5py(data_new, group):
+        # flattens h5py file
+        for key, item in data_new[group].items():
+            group_path = f'{group}{key}'
+            if isinstance(item, h5py.Dataset):
+                new_var_name = group_path.replace('/', '__')
+
+                data_new[new_var_name] = data_new[group_path]
+                del data_new[group_path]
+
+            elif isinstance(item, h5py.Group):
+                if len(list(item.keys())) == 0:
+                    new_group_name = group_path.replace('/', '__')
+                    data_new[new_group_name] = data_new[group_path]
+
+                walk_h5py(data_new, data_new[group_path].name + '/')
+
+    walk_h5py(data_new, data_new.name)
+
+    for del_group in del_group_list:
+        del data_new[del_group]
+
+    finputnc = '.'.join(finput.split('.')[:-1]) + '.nc'
+
+    data_new.close()  # close the h5py dataset
+    copy(finput, finputnc)  # copy to a nc file
+
+    nc_dataset = nc.Dataset(finputnc, mode='r')
+
+    return nc_dataset, has_groups
diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index 4d9d7cbe..e3db8de2 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -23,11 +23,9 @@
 import json
 import operator
 import os
-from shutil import copy
 
 import cf_xarray as cfxr
 import geopandas as gpd
-import h5py
 import importlib_metadata
 import julian
 import netCDF4 as nc
@@ -39,8 +37,9 @@
 
 from podaac.subsetter import dimension_cleanup as dc
 from podaac.subsetter import xarray_enhancements as xre
+from podaac.subsetter.group_handling import GROUP_DELIM, transform_grouped_dataset, recombine_grouped_datasets, \
+    h5file_transform
 
-GROUP_DELIM = '__'
 SERVICE_NAME = 'l2ss-py'
 
 
@@ -871,229 +870,6 @@ def in_shape(lon, lat):
     return xre.where(dataset, boolean_mask, cut)
 
 
-def transform_grouped_dataset(nc_dataset, file_to_subset):
-    """
-    Transform a netCDF4 Dataset that has groups to an xarray compatible
-    dataset. xarray does not work with groups, so this transformation
-    will flatten the variables in the dataset and use the group path as
-    the new variable name. For example, data_01 > km > sst would become
-    'data_01__km__sst', where GROUP_DELIM is __.
-
-    This same pattern is applied to dimensions, which are located under
-    the appropriate group. They are renamed and placed in the root
-    group.
-
-    Parameters
-    ----------
-    nc_dataset : nc.Dataset
-        netCDF4 Dataset that contains groups
-
-    Returns
-    -------
-    nc.Dataset
-        netCDF4 Dataset that does not contain groups and that has been
-        flattened.
-    """
-
-    # Close the existing read-only dataset and reopen in append mode
-    nc_dataset.close()
-    nc_dataset = nc.Dataset(file_to_subset, 'r+')
-
-    dimensions = {}
-
-    def walk(group_node, path):
-        for key, item in group_node.items():
-            group_path = f'{path}{GROUP_DELIM}{key}'
-
-            # If there are variables in this group, copy to root group
-            # and then delete from current group
-            if item.variables:
-                # Copy variables to root group with new name
-                for var_name, var in item.variables.items():
-                    var_group_name = f'{group_path}{GROUP_DELIM}{var_name}'
-                    nc_dataset.variables[var_group_name] = var
-                # Delete variables
-                var_names = list(item.variables.keys())
-                for var_name in var_names:
-                    del item.variables[var_name]
-
-            if item.dimensions:
-                dims = list(item.dimensions.keys())
-                for dim_name in dims:
-                    new_dim_name = f'{group_path.replace("/", GROUP_DELIM)}{GROUP_DELIM}{dim_name}'
-                    item.dimensions[new_dim_name] = item.dimensions[dim_name]
-                    dimensions[new_dim_name] = item.dimensions[dim_name]
-                    item.renameDimension(dim_name, new_dim_name)
-
-            # If there are subgroups in this group, call this function
-            # again on that group.
-            if item.groups:
-                walk(item.groups, group_path)
-
-        # Delete non-root groups
-        group_names = list(group_node.keys())
-        for group_name in group_names:
-            del group_node[group_name]
-
-    for var_name in list(nc_dataset.variables.keys()):
-        new_var_name = f'{GROUP_DELIM}{var_name}'
-        nc_dataset.variables[new_var_name] = nc_dataset.variables[var_name]
-        del nc_dataset.variables[var_name]
-
-    walk(nc_dataset.groups, '')
-
-    # Update the dimensions of the dataset in the root group
-    nc_dataset.dimensions.update(dimensions)
-
-    return nc_dataset
-
-
-def recombine_grouped_datasets(datasets, output_file, start_date):  # pylint: disable=too-many-branches
-    """
-    Given a list of xarray datasets, combine those datasets into a
-    single netCDF4 Dataset and write to the disk. Each dataset has been
-    transformed using its group path and needs to be un-transformed and
-    placed in the appropriate group.
-
-    Parameters
-    ----------
-    datasets : list (xr.Dataset)
-        List of xarray datasets to be combined
-    output_file : str
-        Name of the output file to write the resulting NetCDF file to.
-    """
-
-    base_dataset = nc.Dataset(output_file, mode='w')
-
-    for dataset in datasets:
-        group_lst = []
-        for var_name in dataset.variables.keys():  # need logic if there is data in the top level not in a group
-            group_lst.append('/'.join(var_name.split(GROUP_DELIM)[:-1]))
-        group_lst = ['/' if group == '' else group for group in group_lst]
-        groups = set(group_lst)
-        for group in groups:
-            base_dataset.createGroup(group)
-
-        for dim_name in list(dataset.dims.keys()):
-            new_dim_name = dim_name.split(GROUP_DELIM)[-1]
-            dim_group = _get_nested_group(base_dataset, dim_name)
-            dim_group.createDimension(new_dim_name, dataset.dims[dim_name])
-
-        # Rename variables
-        _rename_variables(dataset, base_dataset, start_date)
-
-    # Remove group vars from base dataset
-    for var_name in list(base_dataset.variables.keys()):
-        if GROUP_DELIM in var_name:
-            del base_dataset.variables[var_name]
-
-    # Remove group dims from base dataset
-    for dim_name in list(base_dataset.dimensions.keys()):
-        if GROUP_DELIM in dim_name:
-            del base_dataset.dimensions[dim_name]
-
-    # Copy global attributes
-    base_dataset.setncatts(datasets[0].attrs)
-    # Write and close
-    base_dataset.close()
-
-
-def _get_nested_group(dataset, group_path):
-    nested_group = dataset
-    for group in group_path.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1]:
-        nested_group = nested_group.groups[group]
-    return nested_group
-
-
-def _rename_variables(dataset, base_dataset, start_date):
-    for var_name in list(dataset.variables.keys()):
-        new_var_name = var_name.split(GROUP_DELIM)[-1]
-        var_group = _get_nested_group(base_dataset, var_name)
-        variable = dataset.variables[var_name]
-        var_dims = [x.split(GROUP_DELIM)[-1] for x in dataset.variables[var_name].dims]
-        if np.issubdtype(
-                dataset.variables[var_name].dtype, np.dtype(np.datetime64)
-        ) or np.issubdtype(
-            dataset.variables[var_name].dtype, np.dtype(np.timedelta64)
-        ):
-            if start_date:
-                dataset.variables[var_name].values = (dataset.variables[var_name].values - np.datetime64(start_date))/np.timedelta64(1, 's')
-                variable = dataset.variables[var_name]
-            else:
-                cf_dt_coder = xr.coding.times.CFDatetimeCoder()
-                encoded_var = cf_dt_coder.encode(dataset.variables[var_name])
-                variable = encoded_var
-
-        var_attrs = variable.attrs
-        fill_value = var_attrs.get('_FillValue')
-        var_attrs.pop('_FillValue', None)
-        comp_args = {"zlib": True, "complevel": 1}
-
-        if variable.dtype == object:
-            var_group.createVariable(new_var_name, 'S1', var_dims, fill_value=fill_value, **comp_args)
-        elif variable.dtype == 'timedelta64[ns]':
-            var_group.createVariable(new_var_name, 'i4', var_dims, fill_value=fill_value, **comp_args)
-        else:
-            var_group.createVariable(new_var_name, variable.dtype, var_dims, fill_value=fill_value, **comp_args)
-
-        # Copy attributes
-        var_group.variables[new_var_name].setncatts(var_attrs)
-
-        # Copy data
-        var_group.variables[new_var_name].set_auto_maskandscale(False)
-        var_group.variables[new_var_name][:] = variable.data
-
-
-def h5file_transform(finput):
-    """
-    Transform a h5py  Dataset that has groups to an xarray compatible
-    dataset. xarray does not work with groups, so this transformation
-    will flatten the variables in the dataset and use the group path as
-    the new variable name. For example, data_01 > km > sst would become
-    'data_01__km__sst', where GROUP_DELIM is __.
-
-    Returns
-    -------
-    nc.Dataset
-        netCDF4 Dataset that does not contain groups and that has been
-        flattened.
-    """
-    data_new = h5py.File(finput, 'r+')
-    del_group_list = list(data_new.keys())
-    has_groups = bool(data_new['/'])
-
-    def walk_h5py(data_new, group):
-        # flattens h5py file
-        for key, item in data_new[group].items():
-            group_path = f'{group}{key}'
-            if isinstance(item, h5py.Dataset):
-                new_var_name = group_path.replace('/', '__')
-
-                data_new[new_var_name] = data_new[group_path]
-                del data_new[group_path]
-
-            elif isinstance(item, h5py.Group):
-                if len(list(item.keys())) == 0:
-                    new_group_name = group_path.replace('/', '__')
-                    data_new[new_group_name] = data_new[group_path]
-
-                walk_h5py(data_new, data_new[group_path].name + '/')
-
-    walk_h5py(data_new, data_new.name)
-
-    for del_group in del_group_list:
-        del data_new[del_group]
-
-    finputnc = '.'.join(finput.split('.')[:-1]) + '.nc'
-
-    data_new.close()  # close the h5py dataset
-    copy(finput, finputnc)  # copy to a nc file
-
-    nc_dataset = nc.Dataset(finputnc, mode='r')
-
-    return nc_dataset, has_groups
-
-
 def get_coordinate_variable_names(dataset, lat_var_names=None, lon_var_names=None, time_var_names=None):
     """
     Retrieve coordinate variables for this dataset. If coordinate
diff --git a/tests/test_subset.py b/tests/test_subset.py
index 763345ec..5c683540 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -36,6 +36,7 @@
 from jsonschema import validate
 from shapely.geometry import Point
 
+from podaac.subsetter import group_handling as gh
 from podaac.subsetter import subset
 from podaac.subsetter.subset import SERVICE_NAME
 from podaac.subsetter import xarray_enhancements as xre
@@ -886,7 +887,7 @@ def test_transform_grouped_dataset(self):
                         os.path.join(self.subset_output_dir, s6_file_name))
 
         nc_ds = nc.Dataset(os.path.join(self.test_data_dir, 'sentinel_6', s6_file_name))
-        nc_ds_transformed = subset.transform_grouped_dataset(
+        nc_ds_transformed = gh.transform_grouped_dataset(
             nc.Dataset(os.path.join(self.subset_output_dir, s6_file_name), 'r'),
             os.path.join(self.subset_output_dir, s6_file_name)
         )
@@ -1361,16 +1362,16 @@ def test_root_group(self):
                 'mask_and_scale': False,
                 'decode_times': False
             }
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, sndr_file_name))
+        nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, sndr_file_name))
         with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args
         ) as dataset:
             var_list = list(dataset.variables)
-            assert (var_list[0][0:2] == subset.GROUP_DELIM)
+            assert (var_list[0][0:2] == gh.GROUP_DELIM)
             group_lst = []
             for var_name in dataset.variables.keys(): #need logic if there is data in the top level not in a group
-                group_lst.append('/'.join(var_name.split(subset.GROUP_DELIM)[:-1]))
+                group_lst.append('/'.join(var_name.split(gh.GROUP_DELIM)[:-1]))
             group_lst = ['/' if group=='' else group for group in group_lst]
             groups = set(group_lst)
             expected_group = {'/mw', '/ave_kern', '/', '/mol_lay', '/aux'}
@@ -1392,7 +1393,7 @@ def test_get_time_squeeze(self):
                 'mask_and_scale': False,
                 'decode_times': False
             }
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
+        nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
         with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args
@@ -1417,7 +1418,7 @@ def test_get_indexers_nd(self):
                 'mask_and_scale': False,
                 'decode_times': False
             }
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
+        nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
         with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args
@@ -1484,7 +1485,7 @@ def test_transform_h5py_dataset(self):
                         entry_lst.append(entry_str + "/" + group_keys)
                     key_lst.append(entry_str + "/" + group_keys)
 
-        nc_dataset, has_groups = subset.h5file_transform(os.path.join(self.subset_output_dir, OMI_file_name))
+        nc_dataset, has_groups = gh.h5file_transform(os.path.join(self.subset_output_dir, OMI_file_name))
 
         nc_vars_flattened = list(nc_dataset.variables.keys())
         for i in range(len(entry_lst)): # go through all the datasets in h5py file
@@ -1511,32 +1512,33 @@ def test_variable_dims_matched_tropomi(self):
 
         # Get variable dimensions from input dataset
         in_var_dims = {
-            var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+            var_name: [dim.split(gh.GROUP_DELIM)[-1] for dim in var.dimensions]
             for var_name, var in in_nc.groups['PRODUCT'].variables.items()
         }
         
         # Get variables from METADATA group
         in_var_dims.update(
             {
-                var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+                var_name: [dim.split(gh.GROUP_DELIM)[-1] for dim in var.dimensions]
                 for var_name, var in in_nc.groups['METADATA'].groups['QA_STATISTICS'].variables.items()
             }
         )
         # Include PRODUCT>SUPPORT_DATA>GEOLOCATIONS location
         in_var_dims.update(
             {
-                var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+                var_name: [dim.split(gh.GROUP_DELIM)[-1] for dim in var.dimensions]
                 for var_name, var in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['GEOLOCATIONS'].variables.items()
             }
         )
 
-        out_nc = subset.transform_grouped_dataset(
+        out_nc = gh.transform_grouped_dataset(
             in_nc, os.path.join(self.subset_output_dir, tropomi_file_name)
         )
 
         # Get variable dimensions from output dataset
         out_var_dims = {
-            var_name.split(subset.GROUP_DELIM)[-1]: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+            var_name.split(gh.GROUP_DELIM)[-1]: [dim.split(
+                gh.GROUP_DELIM)[-1] for dim in var.dimensions]
             for var_name, var in out_nc.variables.items()
         }
 
@@ -1602,7 +1604,7 @@ def test_get_time_epoch_var(self):
 
         nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file), mode='r')
 
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file))
+        nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file))
 
         args = {
             'decode_coords': False,
@@ -1696,7 +1698,7 @@ def test_temporal_he5file_subset(self):
             min_time='2020-01-16T12:30:00Z'
             max_time='2020-01-16T12:40:00Z'
             bbox = np.array(((-180, 180), (-90, 90)))
-            nc_dataset, has_groups = subset.h5file_transform(os.path.join(self.subset_output_dir, OMI_copy_file))
+            nc_dataset, has_groups = gh.h5file_transform(os.path.join(self.subset_output_dir, OMI_copy_file))
 
             args = {
                 'decode_coords': False,
@@ -1824,7 +1826,7 @@ def test_get_time_OMI(self):
         shutil.copyfile(os.path.join(self.test_data_dir, 'OMI', omi_file),
                         os.path.join(self.subset_output_dir, omi_file))
 
-        nc_dataset, has_groups = subset.h5file_transform(os.path.join(self.subset_output_dir, omi_file))
+        nc_dataset, has_groups = gh.h5file_transform(os.path.join(self.subset_output_dir, omi_file))
 
         args = {
             'decode_coords': False,

From 8c2d9e8c545df1d1a48ae5656713be7fa9c8ca14 Mon Sep 17 00:00:00 2001
From: sliu008 <69875423+sliu008@users.noreply.github.com>
Date: Thu, 8 Dec 2022 10:13:39 -0800
Subject: [PATCH 02/16] feature/PODAAC-5065 (#129)

* fix way xarray open granules that have  as a time unit

* fix pylint

* change function to use original function if can parse only change units if we can not parse

* make xarray override into its own function

* add test for override_decode_cf_datetime function

* disable pyline one line instead of global

* Update podaac/subsetter/subset.py

Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com>
---
 CHANGELOG.md               |  1 +
 podaac/subsetter/subset.py | 29 +++++++++++++++++++++++++++++
 tests/test_subset.py       | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3dcd2a23..69ecce10 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Deprecated 
 ### Removed
 ### Fixed
+- PODAAC-5065: integration with SMAP_RSS_L2_SSS_V5, fix way xarray open granules that have `seconds since 2000-1-1 0:0:0 0` as a time unit.
 ### Security
 
 ## [2.2.0]
diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index 4d9d7cbe..e5ed21b1 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -24,8 +24,11 @@
 import operator
 import os
 from shutil import copy
+import dateutil
+from dateutil import parser
 
 import cf_xarray as cfxr
+import cftime
 import geopandas as gpd
 import h5py
 import importlib_metadata
@@ -34,6 +37,7 @@
 import numpy as np
 import pandas as pd
 import xarray as xr
+import xarray.coding.times
 from shapely.geometry import Point
 from shapely.ops import transform
 
@@ -1154,6 +1158,29 @@ def convert_to_datetime(dataset, time_vars):
     return dataset, start_date
 
 
+def override_decode_cf_datetime():
+    """
+    WARNING !!! REMOVE AT EARLIEST XARRAY FIX, this is a override to xarray override_decode_cf_datetime function.
+    xarray has problems decoding time units with format `seconds since 2000-1-1 0:0:0 0`, this solves by testing
+    the unit to see if its parsable, if it is use original function, if not format unit into a parsable format.
+
+    https://github.com/pydata/xarray/issues/7210
+    """
+
+    orig_decode_cf_datetime = xarray.coding.times.decode_cf_datetime
+
+    def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
+        try:
+            parser.parse(units.split('since')[-1])
+            return orig_decode_cf_datetime(num_dates, units, calendar, use_cftime)
+        except dateutil.parser.ParserError:
+            reference_time = cftime.num2date(0, units, calendar)
+            units = f"{units.split('since')[0]} since {reference_time}"
+            return orig_decode_cf_datetime(num_dates, units, calendar, use_cftime)
+
+    xarray.coding.times.decode_cf_datetime = decode_cf_datetime
+
+
 def subset(file_to_subset, bbox, output_file, variables=None,
            # pylint: disable=too-many-branches, disable=too-many-statements
            cut=True, shapefile=None, min_time=None, max_time=None, origin_source=None,
@@ -1221,6 +1248,8 @@ def subset(file_to_subset, bbox, output_file, variables=None,
 
     nc_dataset, rename_vars = dc.remove_duplicate_dims(nc_dataset)
 
+    override_decode_cf_datetime()
+
     if variables:
         variables = [x.replace('/', GROUP_DELIM) for x in variables]
 
diff --git a/tests/test_subset.py b/tests/test_subset.py
index 763345ec..c8587c6f 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -1945,3 +1945,38 @@ def test_passed_coords(self):
         assert lats == dummy_lats
         assert lons == dummy_lons
         assert times == dummy_times
+
+    def test_bad_time_unit(self):
+
+        fill_val = -99999.0
+        time_vals = np.random.rand(10)
+        time_vals[0] = fill_val
+        time_vals[-1] = fill_val
+
+        data_vars = {
+            'foo': (['x'], np.random.rand(10)),
+            'time': (
+                ['x'],
+                time_vals,
+                {
+                    'units': 'seconds since 2000-1-1 0:0:0 0',
+                    '_FillValue': fill_val,
+                    'standard_name': 'time',
+                    'calendar': 'standard'
+                }
+            ),
+        }
+
+        ds = xr.Dataset(
+            data_vars=data_vars,
+            coords={'x': (['x'], np.arange(10))}
+        )
+
+        nc_out_location = join(self.subset_output_dir, "bad_time.nc")
+        ds.to_netcdf(nc_out_location)
+
+        subset.override_decode_cf_datetime()
+
+        ds_test = xr.open_dataset(nc_out_location)
+        ds_test.close()
+

From 91297b6d543b214aa0befa3328ed26160165dced Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 13:19:01 -0500
Subject: [PATCH 03/16] add missing parameter to docstring

---
 podaac/subsetter/group_handling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py
index cf9a648b..a8e6b0a5 100644
--- a/podaac/subsetter/group_handling.py
+++ b/podaac/subsetter/group_handling.py
@@ -24,6 +24,7 @@ def transform_grouped_dataset(nc_dataset, file_to_subset):
     ----------
     nc_dataset : nc.Dataset
         netCDF4 Dataset that contains groups
+    file_to_subset : str
 
     Returns
     -------

From 1311835d9d10e536c6283c6b65f03a2c8c367c7a Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 13:19:39 -0500
Subject: [PATCH 04/16] typo in docstring

---
 podaac/subsetter/subset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index e3db8de2..b1673352 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -489,7 +489,7 @@ def compute_time_variable_name(dataset, lat_var):
 
     Parameters
     ----------
-    dataset : xr.Dataset:
+    dataset : xr.Dataset
         xarray dataset to find time variable from
     lat_var : xr.Variable
         Lat variable for this dataset

From e41dd08030fb077665f8ee7f6c726a4bfe93b220 Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 13:21:03 -0500
Subject: [PATCH 05/16] extract netcdf opening procedure from beginning of
 `subset() into a new function

---
 podaac/subsetter/subset.py | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index b1673352..92607a88 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -930,6 +930,26 @@ def convert_to_datetime(dataset, time_vars):
     return dataset, start_date
 
 
+def open_as_nc_dataset(filepath: str) -> tuple[nc.Dataset, list, bool]:
+    """Open netcdf file, and flatten groups if they exist."""
+    file_extension = filepath.split('.')[-1]
+
+    if file_extension == 'he5':
+        nc_dataset, has_groups = h5file_transform(filepath)
+    else:
+        # Open dataset with netCDF4 first, so we can get group info
+        nc_dataset = nc.Dataset(filepath, mode='r')
+        has_groups = bool(nc_dataset.groups)
+
+        # If dataset has groups, transform to work with xarray
+        if has_groups:
+            nc_dataset = transform_grouped_dataset(nc_dataset, filepath)
+
+    nc_dataset, rename_vars = dc.remove_duplicate_dims(nc_dataset)
+
+    return nc_dataset, rename_vars, has_groups
+
+
 def subset(file_to_subset, bbox, output_file, variables=None,
            # pylint: disable=too-many-branches, disable=too-many-statements
            cut=True, shapefile=None, min_time=None, max_time=None, origin_source=None,
@@ -982,20 +1002,7 @@ def subset(file_to_subset, bbox, output_file, variables=None,
         than one value in the case where there are multiple groups and
         different coordinate variables for each group.
     """
-    file_extension = file_to_subset.split('.')[-1]
-
-    if file_extension == 'he5':
-        nc_dataset, has_groups = h5file_transform(file_to_subset)
-    else:
-        # Open dataset with netCDF4 first, so we can get group info
-        nc_dataset = nc.Dataset(file_to_subset, mode='r')
-        has_groups = bool(nc_dataset.groups)
-
-        # If dataset has groups, transform to work with xarray
-        if has_groups:
-            nc_dataset = transform_grouped_dataset(nc_dataset, file_to_subset)
-
-    nc_dataset, rename_vars = dc.remove_duplicate_dims(nc_dataset)
+    nc_dataset, rename_vars, has_groups = open_as_nc_dataset(file_to_subset)
 
     if variables:
         variables = [x.replace('/', GROUP_DELIM) for x in variables]

From b4d51a2753e81b25fdda2d7785bb59c265ddad02 Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 13:22:04 -0500
Subject: [PATCH 06/16] update tests to use netcdf opening wrapper function, to
 prevent errors with tempo data

---
 tests/test_subset.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/tests/test_subset.py b/tests/test_subset.py
index 5c683540..982d900f 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -170,11 +170,19 @@ def test_subset_bbox(self):
                 output_file=join(self.subset_output_dir, output_file)
             )
 
-            out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
+            file_to_subset = join(self.subset_output_dir, output_file)
+
+            out_ds, rename_vars, _ = subset.open_as_nc_dataset(file_to_subset)
+            out_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(out_ds),
                                      decode_times=False,
                                      decode_coords=False,
                                      mask_and_scale=False)
 
+            # out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
+            #                          decode_times=False,
+            #                          decode_coords=False,
+            #                          mask_and_scale=False)
+
             lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(out_ds)
 
             lat_var_name = lat_var_name[0]
@@ -555,6 +563,7 @@ def test_specified_variables(self):
 
             excluded_variables = list(set(variable[0] for variable in in_ds.data_vars.items())
                                       - set(included_variables))
+            in_ds.close()
 
             subset.subset(
                 file_to_subset=join(self.test_data_dir, file),
@@ -563,6 +572,14 @@ def test_specified_variables(self):
                 variables=included_variables
             )
 
+            in_ds, rename_vars, _ = subset.open_as_nc_dataset(join(self.test_data_dir, file))
+            in_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(in_ds),
+                                     decode_times=False,
+                                     decode_coords=False)
+
+            # in_ds = xr.open_dataset(join(self.test_data_dir, file),
+            #                         decode_times=False,
+            #                         decode_coords=False)
             # Get coord variables
             time_var_name = []
             lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(in_ds)
@@ -1221,7 +1238,12 @@ def test_get_time_variable_name(self):
                 'decode_times': True
             }
             time_var_names = []
-            ds = xr.open_dataset(os.path.join(self.test_data_dir, test_file), **args)
+            ds, rename_vars, _ = subset.open_as_nc_dataset(os.path.join(self.test_data_dir, test_file))
+            ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds),
+                                 decode_times=False,
+                                 decode_coords=False,
+                                 mask_and_scale=False)
+            # ds = xr.open_dataset(os.path.join(self.test_data_dir, test_file), **args)
             lat_var_name = subset.compute_coordinate_variable_names(ds)[0][0]
             time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name])
 

From 8fe0f41e6972fade0ca180b81c33f82bae20c493 Mon Sep 17 00:00:00 2001
From: l2ss-py bot <l2ss-py@noreply.github.com>
Date: Thu, 8 Dec 2022 18:31:04 +0000
Subject: [PATCH 07/16] /version 2.3.0-alpha.5

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index abd88b9f..b84fe59f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@
 
 [tool.poetry]
 name = "l2ss-py"
-version = "2.3.0-alpha.4"
+version = "2.3.0-alpha.5"
 description = "L2 Subsetter Service"
 authors = ["podaac-tva <podaac-tva@jpl.nasa.gov>"]
 license = "Apache-2.0"

From bc5889905b3190186e2e9e75049ecfed3045f013 Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 13:57:25 -0500
Subject: [PATCH 08/16] update `test_specified_variables()` to use netcdf
 opening wrapper function in multiple places

to prevent errors with tempo data
---
 tests/test_subset.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/test_subset.py b/tests/test_subset.py
index 982d900f..60421275 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -554,9 +554,13 @@ def test_specified_variables(self):
         for file in self.test_files:
             output_file = "{}_{}".format(self._testMethodName, file)
 
-            in_ds = xr.open_dataset(join(self.test_data_dir, file),
-                                    decode_times=False,
-                                    decode_coords=False)
+            in_ds, rename_vars, _ = subset.open_as_nc_dataset(join(self.test_data_dir, file))
+            in_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(in_ds),
+                                     decode_times=False,
+                                     decode_coords=False)
+            # in_ds = xr.open_dataset(join(self.test_data_dir, file),
+            #                         decode_times=False,
+            #                         decode_coords=False)
 
             included_variables = set([variable[0] for variable in in_ds.data_vars.items()][::2])
             included_variables = list(included_variables)
@@ -599,9 +603,13 @@ def test_specified_variables(self):
             if time_var_name in excluded_variables:
                 excluded_variables.remove(time_var_name)
 
-            out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
+            out_ds, rename_vars, _ = subset.open_as_nc_dataset(join(self.subset_output_dir, output_file))
+            out_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(out_ds),
                                      decode_times=False,
                                      decode_coords=False)
+            # out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
+            #                          decode_times=False,
+            #                          decode_coords=False)
 
             out_vars = [out_var for out_var in out_ds.data_vars.keys()]
             out_vars.extend(out_ds.coords.keys())

From 0728f9783fdcf6d4f2ace587c8092265d7483135 Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 14:21:38 -0500
Subject: [PATCH 09/16] cosmetic

---
 tests/test_subset.py | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/tests/test_subset.py b/tests/test_subset.py
index 60421275..93cc5a1d 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -178,11 +178,6 @@ def test_subset_bbox(self):
                                      decode_coords=False,
                                      mask_and_scale=False)
 
-            # out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-            #                          decode_times=False,
-            #                          decode_coords=False,
-            #                          mask_and_scale=False)
-
             lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(out_ds)
 
             lat_var_name = lat_var_name[0]
@@ -556,11 +551,8 @@ def test_specified_variables(self):
 
             in_ds, rename_vars, _ = subset.open_as_nc_dataset(join(self.test_data_dir, file))
             in_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(in_ds),
-                                     decode_times=False,
-                                     decode_coords=False)
-            # in_ds = xr.open_dataset(join(self.test_data_dir, file),
-            #                         decode_times=False,
-            #                         decode_coords=False)
+                                    decode_times=False,
+                                    decode_coords=False)
 
             included_variables = set([variable[0] for variable in in_ds.data_vars.items()][::2])
             included_variables = list(included_variables)
@@ -578,12 +570,9 @@ def test_specified_variables(self):
 
             in_ds, rename_vars, _ = subset.open_as_nc_dataset(join(self.test_data_dir, file))
             in_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(in_ds),
-                                     decode_times=False,
-                                     decode_coords=False)
+                                    decode_times=False,
+                                    decode_coords=False)
 
-            # in_ds = xr.open_dataset(join(self.test_data_dir, file),
-            #                         decode_times=False,
-            #                         decode_coords=False)
             # Get coord variables
             time_var_name = []
             lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(in_ds)
@@ -607,9 +596,6 @@ def test_specified_variables(self):
             out_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(out_ds),
                                      decode_times=False,
                                      decode_coords=False)
-            # out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-            #                          decode_times=False,
-            #                          decode_coords=False)
 
             out_vars = [out_var for out_var in out_ds.data_vars.keys()]
             out_vars.extend(out_ds.coords.keys())

From 6bf7888818075852f0b799fab847c6068ece08e9 Mon Sep 17 00:00:00 2001
From: danielfromearth <daniel.kaufman@nasa.gov>
Date: Thu, 8 Dec 2022 14:29:00 -0500
Subject: [PATCH 10/16] clean up comment and use 'decode_times'=True for test

---
 tests/test_subset.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/test_subset.py b/tests/test_subset.py
index 93cc5a1d..23327826 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -1233,11 +1233,8 @@ def test_get_time_variable_name(self):
             }
             time_var_names = []
             ds, rename_vars, _ = subset.open_as_nc_dataset(os.path.join(self.test_data_dir, test_file))
-            ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds),
-                                 decode_times=False,
-                                 decode_coords=False,
-                                 mask_and_scale=False)
-            # ds = xr.open_dataset(os.path.join(self.test_data_dir, test_file), **args)
+            ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds), **args)
+
             lat_var_name = subset.compute_coordinate_variable_names(ds)[0][0]
             time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name])
 

From ef5c63632a2847d675f7d45af03a7891ee92ec3f Mon Sep 17 00:00:00 2001
From: Nick Lenssen <nicklenssen4@gmail.com>
Date: Mon, 12 Dec 2022 12:24:41 -0500
Subject: [PATCH 11/16] feature/issue 126 (#131)

* Add variable leading slash flexibility

* Add tests back to test file

* changelog added and updated

* Update podaac/subsetter/subset.py

Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com>

* update Syntax

* resolve conflict

Co-authored-by: nlensse1 <nicholas.f.lenssen@nasa.gov>
Co-authored-by: Frank Greguska <89428916+frankinspace@users.noreply.github.com>
---
 CHANGELOG.md               |  2 ++
 podaac/subsetter/subset.py |  2 ++
 tests/test_subset.py       | 37 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69ecce10..1ffbad7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- [issue/126](https://github.com/podaac/l2ss-py/issues/126): Added flexibility to variable subsetting
+for variables to not have leading slash in the front
 ### Changed
 ### Deprecated 
 ### Removed
diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index e5ed21b1..2e1b4627 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -1252,6 +1252,8 @@ def subset(file_to_subset, bbox, output_file, variables=None,
 
     if variables:
         variables = [x.replace('/', GROUP_DELIM) for x in variables]
+        if has_groups:
+            variables = [GROUP_DELIM + x if not x.startswith(GROUP_DELIM) else x for x in variables]
 
     args = {
         'decode_coords': False,
diff --git a/tests/test_subset.py b/tests/test_subset.py
index c8587c6f..161bbf36 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -850,7 +850,6 @@ def test_variable_subset_oco2(self):
         var_listout = list(out_nc.groups['Retrieval'].variables.keys())
         assert ('water_height' in var_listout)
 
-
     def test_variable_subset_s6(self):
         """
         multiple variable subset of variables in different groups in oco3
@@ -1946,6 +1945,42 @@ def test_passed_coords(self):
         assert lons == dummy_lons
         assert times == dummy_times
 
+    def test_var_subsetting_tropomi(self):
+        """
+        Check that variable subsetting is the same if a leading slash is included
+        """
+        TROP_dir = join(self.test_data_dir, 'tropomi')
+        trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
+        variable_slash = ['/PRODUCT/methane_mixing_ratio']
+        variable_noslash = ['PRODUCT/methane_mixing_ratio']
+        bbox = np.array(((-180, 180), (-90, 90)))
+        output_file_slash = "{}_{}".format(self._testMethodName, trop_file)
+        output_file_noslash = "{}_noslash_{}".format(self._testMethodName, trop_file)
+        shutil.copyfile(
+            os.path.join(TROP_dir, trop_file),
+            os.path.join(self.subset_output_dir, trop_file)
+        )
+        shutil.copyfile(
+            os.path.join(TROP_dir, trop_file),
+            os.path.join(self.subset_output_dir,'slashtest'+trop_file)
+        )
+        slash_test = subset.subset(
+            file_to_subset=join(self.subset_output_dir, trop_file),
+            bbox=bbox,
+            output_file=join(self.subset_output_dir, output_file_slash),
+            variables = variable_slash
+        )
+        noslash_test = subset.subset(
+            file_to_subset=join(self.subset_output_dir, 'slashtest'+trop_file),
+            bbox=bbox,
+            output_file=join(self.subset_output_dir, output_file_noslash),
+            variables = variable_noslash
+        )
+
+        slash_dataset = nc.Dataset(join(self.subset_output_dir, output_file_slash))
+        noslash_dataset = nc.Dataset(join(self.subset_output_dir, output_file_noslash))
+
+        assert list(slash_dataset.groups['PRODUCT'].variables) == list(noslash_dataset.groups['PRODUCT'].variables)
     def test_bad_time_unit(self):
 
         fill_val = -99999.0

From ddac85c6f80a5550b44cdc95e0ed5bdf324aca34 Mon Sep 17 00:00:00 2001
From: l2ss-py bot <l2ss-py@noreply.github.com>
Date: Mon, 12 Dec 2022 17:38:45 +0000
Subject: [PATCH 12/16] /version 2.3.0-alpha.6

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b84fe59f..da9c5726 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@
 
 [tool.poetry]
 name = "l2ss-py"
-version = "2.3.0-alpha.5"
+version = "2.3.0-alpha.6"
 description = "L2 Subsetter Service"
 authors = ["podaac-tva <podaac-tva@jpl.nasa.gov>"]
 license = "Apache-2.0"

From 98b131a080279f8780aec34de9f5ae7826a494b2 Mon Sep 17 00:00:00 2001
From: Frank Greguska <89428916+frankinspace@users.noreply.github.com>
Date: Mon, 12 Dec 2022 17:14:00 -0800
Subject: [PATCH 13/16] Update build-pipeline.yml

---
 .github/workflows/build-pipeline.yml | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml
index 62348d55..8c964bad 100644
--- a/.github/workflows/build-pipeline.yml
+++ b/.github/workflows/build-pipeline.yml
@@ -184,7 +184,7 @@ jobs:
       - name: Extract metadata (tags, labels) for Docker
         if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
         id: meta
-        uses: docker/metadata-action@v3
+        uses: docker/metadata-action@v4
         with:
           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
           tags: |
@@ -197,7 +197,7 @@ jobs:
           ${GITHUB_WORKSPACE}/.github/workflows/wait-for-pypi.py ${{env.pyproject_name}}[harmony]==${{ env.software_version }}
       - name: Build and push Docker image
         if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v3
         with:
           context: .
           file: docker/Dockerfile
@@ -216,13 +216,6 @@ jobs:
         env:
           SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
         with:
-          image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.software_version }}
+          image: ${{ steps.meta.outputs.tags[0] }}
           args: >
             --severity-threshold=high
-            --file=./docker/Dockerfile
-            --sarif-file-output=docker.sarif
-      - name: Upload result to GitHub Code Scanning
-        if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
-        uses: github/codeql-action/upload-sarif@v2
-        with:
-          sarif_file: ./

From 13c5eebd931f5f1b98a9c201f621de3cd77030f0 Mon Sep 17 00:00:00 2001
From: l2ss-py bot <l2ss-py@noreply.github.com>
Date: Tue, 13 Dec 2022 01:27:39 +0000
Subject: [PATCH 14/16] /version 2.3.0-alpha.7

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index da9c5726..d1307f10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@
 
 [tool.poetry]
 name = "l2ss-py"
-version = "2.3.0-alpha.6"
+version = "2.3.0-alpha.7"
 description = "L2 Subsetter Service"
 authors = ["podaac-tva <podaac-tva@jpl.nasa.gov>"]
 license = "Apache-2.0"

From 1da8f5f94515ca868c269ec2f4edc001acbe6505 Mon Sep 17 00:00:00 2001
From: Frank Greguska <89428916+frankinspace@users.noreply.github.com>
Date: Mon, 12 Dec 2022 18:15:47 -0800
Subject: [PATCH 15/16] Merge changes from origin/develop

---
 podaac/subsetter/subset.py |  22 ++-
 poetry.lock                | 320 +++++++++++++++++--------------------
 pyproject.toml             |   6 +-
 tests/test_subset.py       | 164 +++++++++----------
 4 files changed, 249 insertions(+), 263 deletions(-)

diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index b927d7f1..d5f6d91c 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -978,10 +978,10 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
     xarray.coding.times.decode_cf_datetime = decode_cf_datetime
 
 
-def subset(file_to_subset, bbox, output_file, variables=None,
+def subset(file_to_subset, bbox, output_file, variables=(),
            # pylint: disable=too-many-branches, disable=too-many-statements
            cut=True, shapefile=None, min_time=None, max_time=None, origin_source=None,
-           lat_var_names=None, lon_var_names=None, time_var_names=None):
+           lat_var_names=(), lon_var_names=(), time_var_names=()):
     """
     Subset a given NetCDF file given a bounding box
 
@@ -1014,6 +1014,9 @@ def subset(file_to_subset, bbox, output_file, variables=None,
         ISO timestamp representing the upper bound of the temporal
         subset to be performed. If this value is not provided, the
         granule will not be subset temporally on the upper bound.
+    origin_source : str
+        The original granule source prior to this subset operation to
+        be used for provenance information.
     lat_var_names : list
         List of variables that represent the latitude coordinate
         variables for this granule. This list will only contain more
@@ -1034,10 +1037,17 @@ def subset(file_to_subset, bbox, output_file, variables=None,
 
     override_decode_cf_datetime()
 
-    if variables:
-        variables = [x.replace('/', GROUP_DELIM) for x in variables]
-        if has_groups:
-            variables = [GROUP_DELIM + x if not x.startswith(GROUP_DELIM) else x for x in variables]
+    if has_groups:
+        # Make sure all variables start with '/'
+        variables = ['/' + var if not var.startswith('/') else var for var in variables]
+        lat_var_names = ['/' + var if not var.startswith('/') else var for var in lat_var_names]
+        lon_var_names = ['/' + var if not var.startswith('/') else var for var in lon_var_names]
+        time_var_names = ['/' + var if not var.startswith('/') else var for var in time_var_names]
+        # Replace all '/' with GROUP_DELIM
+        variables = [var.replace('/', GROUP_DELIM) for var in variables]
+        lat_var_names = [var.replace('/', GROUP_DELIM) for var in lat_var_names]
+        lon_var_names = [var.replace('/', GROUP_DELIM) for var in lon_var_names]
+        time_var_names = [var.replace('/', GROUP_DELIM) for var in time_var_names]
 
     args = {
         'decode_coords': False,
diff --git a/poetry.lock b/poetry.lock
index 2d142e97..22d4ddb2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -8,7 +8,7 @@ python-versions = "*"
 
 [[package]]
 name = "astroid"
-version = "2.12.9"
+version = "2.12.13"
 description = "An abstract syntax tree for Python with inference support."
 category = "dev"
 optional = false
@@ -22,14 +22,6 @@ wrapt = [
     {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
 ]
 
-[[package]]
-name = "atomicwrites"
-version = "1.4.1"
-description = "Atomic file writes."
-category = "dev"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-
 [[package]]
 name = "attrs"
 version = "22.1.0"
@@ -46,7 +38,7 @@ tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>
 
 [[package]]
 name = "aws-sam-translator"
-version = "1.50.0"
+version = "1.55.0"
 description = "AWS SAM Translator is a library that transform SAM templates into AWS CloudFormation templates"
 category = "dev"
 optional = false
@@ -57,11 +49,11 @@ boto3 = ">=1.19.5,<2.0.0"
 jsonschema = ">=3.2,<4.0"
 
 [package.extras]
-dev = ["coverage (>=5.3,<6.0)", "flake8 (>=3.8.4,<3.9.0)", "tox (>=3.24,<4.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-xdist (>=2.5,<3.0)", "pytest-env (>=0.6.2,<0.7.0)", "pylint (>=2.9.0,<2.10.0)", "pyyaml (>=5.4,<6.0)", "pytest (>=6.2.5,<6.3.0)", "parameterized (>=0.7.4,<0.8.0)", "click (>=7.1,<8.0)", "dateparser (>=0.7,<1.0)", "boto3 (>=1.23,<2)", "tenacity (>=7.0.0,<7.1.0)", "requests (>=2.24.0,<2.25.0)", "docopt (>=0.6.2,<0.7.0)", "black (==20.8b1)"]
+dev = ["coverage (>=5.3,<6.0)", "flake8 (>=3.8.4,<3.9.0)", "tox (>=3.24,<4.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-xdist (>=2.5,<3.0)", "pytest-env (>=0.6.2,<0.7.0)", "pytest-rerunfailures (>=9.1.1,<9.2.0)", "pylint (>=2.15.0,<2.16.0)", "pyyaml (>=5.4,<6.0)", "pytest (>=6.2.5,<6.3.0)", "parameterized (>=0.7.4,<0.8.0)", "click (>=7.1,<8.0)", "dateparser (>=0.7,<1.0)", "boto3 (>=1.23,<2)", "tenacity (>=7.0.0,<7.1.0)", "requests (>=2.24.0,<2.25.0)", "docopt (>=0.6.2,<0.7.0)", "black (==20.8b1)", "ruamel.yaml (==0.17.21)", "mypy (==0.971)", "boto3-stubs[serverlessrepo,appconfig] (>=1.19.5,<2.0.0)", "types-PyYAML (>=5.4,<6.0)", "types-jsonschema (>=3.2,<4.0)"]
 
 [[package]]
 name = "aws-xray-sdk"
-version = "2.10.0"
+version = "2.11.0"
 description = "The AWS X-Ray SDK for Python (the SDK) enables Python developers to record and emit information from within their applications to the AWS X-Ray service."
 category = "dev"
 optional = false
@@ -73,7 +65,7 @@ wrapt = "*"
 
 [[package]]
 name = "babel"
-version = "2.10.3"
+version = "2.11.0"
 description = "Internationalization utilities"
 category = "dev"
 optional = false
@@ -109,14 +101,14 @@ python-versions = "*"
 
 [[package]]
 name = "boto3"
-version = "1.24.67"
+version = "1.26.28"
 description = "The AWS SDK for Python"
 category = "main"
 optional = false
 python-versions = ">= 3.7"
 
 [package.dependencies]
-botocore = ">=1.27.67,<1.28.0"
+botocore = ">=1.29.28,<1.30.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.6.0,<0.7.0"
 
@@ -125,7 +117,7 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.27.67"
+version = "1.29.28"
 description = "Low-level, data-driven core of boto 3."
 category = "main"
 optional = false
@@ -137,11 +129,11 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = ">=1.25.4,<1.27"
 
 [package.extras]
-crt = ["awscrt (==0.14.0)"]
+crt = ["awscrt (==0.15.3)"]
 
 [[package]]
 name = "certifi"
-version = "2022.6.15"
+version = "2022.12.7"
 description = "Python package for providing Mozilla's CA Bundle."
 category = "main"
 optional = false
@@ -149,7 +141,7 @@ python-versions = ">=3.6"
 
 [[package]]
 name = "cf-xarray"
-version = "0.7.4"
+version = "0.7.6"
 description = "A lightweight convenience wrapper for using CF attributes on xarray objects"
 category = "main"
 optional = false
@@ -171,14 +163,14 @@ pycparser = "*"
 
 [[package]]
 name = "cfn-lint"
-version = "0.64.1"
+version = "0.72.2"
 description = "Checks CloudFormation templates for practices and behaviour that could potentially be improved"
 category = "dev"
 optional = false
 python-versions = ">=3.7, <=4.0, !=4.0"
 
 [package.dependencies]
-aws-sam-translator = ">=1.50.0"
+aws-sam-translator = ">=1.55.0"
 jschema-to-python = ">=1.2.3,<1.3.0"
 jsonpatch = "*"
 jsonschema = ">=3.0,<5"
@@ -189,7 +181,7 @@ sarif-om = ">=1.0.4,<1.1.0"
 
 [[package]]
 name = "cftime"
-version = "1.6.1"
+version = "1.6.2"
 description = "Time-handling functionality from netcdf4-python"
 category = "main"
 optional = false
@@ -258,26 +250,29 @@ python-versions = ">=3.6"
 
 [[package]]
 name = "colorama"
-version = "0.4.5"
+version = "0.4.6"
 description = "Cross-platform colored terminal text."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 
 [[package]]
 name = "coverage"
-version = "6.4.4"
+version = "6.5.0"
 description = "Code coverage measurement for Python"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+
 [package.extras]
 toml = ["tomli"]
 
 [[package]]
 name = "cryptography"
-version = "38.0.1"
+version = "38.0.4"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 category = "dev"
 optional = false
@@ -296,16 +291,17 @@ test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests",
 
 [[package]]
 name = "dask"
-version = "2022.9.0"
+version = "2022.12.0"
 description = "Parallel PyData with Task Scheduling"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 
 [package.dependencies]
-bokeh = {version = ">=2.4.2", optional = true, markers = "extra == \"complete\""}
+bokeh = {version = ">=2.4.2,<3", optional = true, markers = "extra == \"complete\""}
+click = ">=7.0"
 cloudpickle = ">=1.1.1"
-distributed = {version = "2022.9.0", optional = true, markers = "extra == \"complete\""}
+distributed = {version = "2022.12.0", optional = true, markers = "extra == \"complete\""}
 fsspec = ">=0.6.0"
 jinja2 = {version = "*", optional = true, markers = "extra == \"complete\""}
 numpy = {version = ">=1.18", optional = true, markers = "extra == \"complete\""}
@@ -317,10 +313,10 @@ toolz = ">=0.8.2"
 
 [package.extras]
 array = ["numpy (>=1.18)"]
-complete = ["bokeh (>=2.4.2)", "distributed (==2022.9.0)", "jinja2", "numpy (>=1.18)", "pandas (>=1.0)"]
+complete = ["bokeh (>=2.4.2,<3)", "distributed (==2022.12.0)", "jinja2", "numpy (>=1.18)", "pandas (>=1.0)"]
 dataframe = ["numpy (>=1.18)", "pandas (>=1.0)"]
-diagnostics = ["bokeh (>=2.4.2)", "jinja2"]
-distributed = ["distributed (==2022.9.0)"]
+diagnostics = ["bokeh (>=2.4.2,<3)", "jinja2"]
+distributed = ["distributed (==2022.12.0)"]
 test = ["pandas", "pytest", "pytest-rerunfailures", "pytest-xdist", "pre-commit"]
 
 [[package]]
@@ -336,27 +332,27 @@ packaging = "*"
 
 [[package]]
 name = "dill"
-version = "0.3.5.1"
+version = "0.3.6"
 description = "serialize all of python"
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
+python-versions = ">=3.7"
 
 [package.extras]
 graph = ["objgraph (>=1.7.2)"]
 
 [[package]]
 name = "distributed"
-version = "2022.9.0"
+version = "2022.12.0"
 description = "Distributed scheduler for Dask"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 
 [package.dependencies]
-click = ">=6.6"
+click = ">=7.0"
 cloudpickle = ">=1.5.0"
-dask = "2022.9.0"
+dask = "2022.12.0"
 jinja2 = "*"
 locket = ">=1.0.0"
 msgpack = ">=0.6.0"
@@ -365,14 +361,14 @@ psutil = ">=5.0"
 pyyaml = "*"
 sortedcontainers = "<2.0.0 || >2.0.0,<2.0.1 || >2.0.1"
 tblib = ">=1.6.0"
-toolz = ">=0.8.2"
-tornado = ">=6.0.3,<6.2"
+toolz = ">=0.10.0"
+tornado = ">=6.0.3"
 urllib3 = "*"
 zict = ">=0.1.3"
 
 [[package]]
 name = "docker"
-version = "6.0.0"
+version = "6.0.1"
 description = "A Python library for the Docker Engine API."
 category = "dev"
 optional = false
@@ -411,9 +407,20 @@ six = ">=1.9.0"
 gmpy = ["gmpy"]
 gmpy2 = ["gmpy2"]
 
+[[package]]
+name = "exceptiongroup"
+version = "1.0.4"
+description = "Backport of PEP 654 (exception groups)"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=6)"]
+
 [[package]]
 name = "fiona"
-version = "1.8.21"
+version = "1.8.22"
 description = "Fiona reads and writes spatial data files"
 category = "main"
 optional = false
@@ -429,7 +436,7 @@ munch = "*"
 six = ">=1.7"
 
 [package.extras]
-all = ["boto3 (>=1.2.4)", "pytest-cov", "shapely", "pytest (>=3)", "mock"]
+all = ["pytest-cov", "shapely", "boto3 (>=1.2.4)", "pytest (>=3)", "mock"]
 calc = ["shapely"]
 s3 = ["boto3 (>=1.2.4)"]
 test = ["pytest (>=3)", "pytest-cov", "boto3 (>=1.2.4)", "mock"]
@@ -449,7 +456,7 @@ pyflakes = ">=2.3.0,<2.4.0"
 
 [[package]]
 name = "fsspec"
-version = "2022.8.2"
+version = "2022.11.0"
 description = "File-system specification"
 category = "main"
 optional = false
@@ -505,7 +512,7 @@ numpy = ">=1.14.5"
 
 [[package]]
 name = "harmony-service-lib"
-version = "1.0.21"
+version = "1.0.22"
 description = "A library for Python-based Harmony services to parse incoming messages, fetch data, stage data, and call back to Harmony"
 category = "main"
 optional = true
@@ -546,7 +553,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 
 [[package]]
 name = "importlib-metadata"
-version = "4.12.0"
+version = "4.13.0"
 description = "Read metadata from Python packages"
 category = "main"
 optional = false
@@ -556,21 +563,29 @@ python-versions = ">=3.7"
 zipp = ">=0.5"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
+docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"]
 perf = ["ipython"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
+
+[[package]]
+name = "iniconfig"
+version = "1.1.1"
+description = "iniconfig: brain-dead simple config-ini parsing"
+category = "dev"
+optional = false
+python-versions = "*"
 
 [[package]]
 name = "isort"
-version = "5.10.1"
+version = "5.11.1"
 description = "A Python utility / library to sort Python imports."
 category = "dev"
 optional = false
-python-versions = ">=3.6.1,<4.0"
+python-versions = ">=3.7.0"
 
 [package.extras]
-pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
-requirements_deprecated_finder = ["pipreqs", "pip-api"]
+pipfile-deprecated-finder = ["pipreqs", "requirementslib"]
+requirements-deprecated-finder = ["pipreqs", "pip-api"]
 colors = ["colorama (>=0.4.3,<0.5.0)"]
 plugins = ["setuptools"]
 
@@ -630,16 +645,16 @@ jsonpointer = ">=1.9"
 
 [[package]]
 name = "jsonpickle"
-version = "2.2.0"
+version = "3.0.0"
 description = "Python library for serializing any arbitrary object graph into JSON"
 category = "dev"
 optional = false
-python-versions = ">=2.7"
+python-versions = ">=3.7"
 
 [package.extras]
 docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"]
-testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "numpy", "pandas", "pymongo", "scikit-learn", "sqlalchemy", "pytest-flake8 (<1.1.0)", "enum34", "jsonlib", "pytest-flake8 (>=1.1.1)"]
-"testing.libs" = ["simplejson", "ujson", "yajl"]
+testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8 (>=1.1.1)", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "scikit-learn", "sqlalchemy"]
+"testing.libs" = ["simplejson", "ujson"]
 
 [[package]]
 name = "jsonpointer"
@@ -687,11 +702,11 @@ six = "*"
 
 [[package]]
 name = "lazy-object-proxy"
-version = "1.7.1"
+version = "1.8.0"
 description = "A fast and thorough lazy object proxy."
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [[package]]
 name = "locket"
@@ -750,14 +765,6 @@ build = ["twine", "wheel", "blurb"]
 docs = ["sphinx"]
 test = ["pytest (<5.4)", "pytest-cov"]
 
-[[package]]
-name = "more-itertools"
-version = "8.14.0"
-description = "More routines for operating on iterables, beyond itertools"
-category = "dev"
-optional = false
-python-versions = ">=3.5"
-
 [[package]]
 name = "moto"
 version = "1.3.14"
@@ -817,11 +824,11 @@ yaml = ["PyYAML (>=5.1.0)"]
 
 [[package]]
 name = "netcdf4"
-version = "1.6.0"
+version = "1.6.2"
 description = "Provides an object-oriented python interface to the netCDF version 4 library."
 category = "main"
 optional = false
-python-versions = "*"
+python-versions = ">=3.6"
 
 [package.dependencies]
 cftime = "*"
@@ -829,7 +836,7 @@ numpy = ">=1.9"
 
 [[package]]
 name = "networkx"
-version = "2.8.6"
+version = "2.8.8"
 description = "Python package for creating and manipulating graphs and networks"
 category = "dev"
 optional = false
@@ -837,14 +844,14 @@ python-versions = ">=3.8"
 
 [package.extras]
 default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"]
-developer = ["pre-commit (>=2.20)", "mypy (>=0.961)"]
-doc = ["sphinx (>=5)", "pydata-sphinx-theme (>=0.9)", "sphinx-gallery (>=0.10)", "numpydoc (>=1.4)", "pillow (>=9.1)", "nb2plots (>=0.6)", "texext (>=0.6.6)"]
+developer = ["pre-commit (>=2.20)", "mypy (>=0.982)"]
+doc = ["sphinx (>=5.2)", "pydata-sphinx-theme (>=0.11)", "sphinx-gallery (>=0.11)", "numpydoc (>=1.5)", "pillow (>=9.2)", "nb2plots (>=0.6)", "texext (>=0.6.6)"]
 extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"]
-test = ["pytest (>=7.1)", "pytest-cov (>=3.0)", "codecov (>=2.1)"]
+test = ["pytest (>=7.2)", "pytest-cov (>=4.0)", "codecov (>=2.1)"]
 
 [[package]]
 name = "numpy"
-version = "1.23.2"
+version = "1.23.5"
 description = "NumPy is the fundamental package for array computing with Python."
 category = "main"
 optional = false
@@ -852,18 +859,15 @@ python-versions = ">=3.8"
 
 [[package]]
 name = "packaging"
-version = "21.3"
+version = "22.0"
 description = "Core utilities for Python packages"
 category = "main"
 optional = false
-python-versions = ">=3.6"
-
-[package.dependencies]
-pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
+python-versions = ">=3.7"
 
 [[package]]
 name = "pandas"
-version = "1.4.4"
+version = "1.5.2"
 description = "Powerful data structures for data analysis, time series, and statistics"
 category = "main"
 optional = false
@@ -871,10 +875,9 @@ python-versions = ">=3.8"
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.18.5", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
-    {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""},
-    {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""},
+    {version = ">=1.20.3", markers = "python_version < \"3.10\""},
     {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
+    {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
 ]
 python-dateutil = ">=2.8.1"
 pytz = ">=2020.1"
@@ -899,7 +902,7 @@ complete = ["blosc", "pyzmq", "pandas (>=0.19.0)", "numpy (>=1.9.0)"]
 
 [[package]]
 name = "pbr"
-version = "5.10.0"
+version = "5.11.0"
 description = "Python Build Reasonableness"
 category = "dev"
 optional = false
@@ -907,7 +910,7 @@ python-versions = ">=2.6"
 
 [[package]]
 name = "pillow"
-version = "9.2.0"
+version = "9.3.0"
 description = "Python Imaging Library (Fork)"
 category = "main"
 optional = false
@@ -919,30 +922,31 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 
 [[package]]
 name = "platformdirs"
-version = "2.5.2"
-description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+version = "2.6.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
 [package.extras]
-docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"]
-test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"]
+docs = ["furo (>=2022.9.29)", "proselint (>=0.13)", "sphinx-autodoc-typehints (>=1.19.4)", "sphinx (>=5.3)"]
+test = ["appdirs (==1.4.4)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest (>=7.2)"]
 
 [[package]]
 name = "pluggy"
-version = "0.13.1"
+version = "1.0.0"
 description = "plugin and hook calling mechanisms for python"
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=3.6"
 
 [package.extras]
 dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "psutil"
-version = "5.9.2"
+version = "5.9.4"
 description = "Cross-platform lib for process and system monitoring in Python."
 category = "main"
 optional = false
@@ -951,18 +955,10 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [package.extras]
 test = ["ipaddress", "mock", "enum34", "pywin32", "wmi"]
 
-[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
-category = "dev"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-
 [[package]]
 name = "py-cpuinfo"
-version = "8.0.0"
-description = "Get CPU info with pure Python 2 & 3"
+version = "9.0.0"
+description = "Get CPU info with pure Python"
 category = "dev"
 optional = false
 python-versions = "*"
@@ -1012,14 +1008,14 @@ plugins = ["importlib-metadata"]
 
 [[package]]
 name = "pylint"
-version = "2.15.2"
+version = "2.15.8"
 description = "python code static checker"
 category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 
 [package.dependencies]
-astroid = ">=2.12.9,<=2.14.0-dev0"
+astroid = ">=2.12.13,<=2.14.0-dev0"
 colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
 dill = ">=0.2"
 isort = ">=4.2.5,<6"
@@ -1048,20 +1044,9 @@ cffi = ">=1.4.1"
 docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
 tests = ["pytest (>=3.2.1,!=3.3.0)", "hypothesis (>=3.27.0)"]
 
-[[package]]
-name = "pyparsing"
-version = "3.0.9"
-description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-category = "main"
-optional = false
-python-versions = ">=3.6.8"
-
-[package.extras]
-diagrams = ["railroad-diagrams", "jinja2"]
-
 [[package]]
 name = "pyproj"
-version = "3.3.1"
+version = "3.4.0"
 description = "Python interface to PROJ (cartographic projections and coordinate transformations library)"
 category = "main"
 optional = false
@@ -1072,7 +1057,7 @@ certifi = "*"
 
 [[package]]
 name = "pyrsistent"
-version = "0.18.1"
+version = "0.19.2"
 description = "Persistent/Functional/Immutable data structures"
 category = "dev"
 optional = false
@@ -1094,33 +1079,31 @@ validation = ["jsonschema (==3.2.0)"]
 
 [[package]]
 name = "pytest"
-version = "5.4.3"
+version = "7.2.0"
 description = "pytest: simple powerful testing with Python"
 category = "dev"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.7"
 
 [package.dependencies]
-atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
-attrs = ">=17.4.0"
+attrs = ">=19.2.0"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
-more-itertools = ">=4.0.0"
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
 packaging = "*"
-pluggy = ">=0.12,<1.0"
-py = ">=1.5.0"
-wcwidth = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 
 [package.extras]
-checkqa-mypy = ["mypy (==v0.761)"]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
 
 [[package]]
 name = "pytest-benchmark"
-version = "3.4.1"
+version = "4.0.0"
 description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
 
 [package.dependencies]
 py-cpuinfo = "*"
@@ -1133,16 +1116,15 @@ histogram = ["pygal", "pygaljs"]
 
 [[package]]
 name = "pytest-cov"
-version = "2.12.1"
+version = "4.0.0"
 description = "Pytest plugin for measuring coverage."
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.6"
 
 [package.dependencies]
-coverage = ">=5.2.1"
+coverage = {version = ">=5.2.1", extras = ["toml"]}
 pytest = ">=4.6"
-toml = "*"
 
 [package.extras]
 testing = ["fields", "hunter", "process-tests", "six", "pytest-xdist", "virtualenv"]
@@ -1186,7 +1168,7 @@ python-versions = ">=3.5"
 
 [[package]]
 name = "pytz"
-version = "2022.2.1"
+version = "2022.6"
 description = "World timezone definitions, modern and historical"
 category = "main"
 optional = false
@@ -1194,7 +1176,7 @@ python-versions = "*"
 
 [[package]]
 name = "pywin32"
-version = "304"
+version = "305"
 description = "Python for Window Extensions"
 category = "dev"
 optional = false
@@ -1228,18 +1210,20 @@ use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "responses"
-version = "0.21.0"
+version = "0.22.0"
 description = "A utility library for mocking out the `requests` Python library."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
 [package.dependencies]
-requests = ">=2.0,<3.0"
+requests = ">=2.22.0,<3.0"
+toml = "*"
+types-toml = "*"
 urllib3 = ">=1.25.10"
 
 [package.extras]
-tests = ["pytest (>=7.0.0)", "coverage (>=6.0.0)", "pytest-cov", "pytest-asyncio", "pytest-localserver", "flake8", "types-mock", "types-requests", "mypy"]
+tests = ["pytest (>=7.0.0)", "coverage (>=6.0.0)", "pytest-cov", "pytest-asyncio", "pytest-httpserver", "flake8", "types-requests", "mypy"]
 
 [[package]]
 name = "rsa"
@@ -1280,7 +1264,7 @@ pbr = "*"
 
 [[package]]
 name = "shapely"
-version = "1.8.4"
+version = "1.8.5.post1"
 description = "Geometric objects, predicates, and operations"
 category = "main"
 optional = false
@@ -1474,11 +1458,11 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "tomlkit"
-version = "0.11.4"
+version = "0.11.6"
 description = "Style preserving TOML library"
 category = "dev"
 optional = false
-python-versions = ">=3.6,<4.0"
+python-versions = ">=3.6"
 
 [[package]]
 name = "toolz"
@@ -1490,15 +1474,23 @@ python-versions = ">=3.5"
 
 [[package]]
 name = "tornado"
-version = "6.1"
+version = "6.2"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 category = "main"
 optional = false
-python-versions = ">= 3.5"
+python-versions = ">= 3.7"
+
+[[package]]
+name = "types-toml"
+version = "0.10.8.1"
+description = "Typing stubs for toml"
+category = "dev"
+optional = false
+python-versions = "*"
 
 [[package]]
 name = "typing-extensions"
-version = "4.3.0"
+version = "4.4.0"
 description = "Backported and Experimental Type Hints for Python 3.7+"
 category = "main"
 optional = false
@@ -1506,28 +1498,20 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "urllib3"
-version = "1.26.12"
+version = "1.26.13"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 
 [package.extras]
 brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
 secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"]
 socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
-[[package]]
-name = "wcwidth"
-version = "0.2.5"
-description = "Measures the displayed width of unicode strings in a terminal"
-category = "dev"
-optional = false
-python-versions = "*"
-
 [[package]]
 name = "websocket-client"
-version = "1.4.1"
+version = "1.4.2"
 description = "WebSocket client for Python with low level API options"
 category = "dev"
 optional = false
@@ -1562,7 +1546,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 
 [[package]]
 name = "xarray"
-version = "2022.6.0"
+version = "2022.12.0"
 description = "N-D labeled arrays and datasets in Python"
 category = "main"
 optional = false
@@ -1570,15 +1554,15 @@ python-versions = ">=3.8"
 
 [package.dependencies]
 dask = {version = "*", extras = ["complete"], optional = true, markers = "extra == \"parallel\""}
-numpy = ">=1.19"
-packaging = ">=20.0"
-pandas = ">=1.2"
+numpy = ">=1.20"
+packaging = ">=21.3"
+pandas = ">=1.3"
 
 [package.extras]
 accel = ["scipy", "bottleneck", "numbagg", "flox"]
-complete = ["netcdf4", "h5netcdf", "scipy", "pydap", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis"]
-docs = ["netcdf4", "h5netcdf", "scipy", "pydap", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis", "sphinx-autosummary-accessors", "sphinx-rtd-theme", "ipython", "ipykernel", "jupyter-client", "nbsphinx", "scanpydoc"]
-io = ["netcdf4", "h5netcdf", "scipy", "pydap", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch"]
+complete = ["netcdf4", "h5netcdf", "scipy", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis", "pydap"]
+docs = ["netcdf4", "h5netcdf", "scipy", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis", "sphinx-autosummary-accessors", "sphinx-rtd-theme", "ipython", "ipykernel", "jupyter-client", "nbsphinx", "scanpydoc", "pydap"]
+io = ["netcdf4", "h5netcdf", "scipy", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "pydap"]
 parallel = ["dask"]
 viz = ["matplotlib", "seaborn", "nc-time-axis"]
 
@@ -1603,15 +1587,15 @@ heapdict = "*"
 
 [[package]]
 name = "zipp"
-version = "3.8.1"
+version = "3.11.0"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
+docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "jaraco.functools", "more-itertools", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"]
 
 [extras]
 harmony = ["harmony-service-lib", "pystac"]
@@ -1619,12 +1603,11 @@ harmony = ["harmony-service-lib", "pystac"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
-content-hash = "a812b9c24f128e06197e201439794f7a28bb95055a72f928390326c92111bca9"
+content-hash = "ae9d1d8198b4c7d46344bde0f75fa5028fa0f0e31aa97603648636a30e45bed5"
 
 [metadata.files]
 alabaster = []
 astroid = []
-atomicwrites = []
 attrs = []
 aws-sam-translator = []
 aws-xray-sdk = []
@@ -1653,6 +1636,7 @@ distributed = []
 docker = []
 docutils = []
 ecdsa = []
+exceptiongroup = []
 fiona = []
 flake8 = []
 fsspec = []
@@ -1663,6 +1647,7 @@ heapdict = []
 idna = []
 imagesize = []
 importlib-metadata = []
+iniconfig = []
 isort = []
 jinja2 = []
 jmespath = []
@@ -1681,7 +1666,6 @@ markupsafe = []
 mccabe = []
 mistune = []
 mock = []
-more-itertools = []
 moto = []
 msgpack = []
 munch = []
@@ -1696,7 +1680,6 @@ pillow = []
 platformdirs = []
 pluggy = []
 psutil = []
-py = []
 py-cpuinfo = []
 pyasn1 = []
 pycodestyle = []
@@ -1705,7 +1688,6 @@ pyflakes = []
 pygments = []
 pylint = []
 pynacl = []
-pyparsing = []
 pyproj = []
 pyrsistent = []
 pystac = []
@@ -1742,9 +1724,9 @@ tomli = []
 tomlkit = []
 toolz = []
 tornado = []
+types-toml = []
 typing-extensions = []
 urllib3 = []
-wcwidth = []
 websocket-client = []
 werkzeug = []
 wrapt = []
diff --git a/pyproject.toml b/pyproject.toml
index d1307f10..f401b8b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,12 +40,12 @@ h5py = "^3.6.0"
 cf-xarray = "*"
 
 [tool.poetry.dev-dependencies]
-pytest = "^5.2"
+pytest = "~7"
 flake8 = "^3.7"
-pytest-cov = "^2.8"
+pytest-cov = "~4"
 pylint = "^2.4"
 sphinx = "^4.4"
-pytest-benchmark = "^3.2.3"
+pytest-benchmark = "~4"
 moto = "1.3.14"
 jsonschema = "^3.2.0"
 m2r2 = "^0.3.1"
diff --git a/tests/test_subset.py b/tests/test_subset.py
index e17aa06a..75cb0e5f 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -151,7 +151,6 @@ def test_subset_variables(self):
 
             in_ds.close()
             out_ds.close()
-            
 
     def test_subset_bbox(self):
         """
@@ -340,7 +339,6 @@ def test_subset_empty_bbox(self):
 
             assert test_input_dataset.dims.keys() == empty_dataset.dims.keys()
 
-
     def test_bbox_conversion(self):
         """
         Test that the bounding box conversion returns expected
@@ -849,15 +847,15 @@ def test_variable_subset_oco2(self):
         output_file_name = 'oco2_test_out.nc'
         shutil.copyfile(os.path.join(self.test_data_dir, 'OCO2', oco2_file_name),
                         os.path.join(self.subset_output_dir, oco2_file_name))
-        bbox = np.array(((-180,180),(-90.0,90)))
-        variables = ['/xco2','/xco2_quality_flag','/Retrieval/water_height','/sounding_id']
+        bbox = np.array(((-180, 180), (-90.0, 90)))
+        variables = ['/xco2', '/xco2_quality_flag', '/Retrieval/water_height', '/sounding_id']
         subset.subset(
-            file_to_subset=join(self.test_data_dir, 'OCO2',oco2_file_name),
+            file_to_subset=join(self.test_data_dir, 'OCO2', oco2_file_name),
             bbox=bbox,
             variables=variables,
             output_file=join(self.subset_output_dir, output_file_name),
         )
-        
+
         out_nc = nc.Dataset(join(self.subset_output_dir, output_file_name))
         var_listout = list(out_nc.groups['Retrieval'].variables.keys())
         assert ('water_height' in var_listout)
@@ -871,7 +869,7 @@ def test_variable_subset_s6(self):
         output_file_name = 's6_test_out.nc'
         shutil.copyfile(os.path.join(self.test_data_dir, 'sentinel_6', s6_file_name),
                         os.path.join(self.subset_output_dir, s6_file_name))
-        bbox = np.array(((-180,180),(-90.0,90)))
+        bbox = np.array(((-180, 180), (-90.0, 90)))
         variables = ['/data_01/ku/range_ocean_mle3_rms', '/data_20/ku/range_ocean']
         subset.subset(
             file_to_subset=join(self.subset_output_dir, s6_file_name),
@@ -879,14 +877,13 @@ def test_variable_subset_s6(self):
             variables=variables,
             output_file=join(self.subset_output_dir, output_file_name),
         )
-        
+
         out_nc = nc.Dataset(join(self.subset_output_dir, output_file_name))
-        var_listout =list(out_nc.groups['data_01'].groups['ku'].variables.keys())
+        var_listout = list(out_nc.groups['data_01'].groups['ku'].variables.keys())
         var_listout.extend(list(out_nc.groups['data_20'].groups['ku'].variables.keys()))
         assert ('range_ocean_mle3_rms' in var_listout)
         assert ('range_ocean' in var_listout)
 
-
     def test_transform_grouped_dataset(self):
         """
         Test that the transformation function results in a correctly
@@ -923,7 +920,6 @@ def test_transform_grouped_dataset(self):
                 group = group[g]
             assert var_name.strip('__').split('__')[-1] in group.variables.keys()
 
-
     def test_group_subset(self):
         """
         Ensure a subset function can be run on a granule that contains
@@ -1327,9 +1323,10 @@ def test_duplicate_dims_tropomi(self):
         in_nc = nc.Dataset(join(TROP_dir, trop_file))
         out_nc = nc.Dataset(join(self.subset_output_dir, output_file))
 
-        for var_name, variable in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables.items():
-            assert variable.shape == out_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[var_name].shape
-            
+        for var_name, variable in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups[
+            'DETAILED_RESULTS'].variables.items():
+            assert variable.shape == \
+                   out_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[var_name].shape
 
     def test_omi_novars_subset(self):
         """
@@ -1355,10 +1352,12 @@ def test_omi_novars_subset(self):
         in_nc = nc.Dataset(join(omi_dir, omi_file))
         out_nc = nc.Dataset(join(self.subset_output_dir, output_file))
 
-        for var_name, variable in in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups['Geolocation Fields'].variables.items():
-            assert in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups['Geolocation Fields'].variables[var_name].shape == \
-                out_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups['Geolocation Fields'].variables[var_name].shape
-
+        for var_name, variable in in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups[
+            'Geolocation Fields'].variables.items():
+            assert in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups[
+                       'Geolocation Fields'].variables[var_name].shape == \
+                   out_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups[
+                       'Geolocation Fields'].variables[var_name].shape
 
     def test_root_group(self):
         """test that the GROUP_DELIM string, '__', is added to variables in the root group"""
@@ -1370,21 +1369,21 @@ def test_root_group(self):
         nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, sndr_file_name))
 
         args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
+            'decode_coords': False,
+            'mask_and_scale': False,
+            'decode_times': False
+        }
         nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, sndr_file_name))
         with xr.open_dataset(
-            xr.backends.NetCDF4DataStore(nc_dataset),
-            **args
+                xr.backends.NetCDF4DataStore(nc_dataset),
+                **args
         ) as dataset:
             var_list = list(dataset.variables)
             assert (var_list[0][0:2] == gh.GROUP_DELIM)
             group_lst = []
-            for var_name in dataset.variables.keys(): #need logic if there is data in the top level not in a group
+            for var_name in dataset.variables.keys():  # need logic if there is data in the top level not in a group
                 group_lst.append('/'.join(var_name.split(gh.GROUP_DELIM)[:-1]))
-            group_lst = ['/' if group=='' else group for group in group_lst]
+            group_lst = ['/' if group == '' else group for group in group_lst]
             groups = set(group_lst)
             expected_group = {'/mw', '/ave_kern', '/', '/mol_lay', '/aux'}
             assert (groups == expected_group)
@@ -1401,14 +1400,14 @@ def test_get_time_squeeze(self):
         nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file_name))
 
         args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
+            'decode_coords': False,
+            'mask_and_scale': False,
+            'decode_times': False
+        }
         nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
         with xr.open_dataset(
-            xr.backends.NetCDF4DataStore(nc_dataset),
-            **args
+                xr.backends.NetCDF4DataStore(nc_dataset),
+                **args
         ) as dataset:
             lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
             time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
@@ -1426,14 +1425,14 @@ def test_get_indexers_nd(self):
         nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file_name))
 
         args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
+            'decode_coords': False,
+            'mask_and_scale': False,
+            'decode_times': False
+        }
         nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
         with xr.open_dataset(
-            xr.backends.NetCDF4DataStore(nc_dataset),
-            **args
+                xr.backends.NetCDF4DataStore(nc_dataset),
+                **args
         ) as dataset:
             time_var_names = []
             lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
@@ -1444,14 +1443,14 @@ def test_get_indexers_nd(self):
             cond = oper(
                 (dataset[lon_var_name] >= -180),
                 (dataset[lon_var_name] <= 180)
-                ) & (dataset[lat_var_name] >= -90) & (dataset[lat_var_name] <= 90) & True
+            ) & (dataset[lat_var_name] >= -90) & (dataset[lat_var_name] <= 90) & True
 
             indexers = xre.get_indexers_from_nd(cond, True)
             indexed_cond = cond.isel(**indexers)
             indexed_ds = dataset.isel(**indexers)
             new_dataset = indexed_ds.where(indexed_cond)
-            
-            assert ((time_var_name not in indexers.keys()) == True) #time can't be in the index
+
+            assert ((time_var_name not in indexers.keys()) == True)  # time can't be in the index
             assert (new_dataset.dims == dataset.dims)
 
     def test_variable_type_string_oco2(self):
@@ -1461,15 +1460,15 @@ def test_variable_type_string_oco2(self):
         output_file_name = 'oco2_test_out.nc'
         shutil.copyfile(os.path.join(self.test_data_dir, 'OCO2', oco2_file_name),
                         os.path.join(self.subset_output_dir, oco2_file_name))
-        bbox = np.array(((-180,180),(-90.0,90)))
+        bbox = np.array(((-180, 180), (-90.0, 90)))
 
         subset.subset(
-            file_to_subset=join(self.test_data_dir, 'OCO2',oco2_file_name),
+            file_to_subset=join(self.test_data_dir, 'OCO2', oco2_file_name),
             bbox=bbox,
             output_file=join(self.subset_output_dir, output_file_name),
         )
 
-        in_nc = xr.open_dataset(join(self.test_data_dir, 'OCO2',oco2_file_name))
+        in_nc = xr.open_dataset(join(self.test_data_dir, 'OCO2', oco2_file_name))
         out_nc = xr.open_dataset(join(self.subset_output_dir, output_file_name))
         assert (in_nc.variables['source_files'].dtype == out_nc.variables['source_files'].dtype)
 
@@ -1487,11 +1486,11 @@ def test_transform_h5py_dataset(self):
         entry_lst = []
         # Get root level objects
         key_lst = list(h5_ds.keys())
-        
+
         # Go through every level of the file to fill out the remaining objects
         for entry_str in key_lst:
             # If object is a group, add it to the loop list
-            if (isinstance(h5_ds[entry_str],h5py.Group)):
+            if (isinstance(h5_ds[entry_str], h5py.Group)):
                 for group_keys in list(h5_ds[entry_str].keys()):
                     if (isinstance(h5_ds[entry_str + "/" + group_keys], h5py.Dataset)):
                         entry_lst.append(entry_str + "/" + group_keys)
@@ -1500,15 +1499,14 @@ def test_transform_h5py_dataset(self):
         nc_dataset, has_groups = gh.h5file_transform(os.path.join(self.subset_output_dir, OMI_file_name))
 
         nc_vars_flattened = list(nc_dataset.variables.keys())
-        for i in range(len(entry_lst)): # go through all the datasets in h5py file
-            input_variable = '__'+entry_lst[i].replace('/', '__')
+        for i in range(len(entry_lst)):  # go through all the datasets in h5py file
+            input_variable = '__' + entry_lst[i].replace('/', '__')
             output_variable = nc_vars_flattened[i]
             assert (input_variable == output_variable)
 
         nc_dataset.close()
         h5_ds.close()
 
-
     def test_variable_dims_matched_tropomi(self):
         """
         Code must match the dimensions for each variable rather than
@@ -1527,7 +1525,7 @@ def test_variable_dims_matched_tropomi(self):
             var_name: [dim.split(gh.GROUP_DELIM)[-1] for dim in var.dimensions]
             for var_name, var in in_nc.groups['PRODUCT'].variables.items()
         }
-        
+
         # Get variables from METADATA group
         in_var_dims.update(
             {
@@ -1539,7 +1537,8 @@ def test_variable_dims_matched_tropomi(self):
         in_var_dims.update(
             {
                 var_name: [dim.split(gh.GROUP_DELIM)[-1] for dim in var.dimensions]
-                for var_name, var in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['GEOLOCATIONS'].variables.items()
+                for var_name, var in
+                in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['GEOLOCATIONS'].variables.items()
             }
         )
 
@@ -1556,7 +1555,6 @@ def test_variable_dims_matched_tropomi(self):
 
         self.assertDictEqual(in_var_dims, out_var_dims)
 
-
     def test_temporal_merged_topex(self):
         """
         Test that a temporal subset results in a granule that only
@@ -1613,7 +1611,6 @@ def test_get_time_epoch_var(self):
         shutil.copyfile(os.path.join(self.test_data_dir, 'tropomi', tropomi_file),
                         os.path.join(self.subset_output_dir, tropomi_file))
 
-
         nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file), mode='r')
 
         nc_dataset = gh.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file))
@@ -1628,7 +1625,6 @@ def test_get_time_epoch_var(self):
                 xr.backends.NetCDF4DataStore(nc_dataset),
                 **args
         ) as dataset:
-
             lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(dataset)
             time_var_names = [
                 subset.compute_time_variable_name(
@@ -1636,7 +1632,7 @@ def test_get_time_epoch_var(self):
                 ) for lat_var_name in lat_var_names
             ]
             epoch_time_var = subset.get_time_epoch_var(dataset, time_var_names[0])
-            
+
             assert epoch_time_var.split('__')[-1] == 'time'
 
     def test_temporal_variable_subset(self):
@@ -1694,21 +1690,20 @@ def test_temporal_variable_subset(self):
         # Only coordinate variables and variables requested in variable
         # subset should be present.
         assert set(np.append(['lat', 'lon', 'time'], variables)) == set(out_ds.data_vars.keys())
-            
 
     def test_temporal_he5file_subset(self):
         """
         Test that the time type changes to datetime for subsetting
         """
-        
+
         OMI_file_names = ['OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5',
                           'OMI-Aura_L2-OMBRO_2020m0116t1207-o82471_v003-2020m0116t182003.he5']
         OMI_copy_file = 'OMI_copy_testing_2.he5'
         for i in OMI_file_names:
             shutil.copyfile(os.path.join(self.test_data_dir, 'OMI', i),
                             os.path.join(self.subset_output_dir, OMI_copy_file))
-            min_time='2020-01-16T12:30:00Z'
-            max_time='2020-01-16T12:40:00Z'
+            min_time = '2020-01-16T12:30:00Z'
+            max_time = '2020-01-16T12:40:00Z'
             bbox = np.array(((-180, 180), (-90, 90)))
             nc_dataset, has_groups = gh.h5file_transform(os.path.join(self.subset_output_dir, OMI_copy_file))
 
@@ -1719,7 +1714,7 @@ def test_temporal_he5file_subset(self):
             }
 
             if min_time or max_time:
-                args['decode_times'] = True  
+                args['decode_times'] = True
 
             with xr.open_dataset(
                     xr.backends.NetCDF4DataStore(nc_dataset),
@@ -1737,13 +1732,12 @@ def test_temporal_he5file_subset(self):
                 dataset, start_date = subset.convert_to_datetime(dataset, time_var_names)
                 assert dataset[time_var_names[0]].dtype == 'datetime64[ns]'
 
-
     def test_he5_timeattrs_output(self):
         """Test that the time attributes in the output match the attributes of the input for OMI test files"""
 
         omi_dir = join(self.test_data_dir, 'OMI')
         omi_file = 'OMI-Aura_L2-OMBRO_2020m0116t1207-o82471_v003-2020m0116t182003.he5'
-        omi_file_input = 'input'+omi_file
+        omi_file_input = 'input' + omi_file
         bbox = np.array(((-180, 90), (-90, 90)))
         output_file = "{}_{}".format(self._testMethodName, omi_file)
         shutil.copyfile(
@@ -1754,15 +1748,16 @@ def test_he5_timeattrs_output(self):
             os.path.join(omi_dir, omi_file),
             os.path.join(self.subset_output_dir, omi_file_input)
         )
-        
-        min_time='2020-01-16T12:30:00Z'
-        max_time='2020-01-16T12:40:00Z'
+
+        min_time = '2020-01-16T12:30:00Z'
+        max_time = '2020-01-16T12:40:00Z'
         bbox = np.array(((-180, 180), (-90, 90)))
         nc_dataset_input = nc.Dataset(os.path.join(self.subset_output_dir, omi_file_input))
-        incut_set = nc_dataset_input.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups['Geolocation Fields']
+        incut_set = nc_dataset_input.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups[
+            'Geolocation Fields']
         xr_dataset_input = xr.open_dataset(xr.backends.NetCDF4DataStore(incut_set))
-        inattrs =  xr_dataset_input['Time'].attrs
-        
+        inattrs = xr_dataset_input['Time'].attrs
+
         subset.subset(
             file_to_subset=os.path.join(self.subset_output_dir, omi_file),
             bbox=bbox,
@@ -1772,20 +1767,20 @@ def test_he5_timeattrs_output(self):
         )
 
         output_ncdataset = nc.Dataset(os.path.join(self.subset_output_dir, output_file))
-        outcut_set = output_ncdataset.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups['Geolocation Fields']
+        outcut_set = output_ncdataset.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups[
+            'Geolocation Fields']
         xrout_dataset = xr.open_dataset(xr.backends.NetCDF4DataStore(outcut_set))
         outattrs = xrout_dataset['Time'].attrs
 
         for key in inattrs.keys():
             if isinstance(inattrs[key], np.ndarray):
-                if np.array_equal(inattrs[key],outattrs[key]):
+                if np.array_equal(inattrs[key], outattrs[key]):
                     pass
                 else:
                     raise AssertionError('Attributes for {} do not equal each other'.format(key))
             else:
                 assert inattrs[key] == outattrs[key]
-                
-        
+
     def test_temporal_subset_lines(self):
         bbox = np.array(((-180, 180), (-90, 90)))
         file = 'SWOT_L2_LR_SSH_Expert_368_012_20121111T235910_20121112T005015_DG10_01.nc'
@@ -1860,7 +1855,6 @@ def test_get_time_OMI(self):
             assert "Time" in time_var_names[0]
             assert "Latitude" in lat_var_names[0]
 
-
     def test_empty_temporal_subset(self):
         """
         Test the edge case where a subsetted empty granule
@@ -1900,8 +1894,8 @@ def test_passed_coords(self):
         file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
 
         dataset = xr.open_dataset(join(self.test_data_dir, file),
-                                decode_times=False,
-                                decode_coords=False)
+                                  decode_times=False,
+                                  decode_coords=False)
 
         dummy_lats = ['dummy_lat']
         dummy_lons = ['dummy_lon']
@@ -1964,7 +1958,7 @@ def test_var_subsetting_tropomi(self):
         """
         Check that variable subsetting is the same if a leading slash is included
         """
-        TROP_dir = join(self.test_data_dir, 'tropomi')
+        trop_dir = join(self.test_data_dir, 'tropomi')
         trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
         variable_slash = ['/PRODUCT/methane_mixing_ratio']
         variable_noslash = ['PRODUCT/methane_mixing_ratio']
@@ -1972,30 +1966,31 @@ def test_var_subsetting_tropomi(self):
         output_file_slash = "{}_{}".format(self._testMethodName, trop_file)
         output_file_noslash = "{}_noslash_{}".format(self._testMethodName, trop_file)
         shutil.copyfile(
-            os.path.join(TROP_dir, trop_file),
+            os.path.join(trop_dir, trop_file),
             os.path.join(self.subset_output_dir, trop_file)
         )
         shutil.copyfile(
-            os.path.join(TROP_dir, trop_file),
-            os.path.join(self.subset_output_dir,'slashtest'+trop_file)
+            os.path.join(trop_dir, trop_file),
+            os.path.join(self.subset_output_dir, 'slashtest' + trop_file)
         )
-        slash_test = subset.subset(
+        subset.subset(
             file_to_subset=join(self.subset_output_dir, trop_file),
             bbox=bbox,
             output_file=join(self.subset_output_dir, output_file_slash),
-            variables = variable_slash
+            variables=variable_slash
         )
-        noslash_test = subset.subset(
-            file_to_subset=join(self.subset_output_dir, 'slashtest'+trop_file),
+        subset.subset(
+            file_to_subset=join(self.subset_output_dir, 'slashtest' + trop_file),
             bbox=bbox,
             output_file=join(self.subset_output_dir, output_file_noslash),
-            variables = variable_noslash
+            variables=variable_noslash
         )
 
         slash_dataset = nc.Dataset(join(self.subset_output_dir, output_file_slash))
         noslash_dataset = nc.Dataset(join(self.subset_output_dir, output_file_noslash))
 
         assert list(slash_dataset.groups['PRODUCT'].variables) == list(noslash_dataset.groups['PRODUCT'].variables)
+
     def test_bad_time_unit(self):
 
         fill_val = -99999.0
@@ -2029,4 +2024,3 @@ def test_bad_time_unit(self):
 
         ds_test = xr.open_dataset(nc_out_location)
         ds_test.close()
-

From 164b6c69768726646724587599002e409983d704 Mon Sep 17 00:00:00 2001
From: Frank Greguska <89428916+frankinspace@users.noreply.github.com>
Date: Mon, 12 Dec 2022 18:26:31 -0800
Subject: [PATCH 16/16] Merge changes from issues/127

---
 tests/test_subset.py | 73 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/tests/test_subset.py b/tests/test_subset.py
index 74b9d643..9c938213 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -1979,3 +1979,76 @@ def test_passed_coords(data_dir, subset_output_dir):
     assert lats == dummy_lats
     assert lons == dummy_lons
     assert times == dummy_times
+
+
+def test_var_subsetting_tropomi(data_dir, subset_output_dir, request):
+    """
+    Check that variable subsetting is the same if a leading slash is included
+    """
+    trop_dir = join(data_dir, 'tropomi')
+    trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
+    variable_slash = ['/PRODUCT/methane_mixing_ratio']
+    variable_noslash = ['PRODUCT/methane_mixing_ratio']
+    bbox = np.array(((-180, 180), (-90, 90)))
+    output_file_slash = "{}_{}".format(request.node.name, trop_file)
+    output_file_noslash = "{}_noslash_{}".format(request.node.name, trop_file)
+    shutil.copyfile(
+        os.path.join(trop_dir, trop_file),
+        os.path.join(subset_output_dir, trop_file)
+    )
+    shutil.copyfile(
+        os.path.join(trop_dir, trop_file),
+        os.path.join(subset_output_dir, 'slashtest' + trop_file)
+    )
+    subset.subset(
+        file_to_subset=join(subset_output_dir, trop_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file_slash),
+        variables=variable_slash
+    )
+    subset.subset(
+        file_to_subset=join(subset_output_dir, 'slashtest' + trop_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file_noslash),
+        variables=variable_noslash
+    )
+
+    slash_dataset = nc.Dataset(join(subset_output_dir, output_file_slash))
+    noslash_dataset = nc.Dataset(join(subset_output_dir, output_file_noslash))
+
+    assert list(slash_dataset.groups['PRODUCT'].variables) == list(noslash_dataset.groups['PRODUCT'].variables)
+
+
+def test_bad_time_unit(subset_output_dir):
+
+    fill_val = -99999.0
+    time_vals = np.random.rand(10)
+    time_vals[0] = fill_val
+    time_vals[-1] = fill_val
+
+    data_vars = {
+        'foo': (['x'], np.random.rand(10)),
+        'time': (
+            ['x'],
+            time_vals,
+            {
+                'units': 'seconds since 2000-1-1 0:0:0 0',
+                '_FillValue': fill_val,
+                'standard_name': 'time',
+                'calendar': 'standard'
+            }
+        ),
+    }
+
+    ds = xr.Dataset(
+        data_vars=data_vars,
+        coords={'x': (['x'], np.arange(10))}
+    )
+
+    nc_out_location = join(subset_output_dir, "bad_time.nc")
+    ds.to_netcdf(nc_out_location)
+
+    subset.override_decode_cf_datetime()
+
+    ds_test = xr.open_dataset(nc_out_location)
+    ds_test.close()
\ No newline at end of file