diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml
index 8c964bad..c031a4a4 100644
--- a/.github/workflows/build-pipeline.yml
+++ b/.github/workflows/build-pipeline.yml
@@ -4,7 +4,7 @@ name: Build
 on:
   # Triggers the workflow on push events
   push:
-    branches: [ develop, release/**, main, feature/** ]
+    branches: [ develop, release/**, main, feature/**, issue/**, issues/**, dependabot/** ]
 
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
@@ -36,7 +36,10 @@ jobs:
           echo "pyproject_name=$(poetry version | awk '{print $1}')" >> $GITHUB_ENV
       - name: Bump pre-alpha version
         # If triggered by push to a feature branch
-        if: ${{ startsWith(github.ref, 'refs/heads/feature/') }}
+        if: |
+          ${{ startsWith(github.ref, 'refs/heads/issue') }}      ||
+          ${{ startsWith(github.ref, 'refs/heads/dependabot/') }} ||
+          ${{ startsWith(github.ref, 'refs/heads/feature/') }}
         run: |
           new_ver="${{ steps.get-version.outputs.current_version }}+$(git rev-parse --short ${GITHUB_SHA})"
           poetry version $new_ver
@@ -160,6 +163,7 @@ jobs:
           name: python-artifact
           path: dist/*
       - name: Publish to test.pypi.org
+        id: pypi-test-publish
         if: |
           github.ref == 'refs/heads/develop' ||
           startsWith(github.ref, 'refs/heads/release')
@@ -170,19 +174,24 @@ jobs:
           poetry publish -r testpypi
       - name: Publish to pypi.org
         if: ${{ github.ref == 'refs/heads/main' }}
+        id: pypi-publish
         env:
           POETRY_PYPI_TOKEN_PYPI: ${{secrets.POETRY_PYPI_TOKEN_PYPI}}
         run: |
           poetry publish
       - name: Log in to the Container registry
-        if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
+        if: |
+          steps.pypi-test-publish.conclusion == 'success' ||
+          steps.pypi-publish.conclusion == 'success'
         uses: docker/login-action@v1
         with:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
       - name: Extract metadata (tags, labels) for Docker
-        if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
+        if: |
+          steps.pypi-test-publish.conclusion == 'success' ||
+          steps.pypi-publish.conclusion == 'success'
         id: meta
         uses: docker/metadata-action@v4
         with:
@@ -191,12 +200,16 @@ jobs:
             type=semver,pattern={{version}},value=${{ env.software_version }}
             type=raw,value=${{ env.venue }}
       - name: Wait for package
-        if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
+        if: |
+          steps.pypi-test-publish.conclusion == 'success' ||
+          steps.pypi-publish.conclusion == 'success'
         run: |
           pip install tenacity
           ${GITHUB_WORKSPACE}/.github/workflows/wait-for-pypi.py ${{env.pyproject_name}}[harmony]==${{ env.software_version }}
       - name: Build and push Docker image
-        if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
+        if: |
+          steps.pypi-test-publish.conclusion == 'success' ||
+          steps.pypi-publish.conclusion == 'success'
         uses: docker/build-push-action@v3
         with:
           context: .
@@ -208,7 +221,9 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
       - name: Run Snyk on Docker Image
-        if: ${{ !startsWith(github.ref, 'refs/heads/feature') }}
+        if: |
+          steps.pypi-test-publish.conclusion == 'success' ||
+          steps.pypi-publish.conclusion == 'success'
         # Snyk can be used to break the build when it detects vulnerabilities.
         # In this case we want to upload the issues to GitHub Code Scanning
         continue-on-error: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ffbad7c..afe92a44 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ for variables to not have leading slash in the front
 ### Removed
 ### Fixed
 - PODAAC-5065: integration with SMAP_RSS_L2_SSS_V5, fix way xarray open granules that have `seconds since 2000-1-1 0:0:0 0` as a time unit.
+- [issue/127](https://github.com/podaac/l2ss-py/issues/127): Fixed bug when subsetting variables in grouped datasets. Variable names passed to `subset` will now have `/` replaced by `GROUP_DELIM` so they can be located in flattened datasets 
 ### Security
 
 ## [2.2.0]
diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py
new file mode 100644
index 00000000..052a3a00
--- /dev/null
+++ b/podaac/subsetter/group_handling.py
@@ -0,0 +1,238 @@
+"""
+group_handling.py
+
+Functions for converting multidimensional data structures
+ between a group hierarchy and a flat structure
+"""
+from shutil import copy
+
+import h5py
+import netCDF4 as nc
+import numpy as np
+import xarray as xr
+
+GROUP_DELIM = '__'
+
+
+def transform_grouped_dataset(nc_dataset, file_to_subset):
+    """
+    Transform a netCDF4 Dataset that has groups to an xarray compatible
+    dataset. xarray does not work with groups, so this transformation
+    will flatten the variables in the dataset and use the group path as
+    the new variable name. For example, data_01 > km > sst would become
+    'data_01__km__sst', where GROUP_DELIM is __.
+
+    This same pattern is applied to dimensions, which are located under
+    the appropriate group. They are renamed and placed in the root
+    group.
+
+    Parameters
+    ----------
+    nc_dataset : nc.Dataset
+        netCDF4 Dataset that contains groups
+    file_to_subset : str
+
+    Returns
+    -------
+    nc.Dataset
+        netCDF4 Dataset that does not contain groups and that has been
+        flattened.
+    """
+
+    # Close the existing read-only dataset and reopen in append mode
+    nc_dataset.close()
+    nc_dataset = nc.Dataset(file_to_subset, 'r+')
+
+    dimensions = {}
+
+    def walk(group_node, path):
+        for key, item in group_node.items():
+            group_path = f'{path}{GROUP_DELIM}{key}'
+
+            # If there are variables in this group, copy to root group
+            # and then delete from current group
+            if item.variables:
+                # Copy variables to root group with new name
+                for var_name, var in item.variables.items():
+                    var_group_name = f'{group_path}{GROUP_DELIM}{var_name}'
+                    nc_dataset.variables[var_group_name] = var
+                # Delete variables
+                var_names = list(item.variables.keys())
+                for var_name in var_names:
+                    del item.variables[var_name]
+
+            if item.dimensions:
+                dims = list(item.dimensions.keys())
+                for dim_name in dims:
+                    new_dim_name = f'{group_path.replace("/", GROUP_DELIM)}{GROUP_DELIM}{dim_name}'
+                    item.dimensions[new_dim_name] = item.dimensions[dim_name]
+                    dimensions[new_dim_name] = item.dimensions[dim_name]
+                    item.renameDimension(dim_name, new_dim_name)
+
+            # If there are subgroups in this group, call this function
+            # again on that group.
+            if item.groups:
+                walk(item.groups, group_path)
+
+        # Delete non-root groups
+        group_names = list(group_node.keys())
+        for group_name in group_names:
+            del group_node[group_name]
+
+    for var_name in list(nc_dataset.variables.keys()):
+        new_var_name = f'{GROUP_DELIM}{var_name}'
+        nc_dataset.variables[new_var_name] = nc_dataset.variables[var_name]
+        del nc_dataset.variables[var_name]
+
+    walk(nc_dataset.groups, '')
+
+    # Update the dimensions of the dataset in the root group
+    nc_dataset.dimensions.update(dimensions)
+
+    return nc_dataset
+
+
+def recombine_grouped_datasets(datasets, output_file, start_date):  # pylint: disable=too-many-branches
+    """
+    Given a list of xarray datasets, combine those datasets into a
+    single netCDF4 Dataset and write to the disk. Each dataset has been
+    transformed using its group path and needs to be un-transformed and
+    placed in the appropriate group.
+
+    Parameters
+    ----------
+    datasets : list (xr.Dataset)
+        List of xarray datasets to be combined
+    output_file : str
+        Name of the output file to write the resulting NetCDF file to.
+    """
+
+    base_dataset = nc.Dataset(output_file, mode='w')
+
+    for dataset in datasets:
+        group_lst = []
+        for var_name in dataset.variables.keys():  # need logic if there is data in the top level not in a group
+            group_lst.append('/'.join(var_name.split(GROUP_DELIM)[:-1]))
+        group_lst = ['/' if group == '' else group for group in group_lst]
+        groups = set(group_lst)
+        for group in groups:
+            base_dataset.createGroup(group)
+
+        for dim_name in list(dataset.dims.keys()):
+            new_dim_name = dim_name.split(GROUP_DELIM)[-1]
+            dim_group = _get_nested_group(base_dataset, dim_name)
+            dim_group.createDimension(new_dim_name, dataset.dims[dim_name])
+
+        # Rename variables
+        _rename_variables(dataset, base_dataset, start_date)
+
+    # Remove group vars from base dataset
+    for var_name in list(base_dataset.variables.keys()):
+        if GROUP_DELIM in var_name:
+            del base_dataset.variables[var_name]
+
+    # Remove group dims from base dataset
+    for dim_name in list(base_dataset.dimensions.keys()):
+        if GROUP_DELIM in dim_name:
+            del base_dataset.dimensions[dim_name]
+
+    # Copy global attributes
+    base_dataset.setncatts(datasets[0].attrs)
+    # Write and close
+    base_dataset.close()
+
+
+def _get_nested_group(dataset, group_path):
+    nested_group = dataset
+    for group in group_path.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1]:
+        nested_group = nested_group.groups[group]
+    return nested_group
+
+
+def _rename_variables(dataset, base_dataset, start_date):
+    for var_name in list(dataset.variables.keys()):
+        new_var_name = var_name.split(GROUP_DELIM)[-1]
+        var_group = _get_nested_group(base_dataset, var_name)
+        variable = dataset.variables[var_name]
+        var_dims = [x.split(GROUP_DELIM)[-1] for x in dataset.variables[var_name].dims]
+        if np.issubdtype(
+                dataset.variables[var_name].dtype, np.dtype(np.datetime64)
+        ) or np.issubdtype(
+            dataset.variables[var_name].dtype, np.dtype(np.timedelta64)
+        ):
+            if start_date:
+                dataset.variables[var_name].values = (dataset.variables[var_name].values - np.datetime64(start_date))/np.timedelta64(1, 's')
+                variable = dataset.variables[var_name]
+            else:
+                cf_dt_coder = xr.coding.times.CFDatetimeCoder()
+                encoded_var = cf_dt_coder.encode(dataset.variables[var_name])
+                variable = encoded_var
+
+        var_attrs = variable.attrs
+        fill_value = var_attrs.get('_FillValue')
+        var_attrs.pop('_FillValue', None)
+        comp_args = {"zlib": True, "complevel": 1}
+
+        if variable.dtype == object:
+            var_group.createVariable(new_var_name, 'S1', var_dims, fill_value=fill_value, **comp_args)
+        elif variable.dtype == 'timedelta64[ns]':
+            var_group.createVariable(new_var_name, 'i4', var_dims, fill_value=fill_value, **comp_args)
+        else:
+            var_group.createVariable(new_var_name, variable.dtype, var_dims, fill_value=fill_value, **comp_args)
+
+        # Copy attributes
+        var_group.variables[new_var_name].setncatts(var_attrs)
+
+        # Copy data
+        var_group.variables[new_var_name].set_auto_maskandscale(False)
+        var_group.variables[new_var_name][:] = variable.data
+
+
+def h5file_transform(finput):
+    """
+    Transform a h5py  Dataset that has groups to an xarray compatible
+    dataset. xarray does not work with groups, so this transformation
+    will flatten the variables in the dataset and use the group path as
+    the new variable name. For example, data_01 > km > sst would become
+    'data_01__km__sst', where GROUP_DELIM is __.
+
+    Returns
+    -------
+    nc.Dataset
+        netCDF4 Dataset that does not contain groups and that has been
+        flattened.
+    """
+    data_new = h5py.File(finput, 'r+')
+    del_group_list = list(data_new.keys())
+    has_groups = bool(data_new['/'])
+
+    def walk_h5py(data_new, group):
+        # flattens h5py file
+        for key, item in data_new[group].items():
+            group_path = f'{group}{key}'
+            if isinstance(item, h5py.Dataset):
+                new_var_name = group_path.replace('/', '__')
+
+                data_new[new_var_name] = data_new[group_path]
+                del data_new[group_path]
+
+            elif isinstance(item, h5py.Group):
+                if len(list(item.keys())) == 0:
+                    new_group_name = group_path.replace('/', '__')
+                    data_new[new_group_name] = data_new[group_path]
+
+                walk_h5py(data_new, data_new[group_path].name + '/')
+
+    walk_h5py(data_new, data_new.name)
+
+    for del_group in del_group_list:
+        del data_new[del_group]
+
+    finputnc = '.'.join(finput.split('.')[:-1]) + '.nc'
+
+    data_new.close()  # close the h5py dataset
+    copy(finput, finputnc)  # copy to a nc file
+
+    nc_dataset = nc.Dataset(finputnc, mode='r')
+
+    return nc_dataset, has_groups
diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
index 2e1b4627..78a19281 100644
--- a/podaac/subsetter/subset.py
+++ b/podaac/subsetter/subset.py
@@ -23,14 +23,13 @@
 import json
 import operator
 import os
-from shutil import copy
+from typing import Tuple
 import dateutil
 from dateutil import parser
 
 import cf_xarray as cfxr
 import cftime
 import geopandas as gpd
-import h5py
 import importlib_metadata
 import julian
 import netCDF4 as nc
@@ -43,8 +42,9 @@
 
 from podaac.subsetter import dimension_cleanup as dc
 from podaac.subsetter import xarray_enhancements as xre
+from podaac.subsetter.group_handling import GROUP_DELIM, transform_grouped_dataset, recombine_grouped_datasets, \
+    h5file_transform
 
-GROUP_DELIM = '__'
 SERVICE_NAME = 'l2ss-py'
 
 
@@ -494,7 +494,7 @@ def compute_time_variable_name(dataset, lat_var):
 
     Parameters
     ----------
-    dataset : xr.Dataset:
+    dataset : xr.Dataset
         xarray dataset to find time variable from
     lat_var : xr.Variable
         Lat variable for this dataset
@@ -875,229 +875,6 @@ def in_shape(lon, lat):
     return xre.where(dataset, boolean_mask, cut)
 
 
-def transform_grouped_dataset(nc_dataset, file_to_subset):
-    """
-    Transform a netCDF4 Dataset that has groups to an xarray compatible
-    dataset. xarray does not work with groups, so this transformation
-    will flatten the variables in the dataset and use the group path as
-    the new variable name. For example, data_01 > km > sst would become
-    'data_01__km__sst', where GROUP_DELIM is __.
-
-    This same pattern is applied to dimensions, which are located under
-    the appropriate group. They are renamed and placed in the root
-    group.
-
-    Parameters
-    ----------
-    nc_dataset : nc.Dataset
-        netCDF4 Dataset that contains groups
-
-    Returns
-    -------
-    nc.Dataset
-        netCDF4 Dataset that does not contain groups and that has been
-        flattened.
-    """
-
-    # Close the existing read-only dataset and reopen in append mode
-    nc_dataset.close()
-    nc_dataset = nc.Dataset(file_to_subset, 'r+')
-
-    dimensions = {}
-
-    def walk(group_node, path):
-        for key, item in group_node.items():
-            group_path = f'{path}{GROUP_DELIM}{key}'
-
-            # If there are variables in this group, copy to root group
-            # and then delete from current group
-            if item.variables:
-                # Copy variables to root group with new name
-                for var_name, var in item.variables.items():
-                    var_group_name = f'{group_path}{GROUP_DELIM}{var_name}'
-                    nc_dataset.variables[var_group_name] = var
-                # Delete variables
-                var_names = list(item.variables.keys())
-                for var_name in var_names:
-                    del item.variables[var_name]
-
-            if item.dimensions:
-                dims = list(item.dimensions.keys())
-                for dim_name in dims:
-                    new_dim_name = f'{group_path.replace("/", GROUP_DELIM)}{GROUP_DELIM}{dim_name}'
-                    item.dimensions[new_dim_name] = item.dimensions[dim_name]
-                    dimensions[new_dim_name] = item.dimensions[dim_name]
-                    item.renameDimension(dim_name, new_dim_name)
-
-            # If there are subgroups in this group, call this function
-            # again on that group.
-            if item.groups:
-                walk(item.groups, group_path)
-
-        # Delete non-root groups
-        group_names = list(group_node.keys())
-        for group_name in group_names:
-            del group_node[group_name]
-
-    for var_name in list(nc_dataset.variables.keys()):
-        new_var_name = f'{GROUP_DELIM}{var_name}'
-        nc_dataset.variables[new_var_name] = nc_dataset.variables[var_name]
-        del nc_dataset.variables[var_name]
-
-    walk(nc_dataset.groups, '')
-
-    # Update the dimensions of the dataset in the root group
-    nc_dataset.dimensions.update(dimensions)
-
-    return nc_dataset
-
-
-def recombine_grouped_datasets(datasets, output_file, start_date):  # pylint: disable=too-many-branches
-    """
-    Given a list of xarray datasets, combine those datasets into a
-    single netCDF4 Dataset and write to the disk. Each dataset has been
-    transformed using its group path and needs to be un-transformed and
-    placed in the appropriate group.
-
-    Parameters
-    ----------
-    datasets : list (xr.Dataset)
-        List of xarray datasets to be combined
-    output_file : str
-        Name of the output file to write the resulting NetCDF file to.
-    """
-
-    base_dataset = nc.Dataset(output_file, mode='w')
-
-    for dataset in datasets:
-        group_lst = []
-        for var_name in dataset.variables.keys():  # need logic if there is data in the top level not in a group
-            group_lst.append('/'.join(var_name.split(GROUP_DELIM)[:-1]))
-        group_lst = ['/' if group == '' else group for group in group_lst]
-        groups = set(group_lst)
-        for group in groups:
-            base_dataset.createGroup(group)
-
-        for dim_name in list(dataset.dims.keys()):
-            new_dim_name = dim_name.split(GROUP_DELIM)[-1]
-            dim_group = _get_nested_group(base_dataset, dim_name)
-            dim_group.createDimension(new_dim_name, dataset.dims[dim_name])
-
-        # Rename variables
-        _rename_variables(dataset, base_dataset, start_date)
-
-    # Remove group vars from base dataset
-    for var_name in list(base_dataset.variables.keys()):
-        if GROUP_DELIM in var_name:
-            del base_dataset.variables[var_name]
-
-    # Remove group dims from base dataset
-    for dim_name in list(base_dataset.dimensions.keys()):
-        if GROUP_DELIM in dim_name:
-            del base_dataset.dimensions[dim_name]
-
-    # Copy global attributes
-    base_dataset.setncatts(datasets[0].attrs)
-    # Write and close
-    base_dataset.close()
-
-
-def _get_nested_group(dataset, group_path):
-    nested_group = dataset
-    for group in group_path.strip(GROUP_DELIM).split(GROUP_DELIM)[:-1]:
-        nested_group = nested_group.groups[group]
-    return nested_group
-
-
-def _rename_variables(dataset, base_dataset, start_date):
-    for var_name in list(dataset.variables.keys()):
-        new_var_name = var_name.split(GROUP_DELIM)[-1]
-        var_group = _get_nested_group(base_dataset, var_name)
-        variable = dataset.variables[var_name]
-        var_dims = [x.split(GROUP_DELIM)[-1] for x in dataset.variables[var_name].dims]
-        if np.issubdtype(
-                dataset.variables[var_name].dtype, np.dtype(np.datetime64)
-        ) or np.issubdtype(
-            dataset.variables[var_name].dtype, np.dtype(np.timedelta64)
-        ):
-            if start_date:
-                dataset.variables[var_name].values = (dataset.variables[var_name].values - np.datetime64(start_date))/np.timedelta64(1, 's')
-                variable = dataset.variables[var_name]
-            else:
-                cf_dt_coder = xr.coding.times.CFDatetimeCoder()
-                encoded_var = cf_dt_coder.encode(dataset.variables[var_name])
-                variable = encoded_var
-
-        var_attrs = variable.attrs
-        fill_value = var_attrs.get('_FillValue')
-        var_attrs.pop('_FillValue', None)
-        comp_args = {"zlib": True, "complevel": 1}
-
-        if variable.dtype == object:
-            var_group.createVariable(new_var_name, 'S1', var_dims, fill_value=fill_value, **comp_args)
-        elif variable.dtype == 'timedelta64[ns]':
-            var_group.createVariable(new_var_name, 'i4', var_dims, fill_value=fill_value, **comp_args)
-        else:
-            var_group.createVariable(new_var_name, variable.dtype, var_dims, fill_value=fill_value, **comp_args)
-
-        # Copy attributes
-        var_group.variables[new_var_name].setncatts(var_attrs)
-
-        # Copy data
-        var_group.variables[new_var_name].set_auto_maskandscale(False)
-        var_group.variables[new_var_name][:] = variable.data
-
-
-def h5file_transform(finput):
-    """
-    Transform a h5py  Dataset that has groups to an xarray compatible
-    dataset. xarray does not work with groups, so this transformation
-    will flatten the variables in the dataset and use the group path as
-    the new variable name. For example, data_01 > km > sst would become
-    'data_01__km__sst', where GROUP_DELIM is __.
-
-    Returns
-    -------
-    nc.Dataset
-        netCDF4 Dataset that does not contain groups and that has been
-        flattened.
-    """
-    data_new = h5py.File(finput, 'r+')
-    del_group_list = list(data_new.keys())
-    has_groups = bool(data_new['/'])
-
-    def walk_h5py(data_new, group):
-        # flattens h5py file
-        for key, item in data_new[group].items():
-            group_path = f'{group}{key}'
-            if isinstance(item, h5py.Dataset):
-                new_var_name = group_path.replace('/', '__')
-
-                data_new[new_var_name] = data_new[group_path]
-                del data_new[group_path]
-
-            elif isinstance(item, h5py.Group):
-                if len(list(item.keys())) == 0:
-                    new_group_name = group_path.replace('/', '__')
-                    data_new[new_group_name] = data_new[group_path]
-
-                walk_h5py(data_new, data_new[group_path].name + '/')
-
-    walk_h5py(data_new, data_new.name)
-
-    for del_group in del_group_list:
-        del data_new[del_group]
-
-    finputnc = '.'.join(finput.split('.')[:-1]) + '.nc'
-
-    data_new.close()  # close the h5py dataset
-    copy(finput, finputnc)  # copy to a nc file
-
-    nc_dataset = nc.Dataset(finputnc, mode='r')
-
-    return nc_dataset, has_groups
-
-
 def get_coordinate_variable_names(dataset, lat_var_names=None, lon_var_names=None, time_var_names=None):
     """
     Retrieve coordinate variables for this dataset. If coordinate
@@ -1158,6 +935,26 @@ def convert_to_datetime(dataset, time_vars):
     return dataset, start_date
 
 
+def open_as_nc_dataset(filepath: str) -> Tuple[nc.Dataset, list, bool]:
+    """Open netcdf file, and flatten groups if they exist."""
+    file_extension = filepath.split('.')[-1]
+
+    if file_extension == 'he5':
+        nc_dataset, has_groups = h5file_transform(filepath)
+    else:
+        # Open dataset with netCDF4 first, so we can get group info
+        nc_dataset = nc.Dataset(filepath, mode='r')
+        has_groups = bool(nc_dataset.groups)
+
+        # If dataset has groups, transform to work with xarray
+        if has_groups:
+            nc_dataset = transform_grouped_dataset(nc_dataset, filepath)
+
+    nc_dataset, rename_vars = dc.remove_duplicate_dims(nc_dataset)
+
+    return nc_dataset, rename_vars, has_groups
+
+
 def override_decode_cf_datetime():
     """
     WARNING !!! REMOVE AT EARLIEST XARRAY FIX, this is a override to xarray override_decode_cf_datetime function.
@@ -1181,10 +978,10 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None):
     xarray.coding.times.decode_cf_datetime = decode_cf_datetime
 
 
-def subset(file_to_subset, bbox, output_file, variables=None,
+def subset(file_to_subset, bbox, output_file, variables=(),
            # pylint: disable=too-many-branches, disable=too-many-statements
            cut=True, shapefile=None, min_time=None, max_time=None, origin_source=None,
-           lat_var_names=None, lon_var_names=None, time_var_names=None):
+           lat_var_names=(), lon_var_names=(), time_var_names=()):
     """
     Subset a given NetCDF file given a bounding box
 
@@ -1217,6 +1014,9 @@ def subset(file_to_subset, bbox, output_file, variables=None,
         ISO timestamp representing the upper bound of the temporal
         subset to be performed. If this value is not provided, the
         granule will not be subset temporally on the upper bound.
+    origin_source : str
+        Original location or filename of data to be used in "derived from"
+        history element.
     lat_var_names : list
         List of variables that represent the latitude coordinate
         variables for this granule. This list will only contain more
@@ -1233,27 +1033,21 @@ def subset(file_to_subset, bbox, output_file, variables=None,
         than one value in the case where there are multiple groups and
         different coordinate variables for each group.
     """
-    file_extension = file_to_subset.split('.')[-1]
-
-    if file_extension == 'he5':
-        nc_dataset, has_groups = h5file_transform(file_to_subset)
-    else:
-        # Open dataset with netCDF4 first, so we can get group info
-        nc_dataset = nc.Dataset(file_to_subset, mode='r')
-        has_groups = bool(nc_dataset.groups)
-
-        # If dataset has groups, transform to work with xarray
-        if has_groups:
-            nc_dataset = transform_grouped_dataset(nc_dataset, file_to_subset)
-
-    nc_dataset, rename_vars = dc.remove_duplicate_dims(nc_dataset)
+    nc_dataset, rename_vars, has_groups = open_as_nc_dataset(file_to_subset)
 
     override_decode_cf_datetime()
 
-    if variables:
-        variables = [x.replace('/', GROUP_DELIM) for x in variables]
-        if has_groups:
-            variables = [GROUP_DELIM + x if not x.startswith(GROUP_DELIM) else x for x in variables]
+    if has_groups:
+        # Make sure all variables start with '/'
+        variables = ['/' + var if not var.startswith('/') else var for var in variables]
+        lat_var_names = ['/' + var if not var.startswith('/') else var for var in lat_var_names]
+        lon_var_names = ['/' + var if not var.startswith('/') else var for var in lon_var_names]
+        time_var_names = ['/' + var if not var.startswith('/') else var for var in time_var_names]
+        # Replace all '/' with GROUP_DELIM
+        variables = [var.replace('/', GROUP_DELIM) for var in variables]
+        lat_var_names = [var.replace('/', GROUP_DELIM) for var in lat_var_names]
+        lon_var_names = [var.replace('/', GROUP_DELIM) for var in lon_var_names]
+        time_var_names = [var.replace('/', GROUP_DELIM) for var in time_var_names]
 
     args = {
         'decode_coords': False,
diff --git a/poetry.lock b/poetry.lock
index 2d142e97..22d4ddb2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -8,7 +8,7 @@ python-versions = "*"
 
 [[package]]
 name = "astroid"
-version = "2.12.9"
+version = "2.12.13"
 description = "An abstract syntax tree for Python with inference support."
 category = "dev"
 optional = false
@@ -22,14 +22,6 @@ wrapt = [
     {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
 ]
 
-[[package]]
-name = "atomicwrites"
-version = "1.4.1"
-description = "Atomic file writes."
-category = "dev"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-
 [[package]]
 name = "attrs"
 version = "22.1.0"
@@ -46,7 +38,7 @@ tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>
 
 [[package]]
 name = "aws-sam-translator"
-version = "1.50.0"
+version = "1.55.0"
 description = "AWS SAM Translator is a library that transform SAM templates into AWS CloudFormation templates"
 category = "dev"
 optional = false
@@ -57,11 +49,11 @@ boto3 = ">=1.19.5,<2.0.0"
 jsonschema = ">=3.2,<4.0"
 
 [package.extras]
-dev = ["coverage (>=5.3,<6.0)", "flake8 (>=3.8.4,<3.9.0)", "tox (>=3.24,<4.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-xdist (>=2.5,<3.0)", "pytest-env (>=0.6.2,<0.7.0)", "pylint (>=2.9.0,<2.10.0)", "pyyaml (>=5.4,<6.0)", "pytest (>=6.2.5,<6.3.0)", "parameterized (>=0.7.4,<0.8.0)", "click (>=7.1,<8.0)", "dateparser (>=0.7,<1.0)", "boto3 (>=1.23,<2)", "tenacity (>=7.0.0,<7.1.0)", "requests (>=2.24.0,<2.25.0)", "docopt (>=0.6.2,<0.7.0)", "black (==20.8b1)"]
+dev = ["coverage (>=5.3,<6.0)", "flake8 (>=3.8.4,<3.9.0)", "tox (>=3.24,<4.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-xdist (>=2.5,<3.0)", "pytest-env (>=0.6.2,<0.7.0)", "pytest-rerunfailures (>=9.1.1,<9.2.0)", "pylint (>=2.15.0,<2.16.0)", "pyyaml (>=5.4,<6.0)", "pytest (>=6.2.5,<6.3.0)", "parameterized (>=0.7.4,<0.8.0)", "click (>=7.1,<8.0)", "dateparser (>=0.7,<1.0)", "boto3 (>=1.23,<2)", "tenacity (>=7.0.0,<7.1.0)", "requests (>=2.24.0,<2.25.0)", "docopt (>=0.6.2,<0.7.0)", "black (==20.8b1)", "ruamel.yaml (==0.17.21)", "mypy (==0.971)", "boto3-stubs[serverlessrepo,appconfig] (>=1.19.5,<2.0.0)", "types-PyYAML (>=5.4,<6.0)", "types-jsonschema (>=3.2,<4.0)"]
 
 [[package]]
 name = "aws-xray-sdk"
-version = "2.10.0"
+version = "2.11.0"
 description = "The AWS X-Ray SDK for Python (the SDK) enables Python developers to record and emit information from within their applications to the AWS X-Ray service."
 category = "dev"
 optional = false
@@ -73,7 +65,7 @@ wrapt = "*"
 
 [[package]]
 name = "babel"
-version = "2.10.3"
+version = "2.11.0"
 description = "Internationalization utilities"
 category = "dev"
 optional = false
@@ -109,14 +101,14 @@ python-versions = "*"
 
 [[package]]
 name = "boto3"
-version = "1.24.67"
+version = "1.26.28"
 description = "The AWS SDK for Python"
 category = "main"
 optional = false
 python-versions = ">= 3.7"
 
 [package.dependencies]
-botocore = ">=1.27.67,<1.28.0"
+botocore = ">=1.29.28,<1.30.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.6.0,<0.7.0"
 
@@ -125,7 +117,7 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.27.67"
+version = "1.29.28"
 description = "Low-level, data-driven core of boto 3."
 category = "main"
 optional = false
@@ -137,11 +129,11 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = ">=1.25.4,<1.27"
 
 [package.extras]
-crt = ["awscrt (==0.14.0)"]
+crt = ["awscrt (==0.15.3)"]
 
 [[package]]
 name = "certifi"
-version = "2022.6.15"
+version = "2022.12.7"
 description = "Python package for providing Mozilla's CA Bundle."
 category = "main"
 optional = false
@@ -149,7 +141,7 @@ python-versions = ">=3.6"
 
 [[package]]
 name = "cf-xarray"
-version = "0.7.4"
+version = "0.7.6"
 description = "A lightweight convenience wrapper for using CF attributes on xarray objects"
 category = "main"
 optional = false
@@ -171,14 +163,14 @@ pycparser = "*"
 
 [[package]]
 name = "cfn-lint"
-version = "0.64.1"
+version = "0.72.2"
 description = "Checks CloudFormation templates for practices and behaviour that could potentially be improved"
 category = "dev"
 optional = false
 python-versions = ">=3.7, <=4.0, !=4.0"
 
 [package.dependencies]
-aws-sam-translator = ">=1.50.0"
+aws-sam-translator = ">=1.55.0"
 jschema-to-python = ">=1.2.3,<1.3.0"
 jsonpatch = "*"
 jsonschema = ">=3.0,<5"
@@ -189,7 +181,7 @@ sarif-om = ">=1.0.4,<1.1.0"
 
 [[package]]
 name = "cftime"
-version = "1.6.1"
+version = "1.6.2"
 description = "Time-handling functionality from netcdf4-python"
 category = "main"
 optional = false
@@ -258,26 +250,29 @@ python-versions = ">=3.6"
 
 [[package]]
 name = "colorama"
-version = "0.4.5"
+version = "0.4.6"
 description = "Cross-platform colored terminal text."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 
 [[package]]
 name = "coverage"
-version = "6.4.4"
+version = "6.5.0"
 description = "Code coverage measurement for Python"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+
 [package.extras]
 toml = ["tomli"]
 
 [[package]]
 name = "cryptography"
-version = "38.0.1"
+version = "38.0.4"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 category = "dev"
 optional = false
@@ -296,16 +291,17 @@ test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests",
 
 [[package]]
 name = "dask"
-version = "2022.9.0"
+version = "2022.12.0"
 description = "Parallel PyData with Task Scheduling"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 
 [package.dependencies]
-bokeh = {version = ">=2.4.2", optional = true, markers = "extra == \"complete\""}
+bokeh = {version = ">=2.4.2,<3", optional = true, markers = "extra == \"complete\""}
+click = ">=7.0"
 cloudpickle = ">=1.1.1"
-distributed = {version = "2022.9.0", optional = true, markers = "extra == \"complete\""}
+distributed = {version = "2022.12.0", optional = true, markers = "extra == \"complete\""}
 fsspec = ">=0.6.0"
 jinja2 = {version = "*", optional = true, markers = "extra == \"complete\""}
 numpy = {version = ">=1.18", optional = true, markers = "extra == \"complete\""}
@@ -317,10 +313,10 @@ toolz = ">=0.8.2"
 
 [package.extras]
 array = ["numpy (>=1.18)"]
-complete = ["bokeh (>=2.4.2)", "distributed (==2022.9.0)", "jinja2", "numpy (>=1.18)", "pandas (>=1.0)"]
+complete = ["bokeh (>=2.4.2,<3)", "distributed (==2022.12.0)", "jinja2", "numpy (>=1.18)", "pandas (>=1.0)"]
 dataframe = ["numpy (>=1.18)", "pandas (>=1.0)"]
-diagnostics = ["bokeh (>=2.4.2)", "jinja2"]
-distributed = ["distributed (==2022.9.0)"]
+diagnostics = ["bokeh (>=2.4.2,<3)", "jinja2"]
+distributed = ["distributed (==2022.12.0)"]
 test = ["pandas", "pytest", "pytest-rerunfailures", "pytest-xdist", "pre-commit"]
 
 [[package]]
@@ -336,27 +332,27 @@ packaging = "*"
 
 [[package]]
 name = "dill"
-version = "0.3.5.1"
+version = "0.3.6"
 description = "serialize all of python"
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
+python-versions = ">=3.7"
 
 [package.extras]
 graph = ["objgraph (>=1.7.2)"]
 
 [[package]]
 name = "distributed"
-version = "2022.9.0"
+version = "2022.12.0"
 description = "Distributed scheduler for Dask"
 category = "main"
 optional = false
 python-versions = ">=3.8"
 
 [package.dependencies]
-click = ">=6.6"
+click = ">=7.0"
 cloudpickle = ">=1.5.0"
-dask = "2022.9.0"
+dask = "2022.12.0"
 jinja2 = "*"
 locket = ">=1.0.0"
 msgpack = ">=0.6.0"
@@ -365,14 +361,14 @@ psutil = ">=5.0"
 pyyaml = "*"
 sortedcontainers = "<2.0.0 || >2.0.0,<2.0.1 || >2.0.1"
 tblib = ">=1.6.0"
-toolz = ">=0.8.2"
-tornado = ">=6.0.3,<6.2"
+toolz = ">=0.10.0"
+tornado = ">=6.0.3"
 urllib3 = "*"
 zict = ">=0.1.3"
 
 [[package]]
 name = "docker"
-version = "6.0.0"
+version = "6.0.1"
 description = "A Python library for the Docker Engine API."
 category = "dev"
 optional = false
@@ -411,9 +407,20 @@ six = ">=1.9.0"
 gmpy = ["gmpy"]
 gmpy2 = ["gmpy2"]
 
+[[package]]
+name = "exceptiongroup"
+version = "1.0.4"
+description = "Backport of PEP 654 (exception groups)"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=6)"]
+
 [[package]]
 name = "fiona"
-version = "1.8.21"
+version = "1.8.22"
 description = "Fiona reads and writes spatial data files"
 category = "main"
 optional = false
@@ -429,7 +436,7 @@ munch = "*"
 six = ">=1.7"
 
 [package.extras]
-all = ["boto3 (>=1.2.4)", "pytest-cov", "shapely", "pytest (>=3)", "mock"]
+all = ["pytest-cov", "shapely", "boto3 (>=1.2.4)", "pytest (>=3)", "mock"]
 calc = ["shapely"]
 s3 = ["boto3 (>=1.2.4)"]
 test = ["pytest (>=3)", "pytest-cov", "boto3 (>=1.2.4)", "mock"]
@@ -449,7 +456,7 @@ pyflakes = ">=2.3.0,<2.4.0"
 
 [[package]]
 name = "fsspec"
-version = "2022.8.2"
+version = "2022.11.0"
 description = "File-system specification"
 category = "main"
 optional = false
@@ -505,7 +512,7 @@ numpy = ">=1.14.5"
 
 [[package]]
 name = "harmony-service-lib"
-version = "1.0.21"
+version = "1.0.22"
 description = "A library for Python-based Harmony services to parse incoming messages, fetch data, stage data, and call back to Harmony"
 category = "main"
 optional = true
@@ -546,7 +553,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 
 [[package]]
 name = "importlib-metadata"
-version = "4.12.0"
+version = "4.13.0"
 description = "Read metadata from Python packages"
 category = "main"
 optional = false
@@ -556,21 +563,29 @@ python-versions = ">=3.7"
 zipp = ">=0.5"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
+docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"]
 perf = ["ipython"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
+
+[[package]]
+name = "iniconfig"
+version = "1.1.1"
+description = "iniconfig: brain-dead simple config-ini parsing"
+category = "dev"
+optional = false
+python-versions = "*"
 
 [[package]]
 name = "isort"
-version = "5.10.1"
+version = "5.11.1"
 description = "A Python utility / library to sort Python imports."
 category = "dev"
 optional = false
-python-versions = ">=3.6.1,<4.0"
+python-versions = ">=3.7.0"
 
 [package.extras]
-pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
-requirements_deprecated_finder = ["pipreqs", "pip-api"]
+pipfile-deprecated-finder = ["pipreqs", "requirementslib"]
+requirements-deprecated-finder = ["pipreqs", "pip-api"]
 colors = ["colorama (>=0.4.3,<0.5.0)"]
 plugins = ["setuptools"]
 
@@ -630,16 +645,16 @@ jsonpointer = ">=1.9"
 
 [[package]]
 name = "jsonpickle"
-version = "2.2.0"
+version = "3.0.0"
 description = "Python library for serializing any arbitrary object graph into JSON"
 category = "dev"
 optional = false
-python-versions = ">=2.7"
+python-versions = ">=3.7"
 
 [package.extras]
 docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"]
-testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "numpy", "pandas", "pymongo", "scikit-learn", "sqlalchemy", "pytest-flake8 (<1.1.0)", "enum34", "jsonlib", "pytest-flake8 (>=1.1.1)"]
-"testing.libs" = ["simplejson", "ujson", "yajl"]
+testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8 (>=1.1.1)", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "scikit-learn", "sqlalchemy"]
+"testing.libs" = ["simplejson", "ujson"]
 
 [[package]]
 name = "jsonpointer"
@@ -687,11 +702,11 @@ six = "*"
 
 [[package]]
 name = "lazy-object-proxy"
-version = "1.7.1"
+version = "1.8.0"
 description = "A fast and thorough lazy object proxy."
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [[package]]
 name = "locket"
@@ -750,14 +765,6 @@ build = ["twine", "wheel", "blurb"]
 docs = ["sphinx"]
 test = ["pytest (<5.4)", "pytest-cov"]
 
-[[package]]
-name = "more-itertools"
-version = "8.14.0"
-description = "More routines for operating on iterables, beyond itertools"
-category = "dev"
-optional = false
-python-versions = ">=3.5"
-
 [[package]]
 name = "moto"
 version = "1.3.14"
@@ -817,11 +824,11 @@ yaml = ["PyYAML (>=5.1.0)"]
 
 [[package]]
 name = "netcdf4"
-version = "1.6.0"
+version = "1.6.2"
 description = "Provides an object-oriented python interface to the netCDF version 4 library."
 category = "main"
 optional = false
-python-versions = "*"
+python-versions = ">=3.6"
 
 [package.dependencies]
 cftime = "*"
@@ -829,7 +836,7 @@ numpy = ">=1.9"
 
 [[package]]
 name = "networkx"
-version = "2.8.6"
+version = "2.8.8"
 description = "Python package for creating and manipulating graphs and networks"
 category = "dev"
 optional = false
@@ -837,14 +844,14 @@ python-versions = ">=3.8"
 
 [package.extras]
 default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"]
-developer = ["pre-commit (>=2.20)", "mypy (>=0.961)"]
-doc = ["sphinx (>=5)", "pydata-sphinx-theme (>=0.9)", "sphinx-gallery (>=0.10)", "numpydoc (>=1.4)", "pillow (>=9.1)", "nb2plots (>=0.6)", "texext (>=0.6.6)"]
+developer = ["pre-commit (>=2.20)", "mypy (>=0.982)"]
+doc = ["sphinx (>=5.2)", "pydata-sphinx-theme (>=0.11)", "sphinx-gallery (>=0.11)", "numpydoc (>=1.5)", "pillow (>=9.2)", "nb2plots (>=0.6)", "texext (>=0.6.6)"]
 extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"]
-test = ["pytest (>=7.1)", "pytest-cov (>=3.0)", "codecov (>=2.1)"]
+test = ["pytest (>=7.2)", "pytest-cov (>=4.0)", "codecov (>=2.1)"]
 
 [[package]]
 name = "numpy"
-version = "1.23.2"
+version = "1.23.5"
 description = "NumPy is the fundamental package for array computing with Python."
 category = "main"
 optional = false
@@ -852,18 +859,15 @@ python-versions = ">=3.8"
 
 [[package]]
 name = "packaging"
-version = "21.3"
+version = "22.0"
 description = "Core utilities for Python packages"
 category = "main"
 optional = false
-python-versions = ">=3.6"
-
-[package.dependencies]
-pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
+python-versions = ">=3.7"
 
 [[package]]
 name = "pandas"
-version = "1.4.4"
+version = "1.5.2"
 description = "Powerful data structures for data analysis, time series, and statistics"
 category = "main"
 optional = false
@@ -871,10 +875,9 @@ python-versions = ">=3.8"
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.18.5", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
-    {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""},
-    {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""},
+    {version = ">=1.20.3", markers = "python_version < \"3.10\""},
     {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
+    {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
 ]
 python-dateutil = ">=2.8.1"
 pytz = ">=2020.1"
@@ -899,7 +902,7 @@ complete = ["blosc", "pyzmq", "pandas (>=0.19.0)", "numpy (>=1.9.0)"]
 
 [[package]]
 name = "pbr"
-version = "5.10.0"
+version = "5.11.0"
 description = "Python Build Reasonableness"
 category = "dev"
 optional = false
@@ -907,7 +910,7 @@ python-versions = ">=2.6"
 
 [[package]]
 name = "pillow"
-version = "9.2.0"
+version = "9.3.0"
 description = "Python Imaging Library (Fork)"
 category = "main"
 optional = false
@@ -919,30 +922,31 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 
 [[package]]
 name = "platformdirs"
-version = "2.5.2"
-description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+version = "2.6.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
 [package.extras]
-docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"]
-test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"]
+docs = ["furo (>=2022.9.29)", "proselint (>=0.13)", "sphinx-autodoc-typehints (>=1.19.4)", "sphinx (>=5.3)"]
+test = ["appdirs (==1.4.4)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest (>=7.2)"]
 
 [[package]]
 name = "pluggy"
-version = "0.13.1"
+version = "1.0.0"
 description = "plugin and hook calling mechanisms for python"
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=3.6"
 
 [package.extras]
 dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "psutil"
-version = "5.9.2"
+version = "5.9.4"
 description = "Cross-platform lib for process and system monitoring in Python."
 category = "main"
 optional = false
@@ -951,18 +955,10 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [package.extras]
 test = ["ipaddress", "mock", "enum34", "pywin32", "wmi"]
 
-[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
-category = "dev"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-
 [[package]]
 name = "py-cpuinfo"
-version = "8.0.0"
-description = "Get CPU info with pure Python 2 & 3"
+version = "9.0.0"
+description = "Get CPU info with pure Python"
 category = "dev"
 optional = false
 python-versions = "*"
@@ -1012,14 +1008,14 @@ plugins = ["importlib-metadata"]
 
 [[package]]
 name = "pylint"
-version = "2.15.2"
+version = "2.15.8"
 description = "python code static checker"
 category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 
 [package.dependencies]
-astroid = ">=2.12.9,<=2.14.0-dev0"
+astroid = ">=2.12.13,<=2.14.0-dev0"
 colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
 dill = ">=0.2"
 isort = ">=4.2.5,<6"
@@ -1048,20 +1044,9 @@ cffi = ">=1.4.1"
 docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
 tests = ["pytest (>=3.2.1,!=3.3.0)", "hypothesis (>=3.27.0)"]
 
-[[package]]
-name = "pyparsing"
-version = "3.0.9"
-description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-category = "main"
-optional = false
-python-versions = ">=3.6.8"
-
-[package.extras]
-diagrams = ["railroad-diagrams", "jinja2"]
-
 [[package]]
 name = "pyproj"
-version = "3.3.1"
+version = "3.4.0"
 description = "Python interface to PROJ (cartographic projections and coordinate transformations library)"
 category = "main"
 optional = false
@@ -1072,7 +1057,7 @@ certifi = "*"
 
 [[package]]
 name = "pyrsistent"
-version = "0.18.1"
+version = "0.19.2"
 description = "Persistent/Functional/Immutable data structures"
 category = "dev"
 optional = false
@@ -1094,33 +1079,31 @@ validation = ["jsonschema (==3.2.0)"]
 
 [[package]]
 name = "pytest"
-version = "5.4.3"
+version = "7.2.0"
 description = "pytest: simple powerful testing with Python"
 category = "dev"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.7"
 
 [package.dependencies]
-atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
-attrs = ">=17.4.0"
+attrs = ">=19.2.0"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
-more-itertools = ">=4.0.0"
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
 packaging = "*"
-pluggy = ">=0.12,<1.0"
-py = ">=1.5.0"
-wcwidth = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 
 [package.extras]
-checkqa-mypy = ["mypy (==v0.761)"]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
 
 [[package]]
 name = "pytest-benchmark"
-version = "3.4.1"
+version = "4.0.0"
 description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
 
 [package.dependencies]
 py-cpuinfo = "*"
@@ -1133,16 +1116,15 @@ histogram = ["pygal", "pygaljs"]
 
 [[package]]
 name = "pytest-cov"
-version = "2.12.1"
+version = "4.0.0"
 description = "Pytest plugin for measuring coverage."
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.6"
 
 [package.dependencies]
-coverage = ">=5.2.1"
+coverage = {version = ">=5.2.1", extras = ["toml"]}
 pytest = ">=4.6"
-toml = "*"
 
 [package.extras]
 testing = ["fields", "hunter", "process-tests", "six", "pytest-xdist", "virtualenv"]
@@ -1186,7 +1168,7 @@ python-versions = ">=3.5"
 
 [[package]]
 name = "pytz"
-version = "2022.2.1"
+version = "2022.6"
 description = "World timezone definitions, modern and historical"
 category = "main"
 optional = false
@@ -1194,7 +1176,7 @@ python-versions = "*"
 
 [[package]]
 name = "pywin32"
-version = "304"
+version = "305"
 description = "Python for Window Extensions"
 category = "dev"
 optional = false
@@ -1228,18 +1210,20 @@ use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "responses"
-version = "0.21.0"
+version = "0.22.0"
 description = "A utility library for mocking out the `requests` Python library."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
 [package.dependencies]
-requests = ">=2.0,<3.0"
+requests = ">=2.22.0,<3.0"
+toml = "*"
+types-toml = "*"
 urllib3 = ">=1.25.10"
 
 [package.extras]
-tests = ["pytest (>=7.0.0)", "coverage (>=6.0.0)", "pytest-cov", "pytest-asyncio", "pytest-localserver", "flake8", "types-mock", "types-requests", "mypy"]
+tests = ["pytest (>=7.0.0)", "coverage (>=6.0.0)", "pytest-cov", "pytest-asyncio", "pytest-httpserver", "flake8", "types-requests", "mypy"]
 
 [[package]]
 name = "rsa"
@@ -1280,7 +1264,7 @@ pbr = "*"
 
 [[package]]
 name = "shapely"
-version = "1.8.4"
+version = "1.8.5.post1"
 description = "Geometric objects, predicates, and operations"
 category = "main"
 optional = false
@@ -1474,11 +1458,11 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "tomlkit"
-version = "0.11.4"
+version = "0.11.6"
 description = "Style preserving TOML library"
 category = "dev"
 optional = false
-python-versions = ">=3.6,<4.0"
+python-versions = ">=3.6"
 
 [[package]]
 name = "toolz"
@@ -1490,15 +1474,23 @@ python-versions = ">=3.5"
 
 [[package]]
 name = "tornado"
-version = "6.1"
+version = "6.2"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 category = "main"
 optional = false
-python-versions = ">= 3.5"
+python-versions = ">= 3.7"
+
+[[package]]
+name = "types-toml"
+version = "0.10.8.1"
+description = "Typing stubs for toml"
+category = "dev"
+optional = false
+python-versions = "*"
 
 [[package]]
 name = "typing-extensions"
-version = "4.3.0"
+version = "4.4.0"
 description = "Backported and Experimental Type Hints for Python 3.7+"
 category = "main"
 optional = false
@@ -1506,28 +1498,20 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "urllib3"
-version = "1.26.12"
+version = "1.26.13"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 
 [package.extras]
 brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
 secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"]
 socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
-[[package]]
-name = "wcwidth"
-version = "0.2.5"
-description = "Measures the displayed width of unicode strings in a terminal"
-category = "dev"
-optional = false
-python-versions = "*"
-
 [[package]]
 name = "websocket-client"
-version = "1.4.1"
+version = "1.4.2"
 description = "WebSocket client for Python with low level API options"
 category = "dev"
 optional = false
@@ -1562,7 +1546,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 
 [[package]]
 name = "xarray"
-version = "2022.6.0"
+version = "2022.12.0"
 description = "N-D labeled arrays and datasets in Python"
 category = "main"
 optional = false
@@ -1570,15 +1554,15 @@ python-versions = ">=3.8"
 
 [package.dependencies]
 dask = {version = "*", extras = ["complete"], optional = true, markers = "extra == \"parallel\""}
-numpy = ">=1.19"
-packaging = ">=20.0"
-pandas = ">=1.2"
+numpy = ">=1.20"
+packaging = ">=21.3"
+pandas = ">=1.3"
 
 [package.extras]
 accel = ["scipy", "bottleneck", "numbagg", "flox"]
-complete = ["netcdf4", "h5netcdf", "scipy", "pydap", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis"]
-docs = ["netcdf4", "h5netcdf", "scipy", "pydap", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis", "sphinx-autosummary-accessors", "sphinx-rtd-theme", "ipython", "ipykernel", "jupyter-client", "nbsphinx", "scanpydoc"]
-io = ["netcdf4", "h5netcdf", "scipy", "pydap", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch"]
+complete = ["netcdf4", "h5netcdf", "scipy", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis", "pydap"]
+docs = ["netcdf4", "h5netcdf", "scipy", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "bottleneck", "numbagg", "flox", "dask", "matplotlib", "seaborn", "nc-time-axis", "sphinx-autosummary-accessors", "sphinx-rtd-theme", "ipython", "ipykernel", "jupyter-client", "nbsphinx", "scanpydoc", "pydap"]
+io = ["netcdf4", "h5netcdf", "scipy", "zarr", "fsspec", "cftime", "rasterio", "cfgrib", "pooch", "pydap"]
 parallel = ["dask"]
 viz = ["matplotlib", "seaborn", "nc-time-axis"]
 
@@ -1603,15 +1587,15 @@ heapdict = "*"
 
 [[package]]
 name = "zipp"
-version = "3.8.1"
+version = "3.11.0"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
+docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "jaraco.functools", "more-itertools", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"]
 
 [extras]
 harmony = ["harmony-service-lib", "pystac"]
@@ -1619,12 +1603,11 @@ harmony = ["harmony-service-lib", "pystac"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
-content-hash = "a812b9c24f128e06197e201439794f7a28bb95055a72f928390326c92111bca9"
+content-hash = "ae9d1d8198b4c7d46344bde0f75fa5028fa0f0e31aa97603648636a30e45bed5"
 
 [metadata.files]
 alabaster = []
 astroid = []
-atomicwrites = []
 attrs = []
 aws-sam-translator = []
 aws-xray-sdk = []
@@ -1653,6 +1636,7 @@ distributed = []
 docker = []
 docutils = []
 ecdsa = []
+exceptiongroup = []
 fiona = []
 flake8 = []
 fsspec = []
@@ -1663,6 +1647,7 @@ heapdict = []
 idna = []
 imagesize = []
 importlib-metadata = []
+iniconfig = []
 isort = []
 jinja2 = []
 jmespath = []
@@ -1681,7 +1666,6 @@ markupsafe = []
 mccabe = []
 mistune = []
 mock = []
-more-itertools = []
 moto = []
 msgpack = []
 munch = []
@@ -1696,7 +1680,6 @@ pillow = []
 platformdirs = []
 pluggy = []
 psutil = []
-py = []
 py-cpuinfo = []
 pyasn1 = []
 pycodestyle = []
@@ -1705,7 +1688,6 @@ pyflakes = []
 pygments = []
 pylint = []
 pynacl = []
-pyparsing = []
 pyproj = []
 pyrsistent = []
 pystac = []
@@ -1742,9 +1724,9 @@ tomli = []
 tomlkit = []
 toolz = []
 tornado = []
+types-toml = []
 typing-extensions = []
 urllib3 = []
-wcwidth = []
 websocket-client = []
 werkzeug = []
 wrapt = []
diff --git a/pyproject.toml b/pyproject.toml
index d1307f10..f401b8b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,12 +40,12 @@ h5py = "^3.6.0"
 cf-xarray = "*"
 
 [tool.poetry.dev-dependencies]
-pytest = "^5.2"
+pytest = "~7"
 flake8 = "^3.7"
-pytest-cov = "^2.8"
+pytest-cov = "~4"
 pylint = "^2.4"
 sphinx = "^4.4"
-pytest-benchmark = "^3.2.3"
+pytest-benchmark = "~4"
 moto = "1.3.14"
 jsonschema = "^3.2.0"
 m2r2 = "^0.3.1"
diff --git a/tests/data/TEMPO_NO2-PROXY_L2_V01_20130731T232959Z_S015G06_partial.nc b/tests/data/TEMPO_NO2-PROXY_L2_V01_20130731T232959Z_S015G06_partial.nc
new file mode 100644
index 00000000..99c20829
Binary files /dev/null and b/tests/data/TEMPO_NO2-PROXY_L2_V01_20130731T232959Z_S015G06_partial.nc differ
diff --git a/tests/test_subset.py b/tests/test_subset.py
index 161bbf36..6b1e726f 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -15,6 +15,12 @@
 test_subset.py
 ==============
 Test the subsetter functionality.
+
+Unit tests for the L2 subsetter. These tests are all related to the
+subsetting functionality itself, and should provide coverage on the
+following files:
+    - podaac.subsetter.subset.py
+    - podaac.subsetter.xarray_enhancements.py
 """
 import json
 import operator
@@ -27,1803 +33,1693 @@
 
 import geopandas as gpd
 import importlib_metadata
+import netCDF4
 import netCDF4 as nc
 import h5py
 import numpy as np
 import pandas as pd
 import pytest
 import xarray as xr
+import urllib.parse
 from jsonschema import validate
 from shapely.geometry import Point
+from unittest import TestCase
 
 from podaac.subsetter import subset
+from podaac.subsetter.group_handling import GROUP_DELIM
 from podaac.subsetter.subset import SERVICE_NAME
 from podaac.subsetter import xarray_enhancements as xre
 from podaac.subsetter import dimension_cleanup as dc
 
 
-class TestSubsetter(unittest.TestCase):
-    """
-    Unit tests for the L2 subsetter. These tests are all related to the
-    subsetting functionality itself, and should provide coverage on the
-    following files:
-    - podaac.subsetter.subset.py
-    - podaac.subsetter.xarray_enhancements.py
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        cls.test_dir = dirname(realpath(__file__))
-        cls.test_data_dir = join(cls.test_dir, 'data')
-        cls.subset_output_dir = tempfile.mkdtemp(dir=cls.test_data_dir)
-        cls.test_files = [f for f in listdir(cls.test_data_dir)
-                          if isfile(join(cls.test_data_dir, f)) and f.endswith(".nc")]
-
-        cls.history_json_schema = {
-            "$schema": "https://json-schema.org/draft/2020-12/schema",
-            "$id": "https://harmony.earthdata.nasa.gov/history.schema.json",
-            "title": "Data Processing History",
-            "description": "A history record of processing that produced a given data file. For more information, see: https://wiki.earthdata.nasa.gov/display/TRT/In-File+Provenance+Metadata+-+TRT-42",
-            "type": ["array", "object"],
-            "items": {"$ref": "#/definitions/history_record"},
-
-            "definitions": {
-                "history_record": {
-                    "type": "object",
-                    "properties": {
-                        "date_time": {
-                            "description": "A Date/Time stamp in ISO-8601 format, including time-zone, GMT (or Z) preferred",
-                            "type": "string",
-                            "format": "date-time"
-                        },
-                        "derived_from": {
-                            "description": "List of source data files used in the creation of this data file",
-                            "type": ["array", "string"],
-                            "items": {"type": "string"}
-                        },
-                        "program": {
-                            "description": "The name of the program which generated this data file",
-                            "type": "string"
-                        },
-                        "version": {
-                            "description": "The version identification of the program which generated this data file",
-                            "type": "string"
-                        },
-                        "parameters": {
-                            "description": "The list of parameters to the program when generating this data file",
-                            "type": ["array", "string"],
-                            "items": {"type": "string"}
-                        },
-                        "program_ref": {
-                            "description": "A URL reference that defines the program, e.g., a UMM-S reference URL",
-                            "type": "string"
-                        },
-                        "$schema": {
-                            "description": "The URL to this schema",
-                            "type": "string"
-                        }
+@pytest.fixture(scope='class')
+def data_dir():
+    test_dir = dirname(realpath(__file__))
+    return join(test_dir, 'data')
+
+
+@pytest.fixture(scope='class')
+def subset_output_dir(data_dir):
+    subset_output_dir = tempfile.mkdtemp(dir=data_dir)
+    yield subset_output_dir
+    shutil.rmtree(subset_output_dir)
+
+
+@pytest.fixture(scope='class')
+def history_json_schema():
+    return {
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "$id": "https://harmony.earthdata.nasa.gov/history.schema.json",
+        "title": "Data Processing History",
+        "description": "A history record of processing that produced a given data file. For more information, see: https://wiki.earthdata.nasa.gov/display/TRT/In-File+Provenance+Metadata+-+TRT-42",
+        "type": ["array", "object"],
+        "items": {"$ref": "#/definitions/history_record"},
+
+        "definitions": {
+            "history_record": {
+                "type": "object",
+                "properties": {
+                    "date_time": {
+                        "description": "A Date/Time stamp in ISO-8601 format, including time-zone, GMT (or Z) preferred",
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "derived_from": {
+                        "description": "List of source data files used in the creation of this data file",
+                        "type": ["array", "string"],
+                        "items": {"type": "string"}
+                    },
+                    "program": {
+                        "description": "The name of the program which generated this data file",
+                        "type": "string"
                     },
-                    "required": ["date_time", "program"],
-                    "additionalProperties": False
-                }
+                    "version": {
+                        "description": "The version identification of the program which generated this data file",
+                        "type": "string"
+                    },
+                    "parameters": {
+                        "description": "The list of parameters to the program when generating this data file",
+                        "type": ["array", "string"],
+                        "items": {"type": "string"}
+                    },
+                    "program_ref": {
+                        "description": "A URL reference that defines the program, e.g., a UMM-S reference URL",
+                        "type": "string"
+                    },
+                    "$schema": {
+                        "description": "The URL to this schema",
+                        "type": "string"
+                    }
+                },
+                "required": ["date_time", "program"],
+                "additionalProperties": False
             }
         }
+    }
 
-    @classmethod
-    def tearDownClass(cls):
-        # Remove the temporary directories used to house subset data
-        shutil.rmtree(cls.subset_output_dir)
-
-    def test_subset_variables(self):
-        """
-        Test that all variables present in the original NetCDF file
-        are present after the subset takes place, and with the same
-        attributes.
-        """
-
-        bbox = np.array(((-180, 90), (-90, 90)))
-        for file in self.test_files:
-            output_file = "{}_{}".format(self._testMethodName, file)
-            subset.subset(
-                file_to_subset=join(self.test_data_dir, file),
-                bbox=bbox,
-                output_file=join(self.subset_output_dir, output_file)
-            )
-
-            in_ds = xr.open_dataset(join(self.test_data_dir, file),
-                                    decode_times=False,
-                                    decode_coords=False)
-            out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                     decode_times=False,
-                                     decode_coords=False)
-
-            for in_var, out_var in zip(in_ds.data_vars.items(), out_ds.data_vars.items()):
-                # compare names
-                assert in_var[0] == out_var[0]
-
-                # compare attributes
-                np.testing.assert_equal(in_var[1].attrs, out_var[1].attrs)
-
-                # compare type and dimension names
-                assert in_var[1].dtype == out_var[1].dtype
-                assert in_var[1].dims == out_var[1].dims
-
-            in_ds.close()
-            out_ds.close()
-            
-
-    def test_subset_bbox(self):
-        """
-        Test that all data present is within the bounding box given,
-        and that the correct bounding box is used. This test assumed
-        that the scanline *is* being cut.
-        """
-
-        # pylint: disable=too-many-locals
-        bbox = np.array(((-180, 90), (-90, 90)))
-        for file in self.test_files:
-            output_file = "{}_{}".format(self._testMethodName, file)
-            subset.subset(
-                file_to_subset=join(self.test_data_dir, file),
-                bbox=bbox,
-                output_file=join(self.subset_output_dir, output_file)
-            )
-
-            out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                     decode_times=False,
-                                     decode_coords=False,
-                                     mask_and_scale=False)
-
-            lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(out_ds)
-
-            lat_var_name = lat_var_name[0]
-            lon_var_name = lon_var_name[0]
 
-            lon_bounds, lat_bounds = subset.convert_bbox(bbox, out_ds, lat_var_name, lon_var_name)
+def data_files():
+    test_dir = dirname(realpath(__file__))
+    test_data_dir = join(test_dir, 'data')
+    return [f for f in listdir(test_data_dir) if isfile(join(test_data_dir, f)) and f.endswith(".nc")]
 
-            lats = out_ds[lat_var_name].values
-            lons = out_ds[lon_var_name].values
 
-            np.warnings.filterwarnings('ignore')
+TEST_DATA_FILES = data_files()
 
-            # Step 1: Get mask of values which aren't in the bounds.
 
-            # For lon spatial condition, need to consider the
-            # lon_min > lon_max case. If that's the case, should do
-            # an 'or' instead.
-            oper = operator.and_ if lon_bounds[0] < lon_bounds[1] else operator.or_
+@pytest.mark.parametrize("test_file", TEST_DATA_FILES)
+def test_subset_variables(test_file, data_dir, subset_output_dir, request):
+    """
+    Test that all variables present in the original NetCDF file
+    are present after the subset takes place, and with the same
+    attributes.
+    """
 
-            # In these two masks, True == valid and False == invalid
-            lat_truth = np.ma.masked_where((lats >= lat_bounds[0])
-                                           & (lats <= lat_bounds[1]), lats).mask
-            lon_truth = np.ma.masked_where(oper((lons >= lon_bounds[0]),
-                                                (lons <= lon_bounds[1])), lons).mask
+    bbox = np.array(((-180, 90), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, test_file)
+    subset.subset(
+        file_to_subset=join(data_dir, test_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file)
+    )
+
+    in_ds = xr.open_dataset(join(data_dir, test_file),
+                            decode_times=False,
+                            decode_coords=False)
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_times=False,
+                             decode_coords=False)
 
-            # combine masks
-            spatial_mask = np.bitwise_and(lat_truth, lon_truth)
+    for in_var, out_var in zip(in_ds.data_vars.items(), out_ds.data_vars.items()):
+        # compare names
+        assert in_var[0] == out_var[0]
 
-            # Create a mask which represents the valid matrix bounds of
-            # the spatial mask. This is used in the case where a var
-            # has no _FillValue.
-            if lon_truth.ndim == 1:
-                bound_mask = spatial_mask
-            else:
-                rows = np.any(spatial_mask, axis=1)
-                cols = np.any(spatial_mask, axis=0)
-                bound_mask = np.array([[r & c for c in cols] for r in rows])
-
-            # If all the lat/lon values are valid, the file is valid and
-            # there is no need to check individual variables.
-            if np.all(spatial_mask):
-                continue
-
-            # Step 2: Get mask of values which are NaN or "_FillValue in
-            # each variable.
-            for var_name, var in out_ds.data_vars.items():
-                # remove dimension of '1' if necessary
-                vals = np.squeeze(var.values)
-
-                # Get the Fill Value
-                fill_value = var.attrs.get('_FillValue')
-
-                # If _FillValue isn't provided, check that all values
-                # are in the valid matrix bounds go to the next variable
-                if fill_value is None:
-                    combined_mask = np.ma.mask_or(spatial_mask, bound_mask)
-                    np.testing.assert_equal(bound_mask, combined_mask)
-                    continue
-
-                # If the shapes of this var doesn't match the mask,
-                # reshape the var so the comparison can be made. Take
-                # the first index of the unknown dims. This makes
-                # assumptions about the ordering of the dimensions.
-                if vals.shape != out_ds[lat_var_name].shape and vals.shape:
-                    slice_list = []
-                    for dim in var.dims:
-                        if dim in out_ds[lat_var_name].dims:
-                            slice_list.append(slice(None))
-                        else:
-                            slice_list.append(slice(0, 1))
-                    vals = np.squeeze(vals[tuple(slice_list)])
-
-                # Skip for byte type.
-                if vals.dtype == 'S1':
-                    continue
-
-                # In this mask, False == NaN and True = valid
-                var_mask = np.invert(np.ma.masked_invalid(vals).mask)
-                fill_mask = np.invert(np.ma.masked_values(vals, fill_value).mask)
-
-                var_mask = np.bitwise_and(var_mask, fill_mask)
-
-                if var_mask.shape != spatial_mask.shape:
-                    # This may be a case where the time represents lines,
-                    # or some other case where the variable doesn't share
-                    # a shape with the coordinate variables.
-                    continue
-
-                # Step 3: Combine the spatial and var mask with 'or'
-                combined_mask = np.ma.mask_or(var_mask, spatial_mask)
-
-                # Step 4: compare the newly combined mask and the
-                # spatial mask created from the lat/lon masks. They
-                # should be equal, because the 'or' of the two masks
-                # where out-of-bounds values are 'False' will leave
-                # those values assuming there are only NaN values
-                # in the data at those locations.
-                np.testing.assert_equal(spatial_mask, combined_mask)
-
-            out_ds.close()
-
-    @pytest.mark.skip(reason="This is being tested currently.  Temporarily skipped.")
-    def test_subset_no_bbox(self):
-        """
-        Test that the subsetted file is identical to the given file
-        when a 'full' bounding box is given.
-        """
+        # compare attributes
+        np.testing.assert_equal(in_var[1].attrs, out_var[1].attrs)
 
-        bbox = np.array(((-180, 180), (-90, 90)))
-        for file in self.test_files:
-            output_file = "{}_{}".format(self._testMethodName, file)
-            subset.subset(
-                file_to_subset=join(self.test_data_dir, file),
-                bbox=bbox,
-                output_file=join(self.subset_output_dir, output_file)
-            )
+        # compare type and dimension names
+        assert in_var[1].dtype == out_var[1].dtype
+        assert in_var[1].dims == out_var[1].dims
 
-            # pylint: disable=no-member
-            in_nc = nc.Dataset(join(self.test_data_dir, file), 'r')
-            out_nc = nc.Dataset(join(self.subset_output_dir, output_file), 'r')
-
-            # Make sure the output dimensions match the input
-            # dimensions, which means the full file was returned.
-            for name, dimension in in_nc.dimensions.items():
-                assert dimension.size == out_nc.dimensions[name].size
-
-            in_nc.close()
-            out_nc.close()
-
-    def test_subset_empty_bbox(self):
-        """
-        Test that an empty file is returned when the bounding box
-        contains no data.
-        """
-
-        bbox = np.array(((120, 125), (-90, -85)))
-        for file in self.test_files:
-            output_file = "{}_{}".format(self._testMethodName, file)
-            subset.subset(
-                file_to_subset=join(self.test_data_dir, file),
-                bbox=bbox,
-                output_file=join(self.subset_output_dir, output_file)
-            )
-            test_input_dataset = xr.open_dataset(
-                join(self.test_data_dir, file),
-                decode_times=False,
-                decode_coords=False,
-                mask_and_scale=False
-            )
-            empty_dataset = xr.open_dataset(
-                join(self.subset_output_dir, output_file),
-                decode_times=False,
-                decode_coords=False,
-                mask_and_scale=False
-            )
+    in_ds.close()
+    out_ds.close()
 
-            # Ensure all variables are present but empty.
-            for variable_name, variable in empty_dataset.data_vars.items():
-                assert np.all(variable.data == variable.attrs.get('_FillValue', np.nan) or np.isnan(variable.data))
-
-            assert test_input_dataset.dims.keys() == empty_dataset.dims.keys()
-
-
-    def test_bbox_conversion(self):
-        """
-        Test that the bounding box conversion returns expected
-        results. Expected results are hand-calculated.
-        """
-
-        ds_180 = xr.open_dataset(join(self.test_data_dir,
-                                      "MODIS_A-JPL-L2P-v2014.0.nc"),
-                                 decode_times=False,
-                                 decode_coords=False)
-
-        ds_360 = xr.open_dataset(join(
-            self.test_data_dir,
-            "ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc"),
-            decode_times=False,
-            decode_coords=False)
-
-        # Elements in each tuple are:
-        # ds type, lon_range, expected_result
-        test_bboxes = [
-            (ds_180, (-180, 180), (-180, 180)),
-            (ds_360, (-180, 180), (0, 360)),
-            (ds_180, (-180, 0), (-180, 0)),
-            (ds_360, (-180, 0), (180, 360)),
-            (ds_180, (-80, 80), (-80, 80)),
-            (ds_360, (-80, 80), (280, 80)),
-            (ds_180, (0, 180), (0, 180)),
-            (ds_360, (0, 180), (0, 180)),
-            (ds_180, (80, -80), (80, -80)),
-            (ds_360, (80, -80), (80, 280)),
-            (ds_180, (-80, -80), (-180, 180)),
-            (ds_360, (-80, -80), (0, 360))
-        ]
 
-        lat_var = 'lat'
-        lon_var = 'lon'
-
-        for test_bbox in test_bboxes:
-            dataset = test_bbox[0]
-            lon_range = test_bbox[1]
-            expected_result = test_bbox[2]
-            actual_result, _ = subset.convert_bbox(np.array([lon_range, [0, 0]]),
-                                                   dataset, lat_var, lon_var)
-
-            np.testing.assert_equal(actual_result, expected_result)
-
-    def compare_java(self, java_files, cut):
-        """
-        Run the L2 subsetter and compare the result to the equivelant
-        legacy (Java) subsetter result.
-        Parameters
-        ----------
-        java_files : list of strings
-            List of paths to each subsetted Java file.
-        cut : boolean
-            True if the subsetter should return compact.
-        """
-        bbox_map = [("ascat_20150702_084200", ((-180, 0), (-90, 0))),
-                    ("ascat_20150702_102400", ((-180, 0), (-90, 0))),
-                    ("MODIS_A-JPL", ((65.8, 86.35), (40.1, 50.15))),
-                    ("MODIS_T-JPL", ((-78.7, -60.7), (-54.8, -44))),
-                    ("VIIRS", ((-172.3, -126.95), (62.3, 70.65))),
-                    ("AMSR2-L2B_v08_r38622", ((-180, 0), (-90, 0)))]
-
-        for file_str, bbox in bbox_map:
-            java_file = [file for file in java_files if file_str in file][0]
-            test_file = [file for file in self.test_files if file_str in file][0]
-            output_file = "{}_{}".format(self._testMethodName, test_file)
-            subset.subset(
-                file_to_subset=join(self.test_data_dir, test_file),
-                bbox=np.array(bbox),
-                output_file=join(self.subset_output_dir, output_file),
-                cut=cut
-            )
+@pytest.mark.parametrize("test_file", TEST_DATA_FILES)
+def test_subset_bbox(test_file, data_dir, subset_output_dir, request):
+    """
+    Test that all data present is within the bounding box given,
+    and that the correct bounding box is used. This test assumed
+    that the scanline *is* being cut.
+    """
 
-            j_ds = xr.open_dataset(join(self.test_data_dir, java_file),
-                                   decode_times=False,
-                                   decode_coords=False,
-                                   mask_and_scale=False)
-
-            py_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                    decode_times=False,
-                                    decode_coords=False,
-                                    mask_and_scale=False)
-
-            for var_name, var in j_ds.data_vars.items():
-                # Compare shape
-                np.testing.assert_equal(var.shape, py_ds[var_name].shape)
-
-                # Compare meta
-                np.testing.assert_equal(var.attrs, py_ds[var_name].attrs)
-
-                # Compare data
-                np.testing.assert_equal(var.values, py_ds[var_name].values)
-
-            # Compare meta. History will always be different, so remove
-            # from the headers for comparison.
-            del j_ds.attrs['history']
-            del py_ds.attrs['history']
-            del py_ds.attrs['history_json']
-            np.testing.assert_equal(j_ds.attrs, py_ds.attrs)
-
-    def test_compare_java_compact(self):
-        """
-        Tests that the results of the subsetting operation is
-        equivalent to the Java subsetting result on the same bounding
-        box. For simplicity the subsetted Java granules have been
-        manually run and copied into this project. This test DOES
-        cut the scanline.
-        """
-
-        java_result_files = [join("java_results", "cut", f) for f in
-                             listdir(join(self.test_data_dir, "java_results", "cut")) if
-                             isfile(join(self.test_data_dir, "java_results", "cut", f))
-                             and f.endswith(".nc")]
-
-        self.compare_java(java_result_files, cut=True)
-
-    def test_compare_java(self):
-        """
-        Tests that the results of the subsetting operation is
-        equivalent to the Java subsetting result on the same bounding
-        box. For simplicity the subsetted Java granules have been
-        manually run and copied into this project. This runs does NOT
-        cut the scanline.
-        """
-
-        java_result_files = [join("java_results", "uncut", f) for f in
-                             listdir(join(self.test_data_dir, "java_results", "uncut")) if
-                             isfile(join(self.test_data_dir, "java_results", "uncut", f))
-                             and f.endswith(".nc")]
-
-        self.compare_java(java_result_files, cut=False)
-
-    def test_history_metadata_append(self):
-        """
-        Tests that the history metadata header is appended to when it
-        already exists.
-        """
-        test_file = next(filter(
-            lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
-            , self.test_files))
-        output_file = "{}_{}".format(self._testMethodName, test_file)
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, test_file),
-            bbox=np.array(((-180, 180), (-90.0, 90))),
-            output_file=join(self.subset_output_dir, output_file)
-        )
+    # pylint: disable=too-many-locals
+    bbox = np.array(((-180, 90), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, test_file)
+    subset_output_file = join(subset_output_dir, output_file)
+    subset.subset(
+        file_to_subset=join(data_dir, test_file),
+        bbox=bbox,
+        output_file=subset_output_file
+    )
+
+    out_ds, rename_vars, _ = subset.open_as_nc_dataset(subset_output_file)
+    out_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(out_ds),
+                             decode_times=False,
+                             decode_coords=False,
+                             mask_and_scale=False)
 
-        in_nc = xr.open_dataset(join(self.test_data_dir, test_file))
-        out_nc = xr.open_dataset(join(self.subset_output_dir, output_file))
+    lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(out_ds)
+
+    lat_var_name = lat_var_name[0]
+    lon_var_name = lon_var_name[0]
+
+    lon_bounds, lat_bounds = subset.convert_bbox(bbox, out_ds, lat_var_name, lon_var_name)
+
+    lats = out_ds[lat_var_name].values
+    lons = out_ds[lon_var_name].values
+
+    np.warnings.filterwarnings('ignore')
+
+    # Step 1: Get mask of values which aren't in the bounds.
+
+    # For lon spatial condition, need to consider the
+    # lon_min > lon_max case. If that's the case, should do
+    # an 'or' instead.
+    oper = operator.and_ if lon_bounds[0] < lon_bounds[1] else operator.or_
+
+    # In these two masks, True == valid and False == invalid
+    lat_truth = np.ma.masked_where((lats >= lat_bounds[0])
+                                   & (lats <= lat_bounds[1]), lats).mask
+    lon_truth = np.ma.masked_where(oper((lons >= lon_bounds[0]),
+                                        (lons <= lon_bounds[1])), lons).mask
+
+    # combine masks
+    spatial_mask = np.bitwise_and(lat_truth, lon_truth)
+
+    # Create a mask which represents the valid matrix bounds of
+    # the spatial mask. This is used in the case where a var
+    # has no _FillValue.
+    if lon_truth.ndim == 1:
+        bound_mask = spatial_mask
+    else:
+        rows = np.any(spatial_mask, axis=1)
+        cols = np.any(spatial_mask, axis=0)
+        bound_mask = np.array([[r & c for c in cols] for r in rows])
+
+    # If all the lat/lon values are valid, the file is valid and
+    # there is no need to check individual variables.
+    if np.all(spatial_mask):
+        return
+
+    # Step 2: Get mask of values which are NaN or "_FillValue in
+    # each variable.
+    for var_name, var in out_ds.data_vars.items():
+        # remove dimension of '1' if necessary
+        vals = np.squeeze(var.values)
+
+        # Get the Fill Value
+        fill_value = var.attrs.get('_FillValue')
+
+        # If _FillValue isn't provided, check that all values
+        # are in the valid matrix bounds go to the next variable
+        if fill_value is None:
+            combined_mask = np.ma.mask_or(spatial_mask, bound_mask)
+            np.testing.assert_equal(bound_mask, combined_mask)
+            continue
+
+        # If the shapes of this var doesn't match the mask,
+        # reshape the var so the comparison can be made. Take
+        # the first index of the unknown dims. This makes
+        # assumptions about the ordering of the dimensions.
+        if vals.shape != out_ds[lat_var_name].shape and vals.shape:
+            slice_list = []
+            for dim in var.dims:
+                if dim in out_ds[lat_var_name].dims:
+                    slice_list.append(slice(None))
+                else:
+                    slice_list.append(slice(0, 1))
+            vals = np.squeeze(vals[tuple(slice_list)])
 
-        # Assert that the original granule contains history
-        assert in_nc.attrs.get('history') is not None
+        # Skip for byte type.
+        if vals.dtype == 'S1':
+            continue
 
-        # Assert that input and output files have different history
-        self.assertNotEqual(in_nc.attrs['history'], out_nc.attrs['history'])
+        # In this mask, False == NaN and True = valid
+        var_mask = np.invert(np.ma.masked_invalid(vals).mask)
+        fill_mask = np.invert(np.ma.masked_values(vals, fill_value).mask)
 
-        # Assert that last line of history was created by this service
-        assert SERVICE_NAME in out_nc.attrs['history'].split('\n')[-1]
+        var_mask = np.bitwise_and(var_mask, fill_mask)
 
-        # Assert that the old history is still in the subsetted granule
-        assert in_nc.attrs['history'] in out_nc.attrs['history']
+        if var_mask.shape != spatial_mask.shape:
+            # This may be a case where the time represents lines,
+            # or some other case where the variable doesn't share
+            # a shape with the coordinate variables.
+            continue
 
-    def test_history_metadata_create(self):
-        """
-        Tests that the history metadata header is created when it does
-        not exist. All test granules contain this header already, so
-        for this test the header will be removed manually from a granule.
-        """
-        test_file = next(filter(
-            lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
-            , self.test_files))
-        output_file = "{}_{}".format(self._testMethodName, test_file)
+        # Step 3: Combine the spatial and var mask with 'or'
+        combined_mask = np.ma.mask_or(var_mask, spatial_mask)
 
-        # Remove the 'history' metadata from the granule
-        in_nc = xr.open_dataset(join(self.test_data_dir, test_file))
-        del in_nc.attrs['history']
-        in_nc.to_netcdf(join(self.subset_output_dir, 'int_{}'.format(output_file)), 'w')
+        # Step 4: compare the newly combined mask and the
+        # spatial mask created from the lat/lon masks. They
+        # should be equal, because the 'or' of the two masks
+        # where out-of-bounds values are 'False' will leave
+        # those values assuming there are only NaN values
+        # in the data at those locations.
+        np.testing.assert_equal(spatial_mask, combined_mask)
 
-        subset.subset(
-            file_to_subset=join(self.subset_output_dir, "int_{}".format(output_file)),
-            bbox=np.array(((-180, 180), (-90.0, 90))),
-            output_file=join(self.subset_output_dir, output_file)
-        )
+    out_ds.close()
 
-        out_nc = xr.open_dataset(join(self.subset_output_dir, output_file))
 
-        # Assert that the input granule contains no history
-        assert in_nc.attrs.get('history') is None
+@pytest.mark.parametrize("test_file", TEST_DATA_FILES)
+def test_subset_empty_bbox(test_file, data_dir, subset_output_dir, request):
+    """
+    Test that an empty file is returned when the bounding box
+    contains no data.
+    """
 
-        # Assert that the history was created by this service
-        assert SERVICE_NAME in out_nc.attrs['history']
+    bbox = np.array(((120, 125), (-90, -85)))
+    output_file = "{}_{}".format(request.node.name, test_file)
+    subset.subset(
+        file_to_subset=join(data_dir, test_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file)
+    )
+    test_input_dataset = xr.open_dataset(
+        join(data_dir, test_file),
+        decode_times=False,
+        decode_coords=False,
+        mask_and_scale=False
+    )
+    empty_dataset = xr.open_dataset(
+        join(subset_output_dir, output_file),
+        decode_times=False,
+        decode_coords=False,
+        mask_and_scale=False
+    )
+
+    # Ensure all variables are present but empty.
+    for variable_name, variable in empty_dataset.data_vars.items():
+        assert np.all(variable.data == variable.attrs.get('_FillValue', np.nan) or np.isnan(variable.data))
+
+    assert test_input_dataset.dims.keys() == empty_dataset.dims.keys()
+
+
+def test_bbox_conversion(data_dir):
+    """
+    Test that the bounding box conversion returns expected
+    results. Expected results are hand-calculated.
+    """
 
-        # Assert that the history created by this service is the only
-        # line present in the history.
-        assert '\n' not in out_nc.attrs['history']
+    ds_180 = xr.open_dataset(join(data_dir, "MODIS_A-JPL-L2P-v2014.0.nc"),
+                             decode_times=False,
+                             decode_coords=False)
+
+    ds_360 = xr.open_dataset(join(
+        data_dir,
+        "ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc"),
+        decode_times=False,
+        decode_coords=False)
+
+    # Elements in each tuple are:
+    # ds type, lon_range, expected_result
+    test_bboxes = [
+        (ds_180, (-180, 180), (-180, 180)),
+        (ds_360, (-180, 180), (0, 360)),
+        (ds_180, (-180, 0), (-180, 0)),
+        (ds_360, (-180, 0), (180, 360)),
+        (ds_180, (-80, 80), (-80, 80)),
+        (ds_360, (-80, 80), (280, 80)),
+        (ds_180, (0, 180), (0, 180)),
+        (ds_360, (0, 180), (0, 180)),
+        (ds_180, (80, -80), (80, -80)),
+        (ds_360, (80, -80), (80, 280)),
+        (ds_180, (-80, -80), (-180, 180)),
+        (ds_360, (-80, -80), (0, 360))
+    ]
+
+    lat_var = 'lat'
+    lon_var = 'lon'
+
+    for test_bbox in test_bboxes:
+        dataset = test_bbox[0]
+        lon_range = test_bbox[1]
+        expected_result = test_bbox[2]
+        actual_result, _ = subset.convert_bbox(np.array([lon_range, [0, 0]]),
+                                               dataset, lat_var, lon_var)
+
+        np.testing.assert_equal(actual_result, expected_result)
+
+
+def compare_java(test_file, cut, data_dir, subset_output_dir, request):
+    """
+    Run the L2 subsetter and compare the result to the equivelant
+    legacy (Java) subsetter result.
+    Parameters
+    ----------
+    test_file : str
+        path to test file.
+    cut : boolean
+        True if the subsetter should return compact.
+    """
+    bbox_map = [("ascat_20150702_084200", ((-180, 0), (-90, 0))),
+                ("ascat_20150702_102400", ((-180, 0), (-90, 0))),
+                ("MODIS_A-JPL", ((65.8, 86.35), (40.1, 50.15))),
+                ("MODIS_T-JPL", ((-78.7, -60.7), (-54.8, -44))),
+                ("VIIRS", ((-172.3, -126.95), (62.3, 70.65))),
+                ("AMSR2-L2B_v08_r38622", ((-180, 0), (-90, 0)))]
+
+    java_files_dir = join(data_dir, "java_results", "cut" if cut else "uncut")
+
+    java_files = [join(java_files_dir, f) for f in listdir(java_files_dir) if
+                  isfile(join(java_files_dir, f)) and f.endswith(".nc")]
+
+    file, bbox = next(iter([b for b in bbox_map if b[0] in test_file]))
+    java_file = next(iter([f for f in java_files if file in f]))
+
+    output_file = "{}_{}".format(urllib.parse.quote_plus(request.node.name), test_file)
+    subset.subset(
+        file_to_subset=join(data_dir, test_file),
+        bbox=np.array(bbox),
+        output_file=join(subset_output_dir, output_file),
+        cut=cut
+    )
+
+    j_ds = xr.open_dataset(join(data_dir, java_file),
+                           decode_times=False,
+                           decode_coords=False,
+                           mask_and_scale=False)
+
+    py_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                            decode_times=False,
+                            decode_coords=False,
+                            mask_and_scale=False)
+
+    for var_name, var in j_ds.data_vars.items():
+        # Compare shape
+        np.testing.assert_equal(var.shape, py_ds[var_name].shape)
+
+        # Compare meta
+        np.testing.assert_equal(var.attrs, py_ds[var_name].attrs)
+
+        # Compare data
+        np.testing.assert_equal(var.values, py_ds[var_name].values)
+
+    # Compare meta. History will always be different, so remove
+    # from the headers for comparison.
+    del j_ds.attrs['history']
+    del py_ds.attrs['history']
+    del py_ds.attrs['history_json']
+    np.testing.assert_equal(j_ds.attrs, py_ds.attrs)
+
+
+@pytest.mark.parametrize("test_file", [
+    "ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc",
+    "ascat_20150702_102400_metopa_45146_eps_o_250_2300_ovw.l2.nc",
+    "MODIS_A-JPL-L2P-v2014.0.nc",
+    "MODIS_T-JPL-L2P-v2014.0.nc",
+    "VIIRS_NPP-NAVO-L2P-v3.0.nc",
+    "AMSR2-L2B_v08_r38622-v02.0-fv01.0.nc"
+])
+def test_compare_java_compact(test_file, data_dir, subset_output_dir, request):
+    """
+    Tests that the results of the subsetting operation is
+    equivalent to the Java subsetting result on the same bounding
+    box. For simplicity the subsetted Java granules have been
+    manually run and copied into this project. This test DOES
+    cut the scanline.
+    """
 
-    def test_specified_variables(self):
-        """
-        Test that the variables which are specified when calling the subset
-        operation are present in the resulting subsetted data file,
-        and that the variables which are specified are not present.
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        for file in self.test_files:
-            output_file = "{}_{}".format(self._testMethodName, file)
+    compare_java(test_file, True, data_dir, subset_output_dir, request)
 
-            in_ds = xr.open_dataset(join(self.test_data_dir, file),
-                                    decode_times=False,
-                                    decode_coords=False)
 
-            included_variables = set([variable[0] for variable in in_ds.data_vars.items()][::2])
-            included_variables = list(included_variables)
+@pytest.mark.parametrize("test_file", [
+    "ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc",
+    "ascat_20150702_102400_metopa_45146_eps_o_250_2300_ovw.l2.nc",
+    "MODIS_A-JPL-L2P-v2014.0.nc",
+    "MODIS_T-JPL-L2P-v2014.0.nc",
+    "VIIRS_NPP-NAVO-L2P-v3.0.nc",
+    "AMSR2-L2B_v08_r38622-v02.0-fv01.0.nc"
+])
+def test_compare_java(test_file, data_dir, subset_output_dir, request):
+    """
+    Tests that the results of the subsetting operation is
+    equivalent to the Java subsetting result on the same bounding
+    box. For simplicity the subsetted Java granules have been
+    manually run and copied into this project. This runs does NOT
+    cut the scanline.
+    """
 
-            excluded_variables = list(set(variable[0] for variable in in_ds.data_vars.items())
-                                      - set(included_variables))
+    compare_java(test_file, False, data_dir, subset_output_dir, request)
 
-            subset.subset(
-                file_to_subset=join(self.test_data_dir, file),
-                bbox=bbox,
-                output_file=join(self.subset_output_dir, output_file),
-                variables=included_variables
-            )
 
-            # Get coord variables
-            time_var_name = []
-            lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(in_ds)
-            lat_var_name = lat_var_names[0]
-            lon_var_name = lon_var_names[0]
-            time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name])
-
-            included_variables.append(lat_var_name)
-            included_variables.append(lon_var_name)
-            included_variables.append(time_var_name)
-            included_variables.extend(in_ds.coords.keys())
-
-            if lat_var_name in excluded_variables:
-                excluded_variables.remove(lat_var_name)
-            if lon_var_name in excluded_variables:
-                excluded_variables.remove(lon_var_name)
-            if time_var_name in excluded_variables:
-                excluded_variables.remove(time_var_name)
-
-            out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                     decode_times=False,
-                                     decode_coords=False)
-
-            out_vars = [out_var for out_var in out_ds.data_vars.keys()]
-            out_vars.extend(out_ds.coords.keys())
-
-            assert set(out_vars) == set(included_variables)
-            assert set(out_vars).isdisjoint(excluded_variables)
-
-            in_ds.close()
-            out_ds.close()
-
-    def test_calculate_chunks(self):
-        """
-        Test that the calculate chunks function in the subset module
-        correctly calculates and returns the chunks dims dictionary.
-        """
-        rs = np.random.RandomState(0)
-        dataset = xr.DataArray(
-            rs.randn(2, 4000, 4001),
-            dims=['x', 'y', 'z']
-        ).to_dataset(name='foo')
-
-        chunk_dict = subset.calculate_chunks(dataset)
-
-        assert chunk_dict.get('x') is None
-        assert chunk_dict.get('y') is None
-        assert chunk_dict.get('z') == 4000
-
-    def test_missing_coord_vars(self):
-        """
-        As of right now, the subsetter expects the data to contain lat
-        and lon variables. If not present, an error is thrown.
-        """
-        file = 'MODIS_T-JPL-L2P-v2014.0.nc'
-        ds = xr.open_dataset(join(self.test_data_dir, file),
-                             decode_times=False,
-                             decode_coords=False,
-                             mask_and_scale=False)
+def test_history_metadata_append(data_dir, subset_output_dir, request):
+    """
+    Tests that the history metadata header is appended to when it
+    already exists.
+    """
+    test_file = next(filter(
+        lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
+        , TEST_DATA_FILES))
+    output_file = "{}_{}".format(request.node.name, test_file)
+    subset.subset(
+        file_to_subset=join(data_dir, test_file),
+        bbox=np.array(((-180, 180), (-90.0, 90))),
+        output_file=join(subset_output_dir, output_file)
+    )
 
-        # Manually remove var which will cause error when attempting
-        # to subset.
-        ds = ds.drop_vars(['lat'])
+    in_nc = xr.open_dataset(join(data_dir, test_file))
+    out_nc = xr.open_dataset(join(subset_output_dir, output_file))
 
-        output_file = '{}_{}'.format('missing_coords', file)
-        ds.to_netcdf(join(self.subset_output_dir, output_file))
+    # Assert that the original granule contains history
+    assert in_nc.attrs.get('history') is not None
 
-        bbox = np.array(((-180, 180), (-90, 90)))
+    # Assert that input and output files have different history
+    assert in_nc.attrs['history'] != out_nc.attrs['history']
 
-        with pytest.raises(ValueError):
-            subset.subset(
-                file_to_subset=join(self.subset_output_dir, output_file),
-                bbox=bbox,
-                output_file=''
-            )
+    # Assert that last line of history was created by this service
+    assert SERVICE_NAME in out_nc.attrs['history'].split('\n')[-1]
 
-    def test_data_1D(self):
-        """
-        Test that subsetting a 1-D granule does not result in failure.
-        """
-        merged_jason_filename = 'JA1_GPN_2PeP001_002_20020115_060706_20020115_070316.nc'
-        output_file = "{}_{}".format(self._testMethodName, merged_jason_filename)
+    # Assert that the old history is still in the subsetted granule
+    assert in_nc.attrs['history'] in out_nc.attrs['history']
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, merged_jason_filename),
-            bbox=np.array(((-180, 0), (-90, 0))),
-            output_file=join(self.subset_output_dir, output_file)
-        )
 
-        xr.open_dataset(join(self.subset_output_dir, output_file))
+def test_history_metadata_create(data_dir, subset_output_dir, request):
+    """
+    Tests that the history metadata header is created when it does
+    not exist. All test granules contain this header already, so
+    for this test the header will be removed manually from a granule.
+    """
+    test_file = next(filter(
+        lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
+        , TEST_DATA_FILES))
+    output_file = "{}_{}".format(request.node.name, test_file)
 
-    def test_get_coord_variable_names(self):
-        """
-        Test that the expected coord variable names are returned
-        """
-        file = 'MODIS_T-JPL-L2P-v2014.0.nc'
-        ds = xr.open_dataset(join(self.test_data_dir, file),
-                             decode_times=False,
-                             decode_coords=False,
-                             mask_and_scale=False)
+    # Remove the 'history' metadata from the granule
+    in_nc = xr.open_dataset(join(data_dir, test_file))
+    del in_nc.attrs['history']
+    in_nc.to_netcdf(join(subset_output_dir, 'int_{}'.format(output_file)), 'w')
 
-        old_lat_var_name = 'lat'
-        old_lon_var_name = 'lon'
+    subset.subset(
+        file_to_subset=join(subset_output_dir, "int_{}".format(output_file)),
+        bbox=np.array(((-180, 180), (-90.0, 90))),
+        output_file=join(subset_output_dir, output_file)
+    )
 
-        lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(ds)
+    out_nc = xr.open_dataset(join(subset_output_dir, output_file))
 
-        assert lat_var_name[0] == old_lat_var_name
-        assert lon_var_name[0] == old_lon_var_name
+    # Assert that the input granule contains no history
+    assert in_nc.attrs.get('history') is None
 
-        new_lat_var_name = 'latitude'
-        new_lon_var_name = 'x'
-        ds = ds.rename({old_lat_var_name: new_lat_var_name,
-                        old_lon_var_name: new_lon_var_name})
+    # Assert that the history was created by this service
+    assert SERVICE_NAME in out_nc.attrs['history']
 
-        lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(ds)
+    # Assert that the history created by this service is the only
+    # line present in the history.
+    assert '\n' not in out_nc.attrs['history']
 
-        assert lat_var_name[0] == new_lat_var_name
-        assert lon_var_name[0] == new_lon_var_name
 
-    def test_cannot_get_coord_variable_names(self):
-        """
-        Test that, when given a dataset with coord vars which are not
-        expected, a ValueError is raised.
-        """
-        file = 'MODIS_T-JPL-L2P-v2014.0.nc'
-        ds = xr.open_dataset(join(self.test_data_dir, file),
+@pytest.mark.parametrize("test_file", TEST_DATA_FILES)
+def test_specified_variables(test_file, data_dir, subset_output_dir, request):
+    """
+    Test that the variables which are specified when calling the subset
+    operation are present in the resulting subsetted data file,
+    and that the variables which are specified are not present.
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, test_file)
+
+    in_ds, rename_vars, _ = subset.open_as_nc_dataset(join(data_dir, test_file))
+    in_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(in_ds),
+                            decode_times=False,
+                            decode_coords=False)
+    # Non-data vars are by default included in the result
+    non_data_vars = set(in_ds.variables.keys()) - set(in_ds.data_vars.keys())
+
+    # Coordinate variables are always included in the result
+    lat_var_names, lon_var_names, time_var_names = subset.get_coordinate_variable_names(in_ds)
+    coordinate_variables = lat_var_names + lon_var_names + time_var_names
+
+    # Pick some variable to include in the result
+    included_variables = set([variable[0] for variable in in_ds.data_vars.items()][::2])
+    included_variables = list(included_variables)
+
+    # All other data variables should be dropped
+    expected_excluded_variables = list(set(variable[0] for variable in in_ds.data_vars.items())
+                                       - set(included_variables) - set(coordinate_variables))
+
+    subset.subset(
+        file_to_subset=join(data_dir, test_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        variables=[var.replace(GROUP_DELIM, '/') for var in included_variables]
+    )
+
+    out_ds, rename_vars, _ = subset.open_as_nc_dataset(join(subset_output_dir, output_file))
+    out_ds = xr.open_dataset(xr.backends.NetCDF4DataStore(out_ds),
                              decode_times=False,
-                             decode_coords=False,
-                             mask_and_scale=False)
+                             decode_coords=False)
 
-        old_lat_var_name = 'lat'
-        new_lat_var_name = 'foo'
-
-        ds = ds.rename({old_lat_var_name: new_lat_var_name})
-        # Remove 'coordinates' attribute
-        for var_name, var in ds.items():
-            if 'coordinates' in var.attrs:
-                del var.attrs['coordinates']
-
-        self.assertRaises(ValueError, subset.compute_coordinate_variable_names, ds)
-
-    def test_get_spatial_bounds(self):
-        """
-        Test that the get_spatial_bounds function works as expected.
-        The get_spatial_bounds function should return lat/lon min/max
-        which is masked and scaled for both variables. The values
-        should also be adjusted for -180,180/-90,90 coordinate types
-        """
-        ascat_filename = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
-        ghrsst_filename = '20190927000500-JPL-L2P_GHRSST-SSTskin-MODIS_A-D-v02.0-fv01.0.nc'
-
-        ascat_dataset = xr.open_dataset(
-            join(self.test_data_dir, ascat_filename),
-            decode_times=False,
-            decode_coords=False,
-            mask_and_scale=False
-        )
-        ghrsst_dataset = xr.open_dataset(
-            join(self.test_data_dir, ghrsst_filename),
-            decode_times=False,
-            decode_coords=False,
-            mask_and_scale=False
-        )
+    out_vars = [out_var for out_var in out_ds.variables.keys()]
 
-        # ascat1 longitude is -0 360, ghrsst modis A is -180 180
-        # Both have metadata for valid_min
-
-        # Manually calculated spatial bounds
-        ascat_expected_lat_min = -89.4
-        ascat_expected_lat_max = 89.2
-        ascat_expected_lon_min = -180.0
-        ascat_expected_lon_max = 180.0
-
-        ghrsst_expected_lat_min = -77.2
-        ghrsst_expected_lat_max = -53.6
-        ghrsst_expected_lon_min = -170.5
-        ghrsst_expected_lon_max = -101.7
-
-        min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
-            dataset=ascat_dataset,
-            lat_var_names=['lat'],
-            lon_var_names=['lon']
-        ).flatten()
-
-        assert np.isclose(min_lat, ascat_expected_lat_min)
-        assert np.isclose(max_lat, ascat_expected_lat_max)
-        assert np.isclose(min_lon, ascat_expected_lon_min)
-        assert np.isclose(max_lon, ascat_expected_lon_max)
-
-        # Remove the label from the dataset coordinate variables indicating the valid_min.
-        del ascat_dataset['lat'].attrs['valid_min']
-        del ascat_dataset['lon'].attrs['valid_min']
-
-        min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
-            dataset=ascat_dataset,
-            lat_var_names=['lat'],
-            lon_var_names=['lon']
-        ).flatten()
-
-        assert np.isclose(min_lat, ascat_expected_lat_min)
-        assert np.isclose(max_lat, ascat_expected_lat_max)
-        assert np.isclose(min_lon, ascat_expected_lon_min)
-        assert np.isclose(max_lon, ascat_expected_lon_max)
-
-        # Repeat test, but with GHRSST granule
-
-        min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
-            dataset=ghrsst_dataset,
-            lat_var_names=['lat'],
-            lon_var_names=['lon']
-        ).flatten()
-
-        assert np.isclose(min_lat, ghrsst_expected_lat_min)
-        assert np.isclose(max_lat, ghrsst_expected_lat_max)
-        assert np.isclose(min_lon, ghrsst_expected_lon_min)
-        assert np.isclose(max_lon, ghrsst_expected_lon_max)
-
-        # Remove the label from the dataset coordinate variables indicating the valid_min.
-
-        del ghrsst_dataset['lat'].attrs['valid_min']
-        del ghrsst_dataset['lon'].attrs['valid_min']
-
-        min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
-            dataset=ghrsst_dataset,
-            lat_var_names=['lat'],
-            lon_var_names=['lon']
-        ).flatten()
-
-        assert np.isclose(min_lat, ghrsst_expected_lat_min)
-        assert np.isclose(max_lat, ghrsst_expected_lat_max)
-        assert np.isclose(min_lon, ghrsst_expected_lon_min)
-        assert np.isclose(max_lon, ghrsst_expected_lon_max)
-
-    def test_shapefile_subset(self):
-        """
-        Test that using a shapefile to subset data instead of a bbox
-        works as expected
-        """
-        shapefile = 'test.shp'
-        ascat_filename = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
-        output_filename = f'{self._testMethodName}_{ascat_filename}'
-
-        shapefile_file_path = join(self.test_data_dir, 'test_shapefile_subset', shapefile)
-        ascat_file_path = join(self.test_data_dir, ascat_filename)
-        output_file_path = join(self.subset_output_dir, output_filename)
+    assert set(out_vars) == set(included_variables + coordinate_variables).union(non_data_vars)
+    assert set(out_vars).isdisjoint(expected_excluded_variables)
 
-        subset.subset(
-            file_to_subset=ascat_file_path,
-            bbox=None,
-            output_file=output_file_path,
-            shapefile=shapefile_file_path
-        )
+    in_ds.close()
+    out_ds.close()
 
-        # Check that each point of data is within the shapefile
-        shapefile_df = gpd.read_file(shapefile_file_path)
-        with xr.open_dataset(output_file_path) as result_dataset:
-            def in_shape(lon, lat):
-                if np.isnan(lon) or np.isnan(lat):
-                    return
-                point = Point(lon, lat)
-                point_in_shapefile = shapefile_df.contains(point)
-                assert point_in_shapefile[0]
-
-            in_shape_vec = np.vectorize(in_shape)
-            in_shape_vec(result_dataset.lon, result_dataset.lat)
-
-    def test_variable_subset_oco2(self):
-        """
-        variable subsets for groups and root group using a '/'
-        """
-
-        oco2_file_name = 'oco2_LtCO2_190201_B10206Ar_200729175909s.nc4'
-        output_file_name = 'oco2_test_out.nc'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'OCO2', oco2_file_name),
-                        os.path.join(self.subset_output_dir, oco2_file_name))
-        bbox = np.array(((-180,180),(-90.0,90)))
-        variables = ['/xco2','/xco2_quality_flag','/Retrieval/water_height','/sounding_id']
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, 'OCO2',oco2_file_name),
-            bbox=bbox,
-            variables=variables,
-            output_file=join(self.subset_output_dir, output_file_name),
-        )
-        
-        out_nc = nc.Dataset(join(self.subset_output_dir, output_file_name))
-        var_listout = list(out_nc.groups['Retrieval'].variables.keys())
-        assert ('water_height' in var_listout)
-
-    def test_variable_subset_s6(self):
-        """
-        multiple variable subset of variables in different groups in oco3
-        """
-
-        s6_file_name = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
-        output_file_name = 's6_test_out.nc'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'sentinel_6', s6_file_name),
-                        os.path.join(self.subset_output_dir, s6_file_name))
-        bbox = np.array(((-180,180),(-90.0,90)))
-        variables = ['/data_01/ku/range_ocean_mle3_rms', '/data_20/ku/range_ocean']
-        subset.subset(
-            file_to_subset=join(self.subset_output_dir, s6_file_name),
-            bbox=bbox,
-            variables=variables,
-            output_file=join(self.subset_output_dir, output_file_name),
-        )
-        
-        out_nc = nc.Dataset(join(self.subset_output_dir, output_file_name))
-        var_listout =list(out_nc.groups['data_01'].groups['ku'].variables.keys())
-        var_listout.extend(list(out_nc.groups['data_20'].groups['ku'].variables.keys()))
-        assert ('range_ocean_mle3_rms' in var_listout)
-        assert ('range_ocean' in var_listout)
-
-
-    def test_transform_grouped_dataset(self):
-        """
-        Test that the transformation function results in a correctly
-        formatted dataset.
-        """
-        s6_file_name = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'sentinel_6', s6_file_name),
-                        os.path.join(self.subset_output_dir, s6_file_name))
-
-        nc_ds = nc.Dataset(os.path.join(self.test_data_dir, 'sentinel_6', s6_file_name))
-        nc_ds_transformed = subset.transform_grouped_dataset(
-            nc.Dataset(os.path.join(self.subset_output_dir, s6_file_name), 'r'),
-            os.path.join(self.subset_output_dir, s6_file_name)
-        )
 
-        # The original ds has groups
-        assert nc_ds.groups
+def test_calculate_chunks():
+    """
+    Test that the calculate chunks function in the subset module
+    correctly calculates and returns the chunks dims dictionary.
+    """
+    rs = np.random.RandomState(0)
+    dataset = xr.DataArray(
+        rs.randn(2, 4000, 4001),
+        dims=['x', 'y', 'z']
+    ).to_dataset(name='foo')
 
-        # There should be no groups in the new ds
-        assert not nc_ds_transformed.groups
+    chunk_dict = subset.calculate_chunks(dataset)
 
-        # The original ds has no variables in the root group
-        assert not nc_ds.variables
+    assert chunk_dict.get('x') is None
+    assert chunk_dict.get('y') is None
+    assert chunk_dict.get('z') == 4000
 
-        # The new ds has variables in the root group
-        assert nc_ds_transformed.variables
 
-        # Each var in the new ds should map to a variable in the old ds
-        for var_name, var in nc_ds_transformed.variables.items():
-            path = var_name.strip('__').split('__')
+def test_missing_coord_vars(data_dir, subset_output_dir):
+    """
+    As of right now, the subsetter expects the data to contain lat
+    and lon variables. If not present, an error is thrown.
+    """
+    file = 'MODIS_T-JPL-L2P-v2014.0.nc'
+    ds = xr.open_dataset(join(data_dir, file),
+                         decode_times=False,
+                         decode_coords=False,
+                         mask_and_scale=False)
 
-            group = nc_ds[path[0]]
-            for g in path[1:-1]:
-                group = group[g]
-            assert var_name.strip('__').split('__')[-1] in group.variables.keys()
+    # Manually remove var which will cause error when attempting
+    # to subset.
+    ds = ds.drop_vars(['lat'])
 
+    output_file = '{}_{}'.format('missing_coords', file)
+    ds.to_netcdf(join(subset_output_dir, output_file))
 
-    def test_group_subset(self):
-        """
-        Ensure a subset function can be run on a granule that contains
-        groups without errors, and that the subsetted data is within
-        the given spatial bounds.
-        """
-        s6_file_name = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
-        s6_output_file_name = 'SS_S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
-        # Copy S6 file to temp dir
-        shutil.copyfile(
-            os.path.join(self.test_data_dir, 'sentinel_6', s6_file_name),
-            os.path.join(self.subset_output_dir, s6_file_name)
-        )
+    bbox = np.array(((-180, 180), (-90, 90)))
 
-        # Make sure it runs without errors
-        bbox = np.array(((150, 180), (-90, -50)))
-        bounds = subset.subset(
-            file_to_subset=os.path.join(self.subset_output_dir, s6_file_name),
+    with pytest.raises(ValueError):
+        subset.subset(
+            file_to_subset=join(subset_output_dir, output_file),
             bbox=bbox,
-            output_file=os.path.join(self.subset_output_dir, s6_output_file_name)
+            output_file=''
         )
 
-        # Check that bounds are within requested bbox
-        assert bounds[0][0] >= bbox[0][0]
-        assert bounds[0][1] <= bbox[0][1]
-        assert bounds[1][0] >= bbox[1][0]
-        assert bounds[1][1] <= bbox[1][1]
-
-    def test_json_history_metadata_append(self):
-        """
-        Tests that the json history metadata header is appended to when it
-        already exists. First we create a fake json_history header for input file.
-        """
-        test_file = next(filter(
-            lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
-            , self.test_files))
-        output_file = "{}_{}".format(self._testMethodName, test_file)
-        input_file_subset = join(self.subset_output_dir, "int_{}".format(output_file))
-
-        fake_history = [
-            {
-                "date_time": "2021-05-10T14:30:24.553263",
-                "derived_from": basename(input_file_subset),
-                "program": SERVICE_NAME,
-                "version": importlib_metadata.distribution(SERVICE_NAME).version,
-                "parameters": "bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True",
-                "program_ref": "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD",
-                "$schema": "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
-            }
-        ]
 
-        in_nc = xr.open_dataset(join(self.test_data_dir, test_file))
-        in_nc.attrs['history_json'] = json.dumps(fake_history)
-        in_nc.to_netcdf(join(self.subset_output_dir, 'int_{}'.format(output_file)), 'w')
+def test_data_1D(data_dir, subset_output_dir, request):
+    """
+    Test that subsetting a 1-D granule does not result in failure.
+    """
+    merged_jason_filename = 'JA1_GPN_2PeP001_002_20020115_060706_20020115_070316.nc'
+    output_file = "{}_{}".format(request.node.name, merged_jason_filename)
 
-        subset.subset(
-            file_to_subset=input_file_subset,
-            bbox=np.array(((-180, 180), (-90.0, 90))),
-            output_file=join(self.subset_output_dir, output_file)
-        )
+    subset.subset(
+        file_to_subset=join(data_dir, merged_jason_filename),
+        bbox=np.array(((-180, 0), (-90, 0))),
+        output_file=join(subset_output_dir, output_file)
+    )
 
-        out_nc = xr.open_dataset(join(self.subset_output_dir, output_file))
-
-        history_json = json.loads(out_nc.attrs['history_json'])
-        assert len(history_json) == 2
-
-        is_valid_shema = validate(instance=history_json, schema=self.history_json_schema)
-        assert is_valid_shema is None
-
-        for history in history_json:
-            assert "date_time" in history
-            assert history.get('program') == SERVICE_NAME
-            assert history.get('derived_from') == basename(input_file_subset)
-            assert history.get('version') == importlib_metadata.distribution(SERVICE_NAME).version
-            assert history.get('parameters') == 'bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True'
-            assert history.get(
-                'program_ref') == "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD"
-            assert history.get(
-                '$schema') == "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
-
-    def test_json_history_metadata_create(self):
-        """
-        Tests that the json history metadata header is created when it does
-        not exist. All test granules does not contain this header.
-        """
-        test_file = next(filter(
-            lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
-            , self.test_files))
-        output_file = "{}_{}".format(self._testMethodName, test_file)
-
-        # Remove the 'history' metadata from the granule
-        in_nc = xr.open_dataset(join(self.test_data_dir, test_file))
-        in_nc.to_netcdf(join(self.subset_output_dir, 'int_{}'.format(output_file)), 'w')
-
-        input_file_subset = join(self.subset_output_dir, "int_{}".format(output_file))
-        subset.subset(
-            file_to_subset=input_file_subset,
-            bbox=np.array(((-180, 180), (-90.0, 90))),
-            output_file=join(self.subset_output_dir, output_file)
-        )
+    xr.open_dataset(join(subset_output_dir, output_file))
 
-        out_nc = xr.open_dataset(join(self.subset_output_dir, output_file))
-
-        history_json = json.loads(out_nc.attrs['history_json'])
-        assert len(history_json) == 1
-
-        is_valid_shema = validate(instance=history_json, schema=self.history_json_schema)
-        assert is_valid_shema is None
-
-        for history in history_json:
-            assert "date_time" in history
-            assert history.get('program') == SERVICE_NAME
-            assert history.get('derived_from') == basename(input_file_subset)
-            assert history.get('version') == importlib_metadata.distribution(SERVICE_NAME).version
-            assert history.get('parameters') == 'bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True'
-            assert history.get(
-                'program_ref') == "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD"
-            assert history.get(
-                '$schema') == "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
-
-    def test_json_history_metadata_create_origin_source(self):
-        """
-        Tests that the json history metadata header is created when it does
-        not exist. All test granules does not contain this header.
-        """
-        test_file = next(filter(
-            lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
-            , self.test_files))
-        output_file = "{}_{}".format(self._testMethodName, test_file)
-
-        # Remove the 'history' metadata from the granule
-        in_nc = xr.open_dataset(join(self.test_data_dir, test_file))
-        in_nc.to_netcdf(join(self.subset_output_dir, 'int_{}'.format(output_file)), 'w')
-
-        input_file_subset = join(self.subset_output_dir, "int_{}".format(output_file))
-        subset.subset(
-            file_to_subset=input_file_subset,
-            bbox=np.array(((-180, 180), (-90.0, 90))),
-            output_file=join(self.subset_output_dir, output_file),
-            origin_source="fake_original_file.nc"
-        )
 
-        out_nc = xr.open_dataset(join(self.subset_output_dir, output_file))
-
-        history_json = json.loads(out_nc.attrs['history_json'])
-        assert len(history_json) == 1
-
-        is_valid_shema = validate(instance=history_json, schema=self.history_json_schema)
-        assert is_valid_shema is None
-
-        for history in history_json:
-            assert "date_time" in history
-            assert history.get('program') == SERVICE_NAME
-            assert history.get('derived_from') == "fake_original_file.nc"
-            assert history.get('version') == importlib_metadata.distribution(SERVICE_NAME).version
-            assert history.get('parameters') == 'bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True'
-            assert history.get(
-                'program_ref') == "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD"
-            assert history.get(
-                '$schema') == "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
-
-    def test_temporal_subset_ascat(self):
-        """
-        Test that a temporal subset results in a granule that only
-        contains times within the given bounds.
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '2015-07-02T09:00:00'
-        max_time = '2015-07-02T10:00:00'
+def test_get_coord_variable_names(data_dir):
+    """
+    Test that the expected coord variable names are returned
+    """
+    file = 'MODIS_T-JPL-L2P-v2014.0.nc'
+    ds = xr.open_dataset(join(data_dir, file),
+                         decode_times=False,
+                         decode_coords=False,
+                         mask_and_scale=False)
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
+    old_lat_var_name = 'lat'
+    old_lon_var_name = 'lon'
 
-        in_ds = xr.open_dataset(join(self.test_data_dir, file),
-                                decode_times=False,
-                                decode_coords=False)
+    lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(ds)
 
-        out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                 decode_times=False,
-                                 decode_coords=False)
+    assert lat_var_name[0] == old_lat_var_name
+    assert lon_var_name[0] == old_lon_var_name
 
-        # Check that 'time' types match
-        assert in_ds.time.dtype == out_ds.time.dtype
+    new_lat_var_name = 'latitude'
+    new_lon_var_name = 'x'
+    ds = ds.rename({old_lat_var_name: new_lat_var_name,
+                    old_lon_var_name: new_lon_var_name})
 
-        in_ds.close()
-        out_ds.close()
+    lat_var_name, lon_var_name = subset.compute_coordinate_variable_names(ds)
 
-        # Check that all times are within the given bounds. Open
-        # dataset using 'decode_times=True' for auto-conversions to
-        # datetime
-        out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                 decode_coords=False)
+    assert lat_var_name[0] == new_lat_var_name
+    assert lon_var_name[0] == new_lon_var_name
 
-        start_dt = subset.translate_timestamp(min_time)
-        end_dt = subset.translate_timestamp(max_time)
 
-        # All dates should be within the given temporal bounds.
-        assert (out_ds.time >= pd.to_datetime(start_dt)).all()
-        assert (out_ds.time <= pd.to_datetime(end_dt)).all()
+def test_cannot_get_coord_variable_names(data_dir):
+    """
+    Test that, when given a dataset with coord vars which are not
+    expected, a ValueError is raised.
+    """
+    file = 'MODIS_T-JPL-L2P-v2014.0.nc'
+    ds = xr.open_dataset(join(data_dir, file),
+                         decode_times=False,
+                         decode_coords=False,
+                         mask_and_scale=False)
 
-    def test_temporal_subset_modis_a(self):
-        """
-        Test that a temporal subset results in a granule that only
-        contains times within the given bounds.
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        file = 'MODIS_A-JPL-L2P-v2014.0.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '2019-08-05T06:57:00'
-        max_time = '2019-08-05T06:58:00'
-        # Actual min is 2019-08-05T06:55:01.000000000
-        # Actual max is 2019-08-05T06:59:57.000000000
+    old_lat_var_name = 'lat'
+    new_lat_var_name = 'foo'
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
+    ds = ds.rename({old_lat_var_name: new_lat_var_name})
+    # Remove 'coordinates' attribute
+    for var_name, var in ds.items():
+        if 'coordinates' in var.attrs:
+            del var.attrs['coordinates']
 
-        in_ds = xr.open_dataset(join(self.test_data_dir, file),
-                                decode_times=False,
-                                decode_coords=False)
+    with pytest.raises(ValueError) as e_info:
+        subset.compute_coordinate_variable_names(ds)
 
-        out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                 decode_times=False,
-                                 decode_coords=False)
 
-        # Check that 'time' types match
-        assert in_ds.time.dtype == out_ds.time.dtype
+def test_get_spatial_bounds(data_dir):
+    """
+    Test that the get_spatial_bounds function works as expected.
+    The get_spatial_bounds function should return lat/lon min/max
+    which is masked and scaled for both variables. The values
+    should also be adjusted for -180,180/-90,90 coordinate types
+    """
+    ascat_filename = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
+    ghrsst_filename = '20190927000500-JPL-L2P_GHRSST-SSTskin-MODIS_A-D-v02.0-fv01.0.nc'
+
+    ascat_dataset = xr.open_dataset(
+        join(data_dir, ascat_filename),
+        decode_times=False,
+        decode_coords=False,
+        mask_and_scale=False
+    )
+    ghrsst_dataset = xr.open_dataset(
+        join(data_dir, ghrsst_filename),
+        decode_times=False,
+        decode_coords=False,
+        mask_and_scale=False
+    )
+
+    # ascat1 longitude is -0 360, ghrsst modis A is -180 180
+    # Both have metadata for valid_min
+
+    # Manually calculated spatial bounds
+    ascat_expected_lat_min = -89.4
+    ascat_expected_lat_max = 89.2
+    ascat_expected_lon_min = -180.0
+    ascat_expected_lon_max = 180.0
+
+    ghrsst_expected_lat_min = -77.2
+    ghrsst_expected_lat_max = -53.6
+    ghrsst_expected_lon_min = -170.5
+    ghrsst_expected_lon_max = -101.7
+
+    min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
+        dataset=ascat_dataset,
+        lat_var_names=['lat'],
+        lon_var_names=['lon']
+    ).flatten()
+
+    assert np.isclose(min_lat, ascat_expected_lat_min)
+    assert np.isclose(max_lat, ascat_expected_lat_max)
+    assert np.isclose(min_lon, ascat_expected_lon_min)
+    assert np.isclose(max_lon, ascat_expected_lon_max)
+
+    # Remove the label from the dataset coordinate variables indicating the valid_min.
+    del ascat_dataset['lat'].attrs['valid_min']
+    del ascat_dataset['lon'].attrs['valid_min']
+
+    min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
+        dataset=ascat_dataset,
+        lat_var_names=['lat'],
+        lon_var_names=['lon']
+    ).flatten()
+
+    assert np.isclose(min_lat, ascat_expected_lat_min)
+    assert np.isclose(max_lat, ascat_expected_lat_max)
+    assert np.isclose(min_lon, ascat_expected_lon_min)
+    assert np.isclose(max_lon, ascat_expected_lon_max)
+
+    # Repeat test, but with GHRSST granule
+
+    min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
+        dataset=ghrsst_dataset,
+        lat_var_names=['lat'],
+        lon_var_names=['lon']
+    ).flatten()
+
+    assert np.isclose(min_lat, ghrsst_expected_lat_min)
+    assert np.isclose(max_lat, ghrsst_expected_lat_max)
+    assert np.isclose(min_lon, ghrsst_expected_lon_min)
+    assert np.isclose(max_lon, ghrsst_expected_lon_max)
+
+    # Remove the label from the dataset coordinate variables indicating the valid_min.
+
+    del ghrsst_dataset['lat'].attrs['valid_min']
+    del ghrsst_dataset['lon'].attrs['valid_min']
+
+    min_lon, max_lon, min_lat, max_lat = subset.get_spatial_bounds(
+        dataset=ghrsst_dataset,
+        lat_var_names=['lat'],
+        lon_var_names=['lon']
+    ).flatten()
+
+    assert np.isclose(min_lat, ghrsst_expected_lat_min)
+    assert np.isclose(max_lat, ghrsst_expected_lat_max)
+    assert np.isclose(min_lon, ghrsst_expected_lon_min)
+    assert np.isclose(max_lon, ghrsst_expected_lon_max)
+
+
+def test_shapefile_subset(data_dir, subset_output_dir, request):
+    """
+    Test that using a shapefile to subset data instead of a bbox
+    works as expected
+    """
+    shapefile = 'test.shp'
+    ascat_filename = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
+    output_filename = f'{request.node.name}_{ascat_filename}'
+
+    shapefile_file_path = join(data_dir, 'test_shapefile_subset', shapefile)
+    ascat_file_path = join(data_dir, ascat_filename)
+    output_file_path = join(subset_output_dir, output_filename)
+
+    subset.subset(
+        file_to_subset=ascat_file_path,
+        bbox=None,
+        output_file=output_file_path,
+        shapefile=shapefile_file_path
+    )
+
+    # Check that each point of data is within the shapefile
+    shapefile_df = gpd.read_file(shapefile_file_path)
+    with xr.open_dataset(output_file_path) as result_dataset:
+        def in_shape(lon, lat):
+            if np.isnan(lon) or np.isnan(lat):
+                return
+            point = Point(lon, lat)
+            point_in_shapefile = shapefile_df.contains(point)
+            assert point_in_shapefile[0]
+
+        in_shape_vec = np.vectorize(in_shape)
+        in_shape_vec(result_dataset.lon, result_dataset.lat)
+
+
+def test_variable_subset_oco2(data_dir, subset_output_dir):
+    """
+    variable subsets for groups and root group using a '/'
+    """
 
-        in_ds.close()
-        out_ds.close()
+    oco2_file_name = 'oco2_LtCO2_190201_B10206Ar_200729175909s.nc4'
+    output_file_name = 'oco2_test_out.nc'
+    shutil.copyfile(os.path.join(data_dir, 'OCO2', oco2_file_name),
+                    os.path.join(subset_output_dir, oco2_file_name))
+    bbox = np.array(((-180, 180), (-90.0, 90)))
+    variables = ['/xco2', '/xco2_quality_flag', '/Retrieval/water_height', '/sounding_id']
+    subset.subset(
+        file_to_subset=join(data_dir, 'OCO2', oco2_file_name),
+        bbox=bbox,
+        variables=variables,
+        output_file=join(subset_output_dir, output_file_name),
+    )
+
+    out_nc = nc.Dataset(join(subset_output_dir, output_file_name))
+    var_listout = list(out_nc.groups['Retrieval'].variables.keys())
+    assert ('water_height' in var_listout)
+
+
+def test_variable_subset_s6(data_dir, subset_output_dir):
+    """
+    multiple variable subset of variables in different groups in oco3
+    """
 
-        # Check that all times are within the given bounds. Open
-        # dataset using 'decode_times=True' for auto-conversions to
-        # datetime
-        out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                 decode_coords=False)
+    s6_file_name = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
+    output_file_name = 's6_test_out.nc'
+    shutil.copyfile(os.path.join(data_dir, 'sentinel_6', s6_file_name),
+                    os.path.join(subset_output_dir, s6_file_name))
+    bbox = np.array(((-180, 180), (-90.0, 90)))
+    variables = ['/data_01/ku/range_ocean_mle3_rms', '/data_20/ku/range_ocean']
+    subset.subset(
+        file_to_subset=join(subset_output_dir, s6_file_name),
+        bbox=bbox,
+        variables=variables,
+        output_file=join(subset_output_dir, output_file_name),
+    )
+
+    out_nc = nc.Dataset(join(subset_output_dir, output_file_name))
+    var_listout = list(out_nc.groups['data_01'].groups['ku'].variables.keys())
+    var_listout.extend(list(out_nc.groups['data_20'].groups['ku'].variables.keys()))
+    assert ('range_ocean_mle3_rms' in var_listout)
+    assert ('range_ocean' in var_listout)
+
+
+def test_transform_grouped_dataset(data_dir, subset_output_dir):
+    """
+    Test that the transformation function results in a correctly
+    formatted dataset.
+    """
+    s6_file_name = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
+    shutil.copyfile(os.path.join(data_dir, 'sentinel_6', s6_file_name),
+                    os.path.join(subset_output_dir, s6_file_name))
 
-        start_dt = subset.translate_timestamp(min_time)
-        end_dt = subset.translate_timestamp(max_time)
+    nc_ds = nc.Dataset(os.path.join(data_dir, 'sentinel_6', s6_file_name))
+    nc_ds_transformed = subset.transform_grouped_dataset(
+        nc.Dataset(os.path.join(subset_output_dir, s6_file_name), 'r'),
+        os.path.join(subset_output_dir, s6_file_name)
+    )
 
-        epoch_dt = out_ds['time'].values[0]
+    # The original ds has groups
+    assert nc_ds.groups
 
-        # All timedelta + epoch should be within the given temporal bounds.
-        assert out_ds.sst_dtime.min() + epoch_dt >= np.datetime64(start_dt)
-        assert out_ds.sst_dtime.min() + epoch_dt <= np.datetime64(end_dt)
+    # There should be no groups in the new ds
+    assert not nc_ds_transformed.groups
 
-    def test_temporal_subset_s6(self):
-        """
-        Test that a temporal subset results in a granule that only
-        contains times within the given bounds.
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        file = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
-        # Copy S6 file to temp dir
-        shutil.copyfile(
-            os.path.join(self.test_data_dir, 'sentinel_6', file),
-            os.path.join(self.subset_output_dir, file)
-        )
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '2020-12-07T01:20:00'
-        max_time = '2020-12-07T01:25:00'
-        # Actual min is 2020-12-07T01:15:01.000000000
-        # Actual max is 2020-12-07T01:30:23.000000000
+    # The original ds has no variables in the root group
+    assert not nc_ds.variables
 
-        subset.subset(
-            file_to_subset=join(self.subset_output_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
+    # The new ds has variables in the root group
+    assert nc_ds_transformed.variables
 
-        # Check that all times are within the given bounds. Open
-        # dataset using 'decode_times=True' for auto-conversions to
-        # datetime
-        out_ds = xr.open_dataset(
-            join(self.subset_output_dir, output_file),
-            decode_coords=False,
-            group='data_01'
-        )
+    # Each var in the new ds should map to a variable in the old ds
+    for var_name, var in nc_ds_transformed.variables.items():
+        path = var_name.strip('__').split('__')
 
-        start_dt = subset.translate_timestamp(min_time)
-        end_dt = subset.translate_timestamp(max_time)
+        group = nc_ds[path[0]]
+        for g in path[1:-1]:
+            group = group[g]
+        assert var_name.strip('__').split('__')[-1] in group.variables.keys()
 
-        # All dates should be within the given temporal bounds.
-        assert (out_ds.time >= pd.to_datetime(start_dt)).all()
-        assert (out_ds.time <= pd.to_datetime(end_dt)).all()
 
-    def test_get_time_variable_name(self):
-        for test_file in self.test_files:
-            args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': True
-            }
-            time_var_names = []
-            ds = xr.open_dataset(os.path.join(self.test_data_dir, test_file), **args)
-            lat_var_name = subset.compute_coordinate_variable_names(ds)[0][0]
-            time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name])
+def test_group_subset(data_dir, subset_output_dir):
+    """
+    Ensure a subset function can be run on a granule that contains
+    groups without errors, and that the subsetted data is within
+    the given spatial bounds.
+    """
+    s6_file_name = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
+    s6_output_file_name = 'SS_S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
+    # Copy S6 file to temp dir
+    shutil.copyfile(
+        os.path.join(data_dir, 'sentinel_6', s6_file_name),
+        os.path.join(subset_output_dir, s6_file_name)
+    )
+
+    # Make sure it runs without errors
+    bbox = np.array(((150, 180), (-90, -50)))
+    bounds = subset.subset(
+        file_to_subset=os.path.join(subset_output_dir, s6_file_name),
+        bbox=bbox,
+        output_file=os.path.join(subset_output_dir, s6_output_file_name)
+    )
+
+    # Check that bounds are within requested bbox
+    assert bounds[0][0] >= bbox[0][0]
+    assert bounds[0][1] <= bbox[0][1]
+    assert bounds[1][0] >= bbox[1][0]
+    assert bounds[1][1] <= bbox[1][1]
+
+
+def test_json_history_metadata_append(history_json_schema, data_dir, subset_output_dir, request):
+    """
+    Tests that the json history metadata header is appended to when it
+    already exists. First we create a fake json_history header for input file.
+    """
+    test_file = next(filter(
+        lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
+        , TEST_DATA_FILES))
+    output_file = "{}_{}".format(request.node.name, test_file)
+    input_file_subset = join(subset_output_dir, "int_{}".format(output_file))
+
+    fake_history = [
+        {
+            "date_time": "2021-05-10T14:30:24.553263",
+            "derived_from": basename(input_file_subset),
+            "program": SERVICE_NAME,
+            "version": importlib_metadata.distribution(SERVICE_NAME).version,
+            "parameters": "bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True",
+            "program_ref": "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD",
+            "$schema": "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
+        }
+    ]
 
-            assert time_var_name is not None
-            assert 'time' in time_var_name
+    in_nc = xr.open_dataset(join(data_dir, test_file))
+    in_nc.attrs['history_json'] = json.dumps(fake_history)
+    in_nc.to_netcdf(join(subset_output_dir, 'int_{}'.format(output_file)), 'w')
 
-    def test_subset_jason(self):
-        bbox = np.array(((-180, 0), (-90, 90)))
-        file = 'JA1_GPN_2PeP001_002_20020115_060706_20020115_070316.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = "2002-01-15T06:07:06Z"
-        max_time = "2002-01-15T06:30:16Z"
+    subset.subset(
+        file_to_subset=input_file_subset,
+        bbox=np.array(((-180, 180), (-90.0, 90))),
+        output_file=join(subset_output_dir, output_file)
+    )
 
-        subset.subset(
-            file_to_subset=os.path.join(self.test_data_dir, file),
-            bbox=bbox,
-            min_time=min_time,
-            max_time=max_time,
-            output_file=os.path.join(self.subset_output_dir, output_file)
-        )
+    out_nc = xr.open_dataset(join(subset_output_dir, output_file))
 
-    def test_subset_size(self):
+    history_json = json.loads(out_nc.attrs['history_json'])
+    assert len(history_json) == 2
 
-        for file in self.test_files:
-            bbox = np.array(((-180, 0), (-30, 90)))
-            output_file = "{}_{}".format(self._testMethodName, file)
-            input_file_path = os.path.join(self.test_data_dir, file)
-            output_file_path = os.path.join(self.subset_output_dir, output_file)
+    validate(instance=history_json, schema=history_json_schema)
 
-            subset.subset(
-                file_to_subset=input_file_path,
-                bbox=bbox,
-                output_file=output_file_path
-            )
+    for history in history_json:
+        assert "date_time" in history
+        assert history.get('program') == SERVICE_NAME
+        assert history.get('derived_from') == basename(input_file_subset)
+        assert history.get('version') == importlib_metadata.distribution(SERVICE_NAME).version
+        assert history.get('parameters') == 'bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True'
+        assert history.get(
+            'program_ref') == "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD"
+        assert history.get(
+            '$schema') == "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
 
-            original_file_size = os.path.getsize(input_file_path)
-            subset_file_size = os.path.getsize(output_file_path)
 
-            assert subset_file_size < original_file_size
+def test_json_history_metadata_create(history_json_schema, data_dir, subset_output_dir, request):
+    """
+    Tests that the json history metadata header is created when it does
+    not exist. All test granules does not contain this header.
+    """
+    test_file = next(filter(
+        lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
+        , TEST_DATA_FILES))
+    output_file = "{}_{}".format(request.node.name, test_file)
+
+    # Remove the 'history' metadata from the granule
+    in_nc = xr.open_dataset(join(data_dir, test_file))
+    in_nc.to_netcdf(join(subset_output_dir, 'int_{}'.format(output_file)), 'w')
+
+    input_file_subset = join(subset_output_dir, "int_{}".format(output_file))
+    subset.subset(
+        file_to_subset=input_file_subset,
+        bbox=np.array(((-180, 180), (-90.0, 90))),
+        output_file=join(subset_output_dir, output_file)
+    )
+
+    out_nc = xr.open_dataset(join(subset_output_dir, output_file))
+
+    history_json = json.loads(out_nc.attrs['history_json'])
+    assert len(history_json) == 1
+
+    validate(instance=history_json, schema=history_json_schema)
+
+    for history in history_json:
+        assert "date_time" in history
+        assert history.get('program') == SERVICE_NAME
+        assert history.get('derived_from') == basename(input_file_subset)
+        assert history.get('version') == importlib_metadata.distribution(SERVICE_NAME).version
+        assert history.get('parameters') == 'bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True'
+        assert history.get(
+            'program_ref') == "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD"
+        assert history.get(
+            '$schema') == "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
+
+
+def test_json_history_metadata_create_origin_source(history_json_schema, data_dir, subset_output_dir, request):
+    """
+    Tests that the json history metadata header is created when it does
+    not exist. All test granules does not contain this header.
+    """
+    test_file = next(filter(
+        lambda f: '20180101005944-REMSS-L2P_GHRSST-SSTsubskin-AMSR2-L2B_rt_r29918-v02.0-fv01.0.nc' in f
+        , TEST_DATA_FILES))
+    output_file = "{}_{}".format(request.node.name, test_file)
+
+    # Remove the 'history' metadata from the granule
+    in_nc = xr.open_dataset(join(data_dir, test_file))
+    in_nc.to_netcdf(join(subset_output_dir, 'int_{}'.format(output_file)), 'w')
+
+    input_file_subset = join(subset_output_dir, "int_{}".format(output_file))
+    subset.subset(
+        file_to_subset=input_file_subset,
+        bbox=np.array(((-180, 180), (-90.0, 90))),
+        output_file=join(subset_output_dir, output_file),
+        origin_source="fake_original_file.nc"
+    )
+
+    out_nc = xr.open_dataset(join(subset_output_dir, output_file))
+
+    history_json = json.loads(out_nc.attrs['history_json'])
+    assert len(history_json) == 1
+
+    validate(instance=history_json, schema=history_json_schema)
+
+    for history in history_json:
+        assert "date_time" in history
+        assert history.get('program') == SERVICE_NAME
+        assert history.get('derived_from') == "fake_original_file.nc"
+        assert history.get('version') == importlib_metadata.distribution(SERVICE_NAME).version
+        assert history.get('parameters') == 'bbox=[[-180.0, 180.0], [-90.0, 90.0]] cut=True'
+        assert history.get(
+            'program_ref') == "https://cmr.earthdata.nasa.gov:443/search/concepts/S1962070864-POCLOUD"
+        assert history.get(
+            '$schema') == "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
+
+
+def test_temporal_subset_ascat(data_dir, subset_output_dir, request):
+    """
+    Test that a temporal subset results in a granule that only
+    contains times within the given bounds.
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '2015-07-02T09:00:00'
+    max_time = '2015-07-02T10:00:00'
+
+    subset.subset(
+        file_to_subset=join(data_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
+
+    in_ds = xr.open_dataset(join(data_dir, file),
+                            decode_times=False,
+                            decode_coords=False)
+
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_times=False,
+                             decode_coords=False)
 
-    def test_duplicate_dims_sndr(self):
-        """
-        Check if SNDR Climcaps files run successfully even though
-        these files have variables with duplicate dimensions
-        """
-        SNDR_dir = join(self.test_data_dir, 'SNDR')
-        sndr_file = 'SNDR.J1.CRIMSS.20210224T0100.m06.g011.L2_CLIMCAPS_RET.std.v02_28.G.210331064430.nc'
+    # Check that 'time' types match
+    assert in_ds.time.dtype == out_ds.time.dtype
 
-        bbox = np.array(((-180, 90), (-90, 90)))
-        output_file = "{}_{}".format(self._testMethodName, sndr_file)
-        shutil.copyfile(
-            os.path.join(SNDR_dir, sndr_file),
-            os.path.join(self.subset_output_dir, sndr_file)
-        )
-        box_test = subset.subset(
-            file_to_subset=join(self.subset_output_dir, sndr_file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time='2021-02-24T00:50:20Z',
-            max_time='2021-02-24T01:09:55Z'
-        )
-        # check if the box_test is
+    in_ds.close()
+    out_ds.close()
 
-        in_nc = nc.Dataset(join(SNDR_dir, sndr_file))
-        out_nc = nc.Dataset(join(self.subset_output_dir, output_file))
+    # Check that all times are within the given bounds. Open
+    # dataset using 'decode_times=True' for auto-conversions to
+    # datetime
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_coords=False)
 
-        for var_name, variable in in_nc.variables.items():
-            assert in_nc[var_name].shape == out_nc[var_name].shape
+    start_dt = subset.translate_timestamp(min_time)
+    end_dt = subset.translate_timestamp(max_time)
 
-    def test_duplicate_dims_tropomi(self):
-        """
-        Check if SNDR Climcaps files run successfully even though
-        these files have variables with duplicate dimensions
-        """
-        TROP_dir = join(self.test_data_dir, 'tropomi')
-        trop_file = 'S5P_OFFL_L2__AER_LH_20210704T005246_20210704T023416_19290_02_020200_20210708T023111.nc'
+    # All dates should be within the given temporal bounds.
+    assert (out_ds.time >= pd.to_datetime(start_dt)).all()
+    assert (out_ds.time <= pd.to_datetime(end_dt)).all()
 
-        bbox = np.array(((-180, 180), (-90, 90)))
-        output_file = "{}_{}".format(self._testMethodName, trop_file)
-        shutil.copyfile(
-            os.path.join(TROP_dir, trop_file),
-            os.path.join(self.subset_output_dir, trop_file)
-        )
-        box_test = subset.subset(
-            file_to_subset=join(self.subset_output_dir, trop_file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file)
-        )
-        # check if the box_test is
-
-        in_nc = nc.Dataset(join(TROP_dir, trop_file))
-        out_nc = nc.Dataset(join(self.subset_output_dir, output_file))
-
-        for var_name, variable in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables.items():
-            assert variable.shape == out_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[var_name].shape
-            
-
-    def test_omi_novars_subset(self):
-        """
-        Check that the OMI variables are conserved when no variable are specified
-        the data field and lat/lon are in different groups
-        """
-        omi_dir = join(self.test_data_dir, 'OMI')
-        omi_file = 'OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5'
-
-        bbox = np.array(((-180, 90), (-90, 90)))
-        output_file = "{}_{}".format(self._testMethodName, omi_file)
-        shutil.copyfile(
-            os.path.join(omi_dir, omi_file),
-            os.path.join(self.subset_output_dir, omi_file)
-        )
-        box_test = subset.subset(
-            file_to_subset=join(self.subset_output_dir, omi_file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-        )
-        # check if the box_test is
 
-        in_nc = nc.Dataset(join(omi_dir, omi_file))
-        out_nc = nc.Dataset(join(self.subset_output_dir, output_file))
+def test_temporal_subset_modis_a(data_dir, subset_output_dir, request):
+    """
+    Test that a temporal subset results in a granule that only
+    contains times within the given bounds.
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    file = 'MODIS_A-JPL-L2P-v2014.0.nc'
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '2019-08-05T06:57:00'
+    max_time = '2019-08-05T06:58:00'
+    # Actual min is 2019-08-05T06:55:01.000000000
+    # Actual max is 2019-08-05T06:59:57.000000000
+
+    subset.subset(
+        file_to_subset=join(data_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
+
+    in_ds = xr.open_dataset(join(data_dir, file),
+                            decode_times=False,
+                            decode_coords=False)
+
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_times=False,
+                             decode_coords=False)
+
+    # Check that 'time' types match
+    assert in_ds.time.dtype == out_ds.time.dtype
 
-        for var_name, variable in in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups['Geolocation Fields'].variables.items():
-            assert in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups['Geolocation Fields'].variables[var_name].shape == \
-                out_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups['Geolocation Fields'].variables[var_name].shape
+    in_ds.close()
+    out_ds.close()
 
+    # Check that all times are within the given bounds. Open
+    # dataset using 'decode_times=True' for auto-conversions to
+    # datetime
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_coords=False)
 
-    def test_root_group(self):
-        """test that the GROUP_DELIM string, '__', is added to variables in the root group"""
+    start_dt = subset.translate_timestamp(min_time)
+    end_dt = subset.translate_timestamp(max_time)
 
-        sndr_file_name = 'SNDR.SNPP.CRIMSS.20200118T0024.m06.g005.L2_CLIMCAPS_RET.std.v02_28.G.200314032326_subset.nc'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'SNDR', sndr_file_name),
-                        os.path.join(self.subset_output_dir, sndr_file_name))
+    epoch_dt = out_ds['time'].values[0]
 
-        nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, sndr_file_name))
+    # All timedelta + epoch should be within the given temporal bounds.
+    assert out_ds.sst_dtime.min() + epoch_dt >= np.datetime64(start_dt)
+    assert out_ds.sst_dtime.min() + epoch_dt <= np.datetime64(end_dt)
 
-        args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, sndr_file_name))
-        with xr.open_dataset(
+
+def test_temporal_subset_s6(data_dir, subset_output_dir, request):
+    """
+    Test that a temporal subset results in a granule that only
+    contains times within the given bounds.
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    file = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
+    # Copy S6 file to temp dir
+    shutil.copyfile(
+        os.path.join(data_dir, 'sentinel_6', file),
+        os.path.join(subset_output_dir, file)
+    )
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '2020-12-07T01:20:00'
+    max_time = '2020-12-07T01:25:00'
+    # Actual min is 2020-12-07T01:15:01.000000000
+    # Actual max is 2020-12-07T01:30:23.000000000
+
+    subset.subset(
+        file_to_subset=join(subset_output_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
+
+    # Check that all times are within the given bounds. Open
+    # dataset using 'decode_times=True' for auto-conversions to
+    # datetime
+    out_ds = xr.open_dataset(
+        join(subset_output_dir, output_file),
+        decode_coords=False,
+        group='data_01'
+    )
+
+    start_dt = subset.translate_timestamp(min_time)
+    end_dt = subset.translate_timestamp(max_time)
+
+    # All dates should be within the given temporal bounds.
+    assert (out_ds.time >= pd.to_datetime(start_dt)).all()
+    assert (out_ds.time <= pd.to_datetime(end_dt)).all()
+
+
+@pytest.mark.parametrize('test_file', TEST_DATA_FILES)
+def test_get_time_variable_name(test_file, data_dir, subset_output_dir):
+    args = {
+        'decode_coords': False,
+        'mask_and_scale': False,
+        'decode_times': True
+    }
+    ds, rename_vars, _ = subset.open_as_nc_dataset(os.path.join(data_dir, test_file))
+    ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds), **args)
+
+    lat_var_name = subset.compute_coordinate_variable_names(ds)[0][0]
+    time_var_name = subset.compute_time_variable_name(ds, ds[lat_var_name])
+
+    assert time_var_name is not None
+    assert 'time' in time_var_name
+
+
+def test_subset_jason(data_dir, subset_output_dir, request):
+    bbox = np.array(((-180, 0), (-90, 90)))
+    file = 'JA1_GPN_2PeP001_002_20020115_060706_20020115_070316.nc'
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = "2002-01-15T06:07:06Z"
+    max_time = "2002-01-15T06:30:16Z"
+
+    subset.subset(
+        file_to_subset=os.path.join(data_dir, file),
+        bbox=bbox,
+        min_time=min_time,
+        max_time=max_time,
+        output_file=os.path.join(subset_output_dir, output_file)
+    )
+
+
+@pytest.mark.parametrize('test_file', TEST_DATA_FILES)
+def test_subset_size(test_file, data_dir, subset_output_dir, request):
+    bbox = np.array(((-180, 0), (-30, 90)))
+    output_file = "{}_{}".format(request.node.name, test_file)
+    input_file_path = os.path.join(data_dir, test_file)
+    output_file_path = os.path.join(subset_output_dir, output_file)
+
+    subset.subset(
+        file_to_subset=input_file_path,
+        bbox=bbox,
+        output_file=output_file_path
+    )
+
+    original_file_size = os.path.getsize(input_file_path)
+    subset_file_size = os.path.getsize(output_file_path)
+
+    assert subset_file_size < original_file_size
+
+
+def test_duplicate_dims_sndr(data_dir, subset_output_dir, request):
+    """
+    Check if SNDR Climcaps files run successfully even though
+    these files have variables with duplicate dimensions
+    """
+    SNDR_dir = join(data_dir, 'SNDR')
+    sndr_file = 'SNDR.J1.CRIMSS.20210224T0100.m06.g011.L2_CLIMCAPS_RET.std.v02_28.G.210331064430.nc'
+
+    bbox = np.array(((-180, 90), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, sndr_file)
+    shutil.copyfile(
+        os.path.join(SNDR_dir, sndr_file),
+        os.path.join(subset_output_dir, sndr_file)
+    )
+    box_test = subset.subset(
+        file_to_subset=join(subset_output_dir, sndr_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time='2021-02-24T00:50:20Z',
+        max_time='2021-02-24T01:09:55Z'
+    )
+    # check if the box_test is
+
+    in_nc = nc.Dataset(join(SNDR_dir, sndr_file))
+    out_nc = nc.Dataset(join(subset_output_dir, output_file))
+
+    for var_name, variable in in_nc.variables.items():
+        assert in_nc[var_name].shape == out_nc[var_name].shape
+
+
+def test_duplicate_dims_tropomi(data_dir, subset_output_dir, request):
+    """
+    Check if SNDR Climcaps files run successfully even though
+    these files have variables with duplicate dimensions
+    """
+    TROP_dir = join(data_dir, 'tropomi')
+    trop_file = 'S5P_OFFL_L2__AER_LH_20210704T005246_20210704T023416_19290_02_020200_20210708T023111.nc'
+
+    bbox = np.array(((-180, 180), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, trop_file)
+    shutil.copyfile(
+        os.path.join(TROP_dir, trop_file),
+        os.path.join(subset_output_dir, trop_file)
+    )
+    box_test = subset.subset(
+        file_to_subset=join(subset_output_dir, trop_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file)
+    )
+    # check if the box_test is
+
+    in_nc = nc.Dataset(join(TROP_dir, trop_file))
+    out_nc = nc.Dataset(join(subset_output_dir, output_file))
+
+    for var_name, variable in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups[
+        'DETAILED_RESULTS'].variables.items():
+        assert variable.shape == \
+               out_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['DETAILED_RESULTS'].variables[var_name].shape
+
+
+def test_omi_novars_subset(data_dir, subset_output_dir, request):
+    """
+    Check that the OMI variables are conserved when no variable are specified
+    the data field and lat/lon are in different groups
+    """
+    omi_dir = join(data_dir, 'OMI')
+    omi_file = 'OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5'
+
+    bbox = np.array(((-180, 90), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, omi_file)
+    shutil.copyfile(
+        os.path.join(omi_dir, omi_file),
+        os.path.join(subset_output_dir, omi_file)
+    )
+    box_test = subset.subset(
+        file_to_subset=join(subset_output_dir, omi_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+    )
+    # check if the box_test is
+
+    in_nc = nc.Dataset(join(omi_dir, omi_file))
+    out_nc = nc.Dataset(join(subset_output_dir, output_file))
+
+    for var_name, variable in in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups[
+        'Geolocation Fields'].variables.items():
+        assert in_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups[
+                   'Geolocation Fields'].variables[var_name].shape == \
+               out_nc.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount SO2'].groups[
+                   'Geolocation Fields'].variables[var_name].shape
+
+
+def test_root_group(data_dir, subset_output_dir):
+    """test that the GROUP_DELIM string, '__', is added to variables in the root group"""
+
+    sndr_file_name = 'SNDR.SNPP.CRIMSS.20200118T0024.m06.g005.L2_CLIMCAPS_RET.std.v02_28.G.200314032326_subset.nc'
+    shutil.copyfile(os.path.join(data_dir, 'SNDR', sndr_file_name),
+                    os.path.join(subset_output_dir, sndr_file_name))
+
+    nc_dataset = nc.Dataset(os.path.join(subset_output_dir, sndr_file_name))
+
+    args = {
+        'decode_coords': False,
+        'mask_and_scale': False,
+        'decode_times': False
+    }
+    nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(subset_output_dir, sndr_file_name))
+    with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args
-        ) as dataset:
-            var_list = list(dataset.variables)
-            assert (var_list[0][0:2] == subset.GROUP_DELIM)
-            group_lst = []
-            for var_name in dataset.variables.keys(): #need logic if there is data in the top level not in a group
-                group_lst.append('/'.join(var_name.split(subset.GROUP_DELIM)[:-1]))
-            group_lst = ['/' if group=='' else group for group in group_lst]
-            groups = set(group_lst)
-            expected_group = {'/mw', '/ave_kern', '/', '/mol_lay', '/aux'}
-            assert (groups == expected_group)
-
-    def test_get_time_squeeze(self):
-        """test builtin squeeze method on the lat and time variables so 
-        when the two have the same shape with a time and delta time in
-        the tropomi product granuales the get_time_variable_name returns delta time as well"""
-
-        tropomi_file_name = 'S5P_OFFL_L2__SO2____20200713T002730_20200713T020900_14239_01_020103_20200721T191355_subset.nc4'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'tropomi', tropomi_file_name),
-                        os.path.join(self.subset_output_dir, tropomi_file_name))
-
-        nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file_name))
-
-        args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
-        with xr.open_dataset(
+    ) as dataset:
+        var_list = list(dataset.variables)
+        assert (var_list[0][0:2] == subset.GROUP_DELIM)
+        group_lst = []
+        for var_name in dataset.variables.keys():  # need logic if there is data in the top level not in a group
+            group_lst.append('/'.join(var_name.split(subset.GROUP_DELIM)[:-1]))
+        group_lst = ['/' if group == '' else group for group in group_lst]
+        groups = set(group_lst)
+        expected_group = {'/mw', '/ave_kern', '/', '/mol_lay', '/aux'}
+        assert (groups == expected_group)
+
+
+def test_get_time_squeeze(data_dir, subset_output_dir):
+    """test builtin squeeze method on the lat and time variables so
+    when the two have the same shape with a time and delta time in
+    the tropomi product granuales the get_time_variable_name returns delta time as well"""
+
+    tropomi_file_name = 'S5P_OFFL_L2__SO2____20200713T002730_20200713T020900_14239_01_020103_20200721T191355_subset.nc4'
+    shutil.copyfile(os.path.join(data_dir, 'tropomi', tropomi_file_name),
+                    os.path.join(subset_output_dir, tropomi_file_name))
+
+    nc_dataset = nc.Dataset(os.path.join(subset_output_dir, tropomi_file_name))
+
+    args = {
+        'decode_coords': False,
+        'mask_and_scale': False,
+        'decode_times': False
+    }
+    nc_dataset = subset.transform_grouped_dataset(nc_dataset,
+                                                  os.path.join(subset_output_dir, tropomi_file_name))
+    with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args
-        ) as dataset:
-            lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
-            time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
-            lat_dims = dataset[lat_var_name].squeeze().dims
-            time_dims = dataset[time_var_name].squeeze().dims
-            assert (lat_dims == time_dims)
+    ) as dataset:
+        lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
+        time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
+        lat_dims = dataset[lat_var_name].squeeze().dims
+        time_dims = dataset[time_var_name].squeeze().dims
+        assert (lat_dims == time_dims)
+
+
+def test_get_indexers_nd(data_dir, subset_output_dir):
+    """test that the time coordinate is not included in the indexers. Also test that the dimensions are the same for
+       a global box subset"""
+    tropomi_file_name = 'S5P_OFFL_L2__SO2____20200713T002730_20200713T020900_14239_01_020103_20200721T191355_subset.nc4'
+    shutil.copyfile(os.path.join(data_dir, 'tropomi', tropomi_file_name),
+                    os.path.join(subset_output_dir, tropomi_file_name))
+
+    nc_dataset = nc.Dataset(os.path.join(subset_output_dir, tropomi_file_name))
+
+    args = {
+        'decode_coords': False,
+        'mask_and_scale': False,
+        'decode_times': False
+    }
+    nc_dataset = subset.transform_grouped_dataset(nc_dataset,
+                                                  os.path.join(subset_output_dir, tropomi_file_name))
+    with xr.open_dataset(
+            xr.backends.NetCDF4DataStore(nc_dataset),
+            **args
+    ) as dataset:
+        time_var_names = []
+        lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
+        lon_var_name = subset.compute_coordinate_variable_names(dataset)[1][0]
+        time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
+        oper = operator.and_
 
-    def test_get_indexers_nd(self):
-        """test that the time coordinate is not included in the indexers. Also test that the dimensions are the same for
-           a global box subset"""
-        tropomi_file_name = 'S5P_OFFL_L2__SO2____20200713T002730_20200713T020900_14239_01_020103_20200721T191355_subset.nc4'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'tropomi', tropomi_file_name),
-                        os.path.join(self.subset_output_dir, tropomi_file_name))
+        cond = oper(
+            (dataset[lon_var_name] >= -180),
+            (dataset[lon_var_name] <= 180)
+        ) & (dataset[lat_var_name] >= -90) & (dataset[lat_var_name] <= 90) & True
 
-        nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file_name))
+        indexers = xre.get_indexers_from_nd(cond, True)
+        indexed_cond = cond.isel(**indexers)
+        indexed_ds = dataset.isel(**indexers)
+        new_dataset = indexed_ds.where(indexed_cond)
 
-        args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file_name))
-        with xr.open_dataset(
-            xr.backends.NetCDF4DataStore(nc_dataset),
-            **args
-        ) as dataset:
-            time_var_names = []
-            lat_var_name = subset.compute_coordinate_variable_names(dataset)[0][0]
-            lon_var_name = subset.compute_coordinate_variable_names(dataset)[1][0]
-            time_var_name = subset.compute_time_variable_name(dataset, dataset[lat_var_name])
-            oper = operator.and_
-
-            cond = oper(
-                (dataset[lon_var_name] >= -180),
-                (dataset[lon_var_name] <= 180)
-                ) & (dataset[lat_var_name] >= -90) & (dataset[lat_var_name] <= 90) & True
-
-            indexers = xre.get_indexers_from_nd(cond, True)
-            indexed_cond = cond.isel(**indexers)
-            indexed_ds = dataset.isel(**indexers)
-            new_dataset = indexed_ds.where(indexed_cond)
-            
-            assert ((time_var_name not in indexers.keys()) == True) #time can't be in the index
-            assert (new_dataset.dims == dataset.dims)
-
-    def test_variable_type_string_oco2(self):
-        """Code passes a ceating a variable that is type object in oco2 file"""
-
-        oco2_file_name = 'oco2_LtCO2_190201_B10206Ar_200729175909s.nc4'
-        output_file_name = 'oco2_test_out.nc'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'OCO2', oco2_file_name),
-                        os.path.join(self.subset_output_dir, oco2_file_name))
-        bbox = np.array(((-180,180),(-90.0,90)))
+        assert ((time_var_name not in indexers.keys()) == True)  # time can't be in the index
+        assert (new_dataset.dims == dataset.dims)
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, 'OCO2',oco2_file_name),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file_name),
-        )
 
-        in_nc = xr.open_dataset(join(self.test_data_dir, 'OCO2',oco2_file_name))
-        out_nc = xr.open_dataset(join(self.subset_output_dir, output_file_name))
-        assert (in_nc.variables['source_files'].dtype == out_nc.variables['source_files'].dtype)
-
-    def test_transform_h5py_dataset(self):
-        """
-        Test that the transformation function results in a correctly
-        formatted dataset for h5py files
-        """
-        OMI_file_name = 'OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'OMI', OMI_file_name),
-                        os.path.join(self.subset_output_dir, OMI_file_name))
-
-        h5_ds = h5py.File(os.path.join(self.test_data_dir, 'OMI', OMI_file_name), 'r')
-
-        entry_lst = []
-        # Get root level objects
-        key_lst = list(h5_ds.keys())
-        
-        # Go through every level of the file to fill out the remaining objects
-        for entry_str in key_lst:
-            # If object is a group, add it to the loop list
-            if (isinstance(h5_ds[entry_str],h5py.Group)):
-                for group_keys in list(h5_ds[entry_str].keys()):
-                    if (isinstance(h5_ds[entry_str + "/" + group_keys], h5py.Dataset)):
-                        entry_lst.append(entry_str + "/" + group_keys)
-                    key_lst.append(entry_str + "/" + group_keys)
-
-        nc_dataset, has_groups = subset.h5file_transform(os.path.join(self.subset_output_dir, OMI_file_name))
-
-        nc_vars_flattened = list(nc_dataset.variables.keys())
-        for i in range(len(entry_lst)): # go through all the datasets in h5py file
-            input_variable = '__'+entry_lst[i].replace('/', '__')
-            output_variable = nc_vars_flattened[i]
-            assert (input_variable == output_variable)
-
-        nc_dataset.close()
-        h5_ds.close()
-
-
-    def test_variable_dims_matched_tropomi(self):
-        """
-        Code must match the dimensions for each variable rather than
-        assume all dimensions in a group are the same
-        """
-
-        tropomi_file_name = 'S5P_OFFL_L2__SO2____20200713T002730_20200713T020900_14239_01_020103_20200721T191355_subset.nc4'
-        output_file_name = 'tropomi_test_out.nc'
-        shutil.copyfile(os.path.join(self.test_data_dir, 'tropomi', tropomi_file_name),
-                        os.path.join(self.subset_output_dir, tropomi_file_name))
-
-        in_nc = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file_name))
-
-        # Get variable dimensions from input dataset
-        in_var_dims = {
-            var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
-            for var_name, var in in_nc.groups['PRODUCT'].variables.items()
-        }
-        
-        # Get variables from METADATA group
-        in_var_dims.update(
-            {
-                var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
-                for var_name, var in in_nc.groups['METADATA'].groups['QA_STATISTICS'].variables.items()
-            }
-        )
-        # Include PRODUCT>SUPPORT_DATA>GEOLOCATIONS location
-        in_var_dims.update(
-            {
-                var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
-                for var_name, var in in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['GEOLOCATIONS'].variables.items()
-            }
-        )
+def test_variable_type_string_oco2(data_dir, subset_output_dir):
+    """Code passes a ceating a variable that is type object in oco2 file"""
 
-        out_nc = subset.transform_grouped_dataset(
-            in_nc, os.path.join(self.subset_output_dir, tropomi_file_name)
-        )
+    oco2_file_name = 'oco2_LtCO2_190201_B10206Ar_200729175909s.nc4'
+    output_file_name = 'oco2_test_out.nc'
+    shutil.copyfile(os.path.join(data_dir, 'OCO2', oco2_file_name),
+                    os.path.join(subset_output_dir, oco2_file_name))
+    bbox = np.array(((-180, 180), (-90.0, 90)))
 
-        # Get variable dimensions from output dataset
-        out_var_dims = {
-            var_name.split(subset.GROUP_DELIM)[-1]: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
-            for var_name, var in out_nc.variables.items()
-        }
+    subset.subset(
+        file_to_subset=join(data_dir, 'OCO2', oco2_file_name),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file_name),
+    )
 
-        self.assertDictEqual(in_var_dims, out_var_dims)
+    in_nc = xr.open_dataset(join(data_dir, 'OCO2', oco2_file_name))
+    out_nc = xr.open_dataset(join(subset_output_dir, output_file_name))
+    assert (in_nc.variables['source_files'].dtype == out_nc.variables['source_files'].dtype)
 
 
-    def test_temporal_merged_topex(self):
-        """
-        Test that a temporal subset results in a granule that only
-        contains times within the given bounds.
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        file = 'Merged_TOPEX_Jason_OSTM_Jason-3_Cycle_002.V4_2.nc'
-        # Copy S6 file to temp dir
-        shutil.copyfile(
-            os.path.join(self.test_data_dir, file),
-            os.path.join(self.subset_output_dir, file)
-        )
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '1992-01-01T00:00:00'
-        max_time = '1992-11-01T00:00:00'
-        # Actual min is 2020-12-07T01:15:01.000000000
-        # Actual max is 2020-12-07T01:30:23.000000000
+def test_transform_h5py_dataset(data_dir, subset_output_dir):
+    """
+    Test that the transformation function results in a correctly
+    formatted dataset for h5py files
+    """
+    OMI_file_name = 'OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5'
+    shutil.copyfile(os.path.join(data_dir, 'OMI', OMI_file_name),
+                    os.path.join(subset_output_dir, OMI_file_name))
 
-        subset.subset(
-            file_to_subset=join(self.subset_output_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
+    h5_ds = h5py.File(os.path.join(data_dir, 'OMI', OMI_file_name), 'r')
 
-        # Check that all times are within the given bounds. Open
-        # dataset using 'decode_times=True' for auto-conversions to
-        # datetime
-        out_ds = xr.open_dataset(
-            join(self.subset_output_dir, output_file),
-            decode_coords=False
-        )
+    entry_lst = []
+    # Get root level objects
+    key_lst = list(h5_ds.keys())
 
-        start_dt = subset.translate_timestamp(min_time)
-        end_dt = subset.translate_timestamp(max_time)
+    # Go through every level of the file to fill out the remaining objects
+    for entry_str in key_lst:
+        # If object is a group, add it to the loop list
+        if (isinstance(h5_ds[entry_str], h5py.Group)):
+            for group_keys in list(h5_ds[entry_str].keys()):
+                if (isinstance(h5_ds[entry_str + "/" + group_keys], h5py.Dataset)):
+                    entry_lst.append(entry_str + "/" + group_keys)
+                key_lst.append(entry_str + "/" + group_keys)
 
-        # delta time from the MJD of this data collection
-        mjd_dt = np.datetime64("1992-01-01")
-        start_delta_dt = np.datetime64(start_dt) - mjd_dt
-        end_delta_dt = np.datetime64(end_dt) - mjd_dt
+    nc_dataset, has_groups = subset.h5file_transform(os.path.join(subset_output_dir, OMI_file_name))
 
-        # All dates should be within the given temporal bounds.
-        assert (out_ds.time.values >= start_delta_dt).all()
-        assert (out_ds.time.values <= end_delta_dt).all()
+    nc_vars_flattened = list(nc_dataset.variables.keys())
+    for i in range(len(entry_lst)):  # go through all the datasets in h5py file
+        input_variable = '__' + entry_lst[i].replace('/', '__')
+        output_variable = nc_vars_flattened[i]
+        assert (input_variable == output_variable)
 
-    def test_get_time_epoch_var(self):
-        """
-        Test that get_time_epoch_var method returns the 'time' variable for the tropomi CH4 granule"
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        tropomi_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
+    nc_dataset.close()
+    h5_ds.close()
 
-        shutil.copyfile(os.path.join(self.test_data_dir, 'tropomi', tropomi_file),
-                        os.path.join(self.subset_output_dir, tropomi_file))
 
+def test_variable_dims_matched_tropomi(data_dir, subset_output_dir):
+    """
+    Code must match the dimensions for each variable rather than
+    assume all dimensions in a group are the same
+    """
+
+    tropomi_file_name = 'S5P_OFFL_L2__SO2____20200713T002730_20200713T020900_14239_01_020103_20200721T191355_subset.nc4'
+    output_file_name = 'tropomi_test_out.nc'
+    shutil.copyfile(os.path.join(data_dir, 'tropomi', tropomi_file_name),
+                    os.path.join(subset_output_dir, tropomi_file_name))
 
-        nc_dataset = nc.Dataset(os.path.join(self.subset_output_dir, tropomi_file), mode='r')
+    in_nc = nc.Dataset(os.path.join(subset_output_dir, tropomi_file_name))
 
-        nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(self.subset_output_dir, tropomi_file))
+    # Get variable dimensions from input dataset
+    in_var_dims = {
+        var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+        for var_name, var in in_nc.groups['PRODUCT'].variables.items()
+    }
 
-        args = {
-            'decode_coords': False,
-            'mask_and_scale': False,
-            'decode_times': False
+    # Get variables from METADATA group
+    in_var_dims.update(
+        {
+            var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+            for var_name, var in in_nc.groups['METADATA'].groups['QA_STATISTICS'].variables.items()
+        }
+    )
+    # Include PRODUCT>SUPPORT_DATA>GEOLOCATIONS location
+    in_var_dims.update(
+        {
+            var_name: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+            for var_name, var in
+            in_nc.groups['PRODUCT'].groups['SUPPORT_DATA'].groups['GEOLOCATIONS'].variables.items()
         }
+    )
 
-        with xr.open_dataset(
-                xr.backends.NetCDF4DataStore(nc_dataset),
-                **args
-        ) as dataset:
+    out_nc = subset.transform_grouped_dataset(
+        in_nc, os.path.join(subset_output_dir, tropomi_file_name)
+    )
 
-            lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(dataset)
-            time_var_names = [
-                subset.compute_time_variable_name(
-                    dataset, dataset[lat_var_name]
-                ) for lat_var_name in lat_var_names
-            ]
-            epoch_time_var = subset.get_time_epoch_var(dataset, time_var_names[0])
-            
-            assert epoch_time_var.split('__')[-1] == 'time'
-
-    def test_temporal_variable_subset(self):
-        """
-        Test that both a temporal and variable subset can be executed
-        on a granule, and that all of the data within that granule is
-        subsetted as expected.
-        """
-        bbox = np.array(((-180, 180), (-90, 90)))
-        file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '2015-07-02T09:00:00'
-        max_time = '2015-07-02T10:00:00'
-        variables = [
-            'wind_speed',
-            'wind_dir'
-        ]
+    # Get variable dimensions from output dataset
+    out_var_dims = {
+        var_name.split(subset.GROUP_DELIM)[-1]: [dim.split(subset.GROUP_DELIM)[-1] for dim in var.dimensions]
+        for var_name, var in out_nc.variables.items()
+    }
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time,
-            variables=variables
-        )
+    TestCase().assertDictEqual(in_var_dims, out_var_dims)
 
-        in_ds = xr.open_dataset(join(self.test_data_dir, file),
-                                decode_times=False,
-                                decode_coords=False)
-
-        out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                 decode_times=False,
-                                 decode_coords=False)
-
-        # Check that 'time' types match
-        assert in_ds.time.dtype == out_ds.time.dtype
-
-        in_ds.close()
-        out_ds.close()
-
-        # Check that all times are within the given bounds. Open
-        # dataset using 'decode_times=True' for auto-conversions to
-        # datetime
-        out_ds = xr.open_dataset(join(self.subset_output_dir, output_file),
-                                 decode_coords=False)
-
-        start_dt = subset.translate_timestamp(min_time)
-        end_dt = subset.translate_timestamp(max_time)
-
-        # All dates should be within the given temporal bounds.
-        assert (out_ds.time >= pd.to_datetime(start_dt)).all()
-        assert (out_ds.time <= pd.to_datetime(end_dt)).all()
-
-        # Only coordinate variables and variables requested in variable
-        # subset should be present.
-        assert set(np.append(['lat', 'lon', 'time'], variables)) == set(out_ds.data_vars.keys())
-            
-
-    def test_temporal_he5file_subset(self):
-        """
-        Test that the time type changes to datetime for subsetting
-        """
-        
-        OMI_file_names = ['OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5',
-                          'OMI-Aura_L2-OMBRO_2020m0116t1207-o82471_v003-2020m0116t182003.he5']
-        OMI_copy_file = 'OMI_copy_testing_2.he5'
-        for i in OMI_file_names:
-            shutil.copyfile(os.path.join(self.test_data_dir, 'OMI', i),
-                            os.path.join(self.subset_output_dir, OMI_copy_file))
-            min_time='2020-01-16T12:30:00Z'
-            max_time='2020-01-16T12:40:00Z'
-            bbox = np.array(((-180, 180), (-90, 90)))
-            nc_dataset, has_groups = subset.h5file_transform(os.path.join(self.subset_output_dir, OMI_copy_file))
-
-            args = {
-                'decode_coords': False,
-                'mask_and_scale': False,
-                'decode_times': False
-            }
 
-            if min_time or max_time:
-                args['decode_times'] = True  
-
-            with xr.open_dataset(
-                    xr.backends.NetCDF4DataStore(nc_dataset),
-                    **args
-            ) as dataset:
-                lat_var_names, lon_var_names, time_var_names = subset.get_coordinate_variable_names(
-                    dataset=dataset,
-                    lat_var_names=None,
-                    lon_var_names=None,
-                    time_var_names=None
-                )
-                if 'BRO' in i:
-                    assert any('utc' in x.lower() for x in time_var_names)
-
-                dataset, start_date = subset.convert_to_datetime(dataset, time_var_names)
-                assert dataset[time_var_names[0]].dtype == 'datetime64[ns]'
-
-
-    def test_he5_timeattrs_output(self):
-        """Test that the time attributes in the output match the attributes of the input for OMI test files"""
-
-        omi_dir = join(self.test_data_dir, 'OMI')
-        omi_file = 'OMI-Aura_L2-OMBRO_2020m0116t1207-o82471_v003-2020m0116t182003.he5'
-        omi_file_input = 'input'+omi_file
-        bbox = np.array(((-180, 90), (-90, 90)))
-        output_file = "{}_{}".format(self._testMethodName, omi_file)
-        shutil.copyfile(
-            os.path.join(omi_dir, omi_file),
-            os.path.join(self.subset_output_dir, omi_file)
-        )
-        shutil.copyfile(
-            os.path.join(omi_dir, omi_file),
-            os.path.join(self.subset_output_dir, omi_file_input)
-        )
-        
-        min_time='2020-01-16T12:30:00Z'
-        max_time='2020-01-16T12:40:00Z'
-        bbox = np.array(((-180, 180), (-90, 90)))
-        nc_dataset_input = nc.Dataset(os.path.join(self.subset_output_dir, omi_file_input))
-        incut_set = nc_dataset_input.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups['Geolocation Fields']
-        xr_dataset_input = xr.open_dataset(xr.backends.NetCDF4DataStore(incut_set))
-        inattrs =  xr_dataset_input['Time'].attrs
-        
-        subset.subset(
-            file_to_subset=os.path.join(self.subset_output_dir, omi_file),
-            bbox=bbox,
-            output_file=os.path.join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
+def test_temporal_merged_topex(data_dir, subset_output_dir, request):
+    """
+    Test that a temporal subset results in a granule that only
+    contains times within the given bounds.
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    file = 'Merged_TOPEX_Jason_OSTM_Jason-3_Cycle_002.V4_2.nc'
+    # Copy S6 file to temp dir
+    shutil.copyfile(
+        os.path.join(data_dir, file),
+        os.path.join(subset_output_dir, file)
+    )
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '1992-01-01T00:00:00'
+    max_time = '1992-11-01T00:00:00'
+    # Actual min is 2020-12-07T01:15:01.000000000
+    # Actual max is 2020-12-07T01:30:23.000000000
+
+    subset.subset(
+        file_to_subset=join(subset_output_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
+
+    # Check that all times are within the given bounds. Open
+    # dataset using 'decode_times=True' for auto-conversions to
+    # datetime
+    out_ds = xr.open_dataset(
+        join(subset_output_dir, output_file),
+        decode_coords=False
+    )
+
+    start_dt = subset.translate_timestamp(min_time)
+    end_dt = subset.translate_timestamp(max_time)
+
+    # delta time from the MJD of this data collection
+    mjd_dt = np.datetime64("1992-01-01")
+    start_delta_dt = np.datetime64(start_dt) - mjd_dt
+    end_delta_dt = np.datetime64(end_dt) - mjd_dt
+
+    # All dates should be within the given temporal bounds.
+    assert (out_ds.time.values >= start_delta_dt).all()
+    assert (out_ds.time.values <= end_delta_dt).all()
+
+
+def test_get_time_epoch_var(data_dir, subset_output_dir):
+    """
+    Test that get_time_epoch_var method returns the 'time' variable for the tropomi CH4 granule"
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    tropomi_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
 
-        output_ncdataset = nc.Dataset(os.path.join(self.subset_output_dir, output_file))
-        outcut_set = output_ncdataset.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups['Geolocation Fields']
-        xrout_dataset = xr.open_dataset(xr.backends.NetCDF4DataStore(outcut_set))
-        outattrs = xrout_dataset['Time'].attrs
+    shutil.copyfile(os.path.join(data_dir, 'tropomi', tropomi_file),
+                    os.path.join(subset_output_dir, tropomi_file))
 
-        for key in inattrs.keys():
-            if isinstance(inattrs[key], np.ndarray):
-                if np.array_equal(inattrs[key],outattrs[key]):
-                    pass
-                else:
-                    raise AssertionError('Attributes for {} do not equal each other'.format(key))
-            else:
-                assert inattrs[key] == outattrs[key]
-                
-        
-    def test_temporal_subset_lines(self):
-        bbox = np.array(((-180, 180), (-90, 90)))
-        file = 'SWOT_L2_LR_SSH_Expert_368_012_20121111T235910_20121112T005015_DG10_01.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '2012-11-11T23:59:10'
-        max_time = '2012-11-12T00:20:10'
+    nc_dataset = nc.Dataset(os.path.join(subset_output_dir, tropomi_file), mode='r')
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
+    nc_dataset = subset.transform_grouped_dataset(nc_dataset, os.path.join(subset_output_dir, tropomi_file))
 
-        ds = xr.open_dataset(
-            join(self.subset_output_dir, output_file),
-            decode_times=False,
-            decode_coords=False
-        )
+    args = {
+        'decode_coords': False,
+        'mask_and_scale': False,
+        'decode_times': False
+    }
+
+    with xr.open_dataset(
+            xr.backends.NetCDF4DataStore(nc_dataset),
+            **args
+    ) as dataset:
+        lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(dataset)
+        time_var_names = [
+            subset.compute_time_variable_name(
+                dataset, dataset[lat_var_name]
+            ) for lat_var_name in lat_var_names
+        ]
+        epoch_time_var = subset.get_time_epoch_var(dataset, time_var_names[0])
 
-        assert ds.time.dims != ds.latitude.dims
+        assert epoch_time_var.split('__')[-1] == 'time'
 
-    def test_grouped_empty_subset(self):
-        """
-        Test that an empty subset of a grouped dataset returns 'None'
-        spatial bounds.
-        """
-        bbox = np.array(((-10, 10), (-10, 10)))
-        file = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
 
-        shutil.copyfile(os.path.join(self.test_data_dir, 'sentinel_6', file),
-                        os.path.join(self.subset_output_dir, file))
+def test_temporal_variable_subset(data_dir, subset_output_dir, request):
+    """
+    Test that both a temporal and variable subset can be executed
+    on a granule, and that all of the data within that granule is
+    subsetted as expected.
+    """
+    bbox = np.array(((-180, 180), (-90, 90)))
+    file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '2015-07-02T09:00:00'
+    max_time = '2015-07-02T10:00:00'
+    variables = [
+        'wind_speed',
+        'wind_dir'
+    ]
+
+    subset.subset(
+        file_to_subset=join(data_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time,
+        variables=variables
+    )
+
+    in_ds = xr.open_dataset(join(data_dir, file),
+                            decode_times=False,
+                            decode_coords=False)
+
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_times=False,
+                             decode_coords=False)
 
-        spatial_bounds = subset.subset(
-            file_to_subset=join(self.subset_output_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file)
-        )
+    # Check that 'time' types match
+    assert in_ds.time.dtype == out_ds.time.dtype
+
+    in_ds.close()
+    out_ds.close()
+
+    # Check that all times are within the given bounds. Open
+    # dataset using 'decode_times=True' for auto-conversions to
+    # datetime
+    out_ds = xr.open_dataset(join(subset_output_dir, output_file),
+                             decode_coords=False)
 
-        assert spatial_bounds is None
+    start_dt = subset.translate_timestamp(min_time)
+    end_dt = subset.translate_timestamp(max_time)
 
-    def test_get_time_OMI(self):
-        """
-        Test that code get time variables for OMI .he5 files"
-        """
-        omi_file = 'OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5'
+    # All dates should be within the given temporal bounds.
+    assert (out_ds.time >= pd.to_datetime(start_dt)).all()
+    assert (out_ds.time <= pd.to_datetime(end_dt)).all()
 
-        shutil.copyfile(os.path.join(self.test_data_dir, 'OMI', omi_file),
-                        os.path.join(self.subset_output_dir, omi_file))
+    # Only coordinate variables and variables requested in variable
+    # subset should be present.
+    assert set(np.append(['lat', 'lon', 'time'], variables)) == set(out_ds.data_vars.keys())
 
-        nc_dataset, has_groups = subset.h5file_transform(os.path.join(self.subset_output_dir, omi_file))
+
+def test_temporal_he5file_subset(data_dir, subset_output_dir):
+    """
+    Test that the time type changes to datetime for subsetting
+    """
+
+    OMI_file_names = ['OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5',
+                      'OMI-Aura_L2-OMBRO_2020m0116t1207-o82471_v003-2020m0116t182003.he5']
+    OMI_copy_file = 'OMI_copy_testing_2.he5'
+    for i in OMI_file_names:
+        shutil.copyfile(os.path.join(data_dir, 'OMI', i),
+                        os.path.join(subset_output_dir, OMI_copy_file))
+        min_time = '2020-01-16T12:30:00Z'
+        max_time = '2020-01-16T12:40:00Z'
+        bbox = np.array(((-180, 180), (-90, 90)))
+        nc_dataset, has_groups = subset.h5file_transform(os.path.join(subset_output_dir, OMI_copy_file))
 
         args = {
             'decode_coords': False,
@@ -1831,187 +1727,321 @@ def test_get_time_OMI(self):
             'decode_times': False
         }
 
+        if min_time or max_time:
+            args['decode_times'] = True
+
         with xr.open_dataset(
                 xr.backends.NetCDF4DataStore(nc_dataset),
                 **args
         ) as dataset:
-            time_var_names = []
-            lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(dataset)
-            time_var_names = [
-                subset.compute_time_variable_name(
-                    dataset, dataset[lat_var_name]
-                ) for lat_var_name in lat_var_names
-            ]
-            assert "Time" in time_var_names[0]
-            assert "Latitude" in lat_var_names[0]
-
-
-    def test_empty_temporal_subset(self):
-        """
-        Test the edge case where a subsetted empty granule
-        (due to bbox) is temporally subset, which causes the encoding
-        step to fail due to size '1' data for each dimension.
-        """
-        #  37.707:38.484
-        bbox = np.array(((37.707, 38.484), (-13.265, -12.812)))
-        file = '20190927000500-JPL-L2P_GHRSST-SSTskin-MODIS_A-D-v02.0-fv01.0.nc'
-        output_file = "{}_{}".format(self._testMethodName, file)
-        min_time = '2019-09-01'
-        max_time = '2019-09-30'
+            lat_var_names, lon_var_names, time_var_names = subset.get_coordinate_variable_names(
+                dataset=dataset,
+                lat_var_names=None,
+                lon_var_names=None,
+                time_var_names=None
+            )
+            if 'BRO' in i:
+                assert any('utc' in x.lower() for x in time_var_names)
+
+            dataset, start_date = subset.convert_to_datetime(dataset, time_var_names)
+            assert dataset[time_var_names[0]].dtype == 'datetime64[ns]'
+
+
+def test_he5_timeattrs_output(data_dir, subset_output_dir, request):
+    """Test that the time attributes in the output match the attributes of the input for OMI test files"""
+
+    omi_dir = join(data_dir, 'OMI')
+    omi_file = 'OMI-Aura_L2-OMBRO_2020m0116t1207-o82471_v003-2020m0116t182003.he5'
+    omi_file_input = 'input' + omi_file
+    bbox = np.array(((-180, 90), (-90, 90)))
+    output_file = "{}_{}".format(request.node.name, omi_file)
+    shutil.copyfile(
+        os.path.join(omi_dir, omi_file),
+        os.path.join(subset_output_dir, omi_file)
+    )
+    shutil.copyfile(
+        os.path.join(omi_dir, omi_file),
+        os.path.join(subset_output_dir, omi_file_input)
+    )
+
+    min_time = '2020-01-16T12:30:00Z'
+    max_time = '2020-01-16T12:40:00Z'
+    bbox = np.array(((-180, 180), (-90, 90)))
+    nc_dataset_input = nc.Dataset(os.path.join(subset_output_dir, omi_file_input))
+    incut_set = nc_dataset_input.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups[
+        'Geolocation Fields']
+    xr_dataset_input = xr.open_dataset(xr.backends.NetCDF4DataStore(incut_set))
+    inattrs = xr_dataset_input['Time'].attrs
+
+    subset.subset(
+        file_to_subset=os.path.join(subset_output_dir, omi_file),
+        bbox=bbox,
+        output_file=os.path.join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
+
+    output_ncdataset = nc.Dataset(os.path.join(subset_output_dir, output_file))
+    outcut_set = output_ncdataset.groups['HDFEOS'].groups['SWATHS'].groups['OMI Total Column Amount BrO'].groups[
+        'Geolocation Fields']
+    xrout_dataset = xr.open_dataset(xr.backends.NetCDF4DataStore(outcut_set))
+    outattrs = xrout_dataset['Time'].attrs
+
+    for key in inattrs.keys():
+        if isinstance(inattrs[key], np.ndarray):
+            if np.array_equal(inattrs[key], outattrs[key]):
+                pass
+            else:
+                raise AssertionError('Attributes for {} do not equal each other'.format(key))
+        else:
+            assert inattrs[key] == outattrs[key]
 
-        subset.subset(
-            file_to_subset=join(self.test_data_dir, file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file),
-            min_time=min_time,
-            max_time=max_time
-        )
 
-        # Check that all times are within the given bounds. Open
-        # dataset using 'decode_times=True' for auto-conversions to
-        # datetime
-        ds = xr.open_dataset(
-            join(self.subset_output_dir, output_file),
-            decode_coords=False
-        )
+def test_temporal_subset_lines(data_dir, subset_output_dir, request):
+    bbox = np.array(((-180, 180), (-90, 90)))
+    file = 'SWOT_L2_LR_SSH_Expert_368_012_20121111T235910_20121112T005015_DG10_01.nc'
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '2012-11-11T23:59:10'
+    max_time = '2012-11-12T00:20:10'
 
-        assert all(dim_size == 1 for dim_size in ds.dims.values())
-
-    def test_passed_coords(self):
-        """
-        Ensure the coordinates passed in to the subsetter are
-        utilized and not manually calculated.
-        """
-        file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
-
-        dataset = xr.open_dataset(join(self.test_data_dir, file),
-                                decode_times=False,
-                                decode_coords=False)
-
-        dummy_lats = ['dummy_lat']
-        dummy_lons = ['dummy_lon']
-        dummy_times = ['dummy_time']
-
-        actual_lats = ['lat']
-        actual_lons = ['lon']
-        actual_times = ['time']
-
-        # When none are passed in, variables are computed manually
-        lats, lons, times = subset.get_coordinate_variable_names(
-            dataset,
-            lat_var_names=None,
-            lon_var_names=None,
-            time_var_names=None
-        )
+    subset.subset(
+        file_to_subset=join(data_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
 
-        assert lats == actual_lats
-        assert lons == actual_lons
-        assert times == actual_times
+    ds = xr.open_dataset(
+        join(subset_output_dir, output_file),
+        decode_times=False,
+        decode_coords=False
+    )
 
-        # When lats or lons are passed in, only time is computed manually
-        # This case is a bit different because the lat values are used to
-        # compute the time variable so we can't pass in dummy values.
+    assert ds.time.dims != ds.latitude.dims
 
-        lats, lons, times = subset.get_coordinate_variable_names(
-            dataset,
-            lat_var_names=actual_lats,
-            lon_var_names=dummy_lons,
-            time_var_names=None,
-        )
 
-        assert lats == actual_lats
-        assert lons == dummy_lons
-        assert times == actual_times
-        # When only time is passed in, lats and lons are computed manually
-        lats, lons, times = subset.get_coordinate_variable_names(
-            dataset,
-            lat_var_names=None,
-            lon_var_names=None,
-            time_var_names=dummy_times
-        )
-        assert lats == actual_lats
-        assert lons == actual_lons
-        assert times == dummy_times
-
-        # When time, lats, and lons are passed in, nothing is computed manually
-        lats, lons, times = subset.get_coordinate_variable_names(
-            dataset,
-            lat_var_names=dummy_lats,
-            lon_var_names=dummy_lons,
-            time_var_names=dummy_times
-        )
+def test_grouped_empty_subset(data_dir, subset_output_dir, request):
+    """
+    Test that an empty subset of a grouped dataset returns 'None'
+    spatial bounds.
+    """
+    bbox = np.array(((-10, 10), (-10, 10)))
+    file = 'S6A_P4_2__LR_STD__ST_002_140_20201207T011501_20201207T013023_F00.nc'
+    output_file = "{}_{}".format(request.node.name, file)
 
-        assert lats == dummy_lats
-        assert lons == dummy_lons
-        assert times == dummy_times
-
-    def test_var_subsetting_tropomi(self):
-        """
-        Check that variable subsetting is the same if a leading slash is included
-        """
-        TROP_dir = join(self.test_data_dir, 'tropomi')
-        trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
-        variable_slash = ['/PRODUCT/methane_mixing_ratio']
-        variable_noslash = ['PRODUCT/methane_mixing_ratio']
-        bbox = np.array(((-180, 180), (-90, 90)))
-        output_file_slash = "{}_{}".format(self._testMethodName, trop_file)
-        output_file_noslash = "{}_noslash_{}".format(self._testMethodName, trop_file)
-        shutil.copyfile(
-            os.path.join(TROP_dir, trop_file),
-            os.path.join(self.subset_output_dir, trop_file)
-        )
-        shutil.copyfile(
-            os.path.join(TROP_dir, trop_file),
-            os.path.join(self.subset_output_dir,'slashtest'+trop_file)
-        )
-        slash_test = subset.subset(
-            file_to_subset=join(self.subset_output_dir, trop_file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file_slash),
-            variables = variable_slash
-        )
-        noslash_test = subset.subset(
-            file_to_subset=join(self.subset_output_dir, 'slashtest'+trop_file),
-            bbox=bbox,
-            output_file=join(self.subset_output_dir, output_file_noslash),
-            variables = variable_noslash
-        )
+    shutil.copyfile(os.path.join(data_dir, 'sentinel_6', file),
+                    os.path.join(subset_output_dir, file))
 
-        slash_dataset = nc.Dataset(join(self.subset_output_dir, output_file_slash))
-        noslash_dataset = nc.Dataset(join(self.subset_output_dir, output_file_noslash))
-
-        assert list(slash_dataset.groups['PRODUCT'].variables) == list(noslash_dataset.groups['PRODUCT'].variables)
-    def test_bad_time_unit(self):
-
-        fill_val = -99999.0
-        time_vals = np.random.rand(10)
-        time_vals[0] = fill_val
-        time_vals[-1] = fill_val
-
-        data_vars = {
-            'foo': (['x'], np.random.rand(10)),
-            'time': (
-                ['x'],
-                time_vals,
-                {
-                    'units': 'seconds since 2000-1-1 0:0:0 0',
-                    '_FillValue': fill_val,
-                    'standard_name': 'time',
-                    'calendar': 'standard'
-                }
-            ),
-        }
+    spatial_bounds = subset.subset(
+        file_to_subset=join(subset_output_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file)
+    )
 
-        ds = xr.Dataset(
-            data_vars=data_vars,
-            coords={'x': (['x'], np.arange(10))}
-        )
+    assert spatial_bounds is None
+
+
+def test_get_time_OMI(data_dir, subset_output_dir):
+    """
+    Test that code get time variables for OMI .he5 files"
+    """
+    omi_file = 'OMI-Aura_L2-OMSO2_2020m0116t1207-o82471_v003-2020m0223t142939.he5'
+
+    shutil.copyfile(os.path.join(data_dir, 'OMI', omi_file),
+                    os.path.join(subset_output_dir, omi_file))
+
+    nc_dataset, has_groups = subset.h5file_transform(os.path.join(subset_output_dir, omi_file))
+
+    args = {
+        'decode_coords': False,
+        'mask_and_scale': False,
+        'decode_times': False
+    }
+
+    with xr.open_dataset(
+            xr.backends.NetCDF4DataStore(nc_dataset),
+            **args
+    ) as dataset:
+        time_var_names = []
+        lat_var_names, lon_var_names = subset.compute_coordinate_variable_names(dataset)
+        time_var_names = [
+            subset.compute_time_variable_name(
+                dataset, dataset[lat_var_name]
+            ) for lat_var_name in lat_var_names
+        ]
+        assert "Time" in time_var_names[0]
+        assert "Latitude" in lat_var_names[0]
+
+
+def test_empty_temporal_subset(data_dir, subset_output_dir, request):
+    """
+    Test the edge case where a subsetted empty granule
+    (due to bbox) is temporally subset, which causes the encoding
+    step to fail due to size '1' data for each dimension.
+    """
+    #  37.707:38.484
+    bbox = np.array(((37.707, 38.484), (-13.265, -12.812)))
+    file = '20190927000500-JPL-L2P_GHRSST-SSTskin-MODIS_A-D-v02.0-fv01.0.nc'
+    output_file = "{}_{}".format(request.node.name, file)
+    min_time = '2019-09-01'
+    max_time = '2019-09-30'
+
+    subset.subset(
+        file_to_subset=join(data_dir, file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file),
+        min_time=min_time,
+        max_time=max_time
+    )
+
+    # Check that all times are within the given bounds. Open
+    # dataset using 'decode_times=True' for auto-conversions to
+    # datetime
+    ds = xr.open_dataset(
+        join(subset_output_dir, output_file),
+        decode_coords=False
+    )
+
+    assert all(dim_size == 1 for dim_size in ds.dims.values())
+
+
+def test_passed_coords(data_dir, subset_output_dir):
+    """
+    Ensure the coordinates passed in to the subsetter are
+    utilized and not manually calculated.
+    """
+    file = 'ascat_20150702_084200_metopa_45145_eps_o_250_2300_ovw.l2.nc'
+
+    dataset = xr.open_dataset(join(data_dir, file),
+                              decode_times=False,
+                              decode_coords=False)
+
+    dummy_lats = ['dummy_lat']
+    dummy_lons = ['dummy_lon']
+    dummy_times = ['dummy_time']
+
+    actual_lats = ['lat']
+    actual_lons = ['lon']
+    actual_times = ['time']
+
+    # When none are passed in, variables are computed manually
+    lats, lons, times = subset.get_coordinate_variable_names(
+        dataset,
+        lat_var_names=None,
+        lon_var_names=None,
+        time_var_names=None
+    )
+
+    assert lats == actual_lats
+    assert lons == actual_lons
+    assert times == actual_times
+
+    # When lats or lons are passed in, only time is computed manually
+    # This case is a bit different because the lat values are used to
+    # compute the time variable so we can't pass in dummy values.
+
+    lats, lons, times = subset.get_coordinate_variable_names(
+        dataset,
+        lat_var_names=actual_lats,
+        lon_var_names=dummy_lons,
+        time_var_names=None,
+    )
+
+    assert lats == actual_lats
+    assert lons == dummy_lons
+    assert times == actual_times
+    # When only time is passed in, lats and lons are computed manually
+    lats, lons, times = subset.get_coordinate_variable_names(
+        dataset,
+        lat_var_names=None,
+        lon_var_names=None,
+        time_var_names=dummy_times
+    )
+    assert lats == actual_lats
+    assert lons == actual_lons
+    assert times == dummy_times
+
+    # When time, lats, and lons are passed in, nothing is computed manually
+    lats, lons, times = subset.get_coordinate_variable_names(
+        dataset,
+        lat_var_names=dummy_lats,
+        lon_var_names=dummy_lons,
+        time_var_names=dummy_times
+    )
+
+    assert lats == dummy_lats
+    assert lons == dummy_lons
+    assert times == dummy_times
+
+
+def test_var_subsetting_tropomi(data_dir, subset_output_dir, request):
+    """
+    Check that variable subsetting is the same if a leading slash is included
+    """
+    trop_dir = join(data_dir, 'tropomi')
+    trop_file = 'S5P_OFFL_L2__CH4____20190319T110835_20190319T125006_07407_01_010202_20190325T125810_subset.nc4'
+    variable_slash = ['/PRODUCT/methane_mixing_ratio']
+    variable_noslash = ['PRODUCT/methane_mixing_ratio']
+    bbox = np.array(((-180, 180), (-90, 90)))
+    output_file_slash = "{}_{}".format(request.node.name, trop_file)
+    output_file_noslash = "{}_noslash_{}".format(request.node.name, trop_file)
+    shutil.copyfile(
+        os.path.join(trop_dir, trop_file),
+        os.path.join(subset_output_dir, trop_file)
+    )
+    shutil.copyfile(
+        os.path.join(trop_dir, trop_file),
+        os.path.join(subset_output_dir, 'slashtest' + trop_file)
+    )
+    subset.subset(
+        file_to_subset=join(subset_output_dir, trop_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file_slash),
+        variables=variable_slash
+    )
+    subset.subset(
+        file_to_subset=join(subset_output_dir, 'slashtest' + trop_file),
+        bbox=bbox,
+        output_file=join(subset_output_dir, output_file_noslash),
+        variables=variable_noslash
+    )
+
+    slash_dataset = nc.Dataset(join(subset_output_dir, output_file_slash))
+    noslash_dataset = nc.Dataset(join(subset_output_dir, output_file_noslash))
+
+    assert list(slash_dataset.groups['PRODUCT'].variables) == list(noslash_dataset.groups['PRODUCT'].variables)
+
+
+def test_bad_time_unit(subset_output_dir):
+    fill_val = -99999.0
+    time_vals = np.random.rand(10)
+    time_vals[0] = fill_val
+    time_vals[-1] = fill_val
+
+    data_vars = {
+        'foo': (['x'], np.random.rand(10)),
+        'time': (
+            ['x'],
+            time_vals,
+            {
+                'units': 'seconds since 2000-1-1 0:0:0 0',
+                '_FillValue': fill_val,
+                'standard_name': 'time',
+                'calendar': 'standard'
+            }
+        ),
+    }
 
-        nc_out_location = join(self.subset_output_dir, "bad_time.nc")
-        ds.to_netcdf(nc_out_location)
+    ds = xr.Dataset(
+        data_vars=data_vars,
+        coords={'x': (['x'], np.arange(10))}
+    )
 
-        subset.override_decode_cf_datetime()
+    nc_out_location = join(subset_output_dir, "bad_time.nc")
+    ds.to_netcdf(nc_out_location)
 
-        ds_test = xr.open_dataset(nc_out_location)
-        ds_test.close()
+    subset.override_decode_cf_datetime()
 
+    ds_test = xr.open_dataset(nc_out_location)
+    ds_test.close()