podaac · sliu008 · Sep 15, 2023 · Aug 31, 2023 · Aug 31, 2023 · Sep 1, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Removed
 ### Fixed
 - [issue/119](https://github.com/podaac/l2ss-py/issues/119): GPM variable dimensions are renamed from "phony_dim" to the dimension names in the variable attribute "DimensionNames"
+- [issue/189](https://github.com/podaac/l2ss-py/issues/189): Fix temporal subsetting for SWOT collections, use mask_and_scale args for opening granule file if we have an overflow in time fill value, use original dataset encoding when writing file.
+
 ### Security
 
 

diff --git a/cmr/ops_associations.txt b/cmr/ops_associations.txt
@@ -87,3 +87,4 @@ C2601583089-POCLOUD
 C2601581863-POCLOUD
 C2628598397-POCLOUD
 C2036882456-POCLOUD
+C2296989380-POCLOUD
diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
@@ -1162,10 +1162,22 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
 
     if min_time or max_time:
         args['decode_times'] = True
+        # check fill value and dtype, we know that this will cause an integer Overflow with xarray
+        if 'time' in nc_dataset.variables.keys():
+            try:
+                if nc_dataset['time'].getncattr('_FillValue') == nc.default_fillvals.get('f8') and \
+                 nc_dataset['time'].dtype == 'float64':
+                    args['mask_and_scale'] = True
+            except AttributeError:
+                pass
+
     with xr.open_dataset(
             xr.backends.NetCDF4DataStore(nc_dataset),
             **args
     ) as dataset:
+
+        original_dataset = dataset
+
         lat_var_names, lon_var_names, time_var_names = get_coordinate_variable_names(
             dataset=dataset,
             lat_var_names=lat_var_names,
@@ -1224,28 +1236,18 @@ def subset(file_to_subset: str, bbox: np.ndarray, output_file: str,
                     lon_var_names=lon_var_names
                 ))
             else:
-                encoding = {}
-                compression = {"zlib": True, "complevel": 5, "_FillValue": None}
-
-                if (min_time or max_time) and not all(
-                        dim_size == 1 for dim_size in dataset.dims.values()):
-                    encoding = {
-                        var_name: {
-                            'units': nc_dataset.variables[var_name].__dict__['units'],
-                            'zlib': True,
-                            "complevel": 5,
-                            "_FillValue": None
-                        } for var_name in time_var_names
-                        if 'units' in nc_dataset.variables[var_name].__dict__
-                    }
                 for var in dataset.data_vars:
-                    if var not in encoding:
-                        encoding[var] = compression
                     if dataset[var].dtype == 'S1' and isinstance(dataset[var].attrs.get('_FillValue'), bytes):
                         dataset[var].attrs['_FillValue'] = dataset[var].attrs['_FillValue'].decode('UTF-8')
 
+                    var_encoding = {
+                        "zlib": True,
+                        "complevel": 5,
+                        "_FillValue": original_dataset[var].encoding.get('_FillValue')
+                    }
+
                     data_var = dataset[var].copy()
-                    data_var.load().to_netcdf(output_file, 'a', encoding={var: encoding.get(var)})
+                    data_var.load().to_netcdf(output_file, 'a', encoding={var: var_encoding})
                     del data_var
 
                 with nc.Dataset(output_file, 'a') as dataset_attr:

diff --git a/tests/test_subset.py b/tests/test_subset.py
@@ -158,6 +158,24 @@ def test_subset_variables(test_file, data_dir, subset_output_dir, request):
                              decode_times=False,
                              decode_coords=False)
 
+
+    nc_in_ds = nc.Dataset(join(data_dir, test_file))
+    nc_out_ds = nc.Dataset(join(subset_output_dir, output_file))
+
+    time_var_name = None
+    try:
+        lat_var_name = subset.compute_coordinate_variable_names(in_ds)[0][0]
+        time_var_name = subset.compute_time_variable_name(in_ds, in_ds[lat_var_name])
+    except ValueError:
+        # unable to determine lon lat vars
+        pass
+
+    if time_var_name:
+        assert nc_in_ds[time_var_name].units == nc_out_ds[time_var_name].units
+
+    nc_in_ds.close()
+    nc_out_ds.close()
+
     for in_var, out_var in zip(in_ds.data_vars.items(), out_ds.data_vars.items()):
         # compare names
         assert in_var[0] == out_var[0]