Skip to content

Commit

Permalink
Updated to set data as Dask array back in Dataset if data is orginall…
Browse files Browse the repository at this point in the history
…y stored as Dask array. (#450)
  • Loading branch information
kenkehoe authored Apr 27, 2022
1 parent 5b7ec2d commit 85ce4d9
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 11 deletions.
30 changes: 23 additions & 7 deletions act/qc/qcfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,9 +942,9 @@ def datafilter(
ds.qcfilter.datafilter(rm_assessments="Bad")
ds_2 = ds.mean()
print("All_data=", ds_1[var_name].values)
print("All_data =", ds_1[var_name].values)
All_data = 98.86098
print("Bad_Removed=", ds_2[var_name].values)
print("Bad_Removed =", ds_2[var_name].values)
Bad_Removed = 99.15148
"""
Expand All @@ -958,10 +958,18 @@ def datafilter(
qc_var_name = self.check_for_ancillary_qc(var_name, add_if_missing=False, cleanup=False)
if qc_var_name is None:
if verbose:
print(
f'No quality control variable for {var_name} found '
f'in call to .qcfilter.datafilter()'
)
if var_name in ['base_time', 'time_offset']:
continue

try:
if self._obj[var_name].attrs['standard_name'] == 'quality_flag':
continue
except KeyError:
pass

print(f'No quality control variable for {var_name} found '
f'in call to .qcfilter.datafilter()')

continue

data = self.get_masked_data(
Expand All @@ -971,8 +979,16 @@ def datafilter(
ma_fill_value=np_ma,
)

self._obj[var_name].values = data
# If data was orginally stored as Dask array return values to Dataset as Dask array
# else set as Numpy array.
try:
self._obj[var_name].data = dask.array.from_array(
data, chunks=self._obj[var_name].data.chunksize)

except AttributeError:
self._obj[var_name].values = data

# If requested delete quality control variable
if del_qc_var:
del self._obj[qc_var_name]
if verbose:
Expand Down
27 changes: 23 additions & 4 deletions act/tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,21 +718,40 @@ def test_qctests_dos():


def test_datafilter():
ds = read_netcdf(EXAMPLE_MET1)
ds = read_netcdf(EXAMPLE_MET1, drop_variables=['base_time', 'time_offset'])
ds.clean.cleanup()

data_var_names = list(ds.data_vars)
qc_var_names = [var_name for var_name in ds.data_vars if var_name.startswith('qc_')]
data_var_names = list(set(data_var_names) - set(qc_var_names))
data_var_names.sort()
qc_var_names.sort()

var_name = 'atmos_pressure'

ds_1 = ds.mean()

ds.qcfilter.add_less_test(var_name, 99, test_assessment='Bad')
ds.qcfilter.datafilter(rm_assessments='Bad')
ds_2 = ds.mean()

ds_filtered = copy.deepcopy(ds)
ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=False)
ds_2 = ds_filtered.mean()
assert np.isclose(ds_1[var_name].values, 98.86, atol=0.01)
assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01)
assert isinstance(ds_1[var_name].data, da.core.Array)

ds_filtered = copy.deepcopy(ds)
ds_filtered.qcfilter.datafilter(rm_assessments='Bad', variables=var_name)
ds_2 = ds_filtered.mean()
assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01)
expected_var_names = sorted(list(set(data_var_names + qc_var_names) - set(['qc_' + var_name])))
assert sorted(list(ds_filtered.data_vars)) == expected_var_names

ds_filtered = copy.deepcopy(ds)
ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=True)
assert sorted(list(ds_filtered.data_vars)) == data_var_names

ds.close()
del ds


def test_qc_remainder():
Expand Down

0 comments on commit 85ce4d9

Please sign in to comment.