From e4d2cf4140519f8f95fc812f4c6bf848c469ec5d Mon Sep 17 00:00:00 2001 From: Michael Kelleher Date: Thu, 27 Feb 2020 11:03:15 -0500 Subject: [PATCH] Deal with missing / fill values consistently --- build_Bamber_greenland.py | 1 - compare_cism_data.py | 11 ++++++----- data/icebridge.py | 38 ++++++++++++++++++++------------------ data/insar.py | 19 ++++++++++++++----- data/racmo2p3.py | 20 +++++++++++++++----- data/searise.py | 11 +++++++---- 6 files changed, 62 insertions(+), 38 deletions(-) diff --git a/build_Bamber_greenland.py b/build_Bamber_greenland.py index 453fb6a..b13eaf3 100755 --- a/build_Bamber_greenland.py +++ b/build_Bamber_greenland.py @@ -96,7 +96,6 @@ nc_racmo2p3 = get_nc_file(lc_racmo2p3, "r") speak.verbose(args, " Found RACMO 2.3 data") - try: nc_insar = get_nc_file(lc_InSAR, "r") except Exception: diff --git a/compare_cism_data.py b/compare_cism_data.py index 67fc6cd..c73af8f 100644 --- a/compare_cism_data.py +++ b/compare_cism_data.py @@ -212,10 +212,10 @@ def plot_sideby(ref, test, cmn_vars, skip=1): # We need to mask invalid data, and compress to just the non-masked # values, so that the percentiles don't come out as NaNs _masktest = np.ma.masked_greater( - np.ma.masked_invalid(test[var].values.ravel()), 1e20 + np.ma.masked_invalid(test[var].values.ravel()), 1e36 ).compressed() _maskref = np.ma.masked_greater( - np.ma.masked_invalid(test[var].values.ravel()), 1e20 + np.ma.masked_invalid(test[var].values.ravel()), 1e36 ).compressed() axes[3].boxplot( @@ -256,8 +256,8 @@ def annote(data): if any(np.isnan(data)): str_out += "MASK NaN\n" - if any(data > 1e20): - str_out += "MASK >1e20" + if any(data > 1e36): + str_out += "MASK >1e36" if str_out == "": str_out = "NO MASK" @@ -272,7 +272,8 @@ def main(): # Test Bamber grid ref_file = "complete/greenland_8km_2016_12_01.mcb.nc" - test_file = "complete/greenland_8km_2020_02_21.mcb.nc" + # test_file = "complete/greenland_8km_2020_02_21.mcb.nc" + test_file = "complete/greenland_8km_2020_02_27_1038.mcb.nc" # Test EPSG:3413 grid # ref_file = "complete/greenland_8km_2017_06_27.epsg3413.nc" diff --git a/data/icebridge.py b/data/icebridge.py index ab2cec7..a2410fc 100644 --- a/data/icebridge.py +++ b/data/icebridge.py @@ -53,6 +53,8 @@ from pykdtree.kdtree import KDTree +MISSING_VAL = 2.0e36 + def mcb_epsg3413( args, nc_massCon, nc_bamber, nc_base, base, proj_epsg3413, proj_eigen_gl04c @@ -64,6 +66,7 @@ def mcb_epsg3413( base dataset as `thk`. NetCDF attributes are mostly preserved, but the data is changed from type short to type float. """ + massCon = projections.DataGrid() massCon.y = nc_massCon.variables["y"] massCon.x = nc_massCon.variables["x"] @@ -99,7 +102,7 @@ def mcb_epsg3413( ) sys.stdout.write("\r [%-60s] %d%%\n" % ("=" * 60, 100.0)) sys.stdout.flush() - copy_atts_bad_fill(massCon.thickness, base.thk, -9999.0) + copy_atts_bad_fill(massCon.thickness, base.thk, MISSING_VAL) base.thk.grid_mapping = "epsg_3413" base.thk.coordinates = "lon lat" base.thk.reference = "M. Morlighem, E. Rignot, J. Mouginot, H. Seroussi and E. Larour, Deeply incised submarine glacial valleys beneath the Greenland Ice Sheet, Nat. Geosci., 7, 418-422, 2014, doi:10.1038/ngeo2167, http://www.nature.com/ngeo/journal/vaop/ncurrent/full/ngeo2167.html" @@ -248,17 +251,17 @@ def mcb_epsg3413( # print(msk.size) # pp(msk) - base_bamber[~msk] = -9999.0 + base_bamber[~msk] = MISSING_VAL # NOTE: Make sure all values fall within a reasonable range as # RectBivariateSpine interps using the missing values - base_bamber[base_bamber < rng[0]] = -9999.0 - base_bamber[base_bamber > rng[1]] = -9999.0 + base_bamber[base_bamber < rng[0]] = MISSING_VAL + base_bamber[base_bamber > rng[1]] = MISSING_VAL base.var = nc_base.createVariable(var, "f4", ("y", "x",)) base.var[:] = base_bamber[:] copy_atts_bad_fill( - nc_massCon.variables[var_list[0]], base.var, -9999.0 + nc_massCon.variables[var_list[0]], base.var, MISSING_VAL ) base.var.grid_mapping = "epsg_3413" base.var.coordinates = "lon lat" @@ -368,7 +371,7 @@ def mcb_bamber( # Attribute not there...set it somewhere else print("THICKNESS DATA CITATION NOT FOUND") pass - copy_atts_bad_fill(massCon.thickness, base.thk, -9999.0) + copy_atts_bad_fill(massCon.thickness, base.thk, MISSING_VAL) speak.verbose(args, " Interpolating, with priority, topg and topgerr.") speak.verbose(args, " Primary Data [IceBridge]: bed and errbed.") @@ -381,7 +384,7 @@ def mcb_bamber( pri_data = remask(pri_data) sec_data = np.ma.masked_values( - nc_bamber.variables["BedrockElevation"][:, :], -9999.0 + nc_bamber.variables["BedrockElevation"][:, :], -9999 ) sec_data = remask(sec_data) @@ -393,7 +396,7 @@ def mcb_bamber( pri_err = remask(pri_err) sec_err = np.ma.masked_values( - nc_bamber.variables["BedrockError"][:, :], -9999.0 + nc_bamber.variables["BedrockError"][:, :], -9999 ) sec_err = remask(sec_err) @@ -486,12 +489,9 @@ def mcb_bamber( # Numbering in other quadrents is the # reflection through the axis or axes # with negitive skip values (i_s or j_s). - try: - missing_points, interp_dict = interp.check_missing( - pri_data, (nn_ii, nn_jj), i_s, j_s - ) - except IndexError: - breakpoint() + missing_points, interp_dict = interp.check_missing( + pri_data, (nn_ii, nn_jj), i_s, j_s + ) missing_err_pts, err_dict = interp.check_missing( pri_err, (nn_ii, nn_jj), i_s, j_s ) @@ -559,9 +559,11 @@ def mcb_bamber( speak.verbose(args, " Writing topg topgerr to base.") base.topg = nc_base.createVariable("topg", "f4", ("y", "x",)) - base.topg[:, :] = new_data.filled(-9999.0)[:, :] - copy_atts_bad_fill(nc_massCon.variables["bed"], base.topg, -9999.0) + base.topg[:, :] = new_data.filled(MISSING_VAL)[:, :] + copy_atts_bad_fill(nc_massCon.variables["bed"], base.topg, MISSING_VAL) base.topgerr = nc_base.createVariable("topgerr", "f4", ("y", "x",)) - base.topgerr[:, :] = new_err.filled(-9999.0)[:, :] - copy_atts_bad_fill(nc_massCon.variables["errbed"], base.topgerr, -9999.0) + base.topgerr[:, :] = new_err.filled(MISSING_VAL)[:, :] + copy_atts_bad_fill( + nc_massCon.variables["errbed"], base.topgerr, MISSING_VAL + ) diff --git a/data/insar.py b/data/insar.py index dfc5d09..96f616c 100644 --- a/data/insar.py +++ b/data/insar.py @@ -79,6 +79,8 @@ from util.ncfunc import copy_atts_bad_fill from pykdtree.kdtree import KDTree +MISSING_VAL = 2.0e36 + def velocity_epsg3413(args, nc_insar, nc_base, base): insar = projections.DataGrid() @@ -114,12 +116,12 @@ def velocity_epsg3413(args, nc_insar, nc_base, base): sys.stdout.write("\r [%-60s] %d%%\n" % ("=" * 60, 100.0)) sys.stdout.flush() - base_data[base_data < data_min] = -2.0e9 - base_data[base_data > data_max] = -2.0e9 + base_data[base_data < data_min] = MISSING_VAL + base_data[base_data > data_max] = MISSING_VAL base.var = nc_base.createVariable(var, "f4", ("y", "x",)) base.var[:] = base_data[:] - copy_atts(nc_insar.variables[var], base.var) + copy_atts_bad_fill(nc_insar.variables[var], base.var, MISSING_VAL) base.var.grid_mapping = "epsg_3413" base.var.coordinates = "lon lat" @@ -142,6 +144,7 @@ def velocity_bamber(args, nc_insar, nc_base, trans): A DataGrid() class instance that holds the base data grid information. """ + proj_greenland = projections.greenland() x_in = nc_insar.variables["x"][:] y_in = nc_insar.variables["y"][:] @@ -178,12 +181,18 @@ def velocity_bamber(args, nc_insar, nc_base, trans): ) z_t = z_t.reshape(z_in.shape) - + z_t = z_t.filled(MISSING_VAL) z_interp = z_t.flatten()[qi].reshape(trans.y_grid.shape) + # try: + # _fillval = nc_insar.variables[rvar].getncattr("_FillValue") + # except AttributeError: + # _fillval = nc_insar.variables[rvar].getncattr("MISSING_VALue") + # z_interp[np.where(z_interp == _fillval)] = MISSING_VAL + trans.var = nc_base.createVariable(bvar, "f4", ("y", "x",)) trans.var[:] = z_interp[:] - copy_atts_bad_fill(nc_insar.variables[rvar], trans.var, 9.96921e36) + copy_atts_bad_fill(nc_insar.variables[rvar], trans.var, MISSING_VAL) trans.var.comments = "Nearest neighbour remapping" trans.var.grid_mapping = "mcb" diff --git a/data/racmo2p3.py b/data/racmo2p3.py index ef576cf..bcbfa7f 100644 --- a/data/racmo2p3.py +++ b/data/racmo2p3.py @@ -39,6 +39,8 @@ from util import projections from pykdtree.kdtree import KDTree +MISSING_VAL = 2.0e36 + def acab_epsg3413(args, nc_racmo, nc_base, base): racmo = projections.DataGrid() @@ -77,12 +79,12 @@ def acab_epsg3413(args, nc_racmo, nc_base, base): sys.stdout.write("\r [%-60s] %d%%\n" % ("=" * 60, 100.0)) sys.stdout.flush() - base_data[base_data < data_min] = 9.96921e36 - base_data[base_data > data_max] = 9.96921e36 + base_data[base_data < data_min] = MISSING_VAL + base_data[base_data > data_max] = MISSING_VAL base.var = nc_base.createVariable(bvar, "f4", ("y", "x",)) base.var[:] = base_data[:] - copy_atts_bad_fill(nc_racmo.variables[rvar], base.var, 9.96921e36) + copy_atts_bad_fill(nc_racmo.variables[rvar], base.var, MISSING_VAL) base.var.long_name = "Water Equivalent Surface Mass Balance" base.var.standard_name = "land_ice_lwe_surface_specific_mass_balance" base.var.units = "mm/year" @@ -143,12 +145,20 @@ def acab_bamber(args, nc_racmo2p3, nc_base, base): ) z_t = z_t.reshape(z_in.shape) + z_t = z_t.filled(MISSING_VAL) z_interp = z_t.flatten()[qi].reshape(base.y_grid.shape) - base.var = nc_base.createVariable(bvar, "f4", ("y", "x",)) base.var[:] = z_interp[:] - copy_atts_bad_fill(nc_racmo2p3.variables[rvar], base.var, 9.96921e36) + + # If there are missing values from the original dataset, update the + # fill value to something easy + z_interp[ + np.where( + z_interp == nc_racmo2p3.variables[rvar].getncattr("_FillValue") + ) + ] = MISSING_VAL + copy_atts_bad_fill(nc_racmo2p3.variables[rvar], base.var, MISSING_VAL) base.var.long_name = "Water Equivalent Surface Mass Balance" base.var.standard_name = "land_ice_lwe_surface_specific_mass_balance" diff --git a/data/searise.py b/data/searise.py index 0991360..5f4dc28 100644 --- a/data/searise.py +++ b/data/searise.py @@ -109,8 +109,9 @@ def bheatflx_artm_epsg3413( base_bamber, mask=np.logical_or(base_y_mask.mask, base_x_mask.mask) ) + missing_value = 2e36 if base_var != "bheatflx": - base_bamber[base_masked.mask] = -9999.0 + base_bamber[base_masked.mask] = missing_value base.var = nc_base.createVariable(base_var, "f4", ("y", "x",)) if base_var == "bheatflx": @@ -118,7 +119,9 @@ def bheatflx_artm_epsg3413( else: base.var[:] = base_bamber[:] - copy_atts_add_fill(nc_seaRise.variables[sea_var], base.var, -9999.0) + copy_atts_add_fill( + nc_seaRise.variables[sea_var], base.var, missing_value + ) base.var.grid_mapping = "epsg_3413" base.var.coordinates = "lon lat" @@ -166,7 +169,7 @@ def bheatflx_artm_bamber(args, nc_seaRise, nc_base, base): speak.verbose(args, " Writing bheatflx to base.") base_bheatflx = nc_base.createVariable("bheatflx", "f4", ("y", "x",)) base_bheatflx[:, :] = seaRise_data[:, :] - copy_atts(seaRise_bheatflx, base_bheatflx) + copy_atts_add_fill(seaRise_bheatflx, base_bheatflx, 2e36) # get annual mean air temperature (2m) # ------------------------------------- @@ -180,4 +183,4 @@ def bheatflx_artm_bamber(args, nc_seaRise, nc_base, base): speak.verbose(args, " Writing artm to base.") base_artm = nc_base.createVariable("artm", "f4", ("y", "x",)) base_artm[:, :] = seaRise_data[:, :] - copy_atts(seaRise_presartm, base_artm) + copy_atts_add_fill(seaRise_presartm, base_artm, 2e36)