Skip to content

Commit

Permalink
WDQMS fix for obsproc/v1.2
Browse files Browse the repository at this point in the history
The ObsProc upgraded from version 1.1 to version 1.2, in which the
high-resolution radiosonde observations are included. The radiosonde 
data from ObsProcv1.2 stored in GSI diagnostic files, in some cases, 
have multiple observations in the same location/time, and these 
observations often contain bad data (e.g., temperature > 500K or
temperature < 50K). These duplicate observations and bad data
caused WDQMS processing to fail. This PR improves the handling
of these obscure cases.

Refs #2389
  • Loading branch information
emilyhcliu committed May 29, 2024
1 parent 54cc0aa commit 4573f80
Showing 1 changed file with 53 additions and 42 deletions.
95 changes: 53 additions & 42 deletions ush/wdqms.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,18 @@ def __init__(self, inputfiles, wdqms_type, outdir,
# Drop duplicates
df_total = df_total.drop_duplicates()

# Create temporary dataframe of only temperature and q values
tq_condition = [self.wdqms_type_dict[self.wdqms_type]['variable_ids']['t'],
self.wdqms_type_dict[self.wdqms_type]['variable_ids']['q']]

tq_df = df_total.loc[df_total['var_id'].isin(tq_condition)]
no_tq_df = df_total.loc[~df_total['var_id'].isin(tq_condition)]

# Adjust relative humidity data
df_total = self._genqsat(df_total)
tq_df = self._genqsat(tq_df)

# Merge the non t and q values back into returned t and q dataframe
df_total = pd.concat([tq_df, no_tq_df])

# Add Status Flag column
df_total = self._create_status_flag(df_total)
Expand Down Expand Up @@ -118,6 +128,10 @@ def _wdqms_type_requirements(self, df):
df.loc[(df['var_id'] != 110) & (df['Obs_Minus_Forecast_adjusted'].abs() > 500),
'Obs_Minus_Forecast_adjusted'] = -999.9999

if self.wdqms_type in ['TEMP']:
# Only include assimilated data as per WDQMS requirement document
df = df.loc[df['Analysis_Use_Flag'] == 1]

logging.debug(f"Total observations for {self.wdqms_type} after filter: {len(df)}")
logging.info("Exiting wdqms_type_requirements()")

Expand Down Expand Up @@ -448,7 +462,7 @@ def _genqsat(self, df):
temperature background error from GSI diagnostic file
Args:
df : (df) pandas dataframe populated with data from GSI
df : (df) pandas dataframe populated with t and q data from GSI
diagnostic files
wdqms_type : (str) wdqms type file being created
Returns:
Expand All @@ -458,56 +472,53 @@ def _genqsat(self, df):
logging.info("Working in genqstat()")

# Get variable type specific to WDQMS type
q = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['q']
t = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['t']

# Create two dataframes, one for q vales and one for t values
q_df = df.loc[(df['var_id'] == q)]
t_df = df.loc[(df['var_id'] == t)]

# Find where stations are the same
stn_ids = np.intersect1d(t_df.Station_ID, q_df.Station_ID)
q_id = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['q']
t_id = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['t']

df_list = []

# loop through stations, calculate saturation specific humidity,
# and replace background departure values from NetCDF file with
# new ones calculated using the temperature obs and best temperature guess
for stn in stn_ids:
logging.debug(f"Station ID: {stn}")
# Groupby Station_ID
for stn, stn_df in df.groupby('Station_ID'):

t_tmp = t_df.loc[(t_df['Station_ID'] == stn)]
q_tmp = q_df.loc[(q_df['Station_ID'] == stn)]
# Filter the dataframes
q_df = stn_df[stn_df['var_id'] == q_id]
t_df = stn_df[stn_df['var_id'] == t_id]

columns_to_extract = ['Latitude', 'Longitude', 'Pressure', 'Time', 'Observation', 'Obs_Minus_Forecast_adjusted']
columns_to_compare = ['Latitude', 'Longitude', 'Pressure', 'Time']
# Merge dataframes on common keys using an inner join
merged_df = pd.merge(q_df, t_df, on=['Station_ID', 'Latitude', 'Longitude', 'Pressure', 'Time'], suffixes=('_q', '_t'), how='inner')

t_tmp = t_tmp[columns_to_extract]
q_tmp = q_tmp[columns_to_extract]
# Calculate needed values
q_obs = merged_df['Observation_q'].to_numpy() * 1.0e6
q_ges = (merged_df['Observation_q'].to_numpy() - merged_df['Obs_Minus_Forecast_adjusted_q'].to_numpy()) * 1.0e6
t_obs = merged_df['Observation_t'].to_numpy() - 273.16
t_ges = (merged_df['Observation_t'].to_numpy() - merged_df['Obs_Minus_Forecast_adjusted_t'].to_numpy()) - 273.16
pressure = merged_df['Pressure'].to_numpy()

t_tmp = pd.merge(t_tmp, q_tmp, on=columns_to_compare, suffixes=('','_q'), how='inner')
t_tmp = t_tmp[columns_to_extract].drop_duplicates()
qsat_obs = self._temp_2_saturation_specific_humidity(pressure, t_obs)
qsat_ges = self._temp_2_saturation_specific_humidity(pressure, t_ges)

q_obs = q_tmp['Observation'].to_numpy() * 1.0e6
q_ges = (q_tmp['Observation'].to_numpy() -
q_tmp['Obs_Minus_Forecast_adjusted'].to_numpy()) * 1.0e6
t_obs = t_tmp['Observation'].to_numpy() - 273.16
t_ges = (t_tmp['Observation'].to_numpy() -
t_tmp['Obs_Minus_Forecast_adjusted'].to_numpy()) - 273.16
pressure = q_tmp['Pressure'].to_numpy()
# Calculate background departure
bg_dep = (q_obs / qsat_obs) - (q_ges / qsat_ges)

qsat_obs = self._temp_2_saturation_specific_humidity(
pressure, t_obs)
qsat_ges = self._temp_2_saturation_specific_humidity(
pressure, t_ges)
# Grab conditions from merged_df
station_ids = merged_df['Station_ID']
pressure_vals = merged_df['Pressure']
time_vals = merged_df['Time']
latitude = merged_df['Latitude']
longitude = merged_df['Longitude']
conditions = (q_df['Station_ID'].isin(station_ids)) & \
(q_df['Pressure'].isin(pressure_vals)) & \
(q_df['Time'].isin(time_vals)) & \
(q_df['Latitude'].isin(latitude)) & \
(q_df['Longitude'].isin(longitude))

bg_dep = (q_obs / qsat_obs) - (q_ges / qsat_ges)
# Update the background departure values for q_df
q_df = q_df.loc[conditions]
q_df['Obs_Minus_Forecast_adjusted'] = bg_dep

logging.debug(f"Original q background departure: {q_tmp['Obs_Minus_Forecast_adjusted'].values}")
logging.debug(f"New q background departure: {bg_dep}")
df_list.append(pd.concat([t_df, q_df]))

# Replace the current background departure with the new calculated one
df.loc[(df['var_id'] == self.wdqms_type_dict[self.wdqms_type]['variable_ids']['q']) &
(df['Station_ID'] == stn),
'Obs_Minus_Forecast_adjusted'] = bg_dep
df = pd.concat(df_list)

logging.info("Exiting genqstat()")

Expand Down

0 comments on commit 4573f80

Please sign in to comment.