From a08e36b5ac7498416bc629cff91675238e0c604f Mon Sep 17 00:00:00 2001 From: Emily Liu Date: Mon, 8 Apr 2024 02:55:28 +0000 Subject: [PATCH] Remove duplicates and replace Python function in1d with function merge for matching rows with selected columns between two data frames --- ush/wdqms.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ush/wdqms.py b/ush/wdqms.py index 4bc886ad7b..5917499cd0 100755 --- a/ush/wdqms.py +++ b/ush/wdqms.py @@ -66,6 +66,10 @@ def __init__(self, inputfiles, wdqms_type, outdir, # Grab actual datetimes from datetime + timedelta df_total = self._get_datetimes(df_total) + # Drop duplicates + columns_to_compare = ['Station_ID', 'var_id', 'Observation_Type', 'Latitude', 'Longitude', 'Pressure', 'Time'] + df_total = df_total.drop_duplicates(subset=columns_to_compare, keep='first') + # Adjust relative humidity data df_total = self._genqsat(df_total) @@ -474,10 +478,13 @@ def _genqsat(self, df): t_tmp = t_df.loc[(t_df['Station_ID'] == stn)] q_tmp = q_df.loc[(q_df['Station_ID'] == stn)] - t_tmp = t_tmp.loc[(np.in1d(t_tmp['Time'], q_tmp['Time'])) & - (np.in1d(t_tmp['Pressure'], q_tmp['Pressure'])) & - (np.in1d(t_tmp['Latitude'], q_tmp['Latitude'])) & - (np.in1d(t_tmp['Longitude'], q_tmp['Longitude']))] + columns_to_extract = ['Latitude', 'Longitude', 'Pressure', 'Time', 'Observation', 'Obs_Minus_Forecast_adjusted'] + columns_to_compare = ['Latitude', 'Longitude', 'Pressure', 'Time'] + + t_tmp = t_tmp[columns_to_extract] + q_tmp = q_tmp[columns_to_extract] + + t_tmp = pd.merge(t_tmp, q_tmp, on=columns_to_compare, suffixes=('','_q'), how='inner') q_obs = q_tmp['Observation'].to_numpy() * 1.0e6 q_ges = (q_tmp['Observation'].to_numpy() -