Skip to content

Commit

Permalink
Remove duplicates and replace Python function in1d with function merge
Browse files Browse the repository at this point in the history
for matching rows with selected columns between two data frames
  • Loading branch information
emilyhcliu committed Apr 8, 2024
1 parent ad5d099 commit a08e36b
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions ush/wdqms.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ def __init__(self, inputfiles, wdqms_type, outdir,
# Grab actual datetimes from datetime + timedelta
df_total = self._get_datetimes(df_total)

# Drop duplicates
columns_to_compare = ['Station_ID', 'var_id', 'Observation_Type', 'Latitude', 'Longitude', 'Pressure', 'Time']
df_total = df_total.drop_duplicates(subset=columns_to_compare, keep='first')

# Adjust relative humidity data
df_total = self._genqsat(df_total)

Expand Down Expand Up @@ -474,10 +478,13 @@ def _genqsat(self, df):
t_tmp = t_df.loc[(t_df['Station_ID'] == stn)]
q_tmp = q_df.loc[(q_df['Station_ID'] == stn)]

t_tmp = t_tmp.loc[(np.in1d(t_tmp['Time'], q_tmp['Time'])) &
(np.in1d(t_tmp['Pressure'], q_tmp['Pressure'])) &
(np.in1d(t_tmp['Latitude'], q_tmp['Latitude'])) &
(np.in1d(t_tmp['Longitude'], q_tmp['Longitude']))]
columns_to_extract = ['Latitude', 'Longitude', 'Pressure', 'Time', 'Observation', 'Obs_Minus_Forecast_adjusted']
columns_to_compare = ['Latitude', 'Longitude', 'Pressure', 'Time']

t_tmp = t_tmp[columns_to_extract]
q_tmp = q_tmp[columns_to_extract]

t_tmp = pd.merge(t_tmp, q_tmp, on=columns_to_compare, suffixes=('','_q'), how='inner')

q_obs = q_tmp['Observation'].to_numpy() * 1.0e6
q_ges = (q_tmp['Observation'].to_numpy() -
Expand Down

0 comments on commit a08e36b

Please sign in to comment.