-
Notifications
You must be signed in to change notification settings - Fork 1
/
load.py
187 lines (158 loc) · 7.77 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 18 14:22:18 2019
@author: leguillou
"""
import os, fnmatch
import netCDF4 as nc
import xarray as xr
import numpy as np
from datetime import datetime,timedelta
def load_dataset(directory,file,name_time,name_lon,name_lat,name_var,time_min,time_max,lon_min,lon_max,lat_min,lat_max,options,dtout):
"""
NAME
load_Refprods
DESCRIPTION
Args:
directory (string): directory in which the the netcdf file is stored
file (string): name of the netcdf file
name_time (string): name of time coordinate in the netcdf file
name_lon (string): name of longitude coordinate in the netcdf files
name_lat (string): name of latitude coordinate in the netcdf files
name_var (string): name of the variable (typically ssh) to process in the netcdf files
dt_start (datetime object): time of the begining of the analysis window
dt_end (datetime object): time of the end of the analysis window
dt_ref (datetime object): refrence time used in the netcdf file
datetime_type (bool): if true, return datetype formatting timestamps
Returns:
var (3D numpy array): fields of the simulation stored in a numpy array
dt_out_time (1D numpy array): timestamps of the fields
lon (2D numpy array): longitude of the fields
lat (2D numpy array): latitude of the fields
"""
# Open data
ds = xr.open_mfdataset(f'{directory}/{file}', **options)
# Sel
if None not in [time_min,time_max]:
try:
ds = ds.sel({name_time:slice(time_min,time_max)})
except:
ds = ds.where((time_min<=ds[name_time]) & (time_max>=ds[name_time]),drop=True)
if None not in [lon_min,lon_max]:
ds = ds.sel({name_lon:slice(lon_min,lon_max)})
if None not in [lat_min,lat_max]:
ds = ds.sel({name_lat:slice(lat_min,lat_max)})
if dtout is not None:
dt = (ds[name_time][1]-ds[name_time][0]).values / np.timedelta64(1, 's')
isub = int(dtout//dt)
else:
isub = 1
time = ds[name_time][::isub].values
var = ds[name_var][::isub,:,:].values
if len(ds[name_lon].shape)==3:
lon = ds[name_lon][0,:,:].values
lat = ds[name_lat][0,:,:].values
elif len(ds[name_lon].shape)==1:
lon,lat = np.meshgrid(ds[name_lon].values,ds[name_lat].values)
else:
lon = ds[name_lon].values
lat = ds[name_lat].values
datetimes = [datetime.utcfromtimestamp((dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')) for dt64 in time]
datetimes = np.asarray(datetimes)
return var,datetimes,lon,lat
def load_DAprods(directory, name_lon, name_lat, name_var, dt_start, dt_end, dt_timestep, prefixe = "*", suffixe = ""):
"""
NAME
load_DAprods
DESCRIPTION
Args:
directory (string): directory in which the outputs are stored
name_lon (string): name of longitude coordinate in the netcdf files
name_lat (string): name of latitude coordinate in the netcdf files
name_var (string): name of the variable (typically ssh) to process in the netcdf files
dt_start (datetime object): time of the begining of the analysis window
dt_end (datetime object): time of the end of the analysis window
dt_timestep (timedelta object): timestep of the outputs (typically one hour)
prefixe (string): specific prefix of the simulation, if several simulations are stored in the same directory
suffixe (string): specific suffixe of the simulation, if several simulations are stored in the same directory
Returns:
fields (3D numpy array): fields of the simulation stored in a numpy array
datetimes (1D numpy array): timestamps of the fields
lon (2D numpy array): longitude of the fields
lat (2D numpy array): latitude of the fields
"""
listOfFiles = os.listdir(directory)
fields = []
datetimes = []
dt_curr = dt_start
first_iter = True
while dt_curr <= dt_end :
yyyy_curr = str(dt_curr.year)
mm_curr = str(dt_curr.month).zfill(2)
dd_curr = str(dt_curr.day).zfill(2)
HH_curr = str(dt_curr.hour).zfill(2)
MM_curr = str(dt_curr.minute).zfill(2)
pattern = prefixe + '_y' + yyyy_curr + 'm' + mm_curr + 'd' + dd_curr + 'h' + HH_curr + MM_curr + suffixe + "*" + ".nc"
file = [f for f in listOfFiles if fnmatch.fnmatch(f, pattern)]
if len(file)>1:
print("Error: several outputs match the pattern... Please set a prefixe.")
return
elif len(file)==0:
print("No file matching the patter : " + pattern)
dt_curr += dt_timestep
continue
else:
file = file[0]
fid_deg = nc.Dataset(directory + file)
if first_iter:
lon = np.array(fid_deg.variables[name_lon][:,:]) % 360
lat = np.array(fid_deg.variables[name_lat][:,:])
first_iter = False
field_curr = np.mean(fid_deg.variables[name_var][:,:,:],axis=0)
if np.any(np.isnan(field_curr)):
print(file,' : NaN value found. Stop here !')
break
else:
fields.append(field_curr)
datetimes.append(dt_curr)
dt_curr += dt_timestep
return np.asarray(fields),np.asarray(datetimes),lon,lat
def load_DUACSprods(directory,file,name_time,name_lon,name_lat,name_ssh,dt_ref_duacs=datetime(1950,1,1,0,0,0),bounds=None):
"""
NAME
load_DUACSprods
DESCRIPTION
Args:
directory (string): directory in which the outputs are stored
file (string): name of the netcdf file
name_time (string): name of time coordinate in the netcdf file
name_lon (string): name of longitude coordinate in the netcdf file
name_lat (string): name of latitude coordinate in the netcdf file
name_ssh (string): name of ssh to process in the netcdf file
dt_ref_duacs (datetime object): refrence time used in the netcdf file
bounds (list): domain boundaries (optional) to extract the fields
Returns:
ssh2d_duacs (3D numpy array): fields of the simulation stored in a numpy array
datetimes_duacs (1D numpy array): timestamps of the fields
lon2d_duacs (2D numpy array): longitude of the fields
lat2d_duacs (2D numpy array): latitude of the fields
"""
ncin = nc.Dataset(directory + file)
time_duacs = np.array(ncin.variables[name_time][:])
lon_duacs = np.array(ncin.variables[name_lon][:]) % 360
lat_duacs = np.array(ncin.variables[name_lat][:])
lon2d_duacs, lat2d_duacs = np.meshgrid(lon_duacs,lat_duacs)
ssh2d_duacs = np.ma.array(ncin.variables[name_ssh][:,:,:])
# Convert time to datetimes
datetimes_duacs = [dt_ref_duacs + timedelta(days=d) for d in time_duacs]
datetimes_duacs = np.asarray(datetimes_duacs)
# Extraction
if bounds is not None:
lon_min,lon_max,lat_min,lat_max = bounds
ind_lon = np.where(np.abs(lon_duacs -(lon_max+lon_min)/2) <= (lon_max-lon_min)/2 + 0.25)[0]
ind_lat = np.where(np.abs(lat_duacs - (lat_max+lat_min)/2) <= (lat_max-lat_min)/2 + 0.25) [0]
lon2d_duacs = lon2d_duacs[ind_lat[0]:(ind_lat[-1]+1),ind_lon[0]:(ind_lon[-1]+1)]
lat2d_duacs = lat2d_duacs[ind_lat[0]:(ind_lat[-1]+1),ind_lon[0]:(ind_lon[-1]+1)]
ssh2d_duacs = ssh2d_duacs[:,ind_lat[0]:(ind_lat[-1]+1),ind_lon[0]:(ind_lon[-1]+1)]
return ssh2d_duacs, datetimes_duacs, lon2d_duacs, lat2d_duacs