Skip to content

Commit

Permalink
12.4.1
Browse files Browse the repository at this point in the history
1) Reprojection functions used no longer extrapolate beyond range of input grid. Therefore, reprojection code has been updated to prevent seams developing when going from  rectilinear projections to polar projections. For polar grids, cells in the opposite hemisphere are now NaN instead of being unrealistically extrapolated.

2) The aggregation scripts have been updated so that if a prior aggregation script already  exists (e.g., 1979-2020), an extra year of data (e.g., 2021) will be appended to the pre-existing netcdf instead of re-processing all years.

3) Climatology calculations of aggregated fields have been moved to a separate script.

4) A regional mask I often use is now available as a netCDF file on the associated Google Drive.

5) A bug in the 360-day calendar in the timeAdd function has been fixed.

--> None of these updates should change the number of tracks generated in the test data (557 tracks in Jan 1979 of ERA5 data at 100 km resolution and default settings -- 166 of which last more than 24 hours).
  • Loading branch information
alexcrawford0927 committed Oct 6, 2021
1 parent 759c0ca commit 65f4375
Show file tree
Hide file tree
Showing 11 changed files with 616 additions and 413 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified Cyclone Tracking Description - Version 12.4.docx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
Date Created: 10 Mar 2019
Date Modified: 22 Aug 2019 -- Update for Python 3
01 Apr 2020 -- Switched output to netCDF instead of GeoTIFF;
no longer dependent on gdal module
no longer dependent on gdal module (start V5)
19 Oct 2020 -- pulled the map creation out of the for loop
06 Oct 2021 -- added a wrap-around for inputs that prevents
empty cells from forming along either 180° or
360° longitude (start V6)
Purpose: Reads in netcdf files & reprojects to the NSIDC EASE2 Grid North.
'''
Expand All @@ -17,7 +20,7 @@
import numpy as np
from netCDF4 import Dataset
import xesmf as xe
import CycloneModule_12_2 as md
import CycloneModule_12_4 as md

'''********************
Define Variables
Expand All @@ -33,8 +36,8 @@
ncext = '.nc'

# Time Variables
ymin, ymax = 2020, 2020
mmin, mmax = 12, 12
ymin, ymax = 1979, 1979
mmin, mmax = 1, 1
dmin, dmax = 1, 31

mons = ["01","02","03","04","05","06","07","08","09","10","11","12"]
Expand All @@ -43,13 +46,13 @@
startdate = [1900,1,1] # The starting date for the reanalysis time steps

# Inputs for reprojection
xsize, ysize = 50000, -50000 # in meters
nx, ny = 360, 360 # number of grid cells; use 180 by 180 for 100 km grid
xsize, ysize = 100000, -100000 # in meters
nx, ny = 180, 180 # number of grid cells; use 180 by 180 for 100 km grid

# Path Variables
path = "/Volumes/Cressida"
inpath = path+"/"+ra+"/"+var # path+"/"+ra+"/"+var+"_T319" #
outpath = "/Volumes/Cressida/"+ra+"/"+var+"_EASE2_N0_"+str(int(xsize/1000))+"km" # path+"/"+ra+"/"+var+"_T319_EASE2_N0_"+str(int(xsize/1000))+"km" #
inpath = path+"/"+ra+"/"+var #
outpath = "/Volumes/Cressida/"+ra+"/"+var+"_EASE2_N0_"+str(int(xsize/1000))+"km" #
suppath = path+"/Projections"

'''*******************************************
Expand Down Expand Up @@ -77,8 +80,7 @@
ref_netcdf.close()

# Define Grids as Dictionaries
grid_in = {'lon': lons, 'lat': lats}

grid_in = {'lon': np.r_[lons,lons[0]], 'lat': lats}
grid_out = {'lon': outlon, 'lat': outlat}

# Create Regridder
Expand Down Expand Up @@ -133,7 +135,8 @@
inArr = nc.variables[ncvar][np.where(tlist == timeH)[0][0],:,:]

# Transform data
outArr = regridder(inArr)
outArr = regridder(np.c_[inArr,inArr[:,0]])
outArr[outlat < 0] = np.nan # Limits to Northern Hemisphere

# Add to list
mlist.append(outArr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def maxDistFromGenPnt(data):
Set up Environment
*******************************************'''
path = "/Volumes/Cressida"
inpath = path+"/CycloneTracking/tracking12_4TestTracks"
inpath = path+"/CycloneTracking/tracking12_9E5R"
outpath = inpath

'''*******************************************
Expand All @@ -63,7 +63,7 @@ def maxDistFromGenPnt(data):
bboxmain = "" # The main bbox your subsetting from; usually "" for "all cyclones", otherwise BBox##

# Time Variables
starttime = [1979,1,1,0,0,0] # Format: [Y,M,D,H,M,S]
starttime = [2020,1,1,0,0,0] # Format: [Y,M,D,H,M,S]
endtime = [2021,1,1,0,0,0] # stop BEFORE this time (exclusive)
monthstep = [0,1,0,0,0,0] # A Time step that increases by 1 mont [Y,M,D,H,M,S]

Expand Down Expand Up @@ -153,4 +153,4 @@ def maxDistFromGenPnt(data):

# Print elapsed time
print('Elapsed time:',round(clock()-start,2),'seconds')
print("Complete")
print("Complete")
10 changes: 6 additions & 4 deletions Version 12_4 Scripts/C5_CycloneStatSummary_12_V3_AllStorms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
19 May 2020 --> Added automatic creation of output directory
02 Jul 2020 --> Removed reliance on GDAL; using regions stored in netcdf file
21 Jan 2021 --> Added dispalcement as subset option; added genesis/lysis region check
06 Oct 2021 --> Replaced pickled regions file with a netcdf
Purpose: Records information that summarizes each track (e.g., length, lifespan,
region of origin, number of merges and splits).
'''
Expand All @@ -18,6 +19,7 @@
import pandas as pd
import os
import numpy as np
import netCDF4 as nc
import CycloneModule_12_4 as md

def maxDistFromGenPnt(data):
Expand All @@ -27,11 +29,11 @@ def maxDistFromGenPnt(data):
'''*******************************************
Set up Environment
*******************************************'''
BBoxNum = "" # Use "BBox##" or "" if no subset
BBoxNum = "BBox13" # Use "BBox##" or "" if no subset
path = "/Volumes/Cressida"
version = "12_4TestTracks"
version = "12_9E5R"
inpath = path+"/CycloneTracking/tracking"+version+"/"+BBoxNum
regpath = path+"/Projections/EASE2_N0_25km_GenesisRegions.pkl"
regpath = path+"/Projections/EASE2_N0_25km_GenesisRegions.nc"

'''*******************************************
Define Variables
Expand Down Expand Up @@ -66,7 +68,7 @@ def maxDistFromGenPnt(data):
*******************************************'''
print("Main Analysis")
# Load Regions
regs = pd.read_pickle(regpath)
regs = nc.Dataset(regpath)['reg'][:].data

# Prep empty pdf
pdf = pd.DataFrame()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
'''
Author: Alex Crawford
Date Created: 10 Mar 2015
Date Modified: 18 Apr 2016; 10 Jul 2019 (update for Python 3);
Date Modified: 18 Apr 2016; 10 Jul 2019 (update for Python 3);
10 Sep 2020 (switch from geotiff to netcdf), switch to uniform_filter from scipy.ndimage
30 Sep 2020 (switch back to slower custom smoother because of what scipy does to NaNs)
18 Feb 2021 (edited seasonal caluclations to work directly from months, not monthly climatology,
allowing for cross-annual averaging)
13 Sep 2021: If a pre-existing file exists, this script will append new results
instead of overwriting for all years. Climatologies no longer in this script.
Purpose: Calculate aggergate statistics (Eulerian and Lagrangian) for either
cyclone tracks or system tracks.
Expand All @@ -14,6 +16,7 @@
Track Type (typ): Cyclone or System
Bounding Box ID (bboxnum): 2-digit character string
Time Variables: when to start, end, the time step of the data
Aggregation Parameters (minls, mintl, kSizekm)
Other notes:
Units for track density are tracks/month/gridcell
Expand Down Expand Up @@ -43,9 +46,9 @@
Set up Environment
*******************************************'''
print("Setting up environment.")
bboxnum = "" # use "" if performing on all cyclones; or BBox##
bboxnum = "BBox10" # use "" if performing on all cyclones; or BBox##
typ = "System"
ver = "12_4TestTracks"
ver = "12_4E5P"

path = "/Volumes/Cressida"
inpath = path+"/CycloneTracking/tracking"+ver
Expand All @@ -58,11 +61,9 @@
print("Defining variables")
# Time Variables
starttime = [1979,1,1,0,0,0] # Format: [Y,M,D,H,M,S]
endtime = [2020,1,1,0,0,0] # stop BEFORE this time (exclusive)
endtime = [2021,1,1,0,0,0] # stop BEFORE this time (exclusive)
monthstep = [0,1,0,0,0,0] # A Time step that increases by 1 month [Y,M,D,H,M,S]

ymin, ymax = 1981, 2010 # years for climatology

dateref = [1900,1,1,0,0,0] # [Y,M,D,H,M,S]

seasons = np.array([1,2,3,4,5,6,7,8,9,10,11,12]) # Ending month for three-month seasons (e.g., 2 = DJF)
Expand All @@ -73,7 +74,7 @@
# Aggregation Parameters
minls = 1 # minimum lifespan (in days) for a track to be considered
mintl = 1 # minimum track length (in km for version ≥ 11.1; grid cells for version ≤ 10.10) for a track to be considered
kSizekm = 400 # Full kernel size (in km) for spatial averaging measured between grid cell centers.
kSizekm = 800 # Full kernel size (in km) for spatial averaging measured between grid cell centers.
## For a 100 km spatial resolution, 400 is a 4 by 4 kernel; i.e., kSize = (kSizekm/spres)

# Variables
Expand All @@ -88,10 +89,11 @@
print("Main Analysis")
# Ensure that folders exist to store outputs
try:
os.chdir(outpath+"/Aggregation"+typ)
os.chdir(outpath+"/Aggregation"+typ+"/"+str(kSizekm)+"km")
except:
os.mkdir(outpath+"/Aggregation"+typ)
os.chdir(outpath+"/Aggregation"+typ)
os.mkdir(outpath+"/Aggregation"+typ+"/"+str(kSizekm)+"km")
os.chdir(outpath+"/Aggregation"+typ+"/"+str(kSizekm)+"km")
priorfiles = os.listdir()

print("Step 1. Load Files and References")
# Read in attributes of reference files
Expand All @@ -105,13 +107,21 @@

kSize = int(kSizekm/spres) # This needs to be the full width ('diameter'), not the half width ('radius') for ndimage filters

YY = str(starttime[0])+'-'+str(endtime[0]-1)
YY2 = str(ymin) + "-" + str(ymax)
print("Step 2. Check for Prior Data and update years of new analysis.")
startyears = [starttime[0] for i in vNames]
for v in varsi:
name = ver+"_AggregationFields_Monthly_"+vNames[v]+".nc"
if name in priorfiles:
prior = nc.Dataset(name)
startyears[v] = int(np.ceil(prior['time'][:].max()))

# Start at the earliest necessary time for ALL variables of interest
newstarttime = [np.min(np.array(startyears)[varsi]),1,1,0,0,0]

print("Step 2. Aggregate!")
vlists = [ [] for v in vNames]

mt = starttime
mt = newstarttime
while mt != endtime:
# Extract date
Y = str(mt[0])
Expand Down Expand Up @@ -172,15 +182,18 @@
### SAVE FILE ###
print("Step 3. Write to NetCDF")
for v in varsi:
mnc = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+".nc",'w')
mnc = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+"_NEW.nc",'w')
mnc.createDimension('y', lats.shape[0])
mnc.createDimension('x', lats.shape[1])
mnc.createDimension('time', (endtime[0]-starttime[0])*12)
mnc.description = 'Aggregation of cyclone track characteristics on monthly time scale.'

ncy = mnc.createVariable('y', np.float32, ('y',))
ncx = mnc.createVariable('x', np.float32, ('x',))

ncy.units, ncx.units = 'm', 'm'
ncy[:] = np.arange(proj['lat'].shape[0]*spres*1000/-2 + (spres*1000/2),proj['lat'].shape[0]*spres*1000/2, spres*1000)
ncx[:] = np.arange(proj['lat'].shape[1]*spres*1000/-2 + (spres*1000/2),proj['lat'].shape[1]*spres*1000/2, spres*1000)

# Add times, lats, and lons
nctime = mnc.createVariable('time', np.float32, ('time',))
nctime.units = 'years'
Expand All @@ -193,118 +206,23 @@
nclat = mnc.createVariable('lat', np.float32, ('y','x'))
nclat.units = 'degrees'
nclat[:] = proj['lat'][:]


vout = np.array(vlists[v])
ncvar = mnc.createVariable(vNames[v], np.float64, ('time','y','x'))
ncvar.units = vunits[v] + ' -- Smoothing:' + str(kSizekm) + ' km'
ncvar[:] = np.array(vlists[v])

mnc.close()

#################################
##### MONTHLY CLIMATOLOGIES #####
#################################
print("Step 4. Aggregation By Month")
vlists = [ [] for v in vNames ]

for v in varsi:
print(vNames[v])
ncf = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+".nc",'r')
times = ncf['time'][:]

for m in range(1,12+1):
tsub = np.where( ((times-((m-1)/12))%1 == 0) & (times >= ymin) & (times < ymax+1) )[0]

# Take Monthly Climatologies

field_M = ncf[vNames[v]][tsub,:,:].data
field_MC = np.apply_along_axis(np.nanmean, 0, field_M)
vlists[v].append(field_MC)
ncf.close()

# Write NetCDF File
mname = ver+"_AggregationFields_MonthlyClimatology_"+YY2+".nc"
try:
mnc = nc.Dataset(mname,'r+')
except:
mnc = nc.Dataset(mname,'w',format="NETCDF4")
mnc.createDimension('y', lats.shape[0])
mnc.createDimension('x', lats.shape[1])
mnc.createDimension('time', 12)
mnc.description = 'Climatology ('+YY2+') of aggregation of cyclone track characteristics on monthly time scale.'

ncy = mnc.createVariable('y', np.float32, ('y',))
ncx = mnc.createVariable('x', np.float32, ('x',))

# Add times, lats, and lons
nctime = mnc.createVariable('time', np.float32, ('time',))
nctime.units = 'months'
nctime[:] = np.arange(1,12+1,1)

nclon = mnc.createVariable('lon', np.float32, ('y','x'))
nclon.units = 'degrees'
nclon[:] = proj['lon'][:]

nclat = mnc.createVariable('lat', np.float32, ('y','x'))
nclat.units = 'degrees'
nclat[:] = proj['lat'][:]

for v in varsi:
try:
ncvar = mnc.createVariable(vNames[v], np.float64, ('time','y','x'))
ncvar.units = vunits[v] + ' -- Smoothing:' + str(kSizekm) + ' km'
ncvar[:] = np.array(vlists[v])
except:
mnc[vNames[v]][:] = np.array(vlists[v])

mnc.close()

##### SEASONAL MEANS ###
print("Step 5. Aggregate By Season")
sname = ver+"_AggregationFields_SeasonalClimatology_"+YY2+".nc"
if sname in os.listdir():
snc = nc.Dataset(sname,'r+')
else:
snc = nc.Dataset(sname,'w')
snc.createDimension('y', lats.shape[0])
snc.createDimension('x', lats.shape[1])
snc.createDimension('time', len(seasons))
snc.description = 'Climatology ('+YY+') of aggregation of cyclone track characteristics on seasonal time scale.'

ncy = snc.createVariable('y', np.float32, ('y',))
ncx = snc.createVariable('x', np.float32, ('x',))

# Add times, lats, and lons
nctime = snc.createVariable('time', np.int8, ('time',))
nctime.units = 'seasonal end months'
nctime[:] = seasons

nclon = snc.createVariable('lon', np.float32, ('y','x'))
nclon.units = 'degrees'
nclon[:] = proj['lon'][:]

nclat = snc.createVariable('lat', np.float32, ('y','x'))
nclat.units = 'degrees'
nclat[:] = proj['lat'][:]

for v in varsi:
ncf = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+".nc",'r')
times = ncf['time'][:]

print(" " + vNames[v])

varr = ncf[vNames[v]][:]
seaslist = []
for si in seasons:
tsub = np.where( ((times-((si-1)/12))%1 == 0) & (times >= ymin) & (times < ymax+1) )[0]
seasarr = (varr[tsub,:,:] + varr[tsub-1,:,:] + varr[tsub-2,:,:]) / 3
seaslist.append( np.apply_along_axis(np.nanmean, 0, seasarr) )

try:
ncvar = snc.createVariable(vNames[v], np.float64, ('time','y','x'))
ncvar.units = vunits[v] + ' -- Smoothing:' + str(kSizekm) + ' km'
ncvar[:] = np.array(seaslist)
except:
snc[vNames[v]][:] = np.array(seaslist)

ncf.close()
snc.close()
name = ver+"_AggregationFields_Monthly_"+vNames[v]+".nc"
if name in priorfiles: # Append data if prior data existed...
if vout.shape[0] > 0: # ...and there is new data to be added
prior = nc.Dataset(name)
ncvar[:] = np.concatenate( ( prior[vNames[v]][prior['time'][:].data <= newstarttime[0],:,:].data , np.where(vout == 0,np.nan,vout) ) )
prior.close(), mnc.close()
os.remove(name) # Remove old file
os.rename(ver+"_AggregationFields_Monthly_"+vNames[v]+"_NEW.nc", name) # rename new file to standard name

else: # Create new data if no prior data existed
ncvar[:] = np.where(vout == 0,np.nan,vout)
mnc.close()
os.rename(ver+"_AggregationFields_Monthly_"+vNames[v]+"_NEW.nc", name) # rename new file to standard name


Loading

0 comments on commit 65f4375

Please sign in to comment.