12.4.1

1) Reprojection functions used no longer extrapolate beyond range of input grid. Therefore, reprojection code has been updated to prevent seams developing when going from rectilinear projections to polar projections. For polar grids, cells in the opposite hemisphere are now NaN instead of being unrealistically extrapolated. 2) The aggregation scripts have been updated so that if a prior aggregation script already exists (e.g., 1979-2020), an extra year of data (e.g., 2021) will be appended to the pre-existing netcdf instead of re-processing all years. 3) Climatology calculations of aggregated fields have been moved to a separate script. 4) A regional mask I often use is now available as a netCDF file on the associated Google Drive. 5) A bug in the 360-day calendar in the timeAdd function has been fixed. --> None of these updates should change the number of tracks generated in the test data (557 tracks in Jan 1979 of ERA5 data at 100 km resolution and default settings -- 166 of which last more than 24 hours).
alexcrawford0927 · Oct 6, 2021 · 65f4375 · 65f4375
1 parent 759c0ca
commit 65f4375
Show file tree

Hide file tree

Showing 11 changed files with 616 additions and 413 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/Cyclone Tracking Description - Version 12.4.docx b/Cyclone Tracking Description - Version 12.4.docx
diff --git a/Version 12_4 Scripts/C2_Reprojection5_E5.py → Version 12_4 Scripts/C2_Reprojection6_E5.py b/Version 12_4 Scripts/C2_Reprojection5_E5.py → Version 12_4 Scripts/C2_Reprojection6_E5.py
@@ -3,8 +3,11 @@
 Date Created: 10 Mar 2019
 Date Modified: 22 Aug 2019 -- Update for Python 3
                01 Apr 2020 -- Switched output to netCDF instead of GeoTIFF;
-                               no longer dependent on gdal module
+                               no longer dependent on gdal module (start V5)
                19 Oct 2020 -- pulled the map creation out of the for loop
+               06 Oct 2021 -- added a wrap-around for inputs that prevents 
+                              empty cells from forming along either 180° or 
+                              360° longitude (start V6)
 
 Purpose: Reads in netcdf files & reprojects to the NSIDC EASE2 Grid North.
 '''    
@@ -17,7 +20,7 @@
 import numpy as np
 from netCDF4 import Dataset
 import xesmf as xe
-import CycloneModule_12_2 as md
+import CycloneModule_12_4 as md
 
 '''********************
 Define Variables
@@ -33,8 +36,8 @@
 ncext = '.nc'
 
 # Time Variables
-ymin, ymax = 2020, 2020
-mmin, mmax = 12, 12
+ymin, ymax = 1979, 1979
+mmin, mmax = 1, 1
 dmin, dmax = 1, 31 
 
 mons = ["01","02","03","04","05","06","07","08","09","10","11","12"]
@@ -43,13 +46,13 @@
 startdate = [1900,1,1] # The starting date for the reanalysis time steps
 
 # Inputs for reprojection
-xsize, ysize = 50000, -50000 # in meters
-nx, ny = 360, 360 # number of grid cells; use 180 by 180 for 100 km grid
+xsize, ysize = 100000, -100000 # in meters
+nx, ny = 180, 180 # number of grid cells; use 180 by 180 for 100 km grid
 
 # Path Variables
 path = "/Volumes/Cressida"
-inpath = path+"/"+ra+"/"+var # path+"/"+ra+"/"+var+"_T319" # 
-outpath = "/Volumes/Cressida/"+ra+"/"+var+"_EASE2_N0_"+str(int(xsize/1000))+"km" # path+"/"+ra+"/"+var+"_T319_EASE2_N0_"+str(int(xsize/1000))+"km" # 
+inpath = path+"/"+ra+"/"+var #
+outpath = "/Volumes/Cressida/"+ra+"/"+var+"_EASE2_N0_"+str(int(xsize/1000))+"km" # 
 suppath = path+"/Projections"
 
 '''*******************************************
@@ -77,8 +80,7 @@
 ref_netcdf.close()
 
 # Define Grids as Dictionaries
-grid_in = {'lon': lons, 'lat': lats}
-
+grid_in = {'lon': np.r_[lons,lons[0]], 'lat': lats}
 grid_out = {'lon': outlon, 'lat': outlat}
 
 # Create Regridder
@@ -133,7 +135,8 @@
                     inArr = nc.variables[ncvar][np.where(tlist == timeH)[0][0],:,:]
 
                     # Transform data
-                    outArr = regridder(inArr)
+                    outArr = regridder(np.c_[inArr,inArr[:,0]])
+                    outArr[outlat < 0] = np.nan # Limits to Northern Hemisphere
 
                     # Add to list
                     mlist.append(outArr)

diff --git a/Version 12_4 Scripts/C4_Subset_Crossing_byGridMask_andLength_12.py b/Version 12_4 Scripts/C4_Subset_Crossing_byGridMask_andLength_12.py
@@ -40,7 +40,7 @@ def maxDistFromGenPnt(data):
 Set up Environment
 *******************************************'''
 path = "/Volumes/Cressida"
-inpath = path+"/CycloneTracking/tracking12_4TestTracks"
+inpath = path+"/CycloneTracking/tracking12_9E5R"
 outpath = inpath
 
 '''*******************************************
@@ -63,7 +63,7 @@ def maxDistFromGenPnt(data):
 bboxmain = "" # The main bbox your subsetting from; usually "" for "all cyclones", otherwise BBox##
 
 # Time Variables 
-starttime = [1979,1,1,0,0,0] # Format: [Y,M,D,H,M,S]
+starttime = [2020,1,1,0,0,0] # Format: [Y,M,D,H,M,S]
 endtime = [2021,1,1,0,0,0] # stop BEFORE this time (exclusive)
 monthstep = [0,1,0,0,0,0] # A Time step that increases by 1 mont [Y,M,D,H,M,S]
 
@@ -153,4 +153,4 @@ def maxDistFromGenPnt(data):
 
 # Print elapsed time
 print('Elapsed time:',round(clock()-start,2),'seconds')
-print("Complete")
+print("Complete")
diff --git a/Version 12_4 Scripts/C5_CycloneStatSummary_12_V3_AllStorms.py b/Version 12_4 Scripts/C5_CycloneStatSummary_12_V3_AllStorms.py
@@ -6,6 +6,7 @@
 19 May 2020 --> Added automatic creation of output directory
 02 Jul 2020 --> Removed reliance on GDAL; using regions stored in netcdf file
 21 Jan 2021 --> Added dispalcement as subset option; added genesis/lysis region check
+06 Oct 2021 --> Replaced pickled regions file with a netcdf
 Purpose: Records information that summarizes each track (e.g., length, lifespan, 
 region of origin, number of merges and splits).
 '''
@@ -18,6 +19,7 @@
 import pandas as pd
 import os
 import numpy as np
+import netCDF4 as nc
 import CycloneModule_12_4 as md
 
 def maxDistFromGenPnt(data):        
@@ -27,11 +29,11 @@ def maxDistFromGenPnt(data):
 '''*******************************************
 Set up Environment
 *******************************************'''
-BBoxNum = "" # Use "BBox##" or "" if no subset
+BBoxNum = "BBox13" # Use "BBox##" or "" if no subset
 path = "/Volumes/Cressida"
-version = "12_4TestTracks"
+version = "12_9E5R"
 inpath = path+"/CycloneTracking/tracking"+version+"/"+BBoxNum
-regpath = path+"/Projections/EASE2_N0_25km_GenesisRegions.pkl"
+regpath = path+"/Projections/EASE2_N0_25km_GenesisRegions.nc"
 
 '''*******************************************
 Define Variables
@@ -66,7 +68,7 @@ def maxDistFromGenPnt(data):
 *******************************************'''
 print("Main Analysis")
 # Load Regions
-regs = pd.read_pickle(regpath)
+regs = nc.Dataset(regpath)['reg'][:].data
 
 # Prep empty pdf
 pdf = pd.DataFrame()

diff --git a/...s/C6_Track_Aggregation_BBox_v12_events.py → ...6A_Track_Aggregation_Append_v12_events.py b/...s/C6_Track_Aggregation_BBox_v12_events.py → ...6A_Track_Aggregation_Append_v12_events.py
@@ -1,11 +1,13 @@
 '''
 Author: Alex Crawford
 Date Created: 10 Mar 2015
-Date Modified: 18 Apr 2016; 10 Jul 2019 (update for Python 3);
+Date Modified:  18 Apr 2016; 10 Jul 2019 (update for Python 3);
                 10 Sep 2020 (switch from geotiff to netcdf), switch to uniform_filter from scipy.ndimage
                 30 Sep 2020 (switch back to slower custom smoother because of what scipy does to NaNs)
                 18 Feb 2021 (edited seasonal caluclations to work directly from months, not monthly climatology,
                              allowing for cross-annual averaging)
+                13 Sep 2021: If a pre-existing file exists, this script will append new results 
+                            instead of overwriting for all years. Climatologies no longer in this script.
 Purpose: Calculate aggergate statistics (Eulerian and Lagrangian) for either
 cyclone tracks or system tracks.
 
@@ -14,6 +16,7 @@
     Track Type (typ): Cyclone or System
     Bounding Box ID (bboxnum): 2-digit character string
     Time Variables: when to start, end, the time step of the data
+    Aggregation Parameters (minls, mintl, kSizekm)
 
 Other notes:
     Units for track density are tracks/month/gridcell
@@ -43,9 +46,9 @@
 Set up Environment
 *******************************************'''
 print("Setting up environment.")
-bboxnum = "" # use "" if performing on all cyclones; or BBox##
+bboxnum = "BBox10" # use "" if performing on all cyclones; or BBox##
 typ = "System"
-ver = "12_4TestTracks"
+ver = "12_4E5P"
 
 path = "/Volumes/Cressida"
 inpath = path+"/CycloneTracking/tracking"+ver
@@ -58,11 +61,9 @@
 print("Defining variables")
 # Time Variables 
 starttime = [1979,1,1,0,0,0] # Format: [Y,M,D,H,M,S]
-endtime = [2020,1,1,0,0,0] # stop BEFORE this time (exclusive)
+endtime = [2021,1,1,0,0,0] # stop BEFORE this time (exclusive)
 monthstep = [0,1,0,0,0,0] # A Time step that increases by 1 month [Y,M,D,H,M,S]
 
-ymin, ymax = 1981, 2010 # years for climatology
-
 dateref = [1900,1,1,0,0,0] # [Y,M,D,H,M,S]
 
 seasons = np.array([1,2,3,4,5,6,7,8,9,10,11,12]) # Ending month for three-month seasons (e.g., 2 = DJF)
@@ -73,7 +74,7 @@
 # Aggregation Parameters
 minls = 1 # minimum lifespan (in days) for a track to be considered
 mintl = 1 # minimum track length (in km for version ≥ 11.1; grid cells for version ≤ 10.10) for a track to be considered
-kSizekm = 400 # Full kernel size (in km) for spatial averaging measured between grid cell centers.
+kSizekm = 800 # Full kernel size (in km) for spatial averaging measured between grid cell centers.
     ## For a 100 km spatial resolution, 400 is a 4 by 4 kernel; i.e., kSize = (kSizekm/spres)
 
 # Variables
@@ -88,10 +89,11 @@
 print("Main Analysis")
 # Ensure that folders exist to store outputs
 try:
-    os.chdir(outpath+"/Aggregation"+typ)
+    os.chdir(outpath+"/Aggregation"+typ+"/"+str(kSizekm)+"km")
 except:
-    os.mkdir(outpath+"/Aggregation"+typ)
-    os.chdir(outpath+"/Aggregation"+typ)
+    os.mkdir(outpath+"/Aggregation"+typ+"/"+str(kSizekm)+"km")
+    os.chdir(outpath+"/Aggregation"+typ+"/"+str(kSizekm)+"km")
+priorfiles = os.listdir()
 
 print("Step 1. Load Files and References")
 # Read in attributes of reference files
@@ -105,13 +107,21 @@
 
 kSize = int(kSizekm/spres) # This needs to be the full width ('diameter'), not the half width ('radius') for ndimage filters
 
-YY = str(starttime[0])+'-'+str(endtime[0]-1)
-YY2 = str(ymin) + "-" + str(ymax)
+print("Step 2. Check for Prior Data and update years of new analysis.")
+startyears = [starttime[0] for i in vNames]
+for v in varsi:
+    name = ver+"_AggregationFields_Monthly_"+vNames[v]+".nc"
+    if name in priorfiles:
+        prior = nc.Dataset(name)
+        startyears[v] = int(np.ceil(prior['time'][:].max()))
+
+# Start at the earliest necessary time for ALL variables of interest
+newstarttime = [np.min(np.array(startyears)[varsi]),1,1,0,0,0]
 
 print("Step 2. Aggregate!")
 vlists = [ [] for v in vNames]
 
-mt = starttime
+mt = newstarttime
 while mt != endtime:
     # Extract date
     Y = str(mt[0])
@@ -172,15 +182,18 @@
 ### SAVE FILE ###
 print("Step 3. Write to NetCDF")
 for v in varsi:
-    mnc = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+".nc",'w')
+    mnc = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+"_NEW.nc",'w')
     mnc.createDimension('y', lats.shape[0])
     mnc.createDimension('x', lats.shape[1])
     mnc.createDimension('time', (endtime[0]-starttime[0])*12)
     mnc.description = 'Aggregation of cyclone track characteristics on monthly time scale.'
 
     ncy = mnc.createVariable('y', np.float32, ('y',))
     ncx = mnc.createVariable('x', np.float32, ('x',))
-
+    ncy.units, ncx.units = 'm', 'm'
+    ncy[:] = np.arange(proj['lat'].shape[0]*spres*1000/-2 + (spres*1000/2),proj['lat'].shape[0]*spres*1000/2, spres*1000)
+    ncx[:] = np.arange(proj['lat'].shape[1]*spres*1000/-2 + (spres*1000/2),proj['lat'].shape[1]*spres*1000/2, spres*1000)
+
     # Add times, lats, and lons
     nctime = mnc.createVariable('time', np.float32, ('time',))
     nctime.units = 'years'
@@ -193,118 +206,23 @@
     nclat = mnc.createVariable('lat', np.float32, ('y','x'))
     nclat.units = 'degrees'
     nclat[:] = proj['lat'][:]
-
+
+    vout = np.array(vlists[v])
     ncvar = mnc.createVariable(vNames[v], np.float64, ('time','y','x'))
     ncvar.units = vunits[v] + ' -- Smoothing:' + str(kSizekm) + ' km'
-    ncvar[:] = np.array(vlists[v])
-
-    mnc.close()
-
-#################################
-##### MONTHLY CLIMATOLOGIES #####
-#################################
-print("Step 4. Aggregation By Month")
-vlists = [ [] for v in vNames ]
-
-for v in varsi:
-    print(vNames[v])
-    ncf = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+".nc",'r')
-    times = ncf['time'][:]
-
-    for m in range(1,12+1):
-        tsub = np.where( ((times-((m-1)/12))%1 == 0) & (times >= ymin) & (times < ymax+1) )[0]
-
-        # Take Monthly Climatologies
-
-        field_M = ncf[vNames[v]][tsub,:,:].data
-        field_MC = np.apply_along_axis(np.nanmean, 0, field_M)
-        vlists[v].append(field_MC)
-ncf.close()
-
-# Write NetCDF File
-mname = ver+"_AggregationFields_MonthlyClimatology_"+YY2+".nc"
-try:
-    mnc = nc.Dataset(mname,'r+')
-except:
-    mnc = nc.Dataset(mname,'w',format="NETCDF4")
-    mnc.createDimension('y', lats.shape[0])
-    mnc.createDimension('x', lats.shape[1])
-    mnc.createDimension('time', 12)
-    mnc.description = 'Climatology ('+YY2+') of aggregation of cyclone track characteristics on monthly time scale.'
-
-    ncy = mnc.createVariable('y', np.float32, ('y',))
-    ncx = mnc.createVariable('x', np.float32, ('x',))
-
-    # Add times, lats, and lons
-    nctime = mnc.createVariable('time', np.float32, ('time',))
-    nctime.units = 'months'
-    nctime[:] = np.arange(1,12+1,1)
-
-    nclon = mnc.createVariable('lon', np.float32, ('y','x'))
-    nclon.units = 'degrees'
-    nclon[:] = proj['lon'][:]
-
-    nclat = mnc.createVariable('lat', np.float32, ('y','x'))
-    nclat.units = 'degrees'
-    nclat[:] = proj['lat'][:]
-
-for v in varsi:
-    try:
-        ncvar = mnc.createVariable(vNames[v], np.float64, ('time','y','x'))
-        ncvar.units = vunits[v] + ' -- Smoothing:' + str(kSizekm) + ' km'
-        ncvar[:] = np.array(vlists[v])
-    except:
-        mnc[vNames[v]][:] = np.array(vlists[v])
-
-mnc.close()
-
-##### SEASONAL MEANS ###
-print("Step 5. Aggregate By Season")
-sname = ver+"_AggregationFields_SeasonalClimatology_"+YY2+".nc"
-if sname in os.listdir():
-    snc = nc.Dataset(sname,'r+')
-else:
-    snc = nc.Dataset(sname,'w')
-    snc.createDimension('y', lats.shape[0])
-    snc.createDimension('x', lats.shape[1])
-    snc.createDimension('time', len(seasons))
-    snc.description = 'Climatology ('+YY+') of aggregation of cyclone track characteristics on seasonal time scale.'
-
-    ncy = snc.createVariable('y', np.float32, ('y',))
-    ncx = snc.createVariable('x', np.float32, ('x',))
-
-    # Add times, lats, and lons
-    nctime = snc.createVariable('time', np.int8, ('time',))
-    nctime.units = 'seasonal end months'
-    nctime[:] = seasons
-
-    nclon = snc.createVariable('lon', np.float32, ('y','x'))
-    nclon.units = 'degrees'
-    nclon[:] = proj['lon'][:]
-
-    nclat = snc.createVariable('lat', np.float32, ('y','x'))
-    nclat.units = 'degrees'
-    nclat[:] = proj['lat'][:]
-
-for v in varsi:
-    ncf = nc.Dataset(ver+"_AggregationFields_Monthly_"+vNames[v]+".nc",'r')
-    times = ncf['time'][:]
-
-    print("  " + vNames[v])
-
-    varr = ncf[vNames[v]][:]
-    seaslist = []
-    for si in seasons:    
-        tsub = np.where( ((times-((si-1)/12))%1 == 0) & (times >= ymin) & (times < ymax+1) )[0]
-        seasarr = (varr[tsub,:,:] + varr[tsub-1,:,:] + varr[tsub-2,:,:]) / 3
-        seaslist.append( np.apply_along_axis(np.nanmean, 0, seasarr) )
-
-    try:
-        ncvar = snc.createVariable(vNames[v], np.float64, ('time','y','x'))
-        ncvar.units = vunits[v] + ' -- Smoothing:' + str(kSizekm) + ' km'
-        ncvar[:] = np.array(seaslist)
-    except:
-        snc[vNames[v]][:] = np.array(seaslist)
 
-ncf.close()
-snc.close()
+    name = ver+"_AggregationFields_Monthly_"+vNames[v]+".nc"
+    if name in priorfiles: # Append data if prior data existed...
+        if vout.shape[0] > 0: # ...and there is new data to be added
+            prior = nc.Dataset(name)
+            ncvar[:] = np.concatenate( ( prior[vNames[v]][prior['time'][:].data <= newstarttime[0],:,:].data , np.where(vout == 0,np.nan,vout) ) )
+            prior.close(), mnc.close()
+            os.remove(name) # Remove old file
+            os.rename(ver+"_AggregationFields_Monthly_"+vNames[v]+"_NEW.nc", name) # rename new file to standard name
+
+    else: # Create new data if no prior data existed
+        ncvar[:] = np.where(vout == 0,np.nan,vout)
+        mnc.close()
+        os.rename(ver+"_AggregationFields_Monthly_"+vNames[v]+"_NEW.nc", name) # rename new file to standard name
+
+