Update to Version 13.2

Several updates to the way cyclone detection/tracking output is organized -- no changes to core detection/tracking functions. See "Cyclone Tracking Description - Version 13.2.docx" for details.
alexcrawford0927 · Jan 23, 2023 · 7d59abd · 7d59abd
1 parent e1eb608
commit 7d59abd
Show file tree

Hide file tree

Showing 13 changed files with 6,130 additions and 0 deletions.
diff --git a/Cyclone Tracking Description - Version 13.2.docx b/Cyclone Tracking Description - Version 13.2.docx
diff --git a/Version 13_2 Scripts/C17_ExportToCSV_V13.py b/Version 13_2 Scripts/C17_ExportToCSV_V13.py
@@ -0,0 +1,148 @@
+'''
+Author: Alex Crawford
+Date Created: 10/28/16
+Date Modified: 5/6/20 (Updated for Python 3)
+    18 Jan 2021 --> Built in removal of DpDr and precip columns if present
+    11 Apr 2022 --> Fixed a bug in the removal of DpDr and precip columns and unit conversion
+    23 Jan 2023 --> Changed order of columns, added year/month/day/hour columns, changed units to be
+        hPa instead of Pa
+Purpose: Export Cyclone objects' data table as CSV files with year, month, and TID in the filename.
+'''
+
+'''********************
+Import Modules
+********************'''
+print("Loading modules.")
+import pandas as pd
+import numpy as np
+import CycloneModule_13_2 as md
+import os
+# import pickle5
+
+'''*******************************************
+Define Variables
+*******************************************'''
+print("Defining variables")
+
+vers = "13_2R"
+bbox = "" # Use BBox## for subsets
+typ = "System"
+path = "/Volumes/Cressida/CycloneTracking/tracking"+vers
+
+# Time Variables
+years = range(1950,1950+1)
+mos = range(1,12+1)
+dateref = [1900,1,1,0,0,0]
+
+# Cyclone Parameters
+# Aggregation Parameters
+minls = 1 # minimum lifespan (in days) for a track to be considered
+mintl = 1000 # in km for version 11 and beyond
+minlat = 0 # minimum latitude that must be acheived at some point in the track
+
+'''*******************************************
+Main Analysis
+*******************************************'''
+print("Main Analysis")
+
+# Load parameters
+params = pd.read_pickle(path+"/cycloneparams.pkl")
+# params = pickle5.load(open(path+"/cycloneparams.pkl",'rb'))
+try:
+    spres = params['spres']
+except:
+    spres = 100
+
+# Set Up Output Directories
+try:
+    os.chdir(path+"/"+bbox+"/CSV"+typ)
+except:
+    os.mkdir(path+"/"+bbox+"/CSV"+typ)
+
+for y in years:
+    Y = str(y)
+    #Create directories
+    try:
+        os.chdir(path+"/"+bbox+"/CSV"+typ+"/"+Y)
+    except:
+        os.mkdir(path+"/"+bbox+"/CSV"+typ+"/"+Y)
+        os.chdir(path+"/"+bbox+"/CSV"+typ+"/"+Y)
+
+    for m in mos:
+        M = md.dd[m-1]
+
+        #Create directories
+        try:
+            os.chdir(path+"/"+bbox+"/CSV"+typ+"/"+Y+"/"+M)
+        except:
+            os.mkdir(path+"/"+bbox+"/CSV"+typ+"/"+Y+"/"+M)
+            os.chdir(path+"/"+bbox+"/CSV"+typ+"/"+Y+"/"+M)
+
+# Write CSV for Systems
+if typ == "System":
+    for y in years:
+        Y = str(y)
+        print(Y)
+        for m in mos:
+            M = md.dd[m-1]
+
+            # Load data
+            trs = pd.read_pickle(path+"/"+bbox+"/"+typ+"Tracks"+"/"+Y+"/"+bbox+typ.lower()+"tracks"+Y+M+".pkl")
+            # trs = pickle5.load(open(path+"/"+bbox+"/"+typ+"Tracks"+"/"+Y+"/"+bbox+typ.lower()+"tracks"+Y+M+".pkl",'rb'))
+
+            # Write CSVs
+            for tr in trs:
+                if tr.lifespan() >= minls and tr.trackLength() >= mintl and np.max(tr.data.lat) >= minlat:
+                    trdata = tr.data
+                    if len(np.intersect1d(trdata.columns,['precip','precipArea','DpDr'])) == 3:
+                        trdata = trdata.drop(columns=['precip','precipArea','DpDr'])
+
+                    dates = [md.timeAdd(dateref,[0,0,t,0,0,0]) for t in trdata['time']]
+
+                    trdata['year'] = np.array([date[0] for date in dates])
+                    trdata['month'] = np.array([date[1] for date in dates])
+                    trdata['day'] = np.array([date[2] for date in dates])
+                    trdata['hour'] = np.array([date[3] for date in dates])
+                    trdata['p_cent'] = trdata['p_cent'] / 100 # Pa --> hPa
+                    trdata['p_edge'] = trdata['p_edge'] / 100 # Pa --> hPa
+                    trdata['depth'] = trdata['depth'] / 100 # Pa --> hPa
+                    trdata['radius'] = trdata['radius'] * spres # to units of km
+                    trdata['area'] = trdata['area'] * spres * spres # to units of km^2
+                    trdata['DsqP'] = trdata['DsqP'] / spres / spres * 100 * 100 / 100 # to units of hPa/[100 km]^2
+                    trdata['p_grad'] = trdata['p_grad'] / 100 * 1000 * 1000 # to units of hPa / [1000 km]
+                    trdata['Dp'] = trdata['Dp'] / 100 # Pa --> hPa
+                    trdata['DpDt'] = trdata['DpDt'] / 100 # Pa/day --> hPa/day
+
+                    trdata.to_csv(path+"/"+bbox+"/CSV"+typ+"/"+Y+"/"+M+"/"+typ+vers+bbox+"_"+Y+M+"_"+str(tr.sid)+".csv",index=False,columns=list(trdata.columns[-4:])+list(trdata.columns[:-4]))
+
+# Write CSV for Cyclones
+else:
+    for y in years:
+        Y = str(y)
+        print(Y)
+        for m in mos:
+            M = md.dd[m-1]
+            # trs = pickle5.load(open(path+"/"+bbox+"/"+typ+"Tracks"+"/"+Y+"/"+bbox+typ.lower()+"tracks"+Y+M+".pkl",'rb'))
+            trs = pd.read_pickle(path+"/"+bbox+"/"+typ+"Tracks"+"/"+Y+"/"+bbox+typ.lower()+"tracks"+Y+M+".pkl")
+
+            for tr in trs:
+                if tr.lifespan() >= minls and tr.trackLength() >= mintl and np.max(tr.data.lat) >= minlat:
+                    trdata= tr.data
+                    if len(np.intersect1d(trdata.columns,['precip','precipArea','DpDr'])) == 3:
+                        trdata = trdata.drop(columns=['precip','precipArea','DpDr'])
+
+                    trdata['year'] = np.array([date[0] for date in dates])
+                    trdata['month'] = np.array([date[1] for date in dates])
+                    trdata['day'] = np.array([date[2] for date in dates])
+                    trdata['hour'] = np.array([date[3] for date in dates])
+                    trdata['p_cent'] = trdata['p_cent'] / 100 # Pa --> hPa
+                    trdata['p_edge'] = trdata['p_edge'] / 100 # Pa --> hPa
+                    trdata['depth'] = trdata['depth'] / 100 # Pa --> hPa
+                    trdata['radius'] = trdata['radius'] * spres # to units of km
+                    trdata['area'] = trdata['area'] * spres * spres # to units of km^2
+                    trdata['DsqP'] = trdata['DsqP'] / spres / spres * 100 * 100 / 100 # to units of hPa/[100 km]^2
+                    trdata['p_grad'] = trdata['p_grad'] / 100 * 1000 * 1000 # to units of hPa / [1000 km]
+                    trdata['Dp'] = trdata['Dp'] / 100 # Pa --> hPa
+                    trdata['DpDt'] = trdata['DpDt'] / 100 # Pa/day --> hPa/day
+
+                    trdata.to_csv(path+"/"+bbox+"/CSV"+typ+"/"+Y+"/"+M+"/"+typ+vers+bbox+"_"+Y+M+"_"+str(tr.tid)+".csv",index=False,columns=list(trdata.columns))
diff --git a/Version 13_2 Scripts/C2_Reprojection6_E5.py b/Version 13_2 Scripts/C2_Reprojection6_E5.py
@@ -0,0 +1,180 @@
+'''
+Author: Alex Crawford
+Date Created: 10 Mar 2019
+Date Modified: 22 Aug 2019 -- Update for Python 3
+               01 Apr 2020 -- Switched output to netCDF instead of GeoTIFF;
+                               no longer dependent on gdal module (start V5)
+               19 Oct 2020 -- pulled the map creation out of the for loop
+               06 Oct 2021 -- added a wrap-around for inputs that prevents
+                              empty cells from forming along either 180° or
+                              360° longitude (start V6)
+               15 Nov 2022 -- replaced "np.int" with "int"
+
+Purpose: Reads in netcdf files & reprojects to the NSIDC EASE2 Grid North.
+'''
+
+'''********************
+Import Modules
+********************'''
+print("Loading modules.")
+import os
+import numpy as np
+from netCDF4 import Dataset
+import xesmf as xe
+import CycloneModule_12_4 as md
+
+'''********************
+Define Variables
+********************'''
+print("Defining variables")
+
+# File Variables:
+ra = "ERA5"
+var = "SLP"
+
+ncvar = "msl"
+nctvar = "time"
+ncext = '.nc'
+
+# Time Variables
+ymin, ymax = 2022, 2022
+mmin, mmax = 11, 12
+dmin, dmax = 1, 31
+
+mons = ["01","02","03","04","05","06","07","08","09","10","11","12"]
+dpm = [31,28,31,30,31,30,31,31,30,31,30,31] # days per month (non leap year)
+timestep = 3 # in hours
+startdate = [1900,1,1] # The starting date for the reanalysis time steps
+
+# Inputs for reprojection
+xsize, ysize = 25000, -25000 # in meters
+nx, ny = 720, 720 # number of grid cells; use 180 by 180 for 100 km grid
+
+# Path Variables
+path = "/Volumes/Cressida"
+inpath = path+"/"+ra+"/"+var #
+outpath = "/Volumes/Cressida/"+ra+"/"+var+"_EASE2_N0_"+str(int(xsize/1000))+"km" #
+suppath = path+"/Projections"
+
+'''*******************************************
+Main Analysis
+*******************************************'''
+print("Main Analysis")
+
+# Obtain list of nc files:
+os.chdir(outpath)
+fileList = os.listdir(inpath)
+fileList = [f for f in fileList if (f.endswith(ncext) & f.startswith(ra))]
+
+# Identify the time steps:
+ref_netcdf = Dataset(inpath+"/"+fileList[-1])
+
+# Create latitude and longitude arrays:
+lons = ref_netcdf.variables['longitude'][:]
+lats = ref_netcdf.variables['latitude'][:]
+
+outprjnc = Dataset(suppath+'/EASE2_N0_'+str(int(xsize/1000))+'km_Projection.nc')
+outlat = outprjnc['lat'][:].data
+outlon = outprjnc['lon'][:].data
+
+# Close reference netcdf:
+ref_netcdf.close()
+
+# Define Grids as Dictionaries
+grid_in = {'lon': np.r_[lons,lons[0]], 'lat': lats}
+grid_out = {'lon': outlon, 'lat': outlat}
+
+# Create Regridder
+regridder = xe.Regridder(grid_in, grid_out, 'bilinear')
+
+print("Step 2. Set up dates of analysis")
+years = range(ymin,ymax+1)
+mos = range(mmin,mmax+1)
+hrs = [h*timestep for h in range(int(24/timestep))]
+
+ly = md.leapyearBoolean(years) # annual boolean for leap year or not leap year
+
+# Start the reprojection loop
+print("Step 3. Load, Reproject, and Save")
+for y in years:
+    Y = str(y)
+
+    for m in mos:
+        M = mons[m-1]
+
+        mlist, hlist = [], []
+        ncList = [f for f in fileList if Y+M in f]
+
+        if len(ncList) > 1:
+            print("Multiple files with the date " + Y+M + " -- skipping.")
+            continue
+        if len(ncList) == 0:
+            print("No files with the date " + Y+M + " -- skipping.")
+        else:
+            nc = Dataset(inpath+"/"+ncList[0])
+            tlist = nc.variables[nctvar][:]
+
+            # Restrict days to those that exist:
+            if m == 2 and ly[y-ymin] == 1 and dmax > dpm[m-1]:
+                dmax1 = 29
+            elif dmax > dpm[m-1]:
+                dmax1 = dpm[m-1]
+            else:
+                dmax1 = dmax
+
+            # For days that DO exist:
+            for d in range(dmin,dmax1+1):
+                timeD = md.daysBetweenDates(startdate,[y,m,d])*24
+
+                print(" " + Y + " " + M + " " + str(d))
+
+                for h in hrs:
+                    # Establish Time
+                    timeH = timeD + h
+
+                    # Read from netcdf array
+                    inArr = nc.variables[ncvar][np.where(tlist == timeH)[0][0],:,:]
+
+                    # Transform data
+                    outArr = regridder(np.c_[inArr,inArr[:,0]])
+                    outArr[outlat < 0] = np.nan # Limits to Northern Hemisphere
+
+                    # Add to list
+                    mlist.append(outArr)
+                    hlist.append(timeH)
+
+        # Write monthly data to netcdf file
+        ncf = Dataset(ra+"_EASE2_N0_"+str(int(xsize/1000))+"km_"+var+"_Hourly_"+Y+M+".nc", 'w')
+        ncf.description = 'Mean sea-level pressure from ERA5. Projection specifications\
+        for the EASE2 projection (Lambert Azimuthal Equal Area;\
+        lat-origin = 90°N, lon-origin=0°, # cols = ' + str(nx) + ',\
+        # rows = ' + str(ny) + ', dx = ' + str(xsize) + ', dy = ' + str(ysize) + ', units = meters'
+        ncf.source = 'netCDF4 python module'
+
+        ncf.createDimension('time', len(mlist))
+        ncf.createDimension('x', nx)
+        ncf.createDimension('y', ny)
+        ncft = ncf.createVariable('time', int, ('time',))
+        ncfx = ncf.createVariable('x', np.float64, ('x',))
+        ncfy = ncf.createVariable('y', np.float64, ('y',))
+        ncfArr = ncf.createVariable(ncvar, np.float64, ('time','y','x'))
+
+        try:
+            ncft.units = nc.variables[nctvar].units
+        except:
+            ncft.units = 'hours since 1900-01-01 00:00:00.0'
+
+        ncfx.units = 'm'
+        ncfy.units = 'm'
+        ncfArr.units = 'Pa'
+
+        # For x and y, note that the upper left point is the edge of the grid cell, but
+        ## for this we really want the center of the grid cell, hence dividing by 2.
+        ncft[:] = np.array(hlist)
+        ncfx[:] = np.arange(-xsize*(nx-1)/2, xsize*(nx-1)/2+xsize, xsize)
+        ncfy[:] = np.arange(-ysize*(ny-1)/2, ysize*(ny-1)/2+ysize, ysize)
+        ncfArr[:] = np.array(mlist)
+
+        ncf.close()
+
+print("Complete.")