diff --git a/earthobserve/__init__.py b/earth2observe/__init__.py similarity index 86% rename from earthobserve/__init__.py rename to earth2observe/__init__.py index 6a59de3..7947ce2 100644 --- a/earthobserve/__init__.py +++ b/earth2observe/__init__.py @@ -29,8 +29,9 @@ if missing_dependencies: raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) -import earthobserve.remotesensing as remotesensing - +import earth2observe.ecmwf as ecmwf +import earth2observe.chirps as chirps +import earth2observe.utils as utils __doc__ = """ -earthobserve - remote sensing package +earth2observe - remote sensing package """ diff --git a/earth2observe/chirps.py b/earth2observe/chirps.py new file mode 100644 index 0000000..d0eb191 --- /dev/null +++ b/earth2observe/chirps.py @@ -0,0 +1,303 @@ +import os +import datetime as dt +import numpy as np +import pandas as pd +from ftplib import FTP +from joblib import Parallel, delayed +from osgeo import gdal +from pyramids.raster import Raster +from earth2observe.utils import print_progress_bar, extractFromGZ + + +class CHIRPS: + """ + CHIRPS + """ + def __init__( + self, + start: str="", + end: str="", + lat_lim: list=[], + lon_lim: list=[], + time: str="daily", + path: str="", + fmt: str="%Y-%m-%d", + ): + """CHIRPS. + + Parameters + ---------- + time (str, optional): + 'daily' or 'monthly'. Defaults to 'daily'. + start (str, optional): + [description]. Defaults to ''. + end (str, optional): + [description]. Defaults to ''. + path (str, optional): + Path where you want to save the downloaded data. Defaults to ''. + variables (list, optional): + Variable code: VariablesInfo('day').descriptions.keys(). Defaults to []. + lat_lim (list, optional): + [ymin, ymax] (values must be between -50 and 50). Defaults to []. + lon_lim (list, optional): + [xmin, xmax] (values must be between -180 and 180). Defaults to []. + fmt (str, optional): + [description]. Defaults to "%Y-%m-%d". + """ + # Define timestep for the timedates + self.lat_lim = [] + self.lon_lim = [] + if time == "daily": + self.time_freq = "D" + self.output_folder = os.path.join(path, "Precipitation", "CHIRPS", "Daily") + elif time == "monthly": + self.time_freq = "MS" + self.output_folder = os.path.join(path, "Precipitation", "CHIRPS", "Monthly") + else: + raise KeyError("The input time interval is not supported") + self.time = time + + # make directory if it not exists + if not os.path.exists(self.output_folder): + os.makedirs(self.output_folder) + + # check time variables + if start == "": + self.start = pd.Timestamp("1981-01-01") + else: + self.start = dt.datetime.strptime(start, fmt) + + if end == "": + self.end = pd.Timestamp("Now") + else: + self.end = dt.datetime.strptime(end, fmt) + # Create days + self.Dates = pd.date_range(self.start, self.end, freq=self.time_freq) + + # Check space variables + if lat_lim[0] < -50 or lat_lim[1] > 50: + print( + "Latitude above 50N or below 50S is not possible." + " Value set to maximum" + ) + self.lat_lim[0] = np.max(lat_lim[0], -50) + self.lat_lim[1] = np.min(lon_lim[1], 50) + if lon_lim[0] < -180 or lon_lim[1] > 180: + print( + "Longitude must be between 180E and 180W." + " Now value is set to maximum" + ) + self.lon_lim[0] = np.max(lat_lim[0], -180) + self.lon_lim[1] = np.min(lon_lim[1], 180) + else: + self.lat_lim = lat_lim + self.lon_lim = lon_lim + # Define IDs + self.yID = 2000 - np.int16( + np.array([np.ceil((lat_lim[1] + 50) * 20), np.floor((lat_lim[0] + 50) * 20)]) + ) + self.xID = np.int16( + np.array( + [np.floor((lon_lim[0] + 180) * 20), np.ceil((lon_lim[1] + 180) * 20)] + ) + ) + + + def Download(self, progress_bar: bool=True, cores=None): + """Download. + + Download method downloads CHIRPS data + + Parameters + ---------- + progress_bar : TYPE, optional + will print a waitbar. The default is 1. + cores : TYPE, optional + The number of cores used to run the routine. It can be 'False' + to avoid using parallel computing routines. The default is None. + + Returns + ------- + results : TYPE + DESCRIPTION. + """ + # Pass variables to parallel function and run + args = [ + self.output_folder, + self.time, + self.xID, + self.yID, + self.lon_lim, + self.lat_lim, + ] + + if not cores: + # Create Waitbar + if progress_bar: + total_amount = len(self.Dates) + amount = 0 + print_progress_bar( + amount, + total_amount, + prefix="Progress:", + suffix="Complete", + length=50, + ) + + for Date in self.Dates: + CHIRPS.RetrieveData(Date, args) + if progress_bar: + amount = amount + 1 + print_progress_bar( + amount, + total_amount, + prefix="Progress:", + suffix="Complete", + length=50, + ) + results = True + else: + results = Parallel(n_jobs=cores)( + delayed(CHIRPS.RetrieveData)(Date, args) for Date in self.Dates + ) + return results + + + @staticmethod + def RetrieveData(Date, args): + """RetrieveData. + + RetrieveData method retrieves CHIRPS data for a given date from the + https://data.chc.ucsb.edu/ + + Parameters + ---------- + Date : TYPE + DESCRIPTION. + args : TYPE + A list of parameters defined in the DownloadData function. + + Raises + ------ + KeyError + DESCRIPTION. + + Returns + ------- + bool + DESCRIPTION. + """ + # Argument + [output_folder, TimeCase, xID, yID, lon_lim, latlim] = args + + # open ftp server + # ftp = FTP("chg-ftpout.geog.ucsb.edu", "", "") + ftp = FTP("data.chc.ucsb.edu") + ftp.login() + + # Define FTP path to directory + if TimeCase == "daily": + pathFTP = ( + "pub/org/chg/products/CHIRPS-2.0/global_daily/tifs/p05/%s/" + % Date.strftime("%Y") + ) + elif TimeCase == "monthly": + pathFTP = "pub/org/chg/products/CHIRPS-2.0/global_monthly/tifs/" + else: + raise KeyError("The input time interval is not supported") + + # find the document name in this directory + ftp.cwd(pathFTP) + listing = [] + + # read all the file names in the directory + ftp.retrlines("LIST", listing.append) + + # create all the input name (filename) and output (outfilename, filetif, DiFileEnd) names + if TimeCase == "daily": + filename = "chirps-v2.0.%s.%02s.%02s.tif.gz" % ( + Date.strftime("%Y"), + Date.strftime("%m"), + Date.strftime("%d"), + ) + outfilename = os.path.join( + output_folder, + "chirps-v2.0.%s.%02s.%02s.tif" + % (Date.strftime("%Y"), Date.strftime("%m"), Date.strftime("%d")), + ) + DirFileEnd = os.path.join( + output_folder, + "P_CHIRPS.v2.0_mm-day-1_daily_%s.%02s.%02s.tif" + % (Date.strftime("%Y"), Date.strftime("%m"), Date.strftime("%d")), + ) + elif TimeCase == "monthly": + filename = "chirps-v2.0.%s.%02s.tif.gz" % ( + Date.strftime("%Y"), + Date.strftime("%m"), + ) + outfilename = os.path.join( + output_folder, + "chirps-v2.0.%s.%02s.tif" % (Date.strftime("%Y"), Date.strftime("%m")), + ) + DirFileEnd = os.path.join( + output_folder, + "P_CHIRPS.v2.0_mm-month-1_monthly_%s.%02s.%02s.tif" + % (Date.strftime("%Y"), Date.strftime("%m"), Date.strftime("%d")), + ) + else: + raise KeyError("The input time interval is not supported") + + # download the global rainfall file + try: + local_filename = os.path.join(output_folder, filename) + lf = open(local_filename, "wb") + ftp.retrbinary("RETR " + filename, lf.write, 8192) + lf.close() + + # unzip the file + zip_filename = os.path.join(output_folder, filename) + extractFromGZ(zip_filename, outfilename, delete=True) + + # open tiff file + src = gdal.Open(outfilename) + dataset, NoDataValue = Raster.getRasterData(src) + + # clip dataset to the given extent + data = dataset[yID[0]: yID[1], xID[0]: xID[1]] + # replace -ve values with -9999 + data[data < 0] = -9999 + + # save dataset as geotiff file + geo = [lon_lim[0], 0.05, 0, latlim[1], 0, -0.05] + Raster.createRaster( + DirFileEnd, + data, + geo=geo, + epsg="WGS84", + nodatavalue=NoDataValue, + ) + + # delete old tif file + os.remove(outfilename) + + except PermissionError: + print("The file covering the whole world could not be deleted please delete it after the download ends") + return True + + + def ListAttributes(self): + """ + Print Attributes List + """ + + print("\n") + print( + f"Attributes List of: {repr(self.__dict__['name'])} - {self.__class__.__name__} Instance\n" + ) + self_keys = list(self.__dict__.keys()) + self_keys.sort() + for key in self_keys: + if key != "name": + print(str(key) + " : " + repr(self.__dict__[key])) + + print("\n") \ No newline at end of file diff --git a/earthobserve/remotesensing.py b/earth2observe/ecmwf.py similarity index 52% rename from earthobserve/remotesensing.py rename to earth2observe/ecmwf.py index 275db74..b59da02 100644 --- a/earthobserve/remotesensing.py +++ b/earth2observe/ecmwf.py @@ -6,20 +6,15 @@ import calendar import datetime as dt import os -from ftplib import FTP -import gzip - import numpy as np import pandas as pd from ecmwfapi import ECMWFDataServer -from joblib import Parallel, delayed from netCDF4 import Dataset -from osgeo import gdal -import Hapi.weirdFn as weirdFn from pyramids.raster import Raster +from earth2observe.utils import print_progress_bar -class RemoteSensing: +class ECMWF: """RemoteSensing. RemoteSensing class contains methods to download ECMWF data @@ -32,68 +27,67 @@ class RemoteSensing: """ def __init__( self, - Time="daily", - start="", - end="", - Path="", - Vars=[], - latlim=[], - lonlim=[], - fmt="%Y-%m-%d", + time: str = "daily", + start: str = "", + end: str = "", + path: str = "", + variables: list=[], + lat_lim: list=[], + lon_lim: list=[], + fmt: str="%Y-%m-%d", ): - """ - RemoteSensing(self, Time='daily', start='', end='',Path='', - Vars=[], latlim=[], lonlim=[], fmt="%Y-%m-%d") - - Parameters: - Time (str, optional): - [description]. Defaults to 'daily'. - start (str, optional): - [description]. Defaults to ''. - end (str, optional): - [description]. Defaults to ''. - Path (str, optional): - Path where you want to save the downloaded data. Defaults to ''. - Vars (list, optional): - Variable code: VariablesInfo('day').descriptions.keys(). Defaults to []. - latlim (list, optional): - [ymin, ymax]. Defaults to []. - lonlim (list, optional): - [xmin, xmax]. Defaults to []. - fmt (str, optional): - [description]. Defaults to "%Y-%m-%d". + """RemoteSensing + + Parameters + ---------- + time (str, optional): + [description]. Defaults to 'daily'. + start (str, optional): + [description]. Defaults to ''. + end (str, optional): + [description]. Defaults to ''. + path (str, optional): + Path where you want to save the downloaded data. Defaults to ''. + variables (list, optional): + Variable code: VariablesInfo('day').descriptions.keys(). Defaults to []. + lat_lim (list, optional): + [ymin, ymax]. Defaults to []. + lon_lim (list, optional): + [xmin, xmax]. Defaults to []. + fmt (str, optional): + [description]. Defaults to "%Y-%m-%d". """ self.start = dt.datetime.strptime(start, fmt) self.end = dt.datetime.strptime(end, fmt) - if Time == "six_hourly": + if time == "six_hourly": # Set required data for the three hourly option self.string1 = "oper" # Set required data for the daily option - elif Time == "daily": + elif time == "daily": self.Dates = pd.date_range(self.start, self.end, freq="D") - elif Time == "monthly": + elif time == "monthly": self.Dates = pd.date_range(self.start, self.end, freq="MS") - self.Time = Time - self.Path = Path - self.Vars = Vars + self.time = time + self.path = path + self.vars = variables # correct latitude and longitude limits - latlim_corr_one = np.floor(latlim[0] / 0.125) * 0.125 - latlim_corr_two = np.ceil(latlim[1] / 0.125) * 0.125 + latlim_corr_one = np.floor(lat_lim[0] / 0.125) * 0.125 + latlim_corr_two = np.ceil(lat_lim[1] / 0.125) * 0.125 self.latlim_corr = [latlim_corr_one, latlim_corr_two] # correct latitude and longitude limits - lonlim_corr_one = np.floor(lonlim[0] / 0.125) * 0.125 - lonlim_corr_two = np.ceil(lonlim[1] / 0.125) * 0.125 + lonlim_corr_one = np.floor(lon_lim[0] / 0.125) * 0.125 + lonlim_corr_two = np.ceil(lon_lim[1] / 0.125) * 0.125 self.lonlim_corr = [lonlim_corr_one, lonlim_corr_two] # TODO move it to the ECMWF method later # for ECMWF only self.string7 = "%s/to/%s" % (self.start, self.end) - def ECMWF(self, Waitbar: bool = True): + def download(self, progress_bar: bool = True): """ECMWF ECMWF method downloads ECMWF daily data for a given variable, time @@ -102,43 +96,45 @@ def ECMWF(self, Waitbar: bool = True): Parameters ---------- - Waitbar : TYPE, optional + progress_bar : TYPE, optional 0 or 1. to display the progress bar Returns ------- None. """ - for var in self.Vars: + for var in self.vars: # Download data print(f"\nDownload ECMWF {var} data for period {self.start} till {self.end}") - self.DownloadData(var, Waitbar) # CaseParameters=[SumMean, Min, Max] + self.downloadData(var, progress_bar) # CaseParameters=[SumMean, Min, Max] # delete the downloaded netcdf - del_ecmwf_dataset = os.path.join(self.Path, "data_interim.nc") + del_ecmwf_dataset = os.path.join(self.path, "data_interim.nc") os.remove(del_ecmwf_dataset) - def DownloadData(self, Var: str, Waitbar: bool): + def downloadData(self, var: str, progress_bar: bool): """ This function downloads ECMWF six-hourly, daily or monthly data Parameters ---------- - Var: [str] - + var: [str] + variable name + progress_bar: [bool] + True if you want to display a progress bar. """ # Load factors / unit / type of variables / accounts - VarInfo = Variables(self.Time) - Varname_dir = VarInfo.file_name[Var] + VarInfo = Variables(self.time) + Varname_dir = VarInfo.file_name[var] # Create Out directory - out_dir = os.path.join(self.Path, self.Time, Varname_dir) + out_dir = os.path.join(self.path, self.time, Varname_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) - DownloadType = VarInfo.DownloadType[Var] + DownloadType = VarInfo.DownloadType[var] if DownloadType == 1: string1 = "oper" @@ -161,7 +157,7 @@ def DownloadData(self, Var: str, Waitbar: bool): string2 = "pl" string8 = "an" - parameter_number = VarInfo.number_para[Var] + parameter_number = VarInfo.number_para[var] string3 = "%03d.128" % (parameter_number) string5 = "0.125/0.125" @@ -175,8 +171,8 @@ def DownloadData(self, Var: str, Waitbar: bool): # Download data by using the ECMWF API print("Use API ECMWF to collect the data, please wait") - RemoteSensing.API( - self.Path, + ECMWF.API( + self.path, DownloadType, string1, string2, @@ -191,14 +187,14 @@ def DownloadData(self, Var: str, Waitbar: bool): ) # Open the downloaded data - NC_filename = os.path.join(self.Path, "data_interim.nc") + NC_filename = os.path.join(self.path, "data_interim.nc") fh = Dataset(NC_filename, mode="r") # Get the NC variable parameter - parameter_var = VarInfo.var_name[Var] - Var_unit = VarInfo.units[Var] - factors_add = VarInfo.factors_add[Var] - factors_mul = VarInfo.factors_mul[Var] + parameter_var = VarInfo.var_name[var] + Var_unit = VarInfo.units[var] + factors_add = VarInfo.factors_add[var] + factors_mul = VarInfo.factors_mul[var] # Open the NC data Data = fh.variables[parameter_var][:] @@ -212,10 +208,10 @@ def DownloadData(self, Var: str, Waitbar: bool): Geo_out = tuple([Geo_one, 0.125, 0.0, Geo_four, 0.0, -0.125]) # Create Waitbar - if Waitbar: + if progress_bar: total_amount = len(self.Dates) amount = 0 - weirdFn.printWaitBar( + print_progress_bar( amount, total_amount, prefix="Progress:", suffix="Complete", length=50 ) @@ -234,9 +230,9 @@ def DownloadData(self, Var: str, Waitbar: bool): Date_good = np.zeros(len(Data_time)) - if self.Time == "daily": + if self.time == "daily": days_later = 1 - if self.Time == "monthly": + if self.time == "monthly": days_later = calendar.monthrange(year, month)[1] Date_good[ @@ -254,25 +250,25 @@ def DownloadData(self, Var: str, Waitbar: bool): # Calculate the average temperature in celcius degrees Data_end = factors_mul * np.nanmean(Data_one, 0) + factors_add - if VarInfo.types[Var] == "flux": + if VarInfo.types[var] == "flux": Data_end = Data_end * days_later - VarOutputname = VarInfo.file_name[Var] + VarOutputname = VarInfo.file_name[var] # Define the out name name_out = os.path.join( out_dir, "%s_ECMWF_ERA-Interim_%s_%s_%d.%02d.%02d.tif" - % (VarOutputname, Var_unit, self.Time, year, month, day), + % (VarOutputname, Var_unit, self.time, year, month, day), ) # Create Tiff files # Raster.Save_as_tiff(name_out, Data_end, Geo_out, "WGS84") Raster.createRaster(path=name_out, arr=Data_end, geo=Geo_out, epsg="WGS84") - if Waitbar == 1: + if progress_bar: amount = amount + 1 - weirdFn.printWaitBar( + print_progress_bar( amount, total_amount, prefix="Progress:", @@ -303,7 +299,9 @@ def API( os.chdir(output_folder) server = ECMWFDataServer() - + # url = os.environ['ECMWF_API_URL'], + # key = os.environ['ECMWF_API_KEY'], + # email = os.environ['ECMWF_API_EMAIL'], if DownloadType == 1 or DownloadType == 2: server.retrieve( { @@ -350,42 +348,11 @@ def API( return () - @staticmethod - def ExtractFromGZ(InputFile, OutputFile, delete=False): - """ - ExtractFromGZ method extract data from the zip/.gz files, - save the data - - Parameters - ---------- - zip_filename : [str] - zipped file name . - outfilename : [str] - directory where the unzipped data must be - stored. - delete : [bool] - True if you want to delete the zipped file after the extracting the data - Returns - ------- - None. - """ - with gzip.GzipFile(InputFile, "rb") as zf: - content = zf.read() - save_file_content = open(OutputFile, "wb") - save_file_content.write(content) - - save_file_content.close() - zf.close() - - if delete: - os.remove(InputFile) - class Variables: """ This class contains the information about the ECMWF variables http://rda.ucar.edu/cgi-bin/transform?xml=/metadata/ParameterTables/WMO_GRIB1.98-0.128.xml&view=gribdoc """ - number_para = { "T": 130, "2T": 167, @@ -654,319 +621,18 @@ def __init__(self, step): def __str__(self): - print( - "Variable name:" - + "\n" - + str(self.var_name) - + "\n" - + "Descriptions" - + "\n" - + str(self.descriptions) - + "\n" - + "Units : " - + "\n" - + str(self.units) - ) - - - def ListAttributes(self): - """ - Print Attributes List - """ - - print("\n") - print( - "Attributes List of: " - + repr(self.__dict__["name"]) - + " - " - + self.__class__.__name__ - + " Instance\n" - ) - self_keys = list(self.__dict__.keys()) - self_keys.sort() - for key in self_keys: - if key != "name": - print(str(key) + " : " + repr(self.__dict__[key])) - - print("\n") - - -class CHIRPS: - - - def __init__( - self, - start="", - end="", - latlim=[], - lonlim=[], - Time="daily", - Path="", - fmt="%Y-%m-%d", - ): - # latlim -- [ymin, ymax] (values must be between -50 and 50) - # lonlim -- [xmin, xmax] (values must be between -180 and 180) - # TimeCase -- String equal to 'daily' or 'monthly' - - # Define timestep for the timedates - if Time == "daily": - self.TimeFreq = "D" - self.output_folder = os.path.join(Path, "Precipitation", "CHIRPS", "Daily") - elif Time == "monthly": - self.TimeFreq = "MS" - self.output_folder = os.path.join( - Path, "Precipitation", "CHIRPS", "Monthly" - ) - else: - raise KeyError("The input time interval is not supported") - self.Time = Time - - # make directory if it not exists - if not os.path.exists(self.output_folder): - os.makedirs(self.output_folder) - - # check time variables - if start == "": - start = pd.Timestamp("1981-01-01") - else: - self.start = dt.datetime.strptime(start, fmt) - - if end == "": - end = pd.Timestamp("Now") - else: - self.end = dt.datetime.strptisme(end, fmt) - # Create days - self.Dates = pd.date_range(self.start, self.end, freq=self.TimeFreq) - - # Check space variables - if latlim[0] < -50 or latlim[1] > 50: - print( - "Latitude above 50N or below 50S is not possible." - " Value set to maximum" - ) - self.latlim[0] = np.max(latlim[0], -50) - self.latlim[1] = np.min(lonlim[1], 50) - if lonlim[0] < -180 or lonlim[1] > 180: - print( - "Longitude must be between 180E and 180W." - " Now value is set to maximum" - ) - self.lonlim[0] = np.max(latlim[0], -180) - self.lonlim[1] = np.min(lonlim[1], 180) - else: - self.latlim = latlim - self.lonlim = lonlim - # Define IDs - self.yID = 2000 - np.int16( - np.array([np.ceil((latlim[1] + 50) * 20), np.floor((latlim[0] + 50) * 20)]) + f"Variable name:\n {self.var_name}\nDescriptions\n{self.descriptions}\nUnits : \n{self.units}" ) - self.xID = np.int16( - np.array( - [np.floor((lonlim[0] + 180) * 20), np.ceil((lonlim[1] + 180) * 20)] - ) - ) - - - def Download(self, Waitbar=1, cores=None): - """ - ============================================================ - Download(self, Waitbar=1, cores=None) - ============================================================ - Download method downloads CHIRPS data - - Parameters - ---------- - Waitbar : TYPE, optional - will print a waitbar. The default is 1. - cores : TYPE, optional - The number of cores used to run the routine. It can be 'False' - to avoid using parallel computing routines. The default is None. - - Returns - ------- - results : TYPE - DESCRIPTION. - - """ - # Pass variables to parallel function and run - args = [ - self.output_folder, - self.Time, - self.xID, - self.yID, - self.lonlim, - self.latlim, - ] - - if not cores: - # Create Waitbar - if Waitbar == 1: - total_amount = len(self.Dates) - amount = 0 - weirdFn.printWaitBar( - amount, - total_amount, - prefix="Progress:", - suffix="Complete", - length=50, - ) - - for Date in self.Dates: - CHIRPS.RetrieveData(Date, args) - if Waitbar == 1: - amount = amount + 1 - weirdFn.printWaitBar( - amount, - total_amount, - prefix="Progress:", - suffix="Complete", - length=50, - ) - results = True - else: - results = Parallel(n_jobs=cores)( - delayed(CHIRPS.RetrieveData)(Date, args) for Date in self.Dates - ) - return results - - - def RetrieveData(Date, args): - """ - =============================================== - RetrieveData(Date, args) - =============================================== - RetrieveData method retrieves CHIRPS data for a given date from the - https://data.chc.ucsb.edu/ - - Parameters - ---------- - Date : TYPE - DESCRIPTION. - args : TYPE - A list of parameters defined in the DownloadData function. - - Raises - ------ - KeyError - DESCRIPTION. - - Returns - ------- - bool - DESCRIPTION. - - """ - # Argument - [output_folder, TimeCase, xID, yID, lonlim, latlim] = args - - # open ftp server - # ftp = FTP("chg-ftpout.geog.ucsb.edu", "", "") - ftp = FTP("data.chc.ucsb.edu") - ftp.login() - - # Define FTP path to directory - if TimeCase == "daily": - pathFTP = ( - "pub/org/chg/products/CHIRPS-2.0/global_daily/tifs/p05/%s/" - % Date.strftime("%Y") - ) - elif TimeCase == "monthly": - pathFTP = "pub/org/chg/products/CHIRPS-2.0/global_monthly/tifs/" - else: - raise KeyError("The input time interval is not supported") - - # find the document name in this directory - ftp.cwd(pathFTP) - listing = [] - - # read all the file names in the directory - ftp.retrlines("LIST", listing.append) - - # create all the input name (filename) and output (outfilename, filetif, DiFileEnd) names - if TimeCase == "daily": - filename = "chirps-v2.0.%s.%02s.%02s.tif.gz" % ( - Date.strftime("%Y"), - Date.strftime("%m"), - Date.strftime("%d"), - ) - outfilename = os.path.join( - output_folder, - "chirps-v2.0.%s.%02s.%02s.tif" - % (Date.strftime("%Y"), Date.strftime("%m"), Date.strftime("%d")), - ) - DirFileEnd = os.path.join( - output_folder, - "P_CHIRPS.v2.0_mm-day-1_daily_%s.%02s.%02s.tif" - % (Date.strftime("%Y"), Date.strftime("%m"), Date.strftime("%d")), - ) - elif TimeCase == "monthly": - filename = "chirps-v2.0.%s.%02s.tif.gz" % ( - Date.strftime("%Y"), - Date.strftime("%m"), - ) - outfilename = os.path.join( - output_folder, - "chirps-v2.0.%s.%02s.tif" % (Date.strftime("%Y"), Date.strftime("%m")), - ) - DirFileEnd = os.path.join( - output_folder, - "P_CHIRPS.v2.0_mm-month-1_monthly_%s.%02s.%02s.tif" - % (Date.strftime("%Y"), Date.strftime("%m"), Date.strftime("%d")), - ) - else: - raise KeyError("The input time interval is not supported") - - # download the global rainfall file - try: - local_filename = os.path.join(output_folder, filename) - lf = open(local_filename, "wb") - ftp.retrbinary("RETR " + filename, lf.write, 8192) - lf.close() - - # unzip the file - zip_filename = os.path.join(output_folder, filename) - RemoteSensing.ExtractFromGZ(zip_filename, outfilename, delete=True) - - # open tiff file - src = gdal.Open(outfilename) - dataset, NoDataValue = Raster.getRasterData(src) - - # clip dataset to the given extent - data = dataset[yID[0]: yID[1], xID[0]: xID[1]] - # replace -ve values with -9999 - data[data < 0] = -9999 - - # save dataset as geotiff file - geo = [lonlim[0], 0.05, 0, latlim[1], 0, -0.05] - Raster.createRaster( - path=DirFileEnd, - data=data, - geo=geo, - epsg="WGS84", - nodatavalue=NoDataValue, - ) - - # delete old tif file - os.remove(outfilename) - - except: - print("file not exists") - return True def ListAttributes(self): """ Print Attributes List """ - print("\n") print( - "Attributes List of: " - + repr(self.__dict__["name"]) - + " - " - + self.__class__.__name__ - + " Instance\n" + f"Attributes List of: {repr(self.__dict__['name'])} - {self.__class__.__name__} Instance\n" ) self_keys = list(self.__dict__.keys()) self_keys.sort() diff --git a/earth2observe/utils.py b/earth2observe/utils.py new file mode 100644 index 0000000..1ab00e5 --- /dev/null +++ b/earth2observe/utils.py @@ -0,0 +1,76 @@ +import sys +import os +import gzip + +def print_progress_bar( + i: int, + total: int, + prefix: str="", + suffix: str="", + decimals: int=1, + length: int=100, + fill: str="█" +): + """print_progress_bar. + + Parameters + ---------- + i: [int] + Iteration number + total: [int] + Total iterations + prefix: [str] + Name after bar + suffix: [str] + Decimals of percentage + decimals: [int] + + length: [int] + width of the waitbar + fill: [str] + bar fill + """ + # Adjust when it is a linux computer + if os.name == "posix" and total == 0: + total = 0.0001 + + percent = ("{0:." + str(decimals) + "f}").format(100 * (i / float(total))) + filled = int(length * i // total) + bar = fill * filled + "-" * (length - filled) + + sys.stdout.write("\r%s |%s| %s%% %s" % (prefix, bar, percent, suffix)) + sys.stdout.flush() + + if i == total: + print() + + + +def extractFromGZ(input_file, output_file, delete=False): + """ + ExtractFromGZ method extract data from the zip/.gz files, + save the data + + Parameters + ---------- + input_file : [str] + zipped file name . + output_file : [str] + directory where the unzipped data must be + stored. + delete : [bool] + True if you want to delete the zipped file after the extracting the data + Returns + ------- + None. + """ + with gzip.GzipFile(input_file, "rb") as zf: + content = zf.read() + save_file_content = open(output_file, "wb") + save_file_content.write(content) + + save_file_content.close() + zf.close() + + if delete: + os.remove(input_file) \ No newline at end of file diff --git a/earthobserve/rs/__init__.py b/earthobserve/rs/__init__.py deleted file mode 100644 index dc346d0..0000000 --- a/earthobserve/rs/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Created on Sat Apr 24 20:57:45 2021 - -@author: mofarrag -""" - - diff --git a/environment.yml b/environment.yml index 81a2b0e..926f54d 100644 --- a/environment.yml +++ b/environment.yml @@ -3,11 +3,12 @@ channels: dependencies: - python >=3.7.1,<3.10 - pip >=21.3.1 + - numpy >=1.21.2,<1.22.4 - netCDF4 >=1.5.5,<1.5.9 - gdal >=3.3.3,<3.5.1 - pandas >=1.3.2,<1.4.3 + - pyramids >=0.1.0 - pip: - - pyramids-gis >=0.1.0 - loguru >=0.5.3 - pytest >=6.2.5 - pytest-cov >=2.12.1 diff --git a/examples/Download Satellite data.ipynb b/examples/Download Satellite data.ipynb new file mode 100644 index 0000000..cc6c291 --- /dev/null +++ b/examples/Download Satellite data.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "48fff0da-2d7a-49ea-ba13-d4768a89bae0", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Download Satellite data" + ] + }, + { + "cell_type": "markdown", + "id": "2c773dd4-6dc6-4c86-aeed-ad82b6118ccb", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## ECMWF" + ] + }, + { + "cell_type": "markdown", + "id": "7ac42c26-f454-40e5-8dc2-668efbc2d02b", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Installation of ECMWF API key" + ] + }, + { + "cell_type": "markdown", + "id": "ad7c9c5c-ed4c-41b7-afdb-40fb471e015b", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "1 - to be able to use Hapi to download ECMWF data you need to register and setup your account in the ECMWF website (https://apps.ecmwf.int/registration/)\n", + "\n", + "2 - Install ECMWF key (instruction are here https://confluence.ecmwf.int/display/WEBAPI/Access+ECMWF+Public+Datasets#AccessECMWFPublicDatasets-key)\n", + "(https://confluence.ecmwf.int/display/WEBAPI/Install+ECMWF+API+Key)" + ] + }, + { + "cell_type": "markdown", + "id": "f3d80671-55eb-4915-990b-78f1dcc4e0d1", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Using ResmoteSensing module from Hapi " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9d733330-95bb-42b4-b6f5-895119345222", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from earth2observe.ecmwf import ECMWF\n", + "from earth2observe.ecmwf import Variables\n", + "import os\n", + "#os.chdir(\"F:/01Algorithms/Hydrology/HAPI/Examples\")" + ] + }, + { + "cell_type": "markdown", + "id": "bc750181-4a25-48f1-b510-ee834484b7c9", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "For the information about the ECMWF data https://apps.ecmwf.int/codes/grib/param-db/\n", + "ECMWP data are represented as variables to know the name of the variable you want to download you can check the object `Variables`\n", + "\n", + "`Variables` contains the tame of the variable you need to give to the `ECMWF` object to get and the unit and description\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "35bb343d-4354-43e2-82ea-6ebf14aa3615", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variable name:\n", + "{'T': 't', '2T': 't2m', 'SRO': 'sro', 'SSRO': 'ssro', 'WIND': 'wind', '10SI': '10si', 'SP': 'sp', 'Q': 'q', 'SSR': 'ssr', 'R': 'r', 'E': 'e', 'SUND': 'sund', 'RO': 'ro', 'TP': 'tp', '10U': 'u10', '10V': 'v10', '2D': 'd2m', 'SR': 'sr', 'AL': 'al', 'HCC': 'hcc'}\n", + "Descriptions\n", + "{'T': 'Temperature [K]', '2T': '2 meter Temperature [K]', 'SRO': 'Surface Runoff [m]', 'SSRO': 'Sub-surface Runoff [m]', 'WIND': 'Wind speed [m s-1]', '10SI': '10 metre windspeed [m s-1]', 'SP': 'Surface Pressure [pa]', 'Q': 'Specific humidity [kg kg-1]', 'SSR': 'Surface solar radiation [W m-2 s]', 'R': 'Relative humidity [%]', 'E': 'Evaporation [m of water]', 'SUND': 'Sunshine duration [s]', 'RO': 'Runoff [m]', 'TP': 'Total Precipitation [m]', '10U': '10 metre U wind component [m s-1]', '10V': '10 metre V wind component [m s-1]', '2D': '2 metre dewpoint temperature [K]', 'SR': 'Surface roughness [m]', 'AL': 'Albedo []', 'HCC': 'High cloud cover []'}\n", + "Units : \n", + "{'T': 'C', '2T': 'C', 'SRO': 'mm', 'SSRO': 'mm', 'WIND': 'm_s-1', '10SI': 'm_s-1', 'SP': 'kpa', 'Q': 'kg_kg-1', 'SSR': 'W_m-2_s', 'R': 'percentage', 'E': 'mm', 'SUND': 's', 'RO': 'mm', 'TP': 'mm', '10U': 'm_s-1', '10V': 'm_s-1', '2D': 'C', 'SR': 'm', 'AL': '-', 'HCC': '-'}\n" + ] + } + ], + "source": [ + "Vars = Variables('daily')\n", + "Vars.__str__()" + ] + }, + { + "cell_type": "markdown", + "id": "0a2a6ca8-a09b-47b8-b97e-8d1c1e11c561", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Inputs" + ] + }, + { + "cell_type": "markdown", + "id": "be829713-e3bb-442e-a2b1-5dfffb46b028", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "- After selecting the variable, temperature and evapotranspiration ['E','T']\n", + "\n", + "- You need to provide the period you want to download the data for knowing that the beginning of the data is 1979.01.01 and the end is 2019.08.01\n", + " lets say we need the data between '2009-01-01' and '2009-02-01'\n", + "- then we need to provide the extent in the form of latitude and longitude\n", + " for out case stude `Coello` those are lat = [4.19,4.64] and lon = [-75.64,-74.72]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c73ad20b-e8cd-4a0e-9107-12a2cfbac207", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "StartDate = '2009-01-01'\n", + "EndDate = '2009-01-10'\n", + "Time = 'daily'\n", + "lat = [4.190755, 4.643963]\n", + "lon = [-75.649243, -74.727286]\n", + "Path = \"/data/satellite_data/\"\n", + "# Temperature, Evapotranspiration\n", + "variables = ['T', 'E']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "405220ce-2d9b-4cee-9137-ee639153ef64", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Download ECMWF T data for period 2009-01-01 00:00:00 till 2009-01-10 00:00:00\n", + "Use API ECMWF to collect the data, please wait\n", + "2021-04-14 09:13:24 ECMWF API python library 1.6.1\n", + "2021-04-14 09:13:24 ECMWF API at https://api.ecmwf.int/v1\n", + "2021-04-14 09:13:25 Welcome Mostafa Farrag\n", + "2021-04-14 09:13:25 In case of problems, please check https://confluence.ecmwf.int/display/WEBAPI/Web+API+FAQ or contact servicedesk@ecmwf.int\n", + "2021-04-14 09:13:26 Request submitted\n", + "2021-04-14 09:13:26 Request id: 607696162b84daac739e06f2\n", + "2021-04-14 09:13:26 Request is submitted\n", + "2021-04-14 09:13:27 Request is active\n", + "Calling 'nice mars /tmp/20210414-0710/b1/tmp-_marsl0iuOw.req'\n", + "mars - WARN -\n", + "mars - WARN - From 29 January 2019 10AM (UTC) MARS uses the interpolation\n", + "mars - WARN - provided by the MIR library. For more details, see\n", + "mars - WARN - https://confluence.ecmwf.int/display/UDOC/MARS+interpolation+with+MIR\n", + "mars - WARN -\n", + "MIR environment variables:\n", + "MIR_CACHE_PATH=/data/ec_coeff\n", + "mars - INFO - 20210414.071328 - Welcome to MARS\n", + "mars - INFO - 20210414.071328 - MARS Client bundle version: 6.28.6.1\n", + "mars - INFO - 20210414.071328 - MARS Client package version: 6.28.6\n", + "mars - INFO - 20210414.071328 - MARS Client build stamp: 20200717102127\n", + "mars - INFO - 20210414.071328 - MIR version: 1.4.7\n", + "mars - INFO - 20210414.071328 - Using ecCodes version 2.18.0\n", + "mars - INFO - 20210414.071328 - Using odb_api version: 0.15.11 (file format version: 0.5)\n", + "mars - INFO - 20210414.071328 - Using FDB5 version: 5.6.1\n", + "mars - INFO - 20210414.071328 - Maximum retrieval size is 50.00 G\n", + "retrieve,levelist=1000,stream=oper,area=4.75/-75.75/4.125/-74.625,levtype=pl,param=130.128,padding=0,step=0,grid=0.125/0.125,expver=0001,time=00:00:00/06:00:00/12:00:00/18:00:00,date=2009-01-01 00:00:00/to/2009-01-10 00:00:00,type=an,class=eimars - WARN - 20210414.071328 - For full resolution grid, it is recommended to use RESOL=AV to prevent any truncation before transformation\n", + "mars - INFO - 20210414.071328 - Automatic split by date is on\n", + "\n", + "mars - INFO - 20210414.071328 - Processing request 1\n", + "sh: /usr/bin/mailx: No such file or directory\n", + "\n", + "RETRIEVE,\n", + " CLASS = EI,\n", + " TYPE = AN,\n", + " STREAM = OPER,\n", + " EXPVER = 0001,\n", + " REPRES = SH,\n", + " LEVTYPE = PL,\n", + " LEVELIST = 1000,\n", + " PARAM = 130.128,\n", + " TIME = 0000/0600/1200/1800,\n", + " STEP = 0,\n", + " DOMAIN = G,\n", + " RESOL = AUTO,\n", + " AREA = 4.75/-75.75/4.125/-74.625,\n", + " GRID = 0.125/0.125,\n", + " PADDING = 0,\n", + " DATE = 20090101/20090102/20090103/20090104/20090105/20090106/20090107/20090108/20090109/20090110\n", + "\n", + "mars - INFO - 20210414.071328 - Web API request id: 607696162b84daac739e06f2\n", + "mars - INFO - 20210414.071328 - Requesting 40 fields\n", + "mars - INFO - 20210414.071328 - Calling mars on 'marser', local port is 58560\n", + "mars - INFO - 20210414.071328 - Server task is 704 [marser]\n", + "mars - INFO - 20210414.071328 - Request cost: 40 fields, 5.05913 Mbytes online, nodes: mvr02 [marser]\n", + "mars - INFO - 20210414.071328 - The efficiency of your requests in the last 12 hours is 100% [marser]\n", + "mars - INFO - 20210414.071328 - Transfering 5304880 bytes\n", + "mars - INFO - 20210414.071328 - ShToGridded: loading Legendre coefficients '/data/ec_coeff/mir/legendre/4/local-T255-GaussianN256-OPT4189816c2e.leg'\n", + "mars - INFO - 20210414.071340 - 40 fields retrieved from 'marser'\n", + "mars - INFO - 20210414.071340 - 40 fields have been interpolated\n", + "mars - INFO - 20210414.071432 - Request time: wall: 1 min 4 sec cpu: 1 sec\n", + "mars - INFO - 20210414.071432 - Read from network: 5.06 Mbyte(s) in < 1 sec [265.06 Mbyte/sec]\n", + "mars - INFO - 20210414.071432 - Visiting marser: wall: 12 sec\n", + "mars - INFO - 20210414.071432 - Post-processing: wall: 11 sec\n", + "mars - INFO - 20210414.071432 - Writing to target file: 8.91 Kbyte(s) in 52 sec [174.18 byte/sec]\n", + "mars - INFO - 20210414.071432 - Memory used: 44.30 Mbyte(s)\n", + "mars - INFO - 20210414.071432 - No errors reported\n", + "Process '['nice', 'mars', '/tmp/20210414-0710/b1/tmp-_marsl0iuOw.req']' finished\n", + "Calling 'nice grib_to_netcdf /data/scratch/20210414-0710/ef/_mars-webmars-public-svc-green-001-6fe5cac1a363ec1525f54343b6cc9fd8-06C7Rj.grib -o /data/scratch/20210414-0710/28/_grib2netcdf-webmars-public-svc-green-007-6fe5cac1a363ec1525f54343b6cc9fd8-tc_782.nc -utime'\n", + "grib_to_netcdf: Version 2.21.0\n", + "grib_to_netcdf: Processing input file '/data/scratch/20210414-0710/ef/_mars-webmars-public-svc-green-001-6fe5cac1a363ec1525f54343b6cc9fd8-06C7Rj.grib'.\n", + "grib_to_netcdf: Found 40 GRIB fields in 1 file.\n", + "grib_to_netcdf: Ignoring key(s): method, type, stream, refdate, hdate\n", + "grib_to_netcdf: Creating netCDF file '/data/scratch/20210414-0710/28/_grib2netcdf-webmars-public-svc-green-007-6fe5cac1a363ec1525f54343b6cc9fd8-tc_782.nc'\n", + "grib_to_netcdf: NetCDF library version: 4.3.3.1 of Dec 10 2015 16:44:18 $\n", + "grib_to_netcdf: Creating large (64 bit) file format.\n", + "grib_to_netcdf: Defining variable 't'.\n", + "grib_to_netcdf: Done.\n", + "Process '['nice', 'grib_to_netcdf', '/data/scratch/20210414-0710/ef/_mars-webmars-public-svc-green-001-6fe5cac1a363ec1525f54343b6cc9fd8-06C7Rj.grib', '-o', '/data/scratch/20210414-0710/28/_grib2netcdf-webmars-public-svc-green-007-6fe5cac1a363ec1525f54343b6cc9fd8-tc_782.nc', '-utime']' finished\n", + "2021-04-14 09:14:40 Request is complete\n", + "2021-04-14 09:14:40 Transfering 5.97656 Kbytes into data_interim.nc\n", + "2021-04-14 09:14:40 From https://stream.ecmwf.int/data/webmars-public-svc-green-007/data/scratch/20210414-0710/28/_grib2netcdf-webmars-public-svc-green-007-6fe5cac1a363ec1525f54343b6cc9fd8-tc_782.nc\n", + "2021-04-14 09:14:41 Transfer rate 22.9308 Kbytes/s\n", + "Progress: |██████████████████████████████████████████████████| 100.0% Complete\n", + "\n", + "Download ECMWF E data for period 2009-01-01 00:00:00 till 2009-01-10 00:00:00\n", + "Use API ECMWF to collect the data, please wait\n", + "2021-04-14 09:14:41 ECMWF API python library 1.6.1\n", + "2021-04-14 09:14:41 ECMWF API at https://api.ecmwf.int/v1\n", + "2021-04-14 09:14:41 Welcome Mostafa Farrag\n", + "2021-04-14 09:14:42 In case of problems, please check https://confluence.ecmwf.int/display/WEBAPI/Web+API+FAQ or contact servicedesk@ecmwf.int\n", + "2021-04-14 09:14:42 Request submitted\n", + "2021-04-14 09:14:42 Request id: 60769663d685a2045b9e06ec\n", + "2021-04-14 09:14:42 Request is submitted\n", + "2021-04-14 09:14:44 Request is active\n", + "Calling 'nice mars /tmp/20210414-0710/0b/tmp-_marsUrYh66.req'\n", + "mars - WARN -\n", + "mars - WARN - From 29 January 2019 10AM (UTC) MARS uses the interpolation\n", + "mars - WARN - provided by the MIR library. For more details, see\n", + "mars - WARN - https://confluence.ecmwf.int/display/UDOC/MARS+interpolation+with+MIR\n", + "mars - WARN -\n", + "MIR environment variables:\n", + "MIR_CACHE_PATH=/data/ec_coeff\n", + "mars - INFO - 20210414.071444 - Welcome to MARS\n", + "mars - INFO - 20210414.071444 - MARS Client bundle version: 6.28.6.1\n", + "mars - INFO - 20210414.071444 - MARS Client package version: 6.28.6\n", + "mars - INFO - 20210414.071444 - MARS Client build stamp: 20200717102127\n", + "mars - INFO - 20210414.071444 - MIR version: 1.4.7\n", + "mars - INFO - 20210414.071444 - Using ecCodes version 2.18.0\n", + "mars - INFO - 20210414.071444 - Using odb_api version: 0.15.11 (file format version: 0.5)\n", + "mars - INFO - 20210414.071444 - Using FDB5 version: 5.6.1\n", + "mars - INFO - 20210414.071444 - Maximum retrieval size is 50.00 G\n", + "retrieve,stream=oper,area=4.75/-75.75/4.125/-74.625,levtype=sfc,param=182.128,padding=0,step=12,grid=0.125/0.125,expver=0001,time=00:00:00/12:00:00,date=2009-01-01 00:00:00/to/2009-01-10 00:00:00,type=fc,class=eimars - WARN - 20210414.071444 - For full resolution grid, it is recommended to use RESOL=AV to prevent any truncation before transformation\n", + "mars - INFO - 20210414.071444 - Automatic split by date is on\n", + "\n", + "mars - INFO - 20210414.071444 - Processing request 1\n", + "sh: /usr/bin/mailx: No such file or directory\n", + "\n", + "RETRIEVE,\n", + " CLASS = EI,\n", + " TYPE = FC,\n", + " STREAM = OPER,\n", + " EXPVER = 0001,\n", + " REPRES = SH,\n", + " LEVTYPE = SFC,\n", + " PARAM = 182.128,\n", + " TIME = 0000/1200,\n", + " STEP = 12,\n", + " DOMAIN = G,\n", + " RESOL = AUTO,\n", + " AREA = 4.75/-75.75/4.125/-74.625,\n", + " GRID = 0.125/0.125,\n", + " PADDING = 0,\n", + " DATE = 20090101/20090102/20090103/20090104/20090105/20090106/20090107/20090108/20090109/20090110\n", + "\n", + "mars - INFO - 20210414.071444 - Web API request id: 60769663d685a2045b9e06ec\n", + "mars - INFO - 20210414.071444 - Requesting 20 fields\n", + "mars - INFO - 20210414.071444 - Calling mars on 'marser', local port is 59438\n", + "mars - INFO - 20210414.071444 - Server task is 286 [marser]\n", + "mars - INFO - 20210414.071444 - Request cost: 20 fields, 3.40073 Mbytes online, nodes: mvr02 [marser]\n", + "mars - INFO - 20210414.071444 - The efficiency of your requests in the last 12 hours is 100% [marser]\n", + "mars - INFO - 20210414.071444 - Transfering 3565920 bytes\n", + "mars - INFO - 20210414.071444 - 20 fields retrieved from 'marser'\n", + "mars - INFO - 20210414.071444 - 20 fields have been interpolated\n", + "mars - INFO - 20210414.071445 - Request time: wall: 1 sec\n", + "mars - INFO - 20210414.071445 - Read from network: 3.40 Mbyte(s) in < 1 sec [273.06 Mbyte/sec]\n", + "mars - INFO - 20210414.071445 - Writing to target file: 4.45 Kbyte(s) in < 1 sec [41.17 Kbyte/sec]\n", + "mars - INFO - 20210414.071445 - Memory used: 36.57 Mbyte(s)\n", + "mars - INFO - 20210414.071445 - No errors reported\n", + "Process '['nice', 'mars', '/tmp/20210414-0710/0b/tmp-_marsUrYh66.req']' finished\n", + "Calling 'nice grib_to_netcdf /data/scratch/20210414-0710/26/_mars-webmars-public-svc-green-003-6fe5cac1a363ec1525f54343b6cc9fd8-JfoG3r.grib -o /data/scratch/20210414-0710/7a/_grib2netcdf-webmars-public-svc-green-006-6fe5cac1a363ec1525f54343b6cc9fd8-hKn4GP.nc -utime'\n", + "grib_to_netcdf: Version 2.21.0\n", + "grib_to_netcdf: Processing input file '/data/scratch/20210414-0710/26/_mars-webmars-public-svc-green-003-6fe5cac1a363ec1525f54343b6cc9fd8-JfoG3r.grib'.\n", + "grib_to_netcdf: Found 20 GRIB fields in 1 file.\n", + "grib_to_netcdf: Ignoring key(s): method, type, stream, refdate, hdate\n", + "grib_to_netcdf: Creating netCDF file '/data/scratch/20210414-0710/7a/_grib2netcdf-webmars-public-svc-green-006-6fe5cac1a363ec1525f54343b6cc9fd8-hKn4GP.nc'\n", + "grib_to_netcdf: NetCDF library version: 4.3.3.1 of Dec 10 2015 16:44:18 $\n", + "grib_to_netcdf: Creating large (64 bit) file format.\n", + "grib_to_netcdf: Defining variable 'e'.\n", + "grib_to_netcdf: Done.\n", + "Process '['nice', 'grib_to_netcdf', '/data/scratch/20210414-0710/26/_mars-webmars-public-svc-green-003-6fe5cac1a363ec1525f54343b6cc9fd8-JfoG3r.grib', '-o', '/data/scratch/20210414-0710/7a/_grib2netcdf-webmars-public-svc-green-006-6fe5cac1a363ec1525f54343b6cc9fd8-hKn4GP.nc', '-utime']' finished\n", + "2021-04-14 09:14:49 Request is complete\n", + "2021-04-14 09:14:49 Transfering 3.60156 Kbytes into data_interim.nc\n", + "2021-04-14 09:14:49 From https://stream.ecmwf.int/data/webmars-public-svc-green-006/data/scratch/20210414-0710/7a/_grib2netcdf-webmars-public-svc-green-006-6fe5cac1a363ec1525f54343b6cc9fd8-hKn4GP.nc\n", + "2021-04-14 09:14:49 Transfer rate 16.9291 Kbytes/s\n", + "Progress: |██████████████████████████████████████████████████| 100.0% Complete\n" + ] + } + ], + "source": [ + "Coello = ECMWF(start=StartDate, end=EndDate, time=Time,\n", + " lat_lim=lat, lon_lim=lon, path=Path, variables=variables)\n", + "\n", + "Coello.download(Waitbar=1)" + ] + }, + { + "cell_type": "markdown", + "id": "195e57ae-6345-45a4-a461-865734bafd73", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## CHIRPS" + ] + }, + { + "cell_type": "markdown", + "id": "358e0726-28fb-4200-9674-9d0754b308cd", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Using the same inputs (period and extent) CHRIPS data does not deen any registration" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bf101450-3e00-4572-a50c-673a267996c7", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from earth2observe.chirps import CHIRPS" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e9ab20a1-786f-4692-913e-94f2f85b0281", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Progress: |██████████████████████████████████████████████████| 100.0% Complete\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Coello = CHIRPS(start=StartDate, end=EndDate, time=Time,\n", + " lat_lim=lat, lon_lim=lon, path=Path)\n", + "Coello.Download()" + ] + }, + { + "cell_type": "markdown", + "id": "2a2dbda9-82f9-44c3-9d0f-c9b330ae880a", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Parallel download\n", + "- As the CHRIPS data are downloaded directly from ftp server, so several downloads can be done at the same time\n", + "- to choose how many cores to be used in the parallelization, you have to provide the parameter `core`\n", + "- there is no indication bar in case of parallel downloads" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d0cfa73-938e-4334-b8cf-9ebc50ddaa81", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "Coello.Download(cores=4)" + ] + } + ], + "metadata": { + "kernelspec": { + "name": "pycharm-e2d4c152", + "language": "python", + "display_name": "PyCharm (pythonProject)" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/examples/Download Satellite data.py b/examples/Download Satellite data.py new file mode 100644 index 0000000..ea55d01 --- /dev/null +++ b/examples/Download Satellite data.py @@ -0,0 +1,56 @@ +""" +Download Satellite data +ECMWF +Installation of ECMWF API key +1 - to be able to use Hapi to download ECMWF data you need to register and setup your account in the ECMWF website (https://apps.ecmwf.int/registration/) + +2 - Install ECMWF key (instruction are here https://confluence.ecmwf.int/display/WEBAPI/Access+ECMWF+Public+Datasets#AccessECMWFPublicDatasets-key) +""" +from earth2observe.chirps import CHIRPS +from earth2observe.ecmwf import ECMWF +from earth2observe.ecmwf import Variables +#%% precipitation +start = "2009-01-01" +end = "2009-01-10" +time = "daily" +lat = [4.190755, 4.643963] +lon = [-75.649243, -74.727286] +path = "/data/satellite_data/" +# Temperature, Evapotranspiration +variables = ["T", "E"] +#%% +Vars = Variables("daily") +Vars.__str__() +#%% Temperature +start = "2009-01-01" +end = "2009-02-01" +Time = "daily" +latlim = [4.19, 4.64] +lonlim = [-75.65, -74.73] +path = r"C:\MyComputer\01Algorithms\Hydrology\earth2observe\examples\data\ecmwf" +# Temperature, Evapotranspiration +variables = ["T", "E"] + +Coello = ECMWF( + time=time, + start=start, + end=end, + lat_lim=latlim, + lon_lim=lonlim, + path=path, + variables=variables, +) + +Coello.download() + +#%% +path = r"C:\MyComputer\01Algorithms\Hydrology\earth2observe\examples\data\chirps" +Coello = CHIRPS( + time=time, + start=start, + end=end, + lat_lim=latlim, + lon_lim=lonlim, + path=path, +) +Coello.Download() # cores=4 diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/data/ecmwf/daily/Tair2m/Tair2m_ECMWF_ERA-Interim_C_daily_2009.01.01.tif b/examples/data/ecmwf/daily/Tair2m/Tair2m_ECMWF_ERA-Interim_C_daily_2009.01.01.tif new file mode 100644 index 0000000..3b889b6 Binary files /dev/null and b/examples/data/ecmwf/daily/Tair2m/Tair2m_ECMWF_ERA-Interim_C_daily_2009.01.01.tif differ diff --git a/examples/data/ecmwf/daily/Tair2m/Tair2m_ECMWF_ERA-Interim_C_daily_2009.01.02.tif b/examples/data/ecmwf/daily/Tair2m/Tair2m_ECMWF_ERA-Interim_C_daily_2009.01.02.tif new file mode 100644 index 0000000..b692de1 Binary files /dev/null and b/examples/data/ecmwf/daily/Tair2m/Tair2m_ECMWF_ERA-Interim_C_daily_2009.01.02.tif differ diff --git a/examples/data/ecmwf/data_interim.nc b/examples/data/ecmwf/data_interim.nc new file mode 100644 index 0000000..489124d Binary files /dev/null and b/examples/data/ecmwf/data_interim.nc differ diff --git a/mkdocs.yml b/mkdocs.yml index b5ed588..712a70a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,7 +2,7 @@ site_name: statista site_description: The documentation of Hapi Hydrological Model site_author: Mostafa Farrag -repo_url: https://github.com/MAfarrag/statista +repo_url: https://github.com/MAfarrag/earth2observe edit_url: "" theme: @@ -10,5 +10,4 @@ theme: nav: - Home: index.md - - GIS: GIS.md - License: license.md diff --git a/setup.py b/setup.py index f74169c..c5e4cb7 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ test_requirements = ['pytest>=3', ] setup( - name="earthobserve", + name="earth2observe", version="0.1.0", description="remote sensing package", author="Mostafa Farrag", @@ -23,15 +23,15 @@ long_description_content_type="text/markdown", license="GNU General Public License v3", zip_safe=False, - packages=find_packages(include=['earthobserve', 'earthobserve.*']), + packages=find_packages(include=['earth2observe', 'earth2observe.*']), test_suite="tests", tests_require=test_requirements, # install_requires=requirements, - entry_points={ - 'console_scripts': [ - 'earthobserve=earthobserve.cli:main', - ], - }, + # entry_points={ + # 'console_scripts': [ + # 'earth2observe=earth2observe.cli:main', + # ], + # }, classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", diff --git a/tests/conftest.py b/tests/conftest.py index 9676663..5029671 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,30 +1,9 @@ -from typing import List +# from typing import List import pandas as pd import pytest -@pytest.fixture(scope="module") -def time_series1() -> list: - return pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist() - - -@pytest.fixture(scope="module") -def time_series2() -> list: - return pd.read_csv("examples/data/time_series2.txt", header=None)[0].tolist() - -@pytest.fixture(scope="module") -def dist_estimation_parameters() -> List[str]: - return ["mle", "lmoments"] - -@pytest.fixture(scope="module") -def dist_estimation_parameters_ks() -> str: - return "lmoments" - -@pytest.fixture(scope="module") -def confidence_interval_alpha() -> float: - return 0.1 - -@pytest.fixture(scope="module") -def parameter_estimation_optimization_threshold() -> int: - return 17 +# @pytest.fixture(scope="module") +# def time_series1() -> list: + # return pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist() \ No newline at end of file diff --git a/tests/test_distributions.py b/tests/test_distributions.py deleted file mode 100644 index 7a9fdca..0000000 --- a/tests/test_distributions.py +++ /dev/null @@ -1,255 +0,0 @@ -from typing import List - -import numpy as np -from matplotlib.figure import Figure - -from statista.distributions import GEV, ConfidenceInterval, Gumbel, PlottingPosition - - -def test_plotting_position_weibul( - time_series1: list, -): - cdf = PlottingPosition.Weibul(time_series1, option=1) - assert isinstance(cdf, np.ndarray) - rp = PlottingPosition.Weibul(time_series1, option=2) - assert isinstance(rp, np.ndarray) - -def test_plotting_position_rp( - time_series1: list, -): - cdf = PlottingPosition.Weibul(time_series1, option=1) - rp = PlottingPosition.Returnperiod(cdf) - assert isinstance(rp, np.ndarray) - - -def test_create_gumbel_instance( - time_series1: list, -): - Gdist = Gumbel(time_series1) - assert isinstance(Gdist.data, np.ndarray) - assert isinstance(Gdist.data_sorted, np.ndarray) - - -def test_gumbel_estimate_parameter( - time_series2: list, - dist_estimation_parameters: List[str], -): - Gdist = Gumbel(time_series2) - for i in range(len(dist_estimation_parameters)): - param = Gdist.EstimateParameter(method=dist_estimation_parameters[i], Test=False) - assert isinstance(param, list) - assert Gdist.loc - assert Gdist.scale - - -def test_parameter_estimation_optimization( - time_series2: list, - dist_estimation_parameters: List[str], - parameter_estimation_optimization_threshold: int, -): - Gdist = Gumbel(time_series2) - param = Gdist.EstimateParameter( - method="optimization", ObjFunc=Gumbel.ObjectiveFn, - threshold=parameter_estimation_optimization_threshold - ) - assert isinstance(param, list) - assert Gdist.loc - assert Gdist.scale - -def test_gumbel_ks( - time_series2: list, - dist_estimation_parameters_ks: str, -): - Gdist = Gumbel(time_series2) - Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - Gdist.ks() - assert Gdist.Dstatic - assert Gdist.KS_Pvalue - - - -def test_gumbel_chisquare( - time_series2: list, - dist_estimation_parameters_ks: str, -): - Gdist = Gumbel(time_series2) - Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - Gdist.chisquare() - assert Gdist.chistatic - assert Gdist.chi_Pvalue - - -def test_gumbel_pdf( - time_series2: list, - dist_estimation_parameters_ks: str, -): - Gdist = Gumbel(time_series2) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - pdf, fig, ax = Gdist.pdf(Param[0], Param[1], plot_figure=True) - assert isinstance(pdf, np.ndarray) - assert isinstance(fig, Figure) - - -def test_gumbel_cdf( - time_series2: list, - dist_estimation_parameters_ks: str, -): - Gdist = Gumbel(time_series2) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - cdf, fig, ax = Gdist.cdf(Param[0], Param[1], plot_figure=True) - assert isinstance(cdf, np.ndarray) - assert isinstance(fig, Figure) - - -def test_gumbel_TheporeticalEstimate( - time_series2: list, - dist_estimation_parameters_ks: str, -): - Gdist = Gumbel(time_series2) - cdf_Weibul = PlottingPosition.Weibul(time_series2) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - Qth = Gdist.TheporeticalEstimate(Param[0], Param[1], cdf_Weibul) - assert isinstance(Qth, np.ndarray) - - -def test_gumbel_confidence_interval( - time_series2: list, - dist_estimation_parameters_ks: str, - confidence_interval_alpha: float -): - Gdist = Gumbel(time_series2) - cdf_Weibul = PlottingPosition.Weibul(time_series2) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - upper, lower = Gdist.ConfidenceInterval(Param[0], Param[1], cdf_Weibul, alpha=confidence_interval_alpha) - assert isinstance(upper, np.ndarray) - assert isinstance(lower, np.ndarray) - - -def test_gumbel_probapility_plot( - time_series2: list, - dist_estimation_parameters_ks: str, - confidence_interval_alpha: float -): - Gdist = Gumbel(time_series2) - cdf_Weibul = PlottingPosition.Weibul(time_series2) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - [fig1, fig2], [ax1, ax2] = Gdist.ProbapilityPlot(Param[0], Param[1], cdf_Weibul, alpha=confidence_interval_alpha) - assert isinstance(fig1, Figure) - assert isinstance(fig2, Figure) - - - - - -def test_create_gev_instance( - time_series1: list, -): - Gdist = GEV(time_series1) - assert isinstance(Gdist.data, np.ndarray) - assert isinstance(Gdist.data_sorted, np.ndarray) - - -def test_gev_estimate_parameter( - time_series1: list, - dist_estimation_parameters: List[str], -): - Gdist = GEV(time_series1) - for i in range(len(dist_estimation_parameters)): - param = Gdist.EstimateParameter(method=dist_estimation_parameters[i], Test=False) - assert isinstance(param, list) - assert Gdist.loc - assert Gdist.scale - assert Gdist.shape - - -def test_gev_ks( - time_series1: list, - dist_estimation_parameters_ks: str, -): - Gdist = GEV(time_series1) - Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - Gdist.ks() - assert Gdist.Dstatic - assert Gdist.KS_Pvalue - -def test_gev_chisquare( - time_series1: list, - dist_estimation_parameters_ks: str, -): - Gdist = GEV(time_series1) - Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - Gdist.chisquare() - assert Gdist.chistatic - assert Gdist.chi_Pvalue - - -def test_gev_pdf( - time_series1: list, - dist_estimation_parameters_ks: str, -): - Gdist = GEV(time_series1) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - pdf, fig, ax = Gdist.pdf(Param[0], Param[1], Param[2], plot_figure=True) - assert isinstance(pdf, np.ndarray) - assert isinstance(fig, Figure) - - -def test_gev_cdf( - time_series1: list, - dist_estimation_parameters_ks: str, -): - Gdist = GEV(time_series1) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - cdf, fig, ax = Gdist.cdf(Param[0], Param[1], Param[2], plot_figure=True) - assert isinstance(cdf, np.ndarray) - assert isinstance(fig, Figure) - -def test_gev_TheporeticalEstimate( - time_series1: list, - dist_estimation_parameters_ks: str, -): - Gdist = GEV(time_series1) - cdf_Weibul = PlottingPosition.Weibul(time_series1) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - Qth = Gdist.TheporeticalEstimate(Param[0], Param[1], Param[2],cdf_Weibul) - assert isinstance(Qth, np.ndarray) - - -def test_gev_confidence_interval( - time_series1: list, - dist_estimation_parameters_ks: str, - confidence_interval_alpha: float -): - Gdist = GEV(time_series1) - cdf_Weibul = PlottingPosition.Weibul(time_series1) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - func = ConfidenceInterval.GEVfunc - upper, lower = Gdist.ConfidenceInterval( - Param[0], Param[1], Param[2], F=cdf_Weibul, alpha=confidence_interval_alpha, - statfunction=func, n_samples=len(time_series1) - ) - assert isinstance(upper, np.ndarray) - assert isinstance(lower, np.ndarray) - - -def test_confidence_interval_directly( - time_series1: list, - dist_estimation_parameters_ks: str, - confidence_interval_alpha: float -): - Gdist = GEV(time_series1) - cdf_Weibul = PlottingPosition.Weibul(time_series1) - Param = Gdist.EstimateParameter(method=dist_estimation_parameters_ks, Test=False) - func = ConfidenceInterval.GEVfunc - # upper, lower = Gdist.ConfidenceInterval( - # Param[0], Param[1], Param[2], F=cdf_Weibul, alpha=confidence_interval_alpha, - # statfunction=func, n_samples=len(time_series1) - # ) - CI = ConfidenceInterval.BootStrap( - time_series1, statfunction=func, gevfit=Param, n_samples=len(time_series1), F=cdf_Weibul - ) - LB = CI["LB"] - UB = CI["UB"] - - assert isinstance(LB, np.ndarray) - assert isinstance(UB, np.ndarray)