From da9cac359197347a3e1d69c0a6466da479bf20a0 Mon Sep 17 00:00:00 2001 From: Mostafa Farrag Date: Thu, 4 Jan 2024 02:00:24 +0100 Subject: [PATCH] dataset/dataset/domain-cells-for-different-bands (#76) * count domain cells now takes band parameter * clean * correct the no_data_value for each band * add docs * update history * update checklist files --- HISTORY.rst | 7 ++ README.md | 4 +- docs/dataset.rst | 29 ++++++++ pyramids/dataset.py | 132 +++++++++++++++++----------------- setup.py | 2 +- tests/dataset/test_dataset.py | 12 ++++ 6 files changed, 119 insertions(+), 67 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 50be920dd..c01fa6fe2 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -147,3 +147,10 @@ Dataset """"""" * fix the un-updated array dimension bug in the crop method when the mask is a vector mask and the touch parameter is True. + + +0.5.5 (2024-01-04) +------------------ +Dataset +""""""" +* Count domain cells for a specific band. diff --git a/README.md b/README.md index eef914d64..b7aa953f5 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Installing pyramids Installing `pyramids` from the `conda-forge` channel can be achieved by: ``` -conda install -c conda-forge pyramids=0.5.4 +conda install -c conda-forge pyramids=0.5.5 ``` It is possible to list all the versions of `pyramids` available on your platform with: @@ -68,7 +68,7 @@ pip install git+https://github.com/Serapieum-of-alex/pyramids to install the last release, you can easily use pip ``` -pip install pyramids-gis==0.5.4 +pip install pyramids-gis==0.5.5 ``` Quick start diff --git a/docs/dataset.rst b/docs/dataset.rst index 126b065fe..aac173c35 100644 --- a/docs/dataset.rst +++ b/docs/dataset.rst @@ -1079,6 +1079,35 @@ To extract the ExtractedValues, Cells = R.OverlayMap(Path+"DepthMax22489.zip", BaseMapF,ExcludedValue, Compressed,OccupiedCellsOnly) +count_domain_cells +------------------ +- To number of cells in a raster that are not `no_data_value` value. + +Parameters +^^^^^^^^^^ +band: [int] + band index. Default is 0. + +Returns +^^^^^^^ +int: + Number of cells + +.. code:: py + + path = "examples/data/dem/DEM5km_Rhine_burned_fill.tif" + dataset = Dataset.read_file(path) + cells = dataset.count_domain_cells() + print(f"Number of cells = {cells}") + + Number of cells = 6374 + +in case the dataset is a multi-band raster, you can specify the band index. + +.. code:: py + + cells = dataset.count_domain_cells(band=1) + Mathematical operations ======================= diff --git a/pyramids/dataset.py b/pyramids/dataset.py index 6bb781ed2..1253823e8 100644 --- a/pyramids/dataset.py +++ b/pyramids/dataset.py @@ -1068,18 +1068,22 @@ def get_variables(self): return variables - def count_domain_cells(self): + def count_domain_cells(self, band: int = 0) -> int: """Count cells inside the domain + Parameters + ---------- + band: [int] + band index. Default is 0. + Returns ------- int: Number of cells """ - # count cells inside the domain - arr = self.raster.ReadAsArray() + arr = self.read_array(band=band) domain_count = np.size(arr[:, :]) - np.count_nonzero( - (arr[np.isclose(arr, self.no_data_value[0], rtol=0.001)]) + (arr[np.isclose(arr, self.no_data_value[band], rtol=0.001)]) ) return domain_count @@ -3353,7 +3357,7 @@ def read_multiple_files( cls, path: Union[str, List[str]], with_order: bool = False, - regex_string=r"\d{4}.\d{2}.\d{2}", + regex_string: str = r"\d{4}.\d{2}.\d{2}", date: bool = True, file_name_data_fmt: str = None, start: str = None, @@ -3363,74 +3367,74 @@ def read_multiple_files( ): r"""read_multiple_files. - - reads rasters from a folder and creates a 3d array with the same 2d dimensions of the first raster in - the folder and length as the number of files. + - reads rasters from a folder and creates a 3d array with the same 2d dimensions of the first raster in + the folder and length as the number of files. - inside the folder. - - All rasters should have the same dimensions - - If you want to read the rasters with a certain order, then all raster file names should have a date that follows - the same format (YYYY.MM .DD / YYYY-MM-DD or YYYY_MM_DD) (i.e. "MSWEP_1979.01.01.tif"). + inside the folder. + - All rasters should have the same dimensions + - If you want to read the rasters with a certain order, then all raster file names should have a date that + follows the same format (YYYY.MM .DD / YYYY-MM-DD or YYYY_MM_DD) (i.e. "MSWEP_1979.01.01.tif"). - Parameters - ---------- - path:[str/list] - path of the folder that contains all the rasters, ora list contains the paths of the rasters to read. - with_order: [bool] - ` True if the rasters names' follows a certain order, then the rasters' names should have a date that follows - the same format (YYYY.MM.DD / YYYY-MM-DD or YYYY_MM_DD). - >>> "MSWEP_1979.01.01.tif" - >>> "MSWEP_1979.01.02.tif" - >>> ... - >>> "MSWEP_1979.01.20.tif" - regex_string: [str] - a regex string that we can use to locate the date in the file names.Default is r"\d{4}.\d{ - 2}.\d{2}". - >>> fname = "MSWEP_YYYY.MM.DD.tif" - >>> regex_string = r"\d{4}.\d{2}.\d{2}" - - or - >>> fname = "MSWEP_YYYY_M_D.tif" - >>> regex_string = r"\d{4}_\d{1}_\d{1}" - - if there is a number at the beginning of the name - >>> fname = "1_MSWEP_YYYY_M_D.tif" - >>> regex_string = r"\d+" - date: [bool] - True if the number in the file name is a date. Default is True. - file_name_data_fmt : [str] - if the files names' have a date and you want to read them ordered .Default is None - >>> "MSWEP_YYYY.MM.DD.tif" - >>> file_name_data_fmt = "%Y.%m.%d" - start: [str] - start date if you want to read the input raster for a specific period only and not all rasters, - if not given all rasters in the given path will be read. - end: [str] - end date if you want to read the input temperature for a specific period only, - if not given all rasters in the given path will be read. - fmt: [str] - format of the given date in the start/end parameter. - extension: [str] - the extension of the files you want to read from the given path. Default is ".tif". + Parameters + ---------- + path:[str/list] + path of the folder that contains all the rasters, ora list contains the paths of the rasters to read. + with_order: [bool] + True if the rasters names' follows a certain order, then the rasters' names should have a date that follows + the same format (YYYY.MM.DD / YYYY-MM-DD or YYYY_MM_DD). + >>> "MSWEP_1979.01.01.tif" + >>> "MSWEP_1979.01.02.tif" + >>> ... + >>> "MSWEP_1979.01.20.tif" + regex_string: [str] + a regex string that we can use to locate the date in the file names.Default is r"\d{4}.\d{ + 2}.\d{2}". + >>> fname = "MSWEP_YYYY.MM.DD.tif" + >>> regex_string = r"\d{4}.\d{2}.\d{2}" + - or + >>> fname = "MSWEP_YYYY_M_D.tif" + >>> regex_string = r"\d{4}_\d{1}_\d{1}" + - if there is a number at the beginning of the name + >>> fname = "1_MSWEP_YYYY_M_D.tif" + >>> regex_string = r"\d+" + date: [bool] + True if the number in the file name is a date. Default is True. + file_name_data_fmt : [str] + if the files names' have a date and you want to read them ordered .Default is None + >>> "MSWEP_YYYY.MM.DD.tif" + >>> file_name_data_fmt = "%Y.%m.%d" + start: [str] + start date if you want to read the input raster for a specific period only and not all rasters, + if not given all rasters in the given path will be read. + end: [str] + end date if you want to read the input temperature for a specific period only, + if not given all rasters in the given path will be read. + fmt: [str] + format of the given date in the start/end parameter. + extension: [str] + the extension of the files you want to read from the given path. Default is ".tif". - Returns - ------- - DataCube: - instance of the datacube class. + Returns + ------- + DataCube: + instance of the datacube class. - Example - ------- - >>> from pyramids.dataset import Datacube - >>> raster_folder = "examples/GIS/data/raster-folder" - >>> prec = Datacube.read_multiple_files(raster_folder) - - >>> import glob - >>> search_criteria = "*.tif" - >>> file_list = glob.glob(os.path.join(raster_folder, search_criteria)) - >>> prec = Datacube.read_multiple_files(file_list, with_order=False) + Example + ------- + >>> from pyramids.dataset import Datacube + >>> raster_folder = "examples/GIS/data/raster-folder" + >>> prec = Datacube.read_multiple_files(raster_folder) + + >>> import glob + >>> search_criteria = "*.tif" + >>> file_list = glob.glob(os.path.join(raster_folder, search_criteria)) + >>> prec = Datacube.read_multiple_files(file_list, with_order=False) """ if not isinstance(path, str) and not isinstance(path, list): raise TypeError(f"path input should be string/list type, given{type(path)}") if isinstance(path, str): - # check wither the path exists or not + # check whither the path exists or not if not os.path.exists(path): raise FileNotFoundError("The path you have provided does not exist") # get a list of all files diff --git a/setup.py b/setup.py index 79194ca0b..f38f3ec3d 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name="pyramids-gis", - version="0.5.4", + version="0.5.5", description="GIS utility package", author="Mostafa Farrag", author_email="moah.farag@gmail.come", diff --git a/tests/dataset/test_dataset.py b/tests/dataset/test_dataset.py index 808eda618..f056726f5 100644 --- a/tests/dataset/test_dataset.py +++ b/tests/dataset/test_dataset.py @@ -418,6 +418,18 @@ def test_ascii( pass +class TestCountDomainCells: + """test count domain cells""" + + def test_single_band(self, src: gdal.Dataset): + src = Dataset(src) + assert src.count_domain_cells() == 89 + + def test_multi_band(self, era5_image: gdal.Dataset): + src = Dataset(era5_image) + assert src.count_domain_cells() == 5 + + class TestGetCellCoordsAndCreateCellGeometry: def test_cell_center_masked_cells( self,