From cb94ab8fec5a6fb9c39e46859cc95dc3b9af023e Mon Sep 17 00:00:00 2001 From: Mostafa Farrag Date: Sun, 31 Jul 2022 23:09:55 +0200 Subject: [PATCH] add gee scripts --- earth2observe/__init__.py | 20 +++- earth2observe/gee/__init__.py | 38 ++++++++ earth2observe/gee/data.py | 18 ++++ earth2observe/gee/dataset.py | 79 ++++++++++++++++ .../dataset_catalog.json} | 0 earth2observe/gee/gee.py | 91 +++++++++++++++++++ earth2observe/gee/imagecollection.py | 2 + tests/conftest.py | 9 +- tests/gee/__init__.py | 0 tests/gee/conftest.py | 9 ++ tests/gee/test_data.py | 12 +++ 11 files changed, 274 insertions(+), 4 deletions(-) create mode 100644 earth2observe/gee/__init__.py create mode 100644 earth2observe/gee/data.py create mode 100644 earth2observe/gee/dataset.py rename earth2observe/{datasets.json => gee/dataset_catalog.json} (100%) create mode 100644 earth2observe/gee/gee.py create mode 100644 earth2observe/gee/imagecollection.py create mode 100644 tests/gee/__init__.py create mode 100644 tests/gee/conftest.py create mode 100644 tests/gee/test_data.py diff --git a/earth2observe/__init__.py b/earth2observe/__init__.py index 7947ce2..6864b85 100644 --- a/earth2observe/__init__.py +++ b/earth2observe/__init__.py @@ -29,9 +29,27 @@ if missing_dependencies: raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) -import earth2observe.ecmwf as ecmwf +def configuration(parent_package='',top_path=None): + + from numpy.distutils.misc_util import Configuration + + config = Configuration(None,parent_package,top_path) + config.set_options( + ignore_setup_xxx_py=True, + assume_default_configuration=True, + delegate_options_to_subpackages=True, + quiet=True, + ) + + config.add_subpackage('gee') + return config + + import earth2observe.chirps as chirps +import earth2observe.ecmwf as ecmwf +import earth2observe.gee as gee import earth2observe.utils as utils + __doc__ = """ earth2observe - remote sensing package """ diff --git a/earth2observe/gee/__init__.py b/earth2observe/gee/__init__.py new file mode 100644 index 0000000..3bc65af --- /dev/null +++ b/earth2observe/gee/__init__.py @@ -0,0 +1,38 @@ +try: + from importlib.metadata import PackageNotFoundError # type: ignore + from importlib.metadata import version +except ImportError: # pragma: no cover + from importlib_metadata import PackageNotFoundError # type: ignore + from importlib_metadata import version + + +try: + __version__ = version(__name__) +except PackageNotFoundError: # pragma: no cover + __version__ = "unknown" + +# documentation format +__author__ = "Mostafa Farrag" +__email__ = 'moah.farag@gmail.com' +__docformat__ = "restructuredtext" + +# Let users know if they're missing any of our hard dependencies +hard_dependencies = () # ("numpy", "pandas", "gdal") +missing_dependencies = [] + +for dependency in hard_dependencies: + try: + __import__(dependency) + except ImportError as e: + missing_dependencies.append(dependency) + +if missing_dependencies: + raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) + +import earth2observe.gee.data as data +import earth2observe.gee.dataset as dataset +import earth2observe.gee.gee as gee + +__doc__ = """ +gee - google earth engine +""" diff --git a/earth2observe/gee/data.py b/earth2observe/gee/data.py new file mode 100644 index 0000000..80d289c --- /dev/null +++ b/earth2observe/gee/data.py @@ -0,0 +1,18 @@ +import os + +import pandas as pd +from pandas import DataFrame + +from earth2observe.gee import __path__ + + +def getCatalog() -> DataFrame: + """get_catalog. + + get_catalog retrieves the dataset catalog + + Returns + ------- + DataFrame + """ + return pd.read_json(os.path.join(__path__[0], "dataset_catalog.json")) diff --git a/earth2observe/gee/dataset.py b/earth2observe/gee/dataset.py new file mode 100644 index 0000000..d2b02e4 --- /dev/null +++ b/earth2observe/gee/dataset.py @@ -0,0 +1,79 @@ +import datetime as dt + +import ee + +from earth2observe.gee.data import getCatalog + +catalog = getCatalog() +default_date_format = "%Y-%m-%d" + +class Dataset: + """ + Dataset + """ + def __init__(self, dataset_id: str, start_date: str, end_date: str, date_format: str = "%Y-%m-%d"): + if dataset_id not in catalog["dataset"].tolist(): + raise ValueError(f"the given dataset: {dataset_id} does nor exist in the catalog") + else: + self.metadata = catalog.loc[catalog["dataset"] == dataset_id, :] + self.id = id + + self.start_date, self.end_date = self.getDate(dataset_id, start_date, end_date, date_format) + self.catalog = catalog + pass + + + + + + @staticmethod + def getDate( + dataset_id: str, + start_date: str = None, + end_date: str = None, + date_format: str = default_date_format): + """getDate. + + getDate retrieves the start and end date of a dataset + + Parameters + ---------- + dataset_id: [str] + dataset id as in the catalog. + start_date: [str] + to check it the given start date falls in the available dataset + end_date: [str] + to check it the given end date falls in the available dataset + date_format: [str] + format of the given dates, Default is YYYY-MM-DD + + Returns + ------- + start_date: [str] + beginning of the time series. + + end_date: [str] + end of the time series. + """ + data = catalog.loc[catalog["dataset"] == dataset_id, :] + + dataset_start_date = dt.datetime.strptime(data["start_date"].values[0], default_date_format) + dataset_end_date = data["end_date"].values[0] + if dataset_end_date == "Now": + dataset_end_date = dt.datetime.now().date() + + if not start_date: + start_date = dt.datetime.strptime(start_date, date_format) + if start_date < dataset_start_date: + start_date = dataset_start_date + else: + start_date = dataset_start_date + + if not end_date: + end_date = dt.datetime.strptime(end_date, date_format) + if end_date > dataset_end_date: + end_date = dataset_end_date + else: + end_date = dataset_end_date + + return start_date, end_date diff --git a/earth2observe/datasets.json b/earth2observe/gee/dataset_catalog.json similarity index 100% rename from earth2observe/datasets.json rename to earth2observe/gee/dataset_catalog.json diff --git a/earth2observe/gee/gee.py b/earth2observe/gee/gee.py new file mode 100644 index 0000000..c11d592 --- /dev/null +++ b/earth2observe/gee/gee.py @@ -0,0 +1,91 @@ +"Google earth engine main script" +import base64 +import json +import os + +import ee + + +class GEE: + """ + GEE + """ + def __init__(self, service_account:str, service_key_path: str): + """Initialize. + + Parameters + ---------- + service_account: [str] + service account name + service_key_path: [str] + path to the service account json file + Returns + ------- + None + """ + self.Initialize(service_account, service_key_path) + + pass + + def Initialize(self, service_account: str, service_key: str): + """Initialize. + + Initialize authenticate and initializes the connection to google earth engine with a service accont file + content or path + + Parameters + ---------- + service_account: [str] + service account name + service_key: [str] + path to the service account json file or the content of the service account + + Returns + ------- + None + """ + try: + credentials = ee.ServiceAccountCredentials(service_account, service_key) + except ValueError: + credentials = ee.ServiceAccountCredentials(service_account, key_data=service_key) + ee.Initialize(credentials=credentials) + + + @staticmethod + def encodeServiceAccount(service_key_dir: str) -> bytes: + """encodeServiceAccount. + + decodeServiceAccount decode the service account + + Parameters + ---------- + service_key_dir: [str] + + Returns + ------- + byte string + """ + content = json.load(open(service_key_dir)) + dumped_service_account = json.dumps(content) + encoded_service_account = base64.b64encode(dumped_service_account.encode()) + return encoded_service_account + + + @staticmethod + def decodeServiceAccount(service_key_bytes: bytes) -> str: + """decodeServiceAccount. + + decodeServiceAccount + + Parameters + ---------- + service_key_bytes: [bytes] + the content of the service account encoded with base64 + + Returns + ------- + str: + google cloud service account content + """ + service_key = json.loads(base64.b64decode(service_key_bytes).decode()) + return service_key diff --git a/earth2observe/gee/imagecollection.py b/earth2observe/gee/imagecollection.py new file mode 100644 index 0000000..a4a00d0 --- /dev/null +++ b/earth2observe/gee/imagecollection.py @@ -0,0 +1,2 @@ + +# collections = ee.ImageCollection(url') diff --git a/tests/conftest.py b/tests/conftest.py index 5029671..1c03837 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,12 @@ -# from typing import List - import pandas as pd import pytest +from tests.gee.conftest import * + +# from typing import List + + # @pytest.fixture(scope="module") # def time_series1() -> list: - # return pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist() \ No newline at end of file + # return pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist() diff --git a/tests/gee/__init__.py b/tests/gee/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/gee/conftest.py b/tests/gee/conftest.py new file mode 100644 index 0000000..dad867c --- /dev/null +++ b/tests/gee/conftest.py @@ -0,0 +1,9 @@ +from typing import List + +import pytest + + +@pytest.fixture(scope="module") +def catalog_columns() -> List[str]: + return ['dataset', 'name', 'provider', 'url', 'bands', 'band_describtion', 'spatial_resolution', + 'temporal_resolution', 'start_date', 'end_date', 'min', 'max'] diff --git a/tests/gee/test_data.py b/tests/gee/test_data.py new file mode 100644 index 0000000..59f839a --- /dev/null +++ b/tests/gee/test_data.py @@ -0,0 +1,12 @@ +from typing import List + +from pandas import DataFrame + +from earth2observe.gee.data import getCatalog + + +def test_get_catalog(catalog_columns: List[str]): + catalog = getCatalog() + + assert isinstance(catalog, DataFrame) + assert all(col in catalog_columns for col in catalog.columns.to_list())