Skip to content

Commit

Permalink
add gee scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
MAfarrag committed Jul 31, 2022
1 parent 6404b01 commit cb94ab8
Show file tree
Hide file tree
Showing 11 changed files with 274 additions and 4 deletions.
20 changes: 19 additions & 1 deletion earth2observe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,27 @@
if missing_dependencies:
raise ImportError("Missing required dependencies {0}".format(missing_dependencies))

import earth2observe.ecmwf as ecmwf
def configuration(parent_package='',top_path=None):

from numpy.distutils.misc_util import Configuration

config = Configuration(None,parent_package,top_path)
config.set_options(
ignore_setup_xxx_py=True,
assume_default_configuration=True,
delegate_options_to_subpackages=True,
quiet=True,
)

config.add_subpackage('gee')
return config


import earth2observe.chirps as chirps
import earth2observe.ecmwf as ecmwf
import earth2observe.gee as gee
import earth2observe.utils as utils

__doc__ = """
earth2observe - remote sensing package
"""
38 changes: 38 additions & 0 deletions earth2observe/gee/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
try:
from importlib.metadata import PackageNotFoundError # type: ignore
from importlib.metadata import version
except ImportError: # pragma: no cover
from importlib_metadata import PackageNotFoundError # type: ignore
from importlib_metadata import version


try:
__version__ = version(__name__)
except PackageNotFoundError: # pragma: no cover
__version__ = "unknown"

# documentation format
__author__ = "Mostafa Farrag"
__email__ = '[email protected]'
__docformat__ = "restructuredtext"

# Let users know if they're missing any of our hard dependencies
hard_dependencies = () # ("numpy", "pandas", "gdal")
missing_dependencies = []

for dependency in hard_dependencies:
try:
__import__(dependency)
except ImportError as e:
missing_dependencies.append(dependency)

if missing_dependencies:
raise ImportError("Missing required dependencies {0}".format(missing_dependencies))

import earth2observe.gee.data as data
import earth2observe.gee.dataset as dataset
import earth2observe.gee.gee as gee

__doc__ = """
gee - google earth engine
"""
18 changes: 18 additions & 0 deletions earth2observe/gee/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os

import pandas as pd
from pandas import DataFrame

from earth2observe.gee import __path__


def getCatalog() -> DataFrame:
"""get_catalog.
get_catalog retrieves the dataset catalog
Returns
-------
DataFrame
"""
return pd.read_json(os.path.join(__path__[0], "dataset_catalog.json"))
79 changes: 79 additions & 0 deletions earth2observe/gee/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import datetime as dt

import ee

from earth2observe.gee.data import getCatalog

catalog = getCatalog()
default_date_format = "%Y-%m-%d"

class Dataset:
"""
Dataset
"""
def __init__(self, dataset_id: str, start_date: str, end_date: str, date_format: str = "%Y-%m-%d"):
if dataset_id not in catalog["dataset"].tolist():
raise ValueError(f"the given dataset: {dataset_id} does nor exist in the catalog")
else:
self.metadata = catalog.loc[catalog["dataset"] == dataset_id, :]
self.id = id

self.start_date, self.end_date = self.getDate(dataset_id, start_date, end_date, date_format)
self.catalog = catalog
pass





@staticmethod
def getDate(
dataset_id: str,
start_date: str = None,
end_date: str = None,
date_format: str = default_date_format):
"""getDate.
getDate retrieves the start and end date of a dataset
Parameters
----------
dataset_id: [str]
dataset id as in the catalog.
start_date: [str]
to check it the given start date falls in the available dataset
end_date: [str]
to check it the given end date falls in the available dataset
date_format: [str]
format of the given dates, Default is YYYY-MM-DD
Returns
-------
start_date: [str]
beginning of the time series.
end_date: [str]
end of the time series.
"""
data = catalog.loc[catalog["dataset"] == dataset_id, :]

dataset_start_date = dt.datetime.strptime(data["start_date"].values[0], default_date_format)
dataset_end_date = data["end_date"].values[0]
if dataset_end_date == "Now":
dataset_end_date = dt.datetime.now().date()

if not start_date:
start_date = dt.datetime.strptime(start_date, date_format)
if start_date < dataset_start_date:
start_date = dataset_start_date
else:
start_date = dataset_start_date

if not end_date:
end_date = dt.datetime.strptime(end_date, date_format)
if end_date > dataset_end_date:
end_date = dataset_end_date
else:
end_date = dataset_end_date

return start_date, end_date
File renamed without changes.
91 changes: 91 additions & 0 deletions earth2observe/gee/gee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"Google earth engine main script"
import base64
import json
import os

import ee


class GEE:
"""
GEE
"""
def __init__(self, service_account:str, service_key_path: str):
"""Initialize.
Parameters
----------
service_account: [str]
service account name
service_key_path: [str]
path to the service account json file
Returns
-------
None
"""
self.Initialize(service_account, service_key_path)

pass

def Initialize(self, service_account: str, service_key: str):
"""Initialize.
Initialize authenticate and initializes the connection to google earth engine with a service accont file
content or path
Parameters
----------
service_account: [str]
service account name
service_key: [str]
path to the service account json file or the content of the service account
Returns
-------
None
"""
try:
credentials = ee.ServiceAccountCredentials(service_account, service_key)
except ValueError:
credentials = ee.ServiceAccountCredentials(service_account, key_data=service_key)
ee.Initialize(credentials=credentials)


@staticmethod
def encodeServiceAccount(service_key_dir: str) -> bytes:
"""encodeServiceAccount.
decodeServiceAccount decode the service account
Parameters
----------
service_key_dir: [str]
Returns
-------
byte string
"""
content = json.load(open(service_key_dir))
dumped_service_account = json.dumps(content)
encoded_service_account = base64.b64encode(dumped_service_account.encode())
return encoded_service_account


@staticmethod
def decodeServiceAccount(service_key_bytes: bytes) -> str:
"""decodeServiceAccount.
decodeServiceAccount
Parameters
----------
service_key_bytes: [bytes]
the content of the service account encoded with base64
Returns
-------
str:
google cloud service account content
"""
service_key = json.loads(base64.b64decode(service_key_bytes).decode())
return service_key
2 changes: 2 additions & 0 deletions earth2observe/gee/imagecollection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

# collections = ee.ImageCollection(url')
9 changes: 6 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# from typing import List

import pandas as pd
import pytest

from tests.gee.conftest import *

# from typing import List



# @pytest.fixture(scope="module")
# def time_series1() -> list:
# return pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist()
# return pd.read_csv("examples/data/time_series1.txt", header=None)[0].tolist()
Empty file added tests/gee/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions tests/gee/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from typing import List

import pytest


@pytest.fixture(scope="module")
def catalog_columns() -> List[str]:
return ['dataset', 'name', 'provider', 'url', 'bands', 'band_describtion', 'spatial_resolution',
'temporal_resolution', 'start_date', 'end_date', 'min', 'max']
12 changes: 12 additions & 0 deletions tests/gee/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import List

from pandas import DataFrame

from earth2observe.gee.data import getCatalog


def test_get_catalog(catalog_columns: List[str]):
catalog = getCatalog()

assert isinstance(catalog, DataFrame)
assert all(col in catalog_columns for col in catalog.columns.to_list())

0 comments on commit cb94ab8

Please sign in to comment.