Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add endpoint for querying multiple metadata #311

Merged
merged 16 commits into from
Aug 20, 2023
3 changes: 3 additions & 0 deletions terracotta/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class TerracottaSettings(NamedTuple):
#: Use a process pool for band retrieval in parallel
USE_MULTIPROCESSING: bool = True

#: Maximum number of metadata keys per POST /metadata request
MAX_POST_METADATA_KEYS: int = 100


AVAILABLE_SETTINGS: Tuple[str, ...] = TerracottaSettings._fields

Expand Down
41 changes: 38 additions & 3 deletions terracotta/handlers/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,53 @@
Handle /metadata API endpoint.
"""

from typing import Mapping, Sequence, Dict, Any, Union
from typing import Mapping, Sequence, Dict, Any, Union, List, Optional
from collections import OrderedDict

from terracotta import get_settings, get_driver
from terracotta.profile import trace


def filter_metadata(
metadata: Dict[str, Any], columns: Optional[List[str]]
) -> Dict[str, Any]:
"""Filter metadata by columns, if given"""
assert (
columns is None or len(columns) > 0
), "columns must either be a non-empty list or None"

if columns:
metadata = {c: metadata[c] for c in columns}

return metadata


@trace("metadata_handler")
def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]:
def metadata(
columns: Optional[List[str]], keys: Union[Sequence[str], Mapping[str, str]]
) -> Dict[str, Any]:
"""Returns all metadata for a single dataset"""
settings = get_settings()
driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER)
metadata = driver.get_metadata(keys)
metadata = filter_metadata(driver.get_metadata(keys), columns)
metadata["keys"] = OrderedDict(zip(driver.key_names, keys))
return metadata


@trace("multiple_metadata_handler")
def multiple_metadata(
columns: Optional[List[str]], datasets: List[List[str]]
) -> List[Dict[str, Any]]:
"""Returns all metadata for multiple datasets"""
settings = get_settings()
driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER)
key_names = driver.key_names

out = []
with driver.connect():
for dataset in datasets[:settings.MAX_POST_METADATA_KEYS]:
dionhaefner marked this conversation as resolved.
Show resolved Hide resolved
metadata = filter_metadata(driver.get_metadata(dataset), columns)
metadata["keys"] = OrderedDict(zip(key_names, dataset))
out.append(metadata)

return out
2 changes: 1 addition & 1 deletion terracotta/scripts/click_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class PathlibPath(click.Path):
"""Converts a string to a pathlib.Path object"""

def convert(self, *args: Any) -> pathlib.Path:
return pathlib.Path(super().convert(*args))
return pathlib.Path(str(super().convert(*args)))


RasterPatternType = Tuple[List[str], Dict[Tuple[str, ...], str]]
Expand Down
93 changes: 89 additions & 4 deletions terracotta/server/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
Flask route to handle /metadata calls.
"""

from marshmallow import Schema, fields, validate
from flask import jsonify, Response
from typing import Any, Mapping, Dict
import json

from marshmallow import Schema, fields, validate, pre_load, ValidationError
from flask import jsonify, Response, request

from terracotta.server.flask_api import METADATA_API
from terracotta.exceptions import InvalidArgumentsError


class MetadataSchema(Schema):
Expand Down Expand Up @@ -50,6 +54,42 @@ class Meta:
)


class MetadataColumnsSchema(Schema):
columns = fields.List(
fields.String(),
description="List of columns to return",
required=False,
)

@pre_load
def validate_columns(
self, data: Mapping[str, Any], **kwargs: Any
) -> Dict[str, Any]:
data = dict(data.items())
var = "columns"
val = data.get(var)
if val:
try:
data[var] = json.loads(val)
except json.decoder.JSONDecodeError as exc:
raise ValidationError(
f"Could not decode value for {var} as JSON"
) from exc
return data


class MultipleMetadataDatasetsSchema(Schema):
keys = fields.List(
fields.List(
fields.String(),
description="Keys identifying dataset",
required=True,
),
required=True,
description="Array containing all available key combinations",
)


@METADATA_API.route("/metadata/<path:keys>", methods=["GET"])
def get_metadata(keys: str) -> Response:
"""Get metadata for given dataset
Expand All @@ -63,6 +103,8 @@ def get_metadata(keys: str) -> Response:
description: Keys of dataset to retrieve metadata for (e.g. 'value1/value2')
type: path
required: true
- in: query
schema: MetadataColumnsSchema
responses:
200:
description: All metadata for given dataset
Expand All @@ -72,7 +114,50 @@ def get_metadata(keys: str) -> Response:
"""
from terracotta.handlers.metadata import metadata

columns_schema = MetadataColumnsSchema()
columns = columns_schema.load(request.args).get("columns")

parsed_keys = [key for key in keys.split("/") if key]
payload = metadata(parsed_keys)
schema = MetadataSchema()

payload = metadata(columns, parsed_keys)
schema = MetadataSchema(partial=columns is not None)
return jsonify(schema.load(payload))


@METADATA_API.route("/metadata", methods=["POST"])
def get_multiple_metadata() -> Response:
"""Get metadata for multiple datasets
---
post:
summary: /metadata
description:
Retrieve metadata for multiple datasets, identified by the
body payload. Desired columns can be filtered using the ?columns
query.
parameters:
- in: query
schema: MetadataColumnsSchema
- in: body
schema: MultipleMetadataDatasetsSchema
responses:
200:
description: All metadata for given dataset
schema: MetadataSchema
404:
description: No dataset found for given key combination
"""
from terracotta.handlers.metadata import multiple_metadata

request_body = request.json
if not isinstance(request_body, dict):
raise InvalidArgumentsError("Request body must be a JSON object")
DenizYil marked this conversation as resolved.
Show resolved Hide resolved

datasets_schema = MultipleMetadataDatasetsSchema()
datasets = datasets_schema.load(request_body).get("keys")

columns_schema = MetadataColumnsSchema()
columns = columns_schema.load(request.args).get("columns")

payload = multiple_metadata(columns, datasets)
schema = MetadataSchema(many=True, partial=columns is not None)
return jsonify(schema.load(payload))
26 changes: 25 additions & 1 deletion tests/handlers/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@ def test_metadata_handler(use_testdb):
from terracotta.handlers import metadata, datasets

ds = datasets.datasets()[0]
md = metadata.metadata(ds)
md = metadata.metadata(None, ds)
assert md
assert md["metadata"] == ["extra_data"]

md = metadata.metadata(["metadata", "bounds"], ds)
assert md
assert len(md.keys()) == 3
assert all(k in md.keys() for k in ("metadata", "bounds", "keys"))


def test_multiple_metadata_handler(use_testdb):
from terracotta.handlers import metadata, datasets

ds = datasets.datasets()
ds1 = list(ds[0].values())
ds2 = list(ds[1].values())

md = metadata.multiple_metadata(None, [ds1, ds2])

assert md
assert md[0]["metadata"] == ["extra_data"]
assert len(md) == 2

md = metadata.multiple_metadata(["metadata", "bounds"], [ds1, ds2])
assert md
assert len(md[0].keys()) == 3
assert all(k in md[0].keys() for k in ("metadata", "bounds", "keys"))
25 changes: 25 additions & 0 deletions tests/server/test_flask_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,31 @@ def test_get_metadata_nonexisting(client, use_testdb):
assert rv.status_code == 404


def test_post_metadata(client, use_testdb):
rv = client.post(
"/metadata",
json={
"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]
},
)

assert rv.status_code == 200
assert len(json.loads(rv.data)) == 2


def test_post_metadata_specific_columns(client, use_testdb):
rv = client.post(
'/metadata?columns=["bounds", "range"]',
json={
"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]
},
)

assert rv.status_code == 200
assert len(json.loads(rv.data)) == 2
assert set(json.loads(rv.data)[0].keys()) == {"bounds", "range", "keys"}


def test_get_datasets(client, use_testdb):
rv = client.get("/datasets")
assert rv.status_code == 200
Expand Down
Loading