ome · d-v-b · Apr 30, 2024 · May 2, 2024 · May 2, 2024 · May 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ _build
 _bikeshed
 .tox
 .vscode
+.venv
diff --git a/latest/index.bs b/latest/index.bs
@@ -77,7 +77,7 @@ Images {#image-layout}
 
 The following layout describes the expected Zarr hierarchy for images with
 multiple levels of resolutions and optionally associated labels.
-Note that the number of dimensions is variable between 2 and 5 and that axis names are arbitrary, see [[#multiscale-md]] for details.
+Note that the number of dimensions is variable and that axis names are arbitrary, see [[#multiscale-md]] for details.
 For this example we assume an image with 5 dimensions and axes called `t,c,z,y,x`.
 
 <pre>
@@ -98,8 +98,8 @@ For this example we assume an image with 5 dimensions and axes called `t,c,z,y,x
     ├── n                     # The name of the array is arbitrary with the ordering defined by
     │   │                     # by the "multiscales" metadata, but is often a sequence starting at 0.
     │   │
-    │   ├── .zarray           # All image arrays must be up to 5-dimensional
-    │   │                     # with the axis of type time before type channel, before spatial axes.
+    │   ├── .zarray           
+    │   │                     
     │   │
     │   └─ t                  # Chunks are stored with the nested directory layout.
     │      └─ c               # All but the last chunk element are stored as directories.
@@ -298,21 +298,18 @@ The transformations in the list are applied sequentially and in order.
 "multiscales" metadata {#multiscale-md}
 ---------------------------------------
 
-Metadata about an image can be found under the "multiscales" key in the group-level metadata. Here, image refers to 2 to 5 dimensional data representing image or volumetric data with optional time or channel axes. It is stored in a multiple resolution representation.
+Metadata about an image can be found under the "multiscales" key in the group-level metadata. Images are stored in a multiple resolution representation.
 
 "multiscales" contains a list of dictionaries where each entry describes a multiscale image.
 
 Each "multiscales" dictionary MUST contain the field "axes", see [[#axes-md]].
-The length of "axes" must be between 2 and 5 and MUST be equal to the dimensionality of the zarr arrays storing the image data (see "datasets:path").
-The "axes" MUST contain 2 or 3 entries of "type:space" and MAY contain one additional entry of "type:time" and MAY contain one additional entry of "type:channel" or a null / custom type.
-The order of the entries MUST correspond to the order of dimensions of the zarr arrays. In addition, the entries MUST be ordered by "type" where the "time" axis must come first (if present), followed by the  "channel" or custom axis (if present) and the axes of type "space".
-If there are three spatial axes where two correspond to the image plane ("yx") and images are stacked along the other (anisotropic) axis ("z"), the spatial axes SHOULD be ordered as "zyx".
+The length of "axes" must be equal to the dimensionality of the zarr arrays storing the image data (see "datasets:path").
+The order of the entries MUST correspond to the order of dimensions of the zarr arrays. 
 
 Each "multiscales" dictionary MUST contain the field "datasets", which is a list of dictionaries describing the arrays storing the individual resolution levels.
 Each dictionary in "datasets" MUST contain the field "path", whose value contains the path to the array for this resolution relative
-to the current zarr group. The "path"s MUST be ordered from largest (i.e. highest resolution) to smallest.
+to the current zarr group. The "path"s MUST be ordered from largest (i.e. highest resolution) to smallest. All arrays denoted by a "path" field MUST have the same number of dimensions. The number of dimensions of each array must match the length of the "axes" metadata.
 
-Each "datasets" dictionary MUST have the same number of dimensions and MUST NOT have more than 5 dimensions. The number of dimensions and order MUST correspond to number and order of "axes".
 Each dictionary in "datasets" MUST contain the field "coordinateTransformations", which contains a list of transformations that map the data coordinates to the physical coordinates (as specified by "axes") for this resolution level.
 The transformations are defined according to [[#trafo-md]]. The transformation MUST only be of type `translation` or `scale`.
 They MUST contain exactly one `scale` transformation that specifies the pixel size in physical units or time duration. If scaling information is not available or applicable for one of the axes, the value MUST express the scaling factor between the current resolution and the first resolution for the given axis, defaulting to 1.0 if there is no downsampling along the axis.

diff --git a/latest/models.py b/latest/models.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+from typing import Literal, Optional
+from pydantic import BaseModel, ConfigDict, conlist
+from typing import Annotated, Hashable, List, TypeVar
+from pydantic_core import PydanticCustomError
+from pydantic import AfterValidator, Field, ValidationError
+
+T = TypeVar('T', bound=Hashable)
+
+def _validate_unique_list(v: list[T]) -> list[T]:
+    if len(v) != len(set(v)):
+        raise PydanticCustomError('unique_list', 'List must be unique')
+    return v
+
+UniqueList = Annotated[List[T], AfterValidator(_validate_unique_list), Field(json_schema_extra={'uniqueItems': True})]
+
+class Axis(BaseModel, frozen=True):
+    name: str
+    type: Optional[str] = None
+    unit: Optional[str] = None
+
+class ScaleTransform(BaseModel, frozen=True):
+    type: Literal["scale"]
+    scale: conlist(float, min_length=1)
+
+class TranslationTransform(BaseModel, frozen=True):
+    type: Literal["translation"]
+    translation: conlist(float, min_length=1)
+
+class Dataset(BaseModel, frozen=True):
+    path: str
+    coordinateTransformations: tuple[ScaleTransform] | tuple[ScaleTransform, TranslationTransform]
+
+class Multiscale(BaseModel, frozen=True):
+    """
+    The multiscale datasets for this image
+    """
+    name: Optional[str] = None
+    datasets: conlist(Dataset, min_length=1)
+    axes: UniqueList[Axis]
+    coordinateTransformations: Optional[tuple[ScaleTransform] | tuple[ScaleTransform, TranslationTransform]] = None
+    version: Literal['0.5-dev']
+
+class Window(BaseModel, frozen=True):
+    start: float
+    end: float
+    min: float
+    max: float
+
+class RenderingSettings(BaseModel, frozen=True):
+    window: Window
+    label: str
+    family: str
+    color: str
+    active: bool
+
+class Omero(BaseModel, frozen=True):
+    channels: list[RenderingSettings]
+
+class GroupMetadata(BaseModel, frozen=True):
+    """
+    JSON from OME-NGFF .zattrs
+    """
+    model_config = ConfigDict(title="NGFF Image")
+    multiscales: conlist(Multiscale, min_length=1)
+    omero: Optional[Omero] = None
+
+def make_schema():
+    import json
+    schema = GroupMetadata.model_json_schema()
+    schema["$schema"] = "https://json-schema.org/draft/2020-12/schema"
+    schema["$id"] = "https://ngff.openmicroscopy.org/latest/schemas/image.schema"
+
+    print(json.dumps(schema, indent=2))
+
+if __name__ == '__main__':
+    make_schema()