Merge pull request #32 from JakobBD/dimension_changes

Small changes to DimensionSet
pik-piam · Sep 24, 2024 · 4da615d · 4da615d
2 parents dbeb229 + 3016837
commit 4da615d
Show file tree

Hide file tree

Showing 9 changed files with 70 additions and 177 deletions.
diff --git a/examples/example1.ipynb b/examples/example1.ipynb
diff --git a/examples/example2.ipynb b/examples/example2.ipynb
@@ -289,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
    "id": "e5877592",
    "metadata": {},
    "outputs": [
@@ -299,7 +299,7 @@
        "Text(0.5, 0.98, 'Amount of copper and manganese in secondary steel')"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -332,7 +332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
    "id": "ebbe470a-ec04-4432-9f55-7a0931f9062f",
    "metadata": {},
    "outputs": [
@@ -342,7 +342,7 @@
        "Text(0.5, 0.98, 'Share of copper and manganese in secondary steel')"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -396,7 +396,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "id": "5582ae7a-4d41-4a8b-8726-dff414c96cde",
    "metadata": {},
    "outputs": [
@@ -406,7 +406,7 @@
        "Text(0.5, 0.98, 'Manganese lost in the remelting process')"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -480,12 +480,14 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\jakobdu\\AppData\\Local\\Temp\\ipykernel_19240\\2953891482.py:20: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown\n",
-      "  fig.show()\n"
-     ]
+     "data": {
+      "text/plain": [
+       "Text(0.5, 0.98, 'Material concentration in secondary steel')"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
     },
     {
      "data": {
@@ -506,7 +508,7 @@
     "concentration_b = flow_b / flow_b.sum_nda_over(('e'))\n",
     "\n",
     "scenarios = Dimension(name='Scenarios', letter='s', items=['Standard', 'Updated shredder yield', 'Increased buildings demolition'])\n",
-    "new_dims = DimensionSet(dimensions=remelted_shares.dims.dimensions + [scenarios])\n",
+    "new_dims = remelted_shares.dims.expand_by([scenarios])\n",
     "concentrations = remelted_shares.cast_to(new_dims)\n",
     "concentrations['Updated shredder yield'] = concentration_a\n",
     "concentrations['Increased buildings demolition'] = concentration_b\n",

diff --git a/sodym/data_reader.py b/sodym/data_reader.py
@@ -15,7 +15,7 @@ class DataReader(ABC):
     """
     def read_dimensions(self, dimension_definitions: List[DimensionDefinition]) -> DimensionSet:
         dimensions = [self.read_dimension(definition) for definition in dimension_definitions]
-        return DimensionSet(dimensions=dimensions)
+        return DimensionSet(dim_list=dimensions)
 
     @abstractmethod
     def read_dimension(self, dimension_definition: DimensionDefinition) -> Dimension:

diff --git a/sodym/dimensions.py b/sodym/dimensions.py
@@ -57,7 +57,7 @@ class DimensionSet(PydanticBaseModel):
 
     """
 
-    dimensions: list[Dimension]
+    dim_list: list[Dimension]
 
     @model_validator(mode='after')
     def no_repeated_dimensions(self):
@@ -69,29 +69,29 @@ def no_repeated_dimensions(self):
     def drop(self, key: str, inplace: bool=False):
         dim_to_drop = self._dict[key]
         if not inplace:
-            dimensions = copy(self.dimensions)
+            dimensions = copy(self.dim_list)
             dimensions.remove(dim_to_drop)
-            return DimensionSet(dimensions=dimensions)
-        self.dimensions.remove(dim_to_drop)
+            return DimensionSet(dim_list=dimensions)
+        self.dim_list.remove(dim_to_drop)
 
     @property
     def _dict(self) -> Dict[str, Dimension]:
         """Contains mappings.
 
         letter --> dim object and name --> dim object
         """
-        return {dim.name: dim for dim in self.dimensions} | {dim.letter: dim for dim in self.dimensions}
+        return {dim.name: dim for dim in self.dim_list} | {dim.letter: dim for dim in self.dim_list}
 
     def __getitem__(self, key) -> Dimension:
         if isinstance(key, str):
             return self._dict[key]
         elif isinstance(key, int):
-            return self.dimensions[key]
+            return self.dim_list[key]
         else:
             raise TypeError("Key must be string or int")
 
     def __iter__(self):
-        return iter(self.dimensions)
+        return iter(self.dim_list)
 
     def size(self, key: str):
         return self._dict[key].len
@@ -101,26 +101,41 @@ def shape(self, keys: tuple = None):
         return tuple(self.size(key) for key in keys)
 
     def get_subset(self, dims: tuple = None) -> 'DimensionSet':
-        """Selects :py:class:`Dimension` objects from the object attribute dimensions,
+        """Selects :py:class:`Dimension` objects from the object attribute dim_list,
         according to the dims passed, which can be either letters or names.
         Returns a copy if dims are not given.
         """
         subset = copy(self)
         if dims is not None:
-            subset.dimensions = [self._dict[dim_key] for dim_key in dims]
+            subset.dim_list = [self._dict[dim_key] for dim_key in dims]
         return subset
 
+    def expand_by(self, added_dims: list[Dimension]) -> 'DimensionSet':
+        """Expands the DimensionSet by adding new dimensions to it.
+        """
+        if not all([dim.letter not in self.letters for dim in added_dims]):
+            raise ValueError('DimensionSet already contains one or more of the dimensions to be added.')
+        return DimensionSet(dim_list=self.dim_list + added_dims)
+
+    def intersect_with(self, other: 'DimensionSet') -> 'DimensionSet':
+        intersection_letters = [dim.letter for dim in self.dim_list if dim.letter in other.letters]
+        return self.get_subset(intersection_letters)
+
+    def union_with(self, other: 'DimensionSet') -> 'DimensionSet':
+        added_dims = [dim for dim in other.dim_list if dim.letter not in self.letters]
+        return self.expand_by(added_dims)
+
     @property
     def names(self):
-        return tuple([dim.name for dim in self.dimensions])
+        return tuple([dim.name for dim in self.dim_list])
 
     @property
     def letters(self):
-        return tuple([dim.letter for dim in self.dimensions])
+        return tuple([dim.letter for dim in self.dim_list])
 
     @property
     def string(self):
         return "".join(self.letters)
 
     def index(self, key):
-        return [d.letter for d in self.dimensions].index(key)
+        return [d.letter for d in self.dim_list].index(key)
diff --git a/sodym/export/helper.py b/sodym/export/helper.py
@@ -83,7 +83,7 @@ def fill_fig_ax(self):
     def get_x_array_like_value_array(self):
         if self.x_array is None:
             x_dim_obj = self.array.dims[self.intra_line_dim]
-            x_dimset = DimensionSet(dimensions=[x_dim_obj])
+            x_dimset = DimensionSet(dim_list=[x_dim_obj])
             self.x_array = NamedDimArray(dims=x_dimset, values=np.array(x_dim_obj.items), name=self.intra_line_dim)
         self.x_array = self.x_array.cast_to(self.array.dims)
 

diff --git a/sodym/named_dim_array_helper.py b/sodym/named_dim_array_helper.py
@@ -8,7 +8,7 @@ def named_dim_array_stack(named_dim_arrays: list[NamedDimArray], dimension: Dime
     Method can be applied to `NamedDimArray`s, `StockArray`s, `Parameter`s and `Flow`s.
     """
     named_dim_array0 = named_dim_arrays[0]
-    extended_dimensions = DimensionSet(dimensions=named_dim_array0.dims.dimensions+[dimension])
+    extended_dimensions = named_dim_array0.dims.expand_by([dimension])
     extended = NamedDimArray(dims=extended_dimensions)
     for item, nda in zip(dimension.items, named_dim_arrays):
         extended[{dimension.letter: item}] = nda

diff --git a/sodym/named_dim_arrays.py b/sodym/named_dim_arrays.py
@@ -143,58 +143,43 @@ def _prepare_other(self, other):
             other = NamedDimArray(dims=self.dims, values=other * np.ones(self.shape))
         return other
 
-    def intersect_dims_with(self, other):
-        matching_dims = []
-        for dim in self.dims.dimensions:
-            if dim.letter in other.dims.letters:
-                matching_dims.append(dim)
-        return DimensionSet(dimensions=matching_dims)
-
-    def union_dims_with(self, other):
-        all_dims = copy(self.dims.dimensions)
-        letters_self = self.dims.letters
-        for dim in other.dims.dimensions:
-            if dim.letter not in letters_self:
-                all_dims.append(dim)
-        return DimensionSet(dimensions=all_dims)
-
     def __add__(self, other):
         other = self._prepare_other(other)
-        dims_out = self.intersect_dims_with(other)
+        dims_out = self.dims.intersect_with(other.dims)
         return NamedDimArray(
             dims=dims_out, values=self.sum_values_to(dims_out.letters) + other.sum_values_to(dims_out.letters)
         )
 
     def __sub__(self, other):
         other = self._prepare_other(other)
-        dims_out = self.intersect_dims_with(other)
+        dims_out = self.dims.intersect_with(other.dims)
         return NamedDimArray(
             dims=dims_out, values=self.sum_values_to(dims_out.letters) - other.sum_values_to(dims_out.letters)
         )
 
     def __mul__(self, other):
         other = self._prepare_other(other)
-        dims_out = self.union_dims_with(other)
+        dims_out = self.dims.union_with(other.dims)
         values_out = np.einsum(f"{self.dims.string},{other.dims.string}->{dims_out.string}", self.values, other.values)
         return NamedDimArray(dims=dims_out, values=values_out)
 
     def __truediv__(self, other):
         other = self._prepare_other(other)
-        dims_out = self.union_dims_with(other)
+        dims_out = self.dims.union_with(other.dims)
         values_out = np.einsum(
             f"{self.dims.string},{other.dims.string}->{dims_out.string}", self.values, 1.0 / other.values
         )
         return NamedDimArray(dims=dims_out, values=values_out)
 
     def minimum(self, other):
         other = self._prepare_other(other)
-        dims_out = self.intersect_dims_with(other)
+        dims_out = self.dims.intersect_with(other.dims)
         values_out = np.minimum(self.sum_values_to(dims_out.letters), other.sum_values_to(dims_out.letters))
         return NamedDimArray(dims=dims_out, values=values_out)
 
     def maximum(self, other):
         other = self._prepare_other(other)
-        dims_out = self.intersect_dims_with(other)
+        dims_out = self.dims.intersect_with(other.dims)
         values_out = np.maximum(self.sum_values_to(dims_out.letters), other.sum_values_to(dims_out.letters))
         return NamedDimArray(dims=dims_out, values=values_out)
 
@@ -302,7 +287,7 @@ def to_dict_single_item(self, item):
                 "docstring."
             )
         dict_out = None
-        for d in self.nda.dims.dimensions:
+        for d in self.nda.dims:
             if item in d.items:
                 if dict_out is not None:
                     raise ValueError(
@@ -353,7 +338,7 @@ def to_nda(self) -> 'NamedDimArray':
         assert (
             not self.has_dim_with_several_items
         ), "Cannot convert to NamedDimArray if there are dimensions with several items"
-        
+
         return NamedDimArray(dims=self.dims, values=self.values_pointer, name=self.nda.name)
 
     def _init_ids(self):

diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py
@@ -10,14 +10,14 @@ def test_validate_dimension_set():
         {'name': 'time', 'letter': 't', 'items': [1990, 2000, 2010]},
         {'name': 'place', 'letter': 'p', 'items': ['World', ]}
     ]
-    DimensionSet(dimensions=dimensions)
+    DimensionSet(dim_list=dimensions)
 
     # example with repeated dimension letters in DimensionSet
     dimensions.append(
         {'name': 'another_time', 'letter': 't', 'items': [2020, 2030]}
     )
     with pytest.raises(ValidationError) as error_msg:
-        DimensionSet(dimensions=dimensions)
+        DimensionSet(dim_list=dimensions)
     assert 'letter' in str(error_msg.value)
 
 
@@ -29,11 +29,11 @@ def test_get_subset():
     material_dimension = {'name': 'material', 'letter': 'm', 'items': ['material_0', 'material_1']}
 
     parent_dimensions = subset_dimensions + [material_dimension]
-    dimension_set = DimensionSet(dimensions=parent_dimensions)
+    dimension_set = DimensionSet(dim_list=parent_dimensions)
 
     # example of subsetting the dimension set using dimension letters
     subset_from_letters = dimension_set.get_subset(dims=('t', 'p'))
-    assert subset_from_letters == DimensionSet(dimensions=subset_dimensions)
+    assert subset_from_letters == DimensionSet(dim_list=subset_dimensions)
 
     # example of subsetting the dimension set using dimension names
     subset_from_names = dimension_set.get_subset(dims=('time', 'place'))

diff --git a/tests/test_named_dim_arrays.py b/tests/test_named_dim_arrays.py
@@ -10,12 +10,12 @@
         {'name': 'place', 'letter': 'p', 'items': ['Earth', 'Sun', 'Moon', 'Venus']},
         {'name': 'time', 'letter': 't', 'items': [1990, 2000, 2010]},
     ]
-dims = DimensionSet(dimensions=dimensions)
+dims = DimensionSet(dim_list=dimensions)
 values = np.random.rand(4, 3)
 numbers = NamedDimArray(name='two', dims=dims, values=values)
 
 animals = {'name': 'animal', 'letter': 'a', 'items': ['cat', 'mouse']}
-dims_incl_animals = DimensionSet(dimensions=dimensions+[animals])
+dims_incl_animals = DimensionSet(dim_list=dimensions+[animals])
 animal_values = np.random.rand(4, 3, 2)
 space_animals = NamedDimArray(name='space_animals', dims=dims_incl_animals, values=animal_values)
 
@@ -25,7 +25,7 @@ def test_named_dim_array_validations():
         {'name': 'place', 'letter': 'p', 'items': ['World', ]},
         {'name': 'time', 'letter': 't', 'items': [1990, 2000, 2010]},
     ]
-    dims = DimensionSet(dimensions=dimensions)
+    dims = DimensionSet(dim_list=dimensions)
 
     # example with values with the correct shape
     NamedDimArray(name='numbers', dims=dims, values=np.array([[1, 2, 3], ]))
@@ -52,15 +52,15 @@ def test_cast_to():
     assert_almost_equal(np.sum(casted_named_dim_array.values), 2 * np.sum(values))
 
     # example with differently ordered dimensions
-    target_dims = DimensionSet(dimensions=[animals]+dimensions[::-1])
+    target_dims = DimensionSet(dim_list=[animals]+dimensions[::-1])
     casted_named_dim_array = numbers.cast_to(target_dims=target_dims)
     assert casted_named_dim_array.values.shape == (2, 3, 4)
 
 
 def test_sum_nda_to():
     # sum over one dimension
     summed_named_dim_array = space_animals.sum_nda_to(result_dims=('p', 't'))
-    assert summed_named_dim_array.dims == DimensionSet(dimensions=dimensions)
+    assert summed_named_dim_array.dims == DimensionSet(dim_list=dimensions)
     assert_array_almost_equal(summed_named_dim_array.values, np.sum(animal_values, axis=2))
 
     # sum over two dimensions