Skip to content

Commit

Permalink
[NSETM-2310] Allow to use the cache in readonly mode (#36)
Browse files Browse the repository at this point in the history
New Features:
- Add cache_config.readonly, to be able to use an existing cache without exclusive locking [NSETM-2310].
- Add cache_config.store_type, to change the file format (experimental).
- Add cache_config.skip_features, to skip writing the features DataFrames (not implemented yet).

Deprecations:
- Deprecate output, use cache_config.path instead.
- Deprecate clear_cache, use cache_config.clear instead.
  • Loading branch information
GianlucaFicarelli committed Apr 22, 2024
1 parent e6dfe33 commit 53bce0b
Show file tree
Hide file tree
Showing 44 changed files with 582 additions and 193 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
Changelog
=========

Version 0.9.0
-------------

New Features
~~~~~~~~~~~~

- Add ``cache.readonly``, to be able to use an existing cache without exclusive locking [NSETM-2310].
- Add ``cache.store_type``, to change the file format (experimental).
- Add ``cache.skip_features``, to skip writing the features DataFrames (not implemented yet).

Deprecations
~~~~~~~~~~~~

- Deprecate ``output``, use ``cache.path`` instead.
- Deprecate ``clear_cache``, use ``cache.clear`` instead.


Version 0.8.3
-------------

Expand Down
5 changes: 3 additions & 2 deletions doc/source/data/analysis/config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# simple configuration with extraction and analysis, and combination of parameters
version: 3
version: 4
simulation_campaign: ../simulation-campaign/config.json
output: analysis_output
cache:
path: analysis_output
analysis:
spikes:
extraction:
Expand Down
1 change: 1 addition & 0 deletions doc/source/migration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ Migration
:hidden:

migration/automatic
migration/0.9.0
migration/0.3.0
migration/0.2.0
32 changes: 32 additions & 0 deletions doc/source/migration/0.9.0.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
From 0.8.x to 0.9.x (v4)
------------------------

BlueETL 0.9.x introduces some changes in the configuration (v4), but it's backward compatible with the configuration used in 0.8.x (v3).

1. The ``version`` should be set to ``4``.

2. The following fields have been deprecated and should be replaced, because they will be removed in a future version:

- ``output``: use ``cache.path`` instead
- ``clear_cache``: use ``cache.clear`` instead

For example, if the old configuration contains::

version: 3
simulation_campaign: /path/to/config.json
output: analysis_output
clear_cache: true
...

then it should be replaced with::

version: 4
simulation_campaign: /path/to/config.json
cache:
path: analysis_output
clear: true
...

You can see an example of configuration in the new format here:

- https://github.com/BlueBrain/blueetl/blob/blueetl-v0.9.0/tests/functional/data/sonata/config/analysis_config_01.yaml
50 changes: 32 additions & 18 deletions src/blueetl/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import gc
import logging
from copy import deepcopy
from pathlib import Path
from typing import Any, NamedTuple, Optional

Expand All @@ -12,7 +11,7 @@
from blueetl.cache import CacheManager
from blueetl.campaign.config import SimulationCampaign
from blueetl.config.analysis import init_multi_analysis_configuration
from blueetl.config.analysis_model import MultiAnalysisConfig, SingleAnalysisConfig
from blueetl.config.analysis_model import CacheConfig, MultiAnalysisConfig, SingleAnalysisConfig
from blueetl.features import FeaturesCollection
from blueetl.repository import Repository
from blueetl.resolver import AttrResolver, Resolver
Expand Down Expand Up @@ -47,21 +46,21 @@ def from_config(
cls,
analysis_config: SingleAnalysisConfig,
simulations_config: SimulationCampaign,
cache_config: CacheConfig,
resolver: Resolver,
clear_cache: bool = False,
) -> "Analyzer":
"""Initialize the Analyzer from the given configuration.
Args:
analysis_config: analysis configuration.
simulations_config: simulation campaign configuration.
cache_config: cache configuration.
resolver: resolver instance.
clear_cache: if True, remove any existing cache.
"""
cache_manager = CacheManager(
cache_config=cache_config,
analysis_config=analysis_config,
simulations_config=simulations_config,
clear_cache=clear_cache,
)
repo = Repository(
simulations_config=simulations_config,
Expand Down Expand Up @@ -193,19 +192,18 @@ def from_config(
cls,
global_config: dict,
base_path: StrOrPath,
clear_cache: Optional[bool] = None,
extra_params: dict[str, Any],
) -> "MultiAnalyzer":
"""Initialize the MultiAnalyzer from the given configuration.
Args:
global_config: analysis configuration.
base_path: base path used to resolve relative paths in the configuration.
clear_cache: if True, remove any existing cache; if False, reuse the existing cache;
if None, use the value from the configuration file.
extra_params: dict of overriding parameters.
"""
global_config = init_multi_analysis_configuration(global_config, Path(base_path))
if clear_cache is not None:
global_config.clear_cache = clear_cache
global_config = init_multi_analysis_configuration(
global_config, base_path=Path(base_path), extra_params=extra_params
)
return cls(global_config=global_config)

def _init_analyzers(self) -> dict[str, Analyzer]:
Expand All @@ -216,19 +214,21 @@ def _init_analyzers(self) -> dict[str, Analyzer]:
name: Analyzer.from_config(
analysis_config=analysis_config,
simulations_config=simulations_config,
cache_config=self.global_config.cache.model_copy(
update={"path": self.global_config.cache.path / name}
),
resolver=resolver,
clear_cache=self.global_config.clear_cache,
)
for name, analysis_config in self.global_config.analysis.items()
}

@classmethod
def from_file(cls, path: StrOrPath, clear_cache: Optional[bool] = None) -> "MultiAnalyzer":
def from_file(cls, path: StrOrPath, extra_params: dict[str, Any]) -> "MultiAnalyzer":
"""Return a new instance loaded using the given configuration file."""
return cls.from_config(
global_config=load_yaml(path),
base_path=Path(path).parent,
clear_cache=clear_cache,
extra_params=extra_params,
)

@property
Expand Down Expand Up @@ -316,7 +316,10 @@ def apply_filter(self, simulations_filter: Optional[dict[str, Any]] = None) -> "
if not simulations_filter:
return self
analyzers = {name: a.apply_filter(simulations_filter) for name, a in self.analyzers.items()}
return MultiAnalyzer(global_config=deepcopy(self.global_config), analyzers=analyzers)
return MultiAnalyzer(
global_config=self.global_config.model_copy(deep=True),
analyzers=analyzers,
)

def show(self):
"""Print all the DataFrames."""
Expand All @@ -333,6 +336,7 @@ def run_from_file(
calculate: bool = True,
show: bool = False,
clear_cache: Optional[bool] = None,
readonly_cache: Optional[bool] = None,
loglevel: Optional[int] = None,
) -> MultiAnalyzer:
"""Initialize and return the MultiAnalyzer.
Expand All @@ -343,8 +347,12 @@ def run_from_file(
extract: if True, run the extraction of the repository.
calculate: if True, run the calculation of the features.
show: if True, show a short representation of all the Pandas DataFrames, mainly for debug.
clear_cache: if True, remove any existing cache; if False, reuse the existing cache;
if None, use the value from the configuration file.
clear_cache: if None, use the value from the configuration file. Otherwise:
if True, remove any existing cache;
if False, reuse the existing cache if possible.
readonly_cache: if None, use the value from the configuration file. Otherwise:
if True, use the existing cache if possible, or raise an error;
if False, use the existing cache if possible, or update it.
loglevel: if specified, used to set up logging.
Returns:
Expand All @@ -355,7 +363,13 @@ def run_from_file(
if seed is not None:
np.random.seed(seed)
L.info("MultiAnalyzer configuration: %s", analysis_config_file)
ma = MultiAnalyzer.from_file(analysis_config_file, clear_cache=clear_cache)
ma = MultiAnalyzer.from_file(
analysis_config_file,
extra_params={
"clear_cache": clear_cache,
"readonly_cache": readonly_cache,
},
)
if extract:
ma.extract_repo()
if calculate:
Expand Down
18 changes: 16 additions & 2 deletions src/blueetl/apps/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from blueetl.utils import dump_yaml, load_yaml

# it should always match CONFIG_VERSION if the script supports the current version
MIGRATION_CONFIG_VERSION = 3
MIGRATION_CONFIG_VERSION = 4


def _safe_set(d, key, value):
Expand Down Expand Up @@ -81,13 +81,25 @@ def _migrate_v2_to_v3(input_config):
return output_config


def _migrate_v3_to_v4(input_config):
"""Migrate the configuration from v3 (BlueETL 0.8.x) to v4 (BlueETL 0.9.x)."""
output_config = deepcopy(input_config)
output_config["version"] = 4
cache_config = output_config.setdefault("cache", {})
if (value := output_config.pop("output", None)) is not None:
_safe_set(cache_config, "path", value)
if (value := output_config.pop("clear_cache", None)) is not None:
_safe_set(cache_config, "clear", value)
return output_config


def _sort_root_keys(input_config):
root_keys = [
"version",
"simulation_campaign",
"simulations_filter",
"simulations_filter_in_memory",
"output",
"cache",
"analysis",
"custom",
]
Expand All @@ -110,6 +122,8 @@ def migrate_config(input_config_file, output_config_file, sort):
config = _migrate_v1_to_v2(config)
if version <= 2:
config = _migrate_v2_to_v3(config)
if version <= 3:
config = _migrate_v3_to_v4(config)
if version == CONFIG_VERSION:
click.secho(f"The config version {version} doesn't need to be migrated.", fg="yellow")
if sort:
Expand Down
22 changes: 19 additions & 3 deletions src/blueetl/apps/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,29 @@
@click.option("--show/--no-show", help="Show repository and features dataframes.")
@click.option(
"--clear-cache/--no-clear-cache",
help="If specified, force clearing or keeping the cache, regardless of the configuration file.",
help="If True, force clearing the cache.",
default=None,
)
@click.option(
"--readonly-cache/--no-readonly-cache",
help="If True, use the existing cache if possible, or raise an error if not.",
default=None,
)
@click.option("-i", "--interactive/--no-interactive", help="Start an interactive IPython shell.")
@click.option("-v", "--verbose", count=True, help="-v for INFO, -vv for DEBUG")
def run(analysis_config_file, seed, extract, calculate, show, clear_cache, interactive, verbose):
def run(
analysis_config_file,
seed,
extract,
calculate,
show,
clear_cache,
readonly_cache,
interactive,
verbose,
):
"""Run the analysis."""
# pylint: disable=unused-variable,unused-import,import-outside-toplevel
# pylint: disable=unused-variable,unused-import,import-outside-toplevel,too-many-arguments
loglevel = (logging.WARNING, logging.INFO, logging.DEBUG)[min(verbose, 2)]
# assign the result to a local variable to make it available in the interactive shell
ma = run_from_file( # noqa
Expand All @@ -34,6 +49,7 @@ def run(analysis_config_file, seed, extract, calculate, show, clear_cache, inter
calculate=calculate,
show=show,
clear_cache=clear_cache,
readonly_cache=readonly_cache,
loglevel=loglevel,
)
if interactive:
Expand Down
Loading

0 comments on commit 53bce0b

Please sign in to comment.