diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index ccec29b0..19c0014e 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.11 uses: actions/setup-python@v4 @@ -39,7 +39,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup conda environment uses: conda-incubator/setup-miniconda@v2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a379986..fd2a7a0b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 - uses: pre-commit/action@v3.0.0 @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup conda environment uses: conda-incubator/setup-miniconda@v2 diff --git a/docs/datastores/builders.rst b/docs/datastores/builders.rst index 4a103d60..cb2efac4 100644 --- a/docs/datastores/builders.rst +++ b/docs/datastores/builders.rst @@ -8,7 +8,7 @@ set of Intake-ESM datastore Builders for different ACCESS model outputs. In gene datastore for your ACCESS model output should be as simple as passing your output base directory to an appropriate Builder. -The access-nri-intake package is installed in the :code:`xp65` and :code:`hh5` analysis environments, or +The access-nri-intake package is installed in the :code:`hh5` and :code:`xp65` analysis environments, or users can install it into their own environment (see :ref:`installation` for details). The Builders can be imported from the :code:`access_nri_intake.source.builders` submodule. diff --git a/docs/project_list.rst b/docs/project_list.rst index 5b59336e..83e9fdba 100644 --- a/docs/project_list.rst +++ b/docs/project_list.rst @@ -6,3 +6,4 @@ * :code:`oi10` * :code:`p73` * :code:`rr3` +* :code:`xp65` diff --git a/docs/storage_flags.rst b/docs/storage_flags.rst index d01a52f3..165b38d8 100644 --- a/docs/storage_flags.rst +++ b/docs/storage_flags.rst @@ -1,3 +1,3 @@ .. code-block:: - gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3 \ No newline at end of file + gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65 \ No newline at end of file diff --git a/docs/usage/how.rst b/docs/usage/how.rst index 315f0dbb..fc86053d 100644 --- a/docs/usage/how.rst +++ b/docs/usage/how.rst @@ -19,26 +19,30 @@ In order to use the catalog, you will need to have the following: Note you will need to join a project with a compute allocation. If you don't know what project is appropriate you will need to seek help from your local group or IT support. +#. **Access to the** :code:`xp65` **project**: this project houses the catalog table files. See the + `NCI documentation for how to join projects `_. + #. **Access to the projects that house the data you're interested in**: the catalog references data products across multiple projects on Gadi. Currently, data is included from the following projects: .. include:: ../project_list.rst If you wish to be able to access all the data in the catalog, you will need to be a member of all - these projects. See the `NCI documentation for how to join projects - `_. + these projects. .. attention:: Catalog users will only be able to load data from projects that they have access to. -#. **Access to the** :code:`xp65` **or** :code:`hh5` **projects**: these projects provide public - analysis environments in which the ACCESS-NRI catalog is installed (along with many other useful - packages). Alternatively, you can install the catalog into your own environment. +#. **An installation of the catalog**: the catalog is pre-installed in the + `CLEX CMS "analysis3" conda environment `_ (>=23.04) and + the ACCESS-NRI "access-med" conda environment (all versions). Users are encouraged to use one of these + environments to use the catalog. Alternatively, you can install the catalog into your own environment + as described below. - .. warning:: - The ACCESS-NRI catalog is actually not yet installed in the :code:`hh5` environments, so for now - you'll have to use the :code:`xp65` environment. + .. attention:: + In order to use the CLEX CMS conda environments, you will need to also be a member of the + :code:`hh5` project .. _installation: @@ -46,7 +50,7 @@ Installing the catalog ^^^^^^^^^^^^^^^^^^^^^^ Most users will not need to install the catalog themselves and will instead use the catalog through one -of the public analysis environments provided in either :code:`xp65` or :code:`hh5` (see below). +of the public analysis environments provided in either :code:`hh5` or :code:`xp65` (see below). Advanced users that want to install the catalog into their own environment can do so in three ways: @@ -86,22 +90,21 @@ data it references are available from your session. In particular: * **Setting the storage flags**: in addition to being a member of the projects you want to access, you also have to explicity tell the JupyterLab app that you want to access them in your session. Specify - the project storage paths by entering them in the “Storage” dropdown. To allow access to all data - products in the catalog enter: + the project storage paths by entering them in the “Storage” dropdown. To allow access to the catalog + and all the data products it contains enter: .. include:: ../storage_flags.rst - If you want to use the :code:`xp65` or :code:`hh5` analysis environment, you'll also need to add - :code:`gdata/xp65` or :code:`gdata/hh5`, respectively. + If you want to use the :code:`hh5` analysis environment, you'll also need to add :code:`gdata/hh5`. .. attention:: You need to be a member of all projects you enter here. You can see what projects you are part of at `https://my.nci.org.au/mancini `_. * **Setting the environment**: you need to make sure that the catalog is installed in your JupyterLab - session. As mentioned above, the easiest way to do this is to use either the :code:`xp65` or - :code:`hh5` public analysis environments. You can activate the :code:`xp65` environment within your + session. As mentioned above, the easiest way to do this is to use either the :code:`hh5` or + :code:`xp65` public analysis environments. You can activate the :code:`hh5` environment within your JupyterLab session using the "Advanced options" to set the "Module directories" to - :code:`/g/data/xp65/public/modules` and "Modules" to :code:`conda/are`. Similarly, to use the - :code:`hh5` environment, set "Module directories" to :code:`/g/data/hh5/public/modules` and "Modules" - to :code:`conda/analysis3`. + :code:`/g/data/hh5/public/modules` and "Modules" to :code:`conda/analysis3-unstable`. Similarly, to use + the :code:`xp65` environment, set "Module directories" to :code:`/g/data/xp65/public/modules` and + "Modules" to :code:`conda/are`. diff --git a/docs/usage/quickstart.ipynb b/docs/usage/quickstart.ipynb index c19c8c12..6df17d1b 100644 --- a/docs/usage/quickstart.ipynb +++ b/docs/usage/quickstart.ipynb @@ -2496,7 +2496,7 @@ "id": "d292b4d3-e72b-48e7-92b8-7bb8994012c1", "metadata": {}, "source": [ - "Now that we have our data, we can do our \"analysis\", which here is to simply plot the timeseries." + "Now that we have our datasets, we can do our \"analysis\", which here is to simply plot the timeseries. Note that at this point, we still haven't actually loaded any product data into memory. Our datasets are [dask-backed xarray Dataset objects](https://docs.xarray.dev/en/stable/user-guide/dask.html#parallel-computing-with-dask) that will only be evaluated when required (or computed explicitly), for example when we try to plot our data:" ] }, { @@ -5249,9 +5249,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (access-nri-intake-test)", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "access-nri-intake-test" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 0eca175a..7a405e4a 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -184,6 +184,7 @@ def _get_project(path): project |= set(esm_ds.df["path"].map(_get_project)) project |= {_get_project(path) for path in args["path"]} + project |= {_get_project(build_base_path)} storage_flags = "+".join(sorted([f"gdata/{proj}" for proj in project])) # Build the catalog diff --git a/src/access_nri_intake/data/catalog.yaml b/src/access_nri_intake/data/catalog.yaml index e40976af..0e25bdbb 100644 --- a/src/access_nri_intake/data/catalog.yaml +++ b/src/access_nri_intake/data/catalog.yaml @@ -13,7 +13,7 @@ sources: description: ACCESS-NRI intake catalog driver: intake_dataframe_catalog.core.DfFileCatalog metadata: - storage: gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3 + storage: gdata/al33+gdata/cj50+gdata/dk92+gdata/fs38+gdata/ik11+gdata/oi10+gdata/p73+gdata/rr3+gdata/xp65 version: '{{version}}' parameters: version: diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index 52c6db2f..27ef0d93 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -70,8 +70,11 @@ def _todate(t): frequency = f"{months}mon" elif dt.days >= 1: frequency = f"{dt.days}day" + elif dt.seconds >= 3600: + hours = round(dt.seconds / 3600) + frequency = f"{hours}hr" else: - frequency = f"{dt.seconds // 3600}hr" + frequency = "subhr" start_time = ts.strftime(time_format) end_time = te.strftime(time_format) diff --git a/tests/test_source_utils.py b/tests/test_source_utils.py index 849f2bae..718aa732 100644 --- a/tests/test_source_utils.py +++ b/tests/test_source_utils.py @@ -4,8 +4,13 @@ from pathlib import Path import pytest +import xarray as xr -from access_nri_intake.source.utils import parse_access_filename, parse_access_ncfile +from access_nri_intake.source.utils import ( + get_timeinfo, + parse_access_filename, + parse_access_ncfile, +) @pytest.mark.parametrize( @@ -298,3 +303,41 @@ def test_parse_access_ncfile(test_data, filename, expected): file = str(test_data / Path(filename)) assert parse_access_ncfile(file) == expected + + +@pytest.mark.parametrize( + "start_end, expected", + [ + ([0.0, 0.00625], ("1900-01-01, 00:00:00", "1900-01-01, 00:09:00", "subhr")), + ([0.0, 0.125], ("1900-01-01, 00:00:00", "1900-01-01, 03:00:00", "3hr")), + ([0.0, 0.25], ("1900-01-01, 00:00:00", "1900-01-01, 06:00:00", "6hr")), + ([0.0, 1.0], ("1900-01-01, 00:00:00", "1900-01-02, 00:00:00", "1day")), + ([0.0, 31.0], ("1900-01-01, 00:00:00", "1900-02-01, 00:00:00", "1mon")), + ([0.0, 90.0], ("1900-01-01, 00:00:00", "1900-04-01, 00:00:00", "3mon")), + ([0.0, 365.0], ("1900-01-01, 00:00:00", "1901-01-01, 00:00:00", "1yr")), + ([0.0, 730.0], ("1900-01-01, 00:00:00", "1902-01-01, 00:00:00", "2yr")), + ], +) +@pytest.mark.parametrize("bounds", [True, False]) +def test_get_timeinfo(start_end, expected, bounds): + if bounds: + time = (start_end[0] + start_end[1]) / 2 + ds = xr.Dataset( + data_vars={ + "dummy": ("time", [0]), + "time_bounds": (("time", "nv"), [start_end]), + }, + coords={"time": [time]}, + ) + ds["time"].attrs = dict(bounds="time_bounds") + else: + ds = xr.Dataset( + data_vars={"dummy": ("time", [0, 0])}, + coords={"time": start_end}, + ) + + ds["time"].attrs |= dict( + units="days since 1900-01-01 00:00:00", calendar="GREGORIAN" + ) + + assert get_timeinfo(ds) == expected