Skip to content

Commit

Permalink
add tests for translator funcs
Browse files Browse the repository at this point in the history
  • Loading branch information
dougiesquire committed Jul 4, 2023
1 parent d5c9c59 commit 02b2b07
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 41 deletions.
89 changes: 48 additions & 41 deletions src/access_nri_intake/catalog/translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
like the ACCESS-NRI catalog
"""

import re
from functools import partial

import pandas as pd
Expand Down Expand Up @@ -172,13 +171,13 @@ def _realm_translator(self):
"""
Return realm, fixing a few issues
"""
return _cmip_realm_translator(self.source.df)
return _cmip_realm_translator(self.source.df["realm"])

def _frequency_translator(self):
"""
Return frequency, fixing a few issues
"""
return _cmip_frequency_translator(self.source.df)
return _to_tuple(_cmip_frequency_translator(self.source.df["frequency"]))

def _variable_translator(self):
"""
Expand Down Expand Up @@ -220,13 +219,13 @@ def _realm_translator(self):
"""
Return realm, fixing a few issues
"""
return _cmip_realm_translator(self.source.df)
return _cmip_realm_translator(self.source.df["realm"])

def _frequency_translator(self):
"""
Return frequency, fixing a few issues
"""
return _cmip_frequency_translator(self.source.df)
return _to_tuple(_cmip_frequency_translator(self.source.df["frequency"]))

def _variable_translator(self):
"""
Expand All @@ -249,7 +248,7 @@ def __init__(self, source, columns):
source: :py:class:`~intake.DataSource`
The NCI ERA-Interim intake-esm datastore
columns: list of str
The columns to translate (these are the core columns in the intake-dataframe-catalog)
The columns to translate to (these are the core columns in the intake-dataframe-catalog)
"""

super().__init__(source, columns)
Expand All @@ -262,58 +261,66 @@ def _variable_translator(self):
return _to_tuple(self.source.df["variable"])


def _cmip_frequency_translator(df):
def _cmip_frequency_translator(series):
"""
Return frequency from CMIP frequency metadata
"""

def _parse(string):
for remove in ["Pt", "C.*"]: # Remove Pt, C, and Clim
string = re.sub(remove, "", string)
string = string.replace("daily", "day") # Some incorrect metadata
string = string.replace("sem", "3mon") # CORDEX for seasonal mean
return (f"1{string}",) if string[0] in ["m", "d", "y"] else (string,)
def _translate(string):
translations = {
"3hrPt": "3hr",
"6hrPt": "6hr",
"daily": "1day",
"day": "1day",
"mon": "1mon",
"monC": "1mon",
"monClim": "1mon",
"monPt": "1mon",
"sem": "3mon",
"subhrPt": "subhr",
"yr": "1yr",
"yrPt": "1yr",
}

try:
return translations[string]
except KeyError:
return string

return df["frequency"].apply(lambda string: _parse(string))
return series.apply(lambda string: _translate(string))


def _cmip_realm_translator(df):
def _cmip_realm_translator(series):
"""
Return realm from CMIP realm metadata, fixing some issues
Return realm from CMIP realm metadata, fixing some issues. This function returns
a tuple as there are sometimes multiple realms per cmip asset
"""

def _parse(string):
def _translate(string):
translations = {
"na": "none",
"landonly": "land",
"ocnBgChem": "ocnBgchem",
"seaice": "seaIce",
}

raw_realms = string.split(" ")
realms = []
for realm in raw_realms:
if re.match("na", realm, flags=re.I):
realms.append("none")
elif re.match("seaIce", realm, flags=re.I):
realms.append("seaIce")
elif re.match("landIce", realm, flags=re.I):
realms.append("landIce")
elif re.match("ocnBgchem", realm, flags=re.I):
realms.append("ocnBgchem")
elif re.match("atmos", realm, flags=re.I):
realms.append("atmos")
elif re.match("atmosChem", realm, flags=re.I):
realms.append("atmosChem")
elif re.match("aerosol", realm, flags=re.I):
realms.append("aerosol")
elif re.match("land", realm, flags=re.I):
realms.append("land")
elif re.match("ocean", realm, flags=re.I):
realms.append("ocean")
else:
realms.append("unknown")
return tuple(set(realms))

return df["realm"].apply(lambda string: _parse(string))
try:
realm = translations[realm]
except KeyError:
pass
if realm not in realms:
realms.append(realm)
return tuple(realms)

return series.apply(lambda string: _translate(string))


def _to_tuple(series):
"""
Make entries in the provided series a tuple
Make each entry in the provided series a tuple
Parameters
----------
Expand Down
129 changes: 129 additions & 0 deletions tests/test_translators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

import pandas as pd
import pytest

from access_nri_intake.catalog.translators import (
_cmip_frequency_translator,
_cmip_realm_translator,
_to_tuple,
)


@pytest.mark.parametrize(
"input, expected",
[
(
[
"1hr",
"3hr",
"3hrPt",
"6hr",
"6hrPt",
"daily",
"day",
"fx",
"mon",
"monC",
"monClim",
"monPt",
"sem",
"subhr",
"subhrPt",
"yr",
"yrPt",
],
[
"1hr",
"3hr",
"3hr",
"6hr",
"6hr",
"1day",
"1day",
"fx",
"1mon",
"1mon",
"1mon",
"1mon",
"3mon",
"subhr",
"subhr",
"1yr",
"1yr",
],
),
(["daily"], ["1day"]),
],
)
def test_cmip_frequency_translator(input, expected):
"""Test translation of entries in the CMIP frequency column"""
series = pd.Series(input)
translated = _cmip_frequency_translator(series)
assert list(translated) == expected


@pytest.mark.parametrize(
"input, expected",
[
(
[
"aerosol",
"atmos",
"atmos atmosChem",
"atmos land",
"land",
"landIce",
"landIce land",
"landonly",
"na",
"ocean",
"ocean seaIce",
"ocnBgChem",
"ocnBgchem",
"seaIce",
"seaIce ocean",
"seaice",
],
[
("aerosol",),
("atmos",),
("atmos", "atmosChem"),
("atmos", "land"),
("land",),
("landIce",),
("landIce", "land"),
("land",),
("none",),
("ocean",),
("ocean", "seaIce"),
("ocnBgchem",),
("ocnBgchem",),
("seaIce",),
("seaIce", "ocean"),
("seaIce",),
],
),
(["landonly"], [("land",)]),
(["atmos atmosChem"], [("atmos", "atmosChem")]),
],
)
def test_cmip_realm_translator(input, expected):
"""Test translation of entries in the CMIP realm column"""
series = pd.Series(input)
translated = _cmip_realm_translator(series)
assert list(translated) == expected


@pytest.mark.parametrize(
"input",
[
["a", "b", "c"],
[0, 1, 2],
],
)
def test_to_tuple(input):
"""Test the _to_tuple function"""
series = pd.Series(input)
assert all(_to_tuple(series).map(type) == tuple)

0 comments on commit 02b2b07

Please sign in to comment.