Skip to content

Commit

Permalink
318: tests for read_data_files
Browse files Browse the repository at this point in the history
* rework how test data dirs are handeled

* add test for read_data_files for mtd revisions
  • Loading branch information
John-Sharples committed Sep 10, 2024
1 parent e9b80b8 commit 6a55c1a
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 37 deletions.
16 changes: 6 additions & 10 deletions METdbLoad/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
TOP_DIR = str(Path(__file__).parents[1])
sys.path.insert(0, os.path.abspath(TOP_DIR))


def parse_sql(filename):
"""Parse a .sql file and return a list of SQL statements"""
data = open(filename, "r").readlines()
Expand Down Expand Up @@ -112,18 +113,13 @@ def testRunSql():


@pytest.fixture
def point_stat_xml_file(tmp_path):
"""Get xml load file for point_stat test data."""
data_path = Path(TOP_DIR) / POINT_STAT_DATA_DIR
return get_xml_test_file(tmp_path, data_path, "point_stat")


@pytest.fixture
def get_xml_loadfile(point_stat_xml_file):
def load_and_read_xml():
def get_xml_loadfile():
def load_and_read_xml(
tmp_path, data_dir=POINT_STAT_DATA_DIR, met_tool="point_stat"
):
from METdataio.METdbLoad.ush.read_load_xml import XmlLoadFile

XML_FILE = point_stat_xml_file
XML_FILE = get_xml_test_file(tmp_path, data_dir, met_tool)
XML_LOADFILE = XmlLoadFile(XML_FILE)
XML_LOADFILE.read_xml()
return XML_LOADFILE
Expand Down
4 changes: 0 additions & 4 deletions METdbLoad/test/test_met_db_load.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import argparse
import pytest
from pathlib import Path
from METdbLoad.conftest import TOP_DIR
from METdbLoad.ush.met_db_load import main as load_main

from METdataio.METdbLoad.test.utils import (
Expand Down Expand Up @@ -104,8 +102,6 @@ def test_met_db_table_counts(
met_tool,
expected_counts,
):

met_data_dir = str(Path(TOP_DIR) / met_data_dir)
test_data = {
"xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool)),
"index": "true",
Expand Down
68 changes: 62 additions & 6 deletions METdbLoad/test/test_read_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,79 @@
import pytest

from METdataio.METdbLoad.ush.read_data_files import ReadDataFiles
from METdataio.METdbLoad.test.utils import (
POINT_STAT_DATA_DIR,
MTD_DATA_DIR,
)


def test_counts(get_xml_loadfile):
def test_counts(tmp_path, get_xml_loadfile):
"""Count parts of the files loaded in."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path, POINT_STAT_DATA_DIR)

# Read all of the data from the data files into a dataframe
FILE_DATA = ReadDataFiles()

# read in the data files, with options specified by XML flags
FILE_DATA.read_data(XML_LOADFILE.flags,
XML_LOADFILE.load_files,
XML_LOADFILE.line_types)
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

# number of files
assert len(XML_LOADFILE.load_files) == 2
# number of lines of data
assert FILE_DATA.stat_data.shape[0] == 94
# number of line types
assert FILE_DATA.stat_data.line_type.unique().size == 7


def test_mtd_loads(tmp_path, get_xml_loadfile):
XML_LOADFILE = get_xml_loadfile(tmp_path, MTD_DATA_DIR)

# Read all of the data from the data files into a dataframe
FILE_DATA = ReadDataFiles()

# read in the data files, with options specified by XML flags
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

# number of files
assert len(XML_LOADFILE.load_files) == 2
# number of lines of data
assert FILE_DATA.mtd_2d_data.shape == (278, 43)
assert FILE_DATA.mtd_3d_single_data.shape == (8, 48)


def test_mtd_loads_revision(tmp_path, get_xml_loadfile):
# Create a test MTD 2D revision file
data = (
"""VERSION MODEL DESC FCST_LEAD FCST_VALID OBS_LEAD OBS_VALID T_DELTA FCST_T_BEG FCST_T_END FCST_RAD FCST_THR OBS_T_BEG OBS_T_END OBS_RAD OBS_THR FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBJECT_ID OBJECT_CAT TIME_INDEX AREA CENTROID_X CENTROID_Y CENTROID_LAT CENTROID_LON AXIS_ANG INTENSITY_10 INTENSITY_25 INTENSITY_50 INTENSITY_75 INTENSITY_90 INTENSITY_99\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 F001 CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00"""
)
tmp_mtd_dir = tmp_path / "mtd_revision"
tmp_mtd_dir.mkdir()

with open(tmp_mtd_dir / "mtd_REVISION_TEST_2d.txt", "w") as f:
f.write(data)

XML_LOADFILE = get_xml_loadfile(tmp_path, tmp_mtd_dir)
FILE_DATA = ReadDataFiles()
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

assert len(XML_LOADFILE.load_files) == 1
assert FILE_DATA.mtd_2d_data.shape == (10, 43)
assert FILE_DATA.mtd_3d_single_data.shape == (0, 0)

# Check revision have been correctly labeled
revs = FILE_DATA.mtd_2d_data["fcst_var"] == "REV_APCP_01"
assert sum(revs) == 4
revs = FILE_DATA.mtd_2d_data["obs_var"] == "REV_APCP_01"
assert sum(revs) == 4
16 changes: 8 additions & 8 deletions METdbLoad/test/test_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""Test reading XML file."""

def test_loadflags(get_xml_loadfile):
def test_loadflags(tmp_path, get_xml_loadfile):
"""Read various flags from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.flags['load_stat']
assert XML_LOADFILE.flags['load_mode']
assert XML_LOADFILE.flags['load_mtd']
Expand All @@ -18,22 +18,22 @@ def test_loadflags(get_xml_loadfile):
assert XML_LOADFILE.flags['force_dup_file']
assert XML_LOADFILE.flags['load_xml']

def test_loadgroup(get_xml_loadfile):
def test_loadgroup(tmp_path, get_xml_loadfile):
"""Read group and description from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.group == "Testing"
assert XML_LOADFILE.description == "testing with pytest"

def test_connection(get_xml_loadfile):
def test_connection(tmp_path, get_xml_loadfile):
"""Read connection tags from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.connection['db_host'] == "localhost"
assert XML_LOADFILE.connection['db_port'] == 3306
assert XML_LOADFILE.connection['db_database'] == "mv_test"
assert XML_LOADFILE.connection['db_user'] == "root"
assert XML_LOADFILE.connection['db_management_system'] == "mysql"

def test_insertsize(get_xml_loadfile):
def test_insertsize(tmp_path, get_xml_loadfile):
"""Read insert_size from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.insert_size == 1
26 changes: 17 additions & 9 deletions METdbLoad/test/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
from pathlib import Path


def abs_path(rel_path):
"""Turn a relative path into abs path"""
return str(Path(str(Path(__file__).parents[2])) / rel_path)


# Use data from METreformat where available
ENSEMBLE_STAT_DATA_DIR = "METreformat/test/data/ensemble_stat"
GRID_STAT_DATA_DIR = "METreformat/test/data/grid_stat/mctc_mcts"
MPR_DATA_DIR = "METreformat/test/data/mpr/climo_data"
POINT_STAT_DATA_DIR = "METreformat/test/data/point_stat"
TCDIAG_DATA_DIR = "METreformat/test/data/tcdiag_tcmpr"
ENSEMBLE_STAT_DATA_DIR = abs_path("METreformat/test/data/ensemble_stat")
GRID_STAT_DATA_DIR = abs_path("METreformat/test/data/grid_stat/mctc_mcts")
MPR_DATA_DIR = abs_path("METreformat/test/data/mpr/climo_data")
POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat")
TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr")

# This data is copied from MET test data
# https://hub.docker.com/r/dtcenter/met-data-output
MTD_DATA_DIR = "METdbLoad/test/data/mtd/"
MODE_DATA_DIR = "METdbLoad/test/data/mode/"
MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/")
MODE_DATA_DIR = abs_path("METdbLoad/test/data/mode/")


DEFAULT_LOAD_FLAGS = {
Expand Down Expand Up @@ -72,9 +80,9 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS

def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
"""Write test_load_specification.xml and return path
Args:
tmp_path (Path): Path to write test file to.
tmp_path (Path): Path to write test file to.
met_data_dir (str): directory containing MET files to load
met_tool (str): Name of MET tool that generated files, e.g. "point_stat"
load_flags (dict): Optional.
Expand Down

0 comments on commit 6a55c1a

Please sign in to comment.