Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QOL] Break report Translate errors in terminal & files #77

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions ndsl/stencils/testing/test_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,19 @@ def test_sequential_savepoint(
near_zero=case.testobj.near_zero,
)
if not metric.check:
os.makedirs(OUTDIR, exist_ok=True)
log_filename = os.path.join(
OUTDIR,
f"details-{case.savepoint_name}-{varname}-rank{case.rank}.log",
)
metric.report(log_filename)
pytest.fail(str(metric), pytrace=False)
passing_names.append(failing_names.pop())
ref_data_out[varname] = [ref_data]
if len(failing_names) > 0:
get_thresholds(case.testobj, input_data=original_input_data)
os.makedirs(OUTDIR, exist_ok=True)
out_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc")
nc_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc")
input_data_on_host = {}
for key, _input in input_data.items():
input_data_on_host[key] = gt_utils.asarray(_input)
Expand All @@ -226,7 +232,7 @@ def test_sequential_savepoint(
[output],
ref_data_out,
failing_names,
out_filename,
nc_filename,
)
if failing_names != []:
pytest.fail(
Expand Down Expand Up @@ -353,11 +359,16 @@ def test_parallel_savepoint(
near_zero=case.testobj.near_zero,
)
if not metric.check:
os.makedirs(OUTDIR, exist_ok=True)
log_filename = os.path.join(
OUTDIR, f"details-{case.savepoint_name}-{varname}.log"
)
metric.report(log_filename)
pytest.fail(str(metric), pytrace=False)
passing_names.append(failing_names.pop())
if len(failing_names) > 0:
os.makedirs(OUTDIR, exist_ok=True)
out_filename = os.path.join(
nct_filename = os.path.join(
OUTDIR, f"translate-{case.savepoint_name}-{case.grid.rank}.nc"
)
try:
Expand All @@ -370,7 +381,7 @@ def test_parallel_savepoint(
[output],
ref_data,
failing_names,
out_filename,
nct_filename,
)
except Exception as error:
print(f"TestParallel SaveNetCDF Error: {error}")
Expand Down
195 changes: 110 additions & 85 deletions ndsl/testing/comparison.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Union
from typing import List, Optional, Union

import numpy as np
import numpy.typing as npt
Expand All @@ -20,6 +20,9 @@ def __str__(self) -> str:
def __repr__(self) -> str:
...

def report(self, file_path: Optional[str] = None) -> List[str]:
...


class LegacyMetric(BaseMetric):
"""Legacy (AI2) metric used for original FV3 port.
Expand Down Expand Up @@ -88,67 +91,78 @@ def _compute_errors(
)
return success

def __str__(self) -> str:
return self.__repr__()

def __repr__(self) -> str:
def report(self, file_path: Optional[str] = None) -> List[str]:
report = []
if self.check:
return "✅ No numerical differences"
report.append("✅ No numerical differences")
else:
report.append("❌ Numerical failures")

found_indices = np.logical_not(self.success).nonzero()
computed_failures = self.computed[found_indices]
reference_failures = self.references[found_indices]

# List all errors
bad_indices_count = len(found_indices[0])
# Determine worst result
worst_metric_err = 0.0
abs_errs = []
details = [
"All failures:",
"Index Computed Reference Absloute E Metric E",
]
for b in range(bad_indices_count):
full_index = tuple([f[b] for f in found_indices])

metric_err = self._calculated_metric[full_index]

absolute_distance = abs(computed_failures[b] - reference_failures[b])
abs_errs.append(absolute_distance)

details.append(
f"{full_index} {computed_failures[b]} "
f"{reference_failures[b]} {abs_errs[-1]:.3e} {metric_err:.3e}"
)

report = []
report.append("❌ Numerical failures")

found_indices = np.logical_not(self.success).nonzero()
computed_failures = self.computed[found_indices]
reference_failures = self.references[found_indices]

# List all errors
bad_indices_count = len(found_indices[0])
# Determine worst result
worst_metric_err = 0.0
abs_errs = []
details = [
"All failures:",
"Index Computed Reference Absloute E Metric E",
]
for b in range(bad_indices_count):
full_index = tuple([f[b] for f in found_indices])

metric_err = self._calculated_metric[full_index]

absolute_distance = abs(computed_failures[b] - reference_failures[b])
abs_errs.append(absolute_distance)

details.append(
f"{full_index} {computed_failures[b]} "
f"{reference_failures[b]} {abs_errs[-1]:.3e} {metric_err:.3e}"
if np.isnan(metric_err) or (abs(metric_err) > abs(worst_metric_err)):
worst_metric_err = metric_err
worst_full_idx = full_index
worst_abs_err = abs_errs[-1]
computed_worst = computed_failures[b]
reference_worst = reference_failures[b]
# Try to quantify noisy errors
unique_errors = len(np.unique(np.array(abs_errs)))
# Summary and worst result
fullcount = len(self.references.flatten())
report.append(
f"Failed count: {bad_indices_count}/{fullcount} "
f"({round(100.0 * (bad_indices_count / fullcount), 2)}%),\n"
f"Worst failed index {worst_full_idx}\n"
f" Computed:{computed_worst}\n"
f" Reference: {reference_worst}\n"
f" Absolute diff: {worst_abs_err:.3e}\n"
f" Metric diff: {worst_metric_err:.3e}\n"
f" Metric threshold: {self.eps}\n"
f" Noise quantification:\n"
f" Reference dtype: {type(reference_worst)}\n"
f" Unique errors: {unique_errors}/{bad_indices_count}"
)
report.extend(details)

if np.isnan(metric_err) or (abs(metric_err) > abs(worst_metric_err)):
worst_metric_err = metric_err
worst_full_idx = full_index
worst_abs_err = abs_errs[-1]
computed_worst = computed_failures[b]
reference_worst = reference_failures[b]
# Try to quantify noisy errors
unique_errors = len(np.unique(np.array(abs_errs)))
# Summary and worst result
fullcount = len(self.references.flatten())
report.append(
f"Failed count: {bad_indices_count}/{fullcount} "
f"({round(100.0 * (bad_indices_count / fullcount), 2)}%),\n"
f"Worst failed index {worst_full_idx}\n"
f" Computed:{computed_worst}\n"
f" Reference: {reference_worst}\n"
f" Absolute diff: {worst_abs_err:.3e}\n"
f" Metric diff: {worst_metric_err:.3e}\n"
f" Metric threshold: {self.eps}\n"
f" Noise quantification:\n"
f" Reference dtype: {type(reference_worst)}\n"
f" Unique errors: {unique_errors}/{bad_indices_count}"
)
report.extend(details)
if file_path:
with open(file_path, "w") as fd:
fd.write("\n".join(report))

return report

def __str__(self) -> str:
return self.__repr__()

def __repr__(self) -> str:
report = self.report()
if len(report) > 30:
report = report[:30] # ~10 first errors
report.append("...")
return "\n".join(report)


Expand Down Expand Up @@ -231,36 +245,47 @@ def _compute_all_metrics(
f"recieved data with unexpected dtype {self.references.dtype}"
)

def report(self, file_path: Optional[str] = None) -> List[str]:
report = []
if self.check:
report.append("✅ No numerical differences")
else:
report.append("❌ Numerical failures")

found_indices = np.logical_not(self.success).nonzero()
# List all errors to terminal and file
bad_indices_count = len(found_indices[0])
full_count = len(self.references.flatten())
failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
report = [
f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
f"Index Computed Reference "
f"Absolute E(<{self.absolute_eps:.2e}) "
f"Relative E(<{self.relative_fraction * 100:.2e}%) "
f"ULP E(<{self.ulp_threshold})",
]
# Summary and worst result
for iBad in range(bad_indices_count):
fi = tuple([f[iBad] for f in found_indices])
report.append(
f"{str(fi)} {self.computed[fi]:.16e} {self.references[fi]:.16e} "
f"{self.absolute_distance[fi]:.2e} {'✅' if self.absolute_distance_metric[fi] else '❌'} "
f"{self.relative_distance[fi] * 100:.2e} {'✅' if self.relative_distance_metric[fi] else '❌'} "
f"{int(self.ulp_distance[fi]):02} {'✅' if self.ulp_distance_metric[fi] else '❌'} "
)

if file_path:
with open(file_path, "w") as fd:
fd.write("\n".join(report))

return report

def __str__(self) -> str:
return self.__repr__()

def __repr__(self) -> str:
if self.check:
return "✅ No numerical differences"

report = []
report.append("❌ Numerical failures")

found_indices = np.logical_not(self.success).nonzero()
# List all errors
bad_indices_count = len(found_indices[0])
full_count = len(self.references.flatten())
failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
report = [
f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
f"Index Computed Reference "
f"Absolute E(<{self.absolute_eps:.2e}) "
f"Relative E(<{self.relative_fraction * 100:.2e}%) "
f"ULP E(<{self.ulp_threshold})",
]
# Summary and worst result
for iBad in range(bad_indices_count):
fi = tuple([f[iBad] for f in found_indices])
report.append(
f"({fi[0]:02}, {fi[1]:02}, {fi[2]:02}) {self.computed[fi]:.16e} {self.references[fi]:.16e} "
f"{self.absolute_distance[fi]:.2e} {'✅' if self.absolute_distance_metric[fi] else '❌'} "
f"{self.relative_distance[fi] * 100:.2e} {'✅' if self.relative_distance_metric[fi] else '❌'} "
f"{int(self.ulp_distance[fi]):02} {'✅' if self.ulp_distance_metric[fi] else '❌'} "
)

report = self.report()
if len(report) > 12:
report = report[:12] # ~10 first errors
report.append("...")
return "\n".join(report)
Loading