Skip to content

Commit

Permalink
reporting update
Browse files Browse the repository at this point in the history
  • Loading branch information
denis.plotnikov committed Nov 4, 2024
1 parent 6e37e12 commit 67700ff
Show file tree
Hide file tree
Showing 28 changed files with 1,091 additions and 124 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ include requirements.txt
include package_info.json
include example
include example/example.ipynb
include recon_lw/reporting/template/template_json_report/default_template.jinja
2 changes: 1 addition & 1 deletion package_info.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"package_name": "recon-lw",
"package_version": "3.2.1"
"package_version": "3.2.2"
}
4 changes: 1 addition & 3 deletions recon_lw/interpretation/adapter/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@ def get(self, message, field, strict=False):
val = self.get_body(message).get(extractor, Extractor.NOT_EXTRACTED)
else:
val = extractor(message)

if strict and val == Extractor.NOT_EXTRACTED:
raise KeyError(field)

if val != Extractor.NOT_EXTRACTED:
val = str(val)

return val

def get_root_message_field(self, message, parameter_name, strict=False):
Expand Down
5 changes: 3 additions & 2 deletions recon_lw/interpretation/field_checker/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@


class SimpleFieldChecker(FieldChecker):
def __init__(self, rules: Dict[str, IFieldCheckRuleProtocol]):
def __init__(self, rules: Dict[str, IFieldCheckRuleProtocol], publish_matches: bool = False):
super().__init__(rules)
self.publish_matches = publish_matches

def compare(self, msg1, msg2) -> Iterator[FieldCheckResult]:
for field, rule in self.rules.items():
check_rule_result = rule(field, msg1, msg2)

if check_rule_result.result is False:
if check_rule_result.result is False or self.publish_matches:
yield check_rule_result
Empty file.
158 changes: 158 additions & 0 deletions recon_lw/reporting/check_one/check_one.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import csv
from collections import defaultdict
from pathlib import Path
from typing import Callable, Tuple

from recon_lw.reporting.match_diff.categorizer.base import IErrorCategorizer
from th2_data_services.data import Data
class CheckOneReportGenerator:
def __init__(
self,
output_path: Path,
keep_matches: bool = False,
examples_limit: int = 1000
):
self.output_path = output_path
self.keep_matches = keep_matches

def generate_report(
self,
events: Data,
key_function: Callable[[dict], str],
timestamp_function: Callable[[dict], str],
protocol_function: Callable[[dict], Tuple[str, str]]
):
"""
Generates check-one like report for given events and configuration. It sorts columns from the one with most failes to the one without failes.
One row for original stream message
One row for copy stream message
One row for comparison results
Expected specific events format:
{
"body": {
"match": [{'field': 'name', 'value': 'value'}, ...] - not required, but report will not be full without it.
"diff": [{'field': 'name', 'expected': 'value1', 'actual': 'value2'}, ...] - required, but report will not be full without
}
}
:param events: list of events
:return:
"""
all_fields_per_recon = defaultdict(set)
field_failures_per_recon = defaultdict(lambda: defaultdict(int))
field_presence_count = defaultdict(lambda: defaultdict(int))
total_records_per_recon = defaultdict(int)
data_per_recon = defaultdict(list)

for event in events.filter(lambda e: e['eventType'] == 'BasicReconMatch'):
recon_name = event['recon_name']
event_body = event['body']

diffs = event_body.get('diff', [])
if not self.keep_matches and len(diffs) == 0:
continue

total_records_per_recon[recon_name] += 1

matches = event_body.get('match', [])
key = key_function(event)

match_data = {}
for match in matches:
field = match['field']
all_fields_per_recon[recon_name].add(field)
field_presence_count[recon_name][field] += 1

expected = match['expected']
actual = match['expected']
match_data[field] = {
'expected': str(expected),
'actual': str(actual),
'status': True
}

diff_data = {}
for diff in diffs:
field = diff['field']
all_fields_per_recon[recon_name].add(field)
field_presence_count[recon_name][field] += 1
field_failures_per_recon[recon_name][field] += 1

diff_data[field] = {
'expected': str(diff['expected']),
'actual': str(diff['actual']),
'status': False
}

combined_data = {**match_data, **diff_data}
combined_data['stream_key'] = {
'expected': str(key),
'actual': str(key),
'status': True
}

ts = timestamp_function(event)
combined_data['timestamp'] = {
'expected': str(ts),
'actual': str(ts),
'status': True
}

protocol_expected, protocol_actual = protocol_function(event)
combined_data['protocol'] = {
'expected': str(protocol_expected),
'actual': str(protocol_actual),
'status': True
}

data_per_recon[recon_name].append(combined_data)

for recon_name, stats in data_per_recon.items():
total_records = total_records_per_recon[recon_name]

# Calculate missing field percentages
missing_percentages = {
field: ((total_records - field_presence_count[recon_name][field]) / total_records) * 100
for field in all_fields_per_recon[recon_name]
}

# Sort fields by failures (descending), missing percentage (ascending), and field name
sorted_fields = sorted(
all_fields_per_recon[recon_name],
key=lambda x: (
-field_failures_per_recon[recon_name][x],
missing_percentages[x],
x
)
)

headers = ['protocol', 'status', 'stream_key', 'timestamp'] + sorted_fields

rows = []
for data in data_per_recon[recon_name]:
overall_status = 'FAIL' if any([not value.get('status', True) for value in data.values()]) else 'PASS'
data['status'] = {
'expected': overall_status,
'actual': overall_status,
'status': overall_status
}
rows.extend(
[
[data.get(key, {}).get('expected', '') for key in headers],
[data.get(key, {}).get('actual', '') for key in headers],
[data.get(key, {}).get('status', True) for key in headers]
]
)

self.output_path.mkdir(parents=True, exist_ok=True)
filename = f"{recon_name}_compare_rows.csv"

output_file = self.output_path.joinpath(filename)

with open(output_file, 'w', newline='', encoding='utf8') as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(rows)
print(f'Output file generated: {output_file}')
1 change: 0 additions & 1 deletion recon_lw/reporting/known_issues/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from recon_lw.reporting.known_issues.exec_type import *
from recon_lw.reporting.known_issues.issue import *
from recon_lw.reporting.known_issues.issue_status import *
13 changes: 0 additions & 13 deletions recon_lw/reporting/known_issues/exec_type.py

This file was deleted.

55 changes: 36 additions & 19 deletions recon_lw/reporting/known_issues/issue.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from typing import Optional

from recon_lw.reporting.known_issues.issue_status import IssueStatus

from datetime import datetime

class Issue:
def __init__(
self,
code: str,
description: str,
status: IssueStatus,
status_update_date: str,
expected_fix_version: Optional[str] = None,
status_reason: Optional[str] = None,
is_wip: bool = False,
**kwargs,
self,
code: str,
description: str,
status: IssueStatus,
status_update_date: str,
expected_fix_version: Optional[str] = None,
status_reason: Optional[str]=None,
is_wip: bool = False,
**kwargs
):
self.status_update_date = status_update_date
self.expected_fix_version = expected_fix_version
Expand All @@ -24,23 +24,24 @@ def __init__(
self.is_wip = is_wip

def _rep(self):
if self.status == IssueStatus.UNCATEGORIZED:
return f"{self.code} ({self.description})"
if self.expected_fix_version is not None:
expected_fix = f"[Expected fix: {self.expected_fix_version}]"
expected_fix = f'[Expected fix: {self.expected_fix_version}]'
else:
expected_fix = ""
expected_fix = ''

if self.status_reason is not None:
status_reason = f"[Status reason: {self.status_reason}]"
status_reason = f'[Status reason: {self.status_reason}]'
else:
status_reason = ""
status_reason = ''

if self.is_wip:
res = f"{self.code} {expected_fix} {status_reason} - " f"{self.description}"
else:
res = (
f"{self.code} [{self.status}, {self.status_update_date}]{expected_fix} {status_reason} - "
res = f"{self.code} {expected_fix} {status_reason} - " \
f"{self.description}"
)
else:
res = f"{self.code} [{self.status}, {self.status_update_date}]{expected_fix} {status_reason} - " \
f"{self.description}"

if self.status in {IssueStatus.CLOSED, IssueStatus.DRAFT}:
if self.status == IssueStatus.DRAFT and self.is_wip:
Expand All @@ -59,3 +60,19 @@ def __add__(self, other):

def __radd__(self, other):
return f"{other}{self}"

def __eq__(self, other):
return isinstance(other, Issue) and self.code == other.code and self.description == other.description

def __hash__(self):
return hash((self.code, self.description, self.status.name, self.status_reason))


UNCATEGORIZED_ISSUE = Issue(
code='UNCATEGORIZED',
description='Issue that was not categorized.',
status=IssueStatus.UNCATEGORIZED,
status_update_date='',
expected_fix_version='',
is_wip=True
)
10 changes: 6 additions & 4 deletions recon_lw/reporting/known_issues/issue_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@


class IssueStatus(Enum):
FOR_REVIEW = "ForReview"
CLOSED = "Closed"
APPROVED = "Approved"
DRAFT = "Draft"
DRAFT = ("Draft", 0)
UNCATEGORIZED = ("Uncategorized", 0)
UNDER_INVESTIGATION = ("UnderInvestigation", 0)
FOR_REVIEW = ("ForReview", 1)
CLOSED = ("Closed", 2)
APPROVED = ("Approved", 3)
49 changes: 49 additions & 0 deletions recon_lw/reporting/known_issues/known_issues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from abc import ABC, abstractmethod
from typing import Protocol, Optional

from recon_lw.reporting.known_issues import Issue, UNCATEGORIZED_ISSUE


class KnownIssueProtocol(Protocol):
def __call__(self, event: dict, recon_name: str) -> Issue:
pass


class KnownIssueHandler(ABC):
def __call__(self, event: dict, recon_name: str) -> Issue:
return self.handle(event, recon_name)

@abstractmethod
def handle(self, event: dict, recon_name: str) -> Issue:
pass


class KnownIssues:
def __init__(self, known_issues: dict[str, list[KnownIssueProtocol]] = {}):
"""
Args:
known_issues:
example:
known_issues={
"stream1_vs_stream2 | field 'field1' '10' != '100": Issue(
code='ISSUE-121',
description='Invalid field1 value for mt2 in stream2.',
status=IssueStatus.APPROVED,
status_update_date='19.03.2024'
)
}
"""
self.issues: dict[str, KnownIssueHandler] = known_issues

def find_known_issue(
self, category: str, event: dict, recon_name: str
) -> Optional[Issue]:
handlers = self.issues.get(category, [])
issue = None
for handler in handlers:
issue = handler(event, recon_name)
if issue is not None:
break
if issue is None:
return UNCATEGORIZED_ISSUE
return issue
Loading

0 comments on commit 67700ff

Please sign in to comment.