reporting update

th2-net · Nov 4, 2024 · 67700ff · 67700ff
1 parent 6e37e12
commit 67700ff
Show file tree

Hide file tree

Showing 28 changed files with 1,091 additions and 124 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,3 +3,4 @@ include requirements.txt
 include package_info.json
 include example
 include example/example.ipynb
+include recon_lw/reporting/template/template_json_report/default_template.jinja
diff --git a/package_info.json b/package_info.json
@@ -1,4 +1,4 @@
 {
   "package_name": "recon-lw",
-  "package_version": "3.2.1"
+  "package_version": "3.2.2"
 }
diff --git a/recon_lw/interpretation/adapter/simple.py b/recon_lw/interpretation/adapter/simple.py
@@ -21,12 +21,10 @@ def get(self, message, field, strict=False):
             val = self.get_body(message).get(extractor, Extractor.NOT_EXTRACTED)
         else:
             val = extractor(message)
+
         if strict and val == Extractor.NOT_EXTRACTED:
             raise KeyError(field)
 
-        if val != Extractor.NOT_EXTRACTED:
-            val = str(val)
-
         return val
 
     def get_root_message_field(self, message, parameter_name, strict=False):

diff --git a/recon_lw/interpretation/field_checker/simple.py b/recon_lw/interpretation/field_checker/simple.py
@@ -6,12 +6,13 @@
 
 
 class SimpleFieldChecker(FieldChecker):
-    def __init__(self, rules: Dict[str, IFieldCheckRuleProtocol]):
+    def __init__(self, rules: Dict[str, IFieldCheckRuleProtocol], publish_matches: bool = False):
         super().__init__(rules)
+        self.publish_matches = publish_matches
 
     def compare(self, msg1, msg2) -> Iterator[FieldCheckResult]:
         for field, rule in self.rules.items():
             check_rule_result = rule(field, msg1, msg2)
 
-            if check_rule_result.result is False:
+            if check_rule_result.result is False or self.publish_matches:
                 yield check_rule_result
diff --git a/recon_lw/reporting/check_one/__init__.py b/recon_lw/reporting/check_one/__init__.py
diff --git a/recon_lw/reporting/check_one/check_one.py b/recon_lw/reporting/check_one/check_one.py
@@ -0,0 +1,158 @@
+import csv
+from collections import defaultdict
+from pathlib import Path
+from typing import Callable, Tuple
+
+from recon_lw.reporting.match_diff.categorizer.base import IErrorCategorizer
+from th2_data_services.data import Data
+class CheckOneReportGenerator:
+    def __init__(
+        self,
+        output_path: Path,
+        keep_matches: bool = False,
+        examples_limit: int = 1000
+    ):
+        self.output_path = output_path
+        self.keep_matches = keep_matches
+
+    def generate_report(
+        self,
+        events: Data,
+        key_function: Callable[[dict], str],
+        timestamp_function: Callable[[dict], str],
+        protocol_function: Callable[[dict], Tuple[str, str]]
+    ):
+        """
+        Generates check-one like report for given events and configuration. It sorts columns from the one with most failes to the one without failes.
+
+        One row for original stream message
+        One row for copy stream message
+        One row for comparison results
+
+        Expected specific events format:
+        {
+          "body": {
+             "match": [{'field': 'name', 'value': 'value'}, ...] - not required, but report will not be full without it.
+             "diff": [{'field': 'name', 'expected': 'value1', 'actual': 'value2'}, ...] - required, but report will not be full without
+          }
+        }
+
+        :param events: list of events
+        :return:
+        """
+        all_fields_per_recon = defaultdict(set)
+        field_failures_per_recon = defaultdict(lambda: defaultdict(int))
+        field_presence_count = defaultdict(lambda: defaultdict(int))
+        total_records_per_recon = defaultdict(int)
+        data_per_recon = defaultdict(list)
+
+        for event in events.filter(lambda e: e['eventType'] == 'BasicReconMatch'):
+            recon_name = event['recon_name']
+            event_body = event['body']
+
+            diffs = event_body.get('diff', [])
+            if not self.keep_matches and len(diffs) == 0:
+                continue
+
+            total_records_per_recon[recon_name] += 1
+
+            matches = event_body.get('match', [])
+            key = key_function(event)
+
+            match_data = {}
+            for match in matches:
+                field = match['field']
+                all_fields_per_recon[recon_name].add(field)
+                field_presence_count[recon_name][field] += 1
+
+                expected = match['expected']
+                actual = match['expected']
+                match_data[field] = {
+                    'expected': str(expected),
+                    'actual': str(actual),
+                    'status': True
+                }
+
+            diff_data = {}
+            for diff in diffs:
+                field = diff['field']
+                all_fields_per_recon[recon_name].add(field)
+                field_presence_count[recon_name][field] += 1
+                field_failures_per_recon[recon_name][field] += 1
+
+                diff_data[field] = {
+                    'expected': str(diff['expected']),
+                    'actual': str(diff['actual']),
+                    'status': False
+                }
+
+            combined_data = {**match_data, **diff_data}
+            combined_data['stream_key'] = {
+                'expected': str(key),
+                'actual': str(key),
+                'status': True
+            }
+
+            ts = timestamp_function(event)
+            combined_data['timestamp'] = {
+                'expected': str(ts),
+                'actual': str(ts),
+                'status': True
+            }
+
+            protocol_expected, protocol_actual = protocol_function(event)
+            combined_data['protocol'] = {
+                'expected': str(protocol_expected),
+                'actual': str(protocol_actual),
+                'status': True
+            }
+
+            data_per_recon[recon_name].append(combined_data)
+
+        for recon_name, stats in data_per_recon.items():
+            total_records = total_records_per_recon[recon_name]
+
+            # Calculate missing field percentages
+            missing_percentages = {
+                field: ((total_records - field_presence_count[recon_name][field]) / total_records) * 100
+                for field in all_fields_per_recon[recon_name]
+            }
+
+            # Sort fields by failures (descending), missing percentage (ascending), and field name
+            sorted_fields = sorted(
+                all_fields_per_recon[recon_name],
+                key=lambda x: (
+                    -field_failures_per_recon[recon_name][x],
+                    missing_percentages[x],
+                    x
+                )
+            )
+
+            headers = ['protocol', 'status', 'stream_key', 'timestamp'] + sorted_fields
+
+            rows = []
+            for data in data_per_recon[recon_name]:
+                overall_status = 'FAIL' if any([not value.get('status', True) for value in data.values()]) else 'PASS'
+                data['status'] = {
+                    'expected': overall_status,
+                    'actual': overall_status,
+                    'status': overall_status
+                }
+                rows.extend(
+                    [
+                        [data.get(key, {}).get('expected', '') for key in headers],
+                        [data.get(key, {}).get('actual', '') for key in headers],
+                        [data.get(key, {}).get('status', True) for key in headers]
+                    ]
+                )
+
+                self.output_path.mkdir(parents=True, exist_ok=True)
+                filename = f"{recon_name}_compare_rows.csv"
+
+                output_file = self.output_path.joinpath(filename)
+
+            with open(output_file, 'w', newline='', encoding='utf8') as f:
+                writer = csv.writer(f)
+                writer.writerow(headers)
+                writer.writerows(rows)
+            print(f'Output file generated: {output_file}')
diff --git a/recon_lw/reporting/known_issues/__init__.py b/recon_lw/reporting/known_issues/__init__.py
@@ -1,3 +1,2 @@
-from recon_lw.reporting.known_issues.exec_type import *
 from recon_lw.reporting.known_issues.issue import *
 from recon_lw.reporting.known_issues.issue_status import *
diff --git a/recon_lw/reporting/known_issues/exec_type.py b/recon_lw/reporting/known_issues/exec_type.py
diff --git a/recon_lw/reporting/known_issues/issue.py b/recon_lw/reporting/known_issues/issue.py
@@ -1,19 +1,19 @@
 from typing import Optional
 
 from recon_lw.reporting.known_issues.issue_status import IssueStatus
-
+from datetime import datetime
 
 class Issue:
     def __init__(
-        self,
-        code: str,
-        description: str,
-        status: IssueStatus,
-        status_update_date: str,
-        expected_fix_version: Optional[str] = None,
-        status_reason: Optional[str] = None,
-        is_wip: bool = False,
-        **kwargs,
+            self,
+            code: str,
+            description: str,
+            status: IssueStatus,
+            status_update_date: str,
+            expected_fix_version: Optional[str] = None,
+            status_reason: Optional[str]=None,
+            is_wip: bool = False,
+            **kwargs
     ):
         self.status_update_date = status_update_date
         self.expected_fix_version = expected_fix_version
@@ -24,23 +24,24 @@ def __init__(
         self.is_wip = is_wip
 
     def _rep(self):
+        if self.status == IssueStatus.UNCATEGORIZED:
+            return f"{self.code} ({self.description})"
         if self.expected_fix_version is not None:
-            expected_fix = f"[Expected fix: {self.expected_fix_version}]"
+            expected_fix = f'[Expected fix: {self.expected_fix_version}]'
         else:
-            expected_fix = ""
+            expected_fix = ''
 
         if self.status_reason is not None:
-            status_reason = f"[Status reason: {self.status_reason}]"
+            status_reason = f'[Status reason: {self.status_reason}]'
         else:
-            status_reason = ""
+            status_reason = ''
 
         if self.is_wip:
-            res = f"{self.code} {expected_fix} {status_reason} - " f"{self.description}"
-        else:
-            res = (
-                f"{self.code} [{self.status}, {self.status_update_date}]{expected_fix} {status_reason} - "
+            res = f"{self.code} {expected_fix} {status_reason} - " \
                 f"{self.description}"
-            )
+        else:
+            res = f"{self.code} [{self.status}, {self.status_update_date}]{expected_fix} {status_reason} - " \
+                    f"{self.description}"
 
         if self.status in {IssueStatus.CLOSED, IssueStatus.DRAFT}:
             if self.status == IssueStatus.DRAFT and self.is_wip:
@@ -59,3 +60,19 @@ def __add__(self, other):
 
     def __radd__(self, other):
         return f"{other}{self}"
+
+    def __eq__(self, other):
+        return isinstance(other, Issue) and self.code == other.code and self.description == other.description
+
+    def __hash__(self):
+        return hash((self.code, self.description, self.status.name, self.status_reason))
+
+
+UNCATEGORIZED_ISSUE = Issue(
+    code='UNCATEGORIZED',
+    description='Issue that was not categorized.',
+    status=IssueStatus.UNCATEGORIZED,
+    status_update_date='',
+    expected_fix_version='',
+    is_wip=True
+)
diff --git a/recon_lw/reporting/known_issues/issue_status.py b/recon_lw/reporting/known_issues/issue_status.py
@@ -2,7 +2,9 @@
 
 
 class IssueStatus(Enum):
-    FOR_REVIEW = "ForReview"
-    CLOSED = "Closed"
-    APPROVED = "Approved"
-    DRAFT = "Draft"
+    DRAFT = ("Draft", 0)
+    UNCATEGORIZED = ("Uncategorized", 0)
+    UNDER_INVESTIGATION = ("UnderInvestigation", 0)
+    FOR_REVIEW = ("ForReview", 1)
+    CLOSED = ("Closed", 2)
+    APPROVED = ("Approved", 3)
diff --git a/recon_lw/reporting/known_issues/known_issues.py b/recon_lw/reporting/known_issues/known_issues.py
@@ -0,0 +1,49 @@
+from abc import ABC, abstractmethod
+from typing import Protocol, Optional
+
+from recon_lw.reporting.known_issues import Issue, UNCATEGORIZED_ISSUE
+
+
+class KnownIssueProtocol(Protocol):
+    def __call__(self, event: dict, recon_name: str) -> Issue:
+        pass
+
+
+class KnownIssueHandler(ABC):
+    def __call__(self, event: dict, recon_name: str) -> Issue:
+        return self.handle(event, recon_name)
+
+    @abstractmethod
+    def handle(self, event: dict, recon_name: str) -> Issue:
+        pass
+
+
+class KnownIssues:
+    def __init__(self, known_issues: dict[str, list[KnownIssueProtocol]] = {}):
+        """
+        Args:
+            known_issues:
+                example:
+                    known_issues={
+                        "stream1_vs_stream2 | field 'field1' '10' != '100": Issue(
+                            code='ISSUE-121',
+                            description='Invalid field1 value for mt2 in stream2.',
+                            status=IssueStatus.APPROVED,
+                            status_update_date='19.03.2024'
+                        )
+                    }
+        """
+        self.issues: dict[str, KnownIssueHandler] = known_issues
+
+    def find_known_issue(
+            self, category: str, event: dict, recon_name: str
+    ) -> Optional[Issue]:
+        handlers = self.issues.get(category, [])
+        issue = None
+        for handler in handlers:
+            issue = handler(event, recon_name)
+            if issue is not None:
+                break
+        if issue is None:
+            return UNCATEGORIZED_ISSUE
+        return issue