feat: create reduced errors when processing test instances

codecov · Jul 4, 2024 · ac53b62 · ac53b62
1 parent c48ec49
commit ac53b62
Show file tree

Hide file tree

Showing 2 changed files with 84 additions and 25 deletions.
diff --git a/tasks/test_results_processor.py b/tasks/test_results_processor.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import zlib
+from collections import defaultdict
 from io import BytesIO
 from sys import getsizeof
 from typing import List
@@ -21,8 +22,9 @@
 )
 
 from app import celery_app
-from database.models import Repository, Test, TestInstance, Upload
+from database.models import ReducedError, Repository, Test, TestInstance, Upload
 from services.archive import ArchiveService
+from services.failure_normalizer import reduce_error
 from services.test_results import generate_flags_hash, generate_test_id
 from services.yaml import read_yaml_field
 from tasks.base import BaseCodecovTask
@@ -110,6 +112,8 @@ def _bulk_write_tests_to_db(
         with metrics.timing(key="test_results.processor.write_to_db"):
             test_data = []
             test_instance_data = []
+
+            reduced_error_dict = defaultdict(list)
             for testrun in parsed_testruns:
                 # Build up the data for bulk insert
                 name = testrun.name
@@ -119,41 +123,70 @@ def _bulk_write_tests_to_db(
                 failure_message = testrun.failure_message
                 test_id = generate_test_id(repoid, testsuite, name, flags_hash)
 
-                test_data.append(
-                    dict(
-                        id=test_id,
-                        repoid=repoid,
-                        name=name,
-                        testsuite=testsuite,
-                        flags_hash=flags_hash,
-                    )
+                test_dict = dict(
+                    id=test_id,
+                    repoid=repoid,
+                    name=name,
+                    testsuite=testsuite,
+                    flags_hash=flags_hash,
                 )
+                test_data.append(test_dict)
 
-                test_instance_data.append(
-                    dict(
-                        test_id=test_id,
-                        upload_id=upload_id,
-                        duration_seconds=duration_seconds,
-                        outcome=outcome,
-                        failure_message=failure_message,
-                        commitid=commitid,
-                        branch=branch,
-                        reduced_error_id=None,
+                instance_dict = dict(
+                    test_id=test_id,
+                    upload_id=upload_id,
+                    duration_seconds=duration_seconds,
+                    outcome=outcome,
+                    failure_message=failure_message,
+                    commitid=commitid,
+                    branch=branch,
+                    reduced_error_id=None,
+                )
+                test_instance_data.append(instance_dict)
+                if failure_message:
+                    reduced_error_message = reduce_error(failure_message)
+                    reduced_error_dict[reduced_error_message].append(instance_dict)
+
+            if len(reduced_error_dict) > 0:
+                reduced_error_insert_on_conflict_do_nothing = (
+                    insert(ReducedError.__table__)
+                    .values(
+                        [
+                            {"message": reduced_error}
+                            for reduced_error in reduced_error_dict
+                        ]
                     )
+                    .on_conflict_do_nothing()
+                )
+                db_session.execute(reduced_error_insert_on_conflict_do_nothing)
+                db_session.flush()
+                db_session.commit()
+
+                reduced_error_messages = list(reduced_error_dict.keys())
+                reduced_errors = (
+                    db_session.query(ReducedError)
+                    .filter(ReducedError.message.in_(reduced_error_messages))
+                    .all()
                 )
 
+                for reduced_error in reduced_errors:
+                    for ti in reduced_error_dict[reduced_error.message]:
+                        ti["reduced_error_id"] = reduced_error.id
+
             # Save Tests
-            insert_on_conflict_do_nothing = (
+            test_insert_on_conflict_do_nothing = (
                 insert(Test.__table__).values(test_data).on_conflict_do_nothing()
             )
-            db_session.execute(insert_on_conflict_do_nothing)
+            db_session.execute(test_insert_on_conflict_do_nothing)
             db_session.flush()
+
             # Save TestInstances
             insert_test_instances = insert(TestInstance.__table__).values(
                 test_instance_data
             )
             db_session.execute(insert_test_instances)
             db_session.flush()
+
         # Memory outside the time metrics to not disturb the counter
         # Obviously this is a very rough estimate of sizes. We are interested more
         # in the difference between the insert approaches. SO this should be fine.
@@ -190,7 +223,13 @@ def process_individual_upload(
         upload_id = upload_obj.id
         branch = upload_obj.report.commit.branch
         self._bulk_write_tests_to_db(
-            db_session, repoid, commitid, upload_id, branch, parsed_testruns, flags_hash
+            db_session,
+            repoid,
+            commitid,
+            upload_id,
+            branch,
+            parsed_testruns,
+            flags_hash,
         )
 
         return {

diff --git a/tasks/tests/unit/test_test_results_processor_task.py b/tasks/tests/unit/test_test_results_processor_task.py
@@ -6,7 +6,7 @@
 from test_results_parser import Outcome
 
 from database.models import CommitReport
-from database.models.reports import Test, TestInstance
+from database.models.reports import ReducedError, Test, TestInstance
 from database.tests.factories import CommitFactory, UploadFactory
 from services.test_results import generate_test_id
 from tasks.test_results_processor import (
@@ -79,11 +79,14 @@ def test_upload_processor_task_call(
         failures = (
             dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
         )
+        reduced_errors = dbsession.query(ReducedError).all()
 
         assert len(tests) == 4
         assert len(test_instances) == 4
         assert len(failures) == 1
 
+        assert len(reduced_errors) == 1
+
         assert (
             failures[0].failure_message
             == """def test_divide():\n&gt;       assert Calculator.divide(1, 2) == 0.5\nE       assert 1.0 == 0.5\nE        +  where 1.0 = &lt;function Calculator.divide at 0x104c9eb90&gt;(1, 2)\nE        +    where &lt;function Calculator.divide at 0x104c9eb90&gt; = Calculator.divide\n\napi/temp/calculator/test_calculator.py:30: AssertionError"""
@@ -95,6 +98,8 @@ def test_upload_processor_task_call(
         assert expected_result == result
         assert commit.message == "hello world"
 
+        assert failures[0].reduced_error_id == reduced_errors[0].id
+
         mock_metrics.incr.assert_has_calls(
             [
                 call(
@@ -164,11 +169,13 @@ def test_upload_processor_task_call_pytest_reportlog(
         failures = (
             dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
         )
+        reduced_errors = dbsession.query(ReducedError).all()
 
         assert len(tests) == 2
         assert len(test_instances) == 2
         assert len(failures) == 0
 
+        assert len(reduced_errors) == 0
         assert (
             tests[0].flags_hash
             == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
@@ -230,10 +237,18 @@ def test_upload_processor_task_call_vitest(
             dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
         )
 
+        reduced_errors = dbsession.query(ReducedError).all()
+
         assert len(tests) == 1
         assert len(test_instances) == 4
         assert len(failures) == 4
 
+        assert len(reduced_errors) == 1
+
+        assert all(
+            [failure.reduced_error_id == reduced_errors[0].id for failure in failures]
+        )
+
         assert (
             tests[0].flags_hash
             == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
@@ -296,7 +311,7 @@ def test_test_result_processor_task_error_report_matching(
             commit_yaml={"codecov": {"max_report_age": False}},
             arguments_list=redis_queue,
         )
-        print(caplog.text)
+
         assert "File did not match any parser format" in caplog.text
         mock_metrics.incr.assert_has_calls(
             [
@@ -366,7 +381,7 @@ def test_test_result_processor_task_error_parsing_file(
             commit_yaml={"codecov": {"max_report_age": False}},
             arguments_list=redis_queue,
         )
-        print(caplog.text)
+
         assert "Error parsing file" in caplog.text
         mock_metrics.incr.assert_has_calls(
             [
@@ -576,11 +591,16 @@ def test_upload_processor_task_call_existing_test(
         failures = (
             dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
         )
+        reduced_errors = dbsession.query(ReducedError).all()
 
         assert len(tests) == 4
         assert len(test_instances) == 4
         assert len(failures) == 1
 
+        assert len(reduced_errors) == 1
+
+        assert failures[0].reduced_error_id == reduced_errors[0].id
+
         assert (
             failures[0].failure_message
             == """def test_divide():\n&gt;       assert Calculator.divide(1, 2) == 0.5\nE       assert 1.0 == 0.5\nE        +  where 1.0 = &lt;function Calculator.divide at 0x104c9eb90&gt;(1, 2)\nE        +    where &lt;function Calculator.divide at 0x104c9eb90&gt; = Calculator.divide\n\napi/temp/calculator/test_calculator.py:30: AssertionError"""