Skip to content

Commit

Permalink
feat: create reduced errors when processing test instances
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-sentry committed Jul 4, 2024
1 parent c48ec49 commit ac53b62
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 25 deletions.
83 changes: 61 additions & 22 deletions tasks/test_results_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import zlib
from collections import defaultdict
from io import BytesIO
from sys import getsizeof
from typing import List
Expand All @@ -21,8 +22,9 @@
)

from app import celery_app
from database.models import Repository, Test, TestInstance, Upload
from database.models import ReducedError, Repository, Test, TestInstance, Upload
from services.archive import ArchiveService
from services.failure_normalizer import reduce_error
from services.test_results import generate_flags_hash, generate_test_id
from services.yaml import read_yaml_field
from tasks.base import BaseCodecovTask
Expand Down Expand Up @@ -110,6 +112,8 @@ def _bulk_write_tests_to_db(
with metrics.timing(key="test_results.processor.write_to_db"):
test_data = []
test_instance_data = []

reduced_error_dict = defaultdict(list)
for testrun in parsed_testruns:
# Build up the data for bulk insert
name = testrun.name
Expand All @@ -119,41 +123,70 @@ def _bulk_write_tests_to_db(
failure_message = testrun.failure_message
test_id = generate_test_id(repoid, testsuite, name, flags_hash)

test_data.append(
dict(
id=test_id,
repoid=repoid,
name=name,
testsuite=testsuite,
flags_hash=flags_hash,
)
test_dict = dict(
id=test_id,
repoid=repoid,
name=name,
testsuite=testsuite,
flags_hash=flags_hash,
)
test_data.append(test_dict)

test_instance_data.append(
dict(
test_id=test_id,
upload_id=upload_id,
duration_seconds=duration_seconds,
outcome=outcome,
failure_message=failure_message,
commitid=commitid,
branch=branch,
reduced_error_id=None,
instance_dict = dict(
test_id=test_id,
upload_id=upload_id,
duration_seconds=duration_seconds,
outcome=outcome,
failure_message=failure_message,
commitid=commitid,
branch=branch,
reduced_error_id=None,
)
test_instance_data.append(instance_dict)
if failure_message:
reduced_error_message = reduce_error(failure_message)
reduced_error_dict[reduced_error_message].append(instance_dict)

if len(reduced_error_dict) > 0:
reduced_error_insert_on_conflict_do_nothing = (
insert(ReducedError.__table__)
.values(
[
{"message": reduced_error}
for reduced_error in reduced_error_dict
]
)
.on_conflict_do_nothing()
)
db_session.execute(reduced_error_insert_on_conflict_do_nothing)
db_session.flush()
db_session.commit()

reduced_error_messages = list(reduced_error_dict.keys())
reduced_errors = (
db_session.query(ReducedError)
.filter(ReducedError.message.in_(reduced_error_messages))
.all()
)

for reduced_error in reduced_errors:
for ti in reduced_error_dict[reduced_error.message]:
ti["reduced_error_id"] = reduced_error.id

# Save Tests
insert_on_conflict_do_nothing = (
test_insert_on_conflict_do_nothing = (
insert(Test.__table__).values(test_data).on_conflict_do_nothing()
)
db_session.execute(insert_on_conflict_do_nothing)
db_session.execute(test_insert_on_conflict_do_nothing)
db_session.flush()

# Save TestInstances
insert_test_instances = insert(TestInstance.__table__).values(
test_instance_data
)
db_session.execute(insert_test_instances)
db_session.flush()

# Memory outside the time metrics to not disturb the counter
# Obviously this is a very rough estimate of sizes. We are interested more
# in the difference between the insert approaches. SO this should be fine.
Expand Down Expand Up @@ -190,7 +223,13 @@ def process_individual_upload(
upload_id = upload_obj.id
branch = upload_obj.report.commit.branch
self._bulk_write_tests_to_db(
db_session, repoid, commitid, upload_id, branch, parsed_testruns, flags_hash
db_session,
repoid,
commitid,
upload_id,
branch,
parsed_testruns,
flags_hash,
)

return {
Expand Down
26 changes: 23 additions & 3 deletions tasks/tests/unit/test_test_results_processor_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from test_results_parser import Outcome

from database.models import CommitReport
from database.models.reports import Test, TestInstance
from database.models.reports import ReducedError, Test, TestInstance
from database.tests.factories import CommitFactory, UploadFactory
from services.test_results import generate_test_id
from tasks.test_results_processor import (
Expand Down Expand Up @@ -79,11 +79,14 @@ def test_upload_processor_task_call(
failures = (
dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
)
reduced_errors = dbsession.query(ReducedError).all()

assert len(tests) == 4
assert len(test_instances) == 4
assert len(failures) == 1

assert len(reduced_errors) == 1

assert (
failures[0].failure_message
== """def test_divide():\n> assert Calculator.divide(1, 2) == 0.5\nE assert 1.0 == 0.5\nE + where 1.0 = <function Calculator.divide at 0x104c9eb90>(1, 2)\nE + where <function Calculator.divide at 0x104c9eb90> = Calculator.divide\n\napi/temp/calculator/test_calculator.py:30: AssertionError"""
Expand All @@ -95,6 +98,8 @@ def test_upload_processor_task_call(
assert expected_result == result
assert commit.message == "hello world"

assert failures[0].reduced_error_id == reduced_errors[0].id

mock_metrics.incr.assert_has_calls(
[
call(
Expand Down Expand Up @@ -164,11 +169,13 @@ def test_upload_processor_task_call_pytest_reportlog(
failures = (
dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
)
reduced_errors = dbsession.query(ReducedError).all()

assert len(tests) == 2
assert len(test_instances) == 2
assert len(failures) == 0

assert len(reduced_errors) == 0
assert (
tests[0].flags_hash
== "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
Expand Down Expand Up @@ -230,10 +237,18 @@ def test_upload_processor_task_call_vitest(
dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
)

reduced_errors = dbsession.query(ReducedError).all()

assert len(tests) == 1
assert len(test_instances) == 4
assert len(failures) == 4

assert len(reduced_errors) == 1

assert all(
[failure.reduced_error_id == reduced_errors[0].id for failure in failures]
)

assert (
tests[0].flags_hash
== "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
Expand Down Expand Up @@ -296,7 +311,7 @@ def test_test_result_processor_task_error_report_matching(
commit_yaml={"codecov": {"max_report_age": False}},
arguments_list=redis_queue,
)
print(caplog.text)

assert "File did not match any parser format" in caplog.text
mock_metrics.incr.assert_has_calls(
[
Expand Down Expand Up @@ -366,7 +381,7 @@ def test_test_result_processor_task_error_parsing_file(
commit_yaml={"codecov": {"max_report_age": False}},
arguments_list=redis_queue,
)
print(caplog.text)

assert "Error parsing file" in caplog.text
mock_metrics.incr.assert_has_calls(
[
Expand Down Expand Up @@ -576,11 +591,16 @@ def test_upload_processor_task_call_existing_test(
failures = (
dbsession.query(TestInstance).filter_by(outcome=str(Outcome.Failure)).all()
)
reduced_errors = dbsession.query(ReducedError).all()

assert len(tests) == 4
assert len(test_instances) == 4
assert len(failures) == 1

assert len(reduced_errors) == 1

assert failures[0].reduced_error_id == reduced_errors[0].id

assert (
failures[0].failure_message
== """def test_divide():\n> assert Calculator.divide(1, 2) == 0.5\nE assert 1.0 == 0.5\nE + where 1.0 = <function Calculator.divide at 0x104c9eb90>(1, 2)\nE + where <function Calculator.divide at 0x104c9eb90> = Calculator.divide\n\napi/temp/calculator/test_calculator.py:30: AssertionError"""
Expand Down

0 comments on commit ac53b62

Please sign in to comment.