Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(similarity): Add logging for over 30 system frames #81130

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@ def get_seer_similar_issues(
should go in (if any), or None if no neighbor was near enough.
"""
event_hash = event.get_primary_hash()
stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants))
# Temporarily add project id to this for logging purposes
# TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed
grouping_info = get_grouping_info_from_variants(variants)
grouping_info["project_id"] = event.project.id
stacktrace_string = get_stacktrace_string(grouping_info)
exception_type = get_path(event.data, "exception", "values", -1, "type")

request_data: SimilarIssuesEmbeddingsRequest = {
Expand Down
12 changes: 12 additions & 0 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,18 +179,22 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
html_frame_count = 0 # for a temporary metric
stacktrace_str = ""
found_non_snipped_context_line = False
is_frames_truncated = False

metrics.distribution("seer.grouping.exceptions.length", len(exceptions))

def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
nonlocal frame_count
nonlocal html_frame_count
nonlocal found_non_snipped_context_line
nonlocal is_frames_truncated
frame_strings = []

contributing_frames = [
frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes")
]
if len(contributing_frames) + frame_count > MAX_FRAME_COUNT:
is_frames_truncated = True
contributing_frames = _discard_excess_frames(
contributing_frames, MAX_FRAME_COUNT, frame_count
)
Expand Down Expand Up @@ -287,6 +291,14 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
},
)

if is_frames_truncated and not app_hash:
logger_extra = {
"project_id": data.get("project_id", ""),
"hash": system_hash,
"stacktrace_str": stacktrace_str.strip(),
}
logger.info("grouping.similarity.over_threshold_system_only_frames", extra=logger_extra)

return stacktrace_str.strip()


Expand Down
3 changes: 3 additions & 0 deletions src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ def get_events_from_nodestore(
event._project_cache = project
if event and event.data and event_content_has_stacktrace(event):
grouping_info = get_grouping_info(None, project=project, event=event)
# Temporarily add project id to this for logging purposes
# TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed
grouping_info["project_id"] = project.id
stacktrace_string = get_stacktrace_string(grouping_info)
if stacktrace_string == "":
invalid_event_group_ids.append(group_id)
Expand Down
48 changes: 47 additions & 1 deletion tests/sentry/grouping/ingest/test_seer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from dataclasses import asdict
from time import time
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock, Mock, patch

from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
from sentry.eventstore.models import Event
from sentry.grouping.grouping_info import get_grouping_info_from_variants
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.types import SeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers.eventprocessing import save_new_event
from sentry.testutils.helpers.options import override_options
Expand Down Expand Up @@ -261,3 +263,47 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self)
expected_metadata,
None,
)

@patch("sentry.seer.similarity.utils.logger")
def test_too_many_only_system_frames(self, mock_logger: Mock) -> None:
type = "FailedToFetchError"
value = "Charlie didn't bring the ball back"
context_line = f"raise {type}('{value}')"
new_event = Event(
project_id=self.project.id,
event_id="22312012112120120908201304152013",
data={
"title": f"{type}('{value}')",
"exception": {
"values": [
{
"type": type,
"value": value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(MAX_FRAME_COUNT + 1)
]
},
}
]
},
"platform": "python",
},
)
variants = new_event.get_grouping_variants()
get_seer_similar_issues(new_event, variants)

grouping_info = get_grouping_info_from_variants(variants)
mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"hash": grouping_info["system"]["hash"],
"stacktrace_str": "FailedToFetchError: Charlie didn't bring the ball back\n File \"dogpark1.py\", function play_fetch_1\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark2.py\", function play_fetch_2\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark3.py\", function play_fetch_3\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark4.py\", function play_fetch_4\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark5.py\", function play_fetch_5\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark6.py\", function play_fetch_6\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark7.py\", function play_fetch_7\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark8.py\", function play_fetch_8\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark9.py\", function play_fetch_9\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark10.py\", function play_fetch_10\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark11.py\", function play_fetch_11\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark12.py\", function play_fetch_12\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark13.py\", function play_fetch_13\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark14.py\", function play_fetch_14\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark15.py\", function play_fetch_15\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark16.py\", function play_fetch_16\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark17.py\", function play_fetch_17\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark18.py\", function play_fetch_18\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark19.py\", function play_fetch_19\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark20.py\", function play_fetch_20\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark21.py\", function play_fetch_21\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark22.py\", function play_fetch_22\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark23.py\", function play_fetch_23\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark24.py\", function play_fetch_24\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark25.py\", function play_fetch_25\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark26.py\", function play_fetch_26\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark27.py\", function play_fetch_27\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark28.py\", function play_fetch_28\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark29.py\", function play_fetch_29\n raise FailedToFetchError('Charlie didn't bring the ball back')\n File \"dogpark30.py\", function play_fetch_30\n raise FailedToFetchError('Charlie didn't bring the ball back')",
},
)
48 changes: 47 additions & 1 deletion tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import copy
from collections.abc import Callable
from typing import Any, Literal, cast
from unittest.mock import patch
from uuid import uuid1

from sentry.eventstore.models import Event
from sentry.seer.similarity.utils import (
BASE64_ENCODED_PREFIXES,
MAX_FRAME_COUNT,
SEER_ELIGIBLE_PLATFORMS,
_is_snipped_context_line,
event_content_is_seer_eligible,
Expand Down Expand Up @@ -710,7 +712,51 @@ def test_no_app_no_system(self):
stacktrace_str = get_stacktrace_string(data)
assert stacktrace_str == ""

def test_over_30_contributing_frames(self):
@patch("sentry.seer.similarity.utils.logger")
def test_too_many_system_frames_single_exception(self, mock_logger):
data_system = copy.deepcopy(self.BASE_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["system"]["component"]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT + 1, True)
data_system["project_id"] = self.project.id

get_stacktrace_string(data_system)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"hash": data_system["system"]["hash"],
"stacktrace_str": 'ZeroDivisionError: division by zero\n File "hello.py", function hello_there\n test = 2!\n File "hello.py", function hello_there\n test = 3!\n File "hello.py", function hello_there\n test = 4!\n File "hello.py", function hello_there\n test = 5!\n File "hello.py", function hello_there\n test = 6!\n File "hello.py", function hello_there\n test = 7!\n File "hello.py", function hello_there\n test = 8!\n File "hello.py", function hello_there\n test = 9!\n File "hello.py", function hello_there\n test = 10!\n File "hello.py", function hello_there\n test = 11!\n File "hello.py", function hello_there\n test = 12!\n File "hello.py", function hello_there\n test = 13!\n File "hello.py", function hello_there\n test = 14!\n File "hello.py", function hello_there\n test = 15!\n File "hello.py", function hello_there\n test = 16!\n File "hello.py", function hello_there\n test = 17!\n File "hello.py", function hello_there\n test = 18!\n File "hello.py", function hello_there\n test = 19!\n File "hello.py", function hello_there\n test = 20!\n File "hello.py", function hello_there\n test = 21!\n File "hello.py", function hello_there\n test = 22!\n File "hello.py", function hello_there\n test = 23!\n File "hello.py", function hello_there\n test = 24!\n File "hello.py", function hello_there\n test = 25!\n File "hello.py", function hello_there\n test = 26!\n File "hello.py", function hello_there\n test = 27!\n File "hello.py", function hello_there\n test = 28!\n File "hello.py", function hello_there\n test = 29!\n File "hello.py", function hello_there\n test = 30!\n File "hello.py", function hello_there\n test = 31!',
},
)

@patch("sentry.seer.similarity.utils.logger")
def test_too_many_system_frames_chained_exception(self, mock_logger):
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["project_id"] = self.project.id
# Split MAX_FRAME_COUNT across the two exceptions
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)

get_stacktrace_string(data_system)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"hash": data_system["system"]["hash"],
"stacktrace_str": 'Exception: Catch divide by zero error\n File "python_onboarding.py", function <module>\n divide_by_zero()\n File "python_onboarding.py", function divide_by_zero\n raise Exception("Catch divide by zero error")\n File "hello.py", function hello_there\n test = 1!\n File "hello.py", function hello_there\n test = 2!\n File "hello.py", function hello_there\n test = 3!\n File "hello.py", function hello_there\n test = 4!\n File "hello.py", function hello_there\n test = 5!\n File "hello.py", function hello_there\n test = 6!\n File "hello.py", function hello_there\n test = 7!\n File "hello.py", function hello_there\n test = 8!\n File "hello.py", function hello_there\n test = 9!\n File "hello.py", function hello_there\n test = 10!\n File "hello.py", function hello_there\n test = 11!\n File "hello.py", function hello_there\n test = 12!\n File "hello.py", function hello_there\n test = 13!\n File "hello.py", function hello_there\n test = 14!\n File "hello.py", function hello_there\n test = 15!\nZeroDivisionError: division by zero\n File "hello.py", function hello_there\n test = 3!\n File "hello.py", function hello_there\n test = 4!\n File "hello.py", function hello_there\n test = 5!\n File "hello.py", function hello_there\n test = 6!\n File "hello.py", function hello_there\n test = 7!\n File "hello.py", function hello_there\n test = 8!\n File "hello.py", function hello_there\n test = 9!\n File "hello.py", function hello_there\n test = 10!\n File "hello.py", function hello_there\n test = 11!\n File "hello.py", function hello_there\n test = 12!\n File "hello.py", function hello_there\n test = 13!\n File "hello.py", function hello_there\n test = 14!\n File "hello.py", function hello_there\n test = 15!',
},
)

def test_too_many_in_app_contributing_frames(self):
"""Check that when there are over 30 contributing frames, the last 30 are included."""

data_frames = copy.deepcopy(self.BASE_APP_DATA)
Expand Down
Loading
Loading