Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(similarity): Add logging for over 30 system frames #81130

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,13 @@ def get_seer_similar_issues(
should go in (if any), or None if no neighbor was near enough.
"""
event_hash = event.get_primary_hash()
# Temporarily add project and event id to this for logging purposes
# TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed
grouping_info = get_grouping_info_from_variants(variants)
grouping_info["project_id"] = event.project.id # type: ignore[assignment]
grouping_info["event_id"] = event.event_id # type: ignore[assignment]
stacktrace_string = get_stacktrace_string(grouping_info)

exception_type = get_path(event.data, "exception", "values", -1, "type")

request_data: SimilarIssuesEmbeddingsRequest = {
Expand Down
12 changes: 12 additions & 0 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,18 +179,22 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
html_frame_count = 0 # for a temporary metric
stacktrace_str = ""
found_non_snipped_context_line = False
is_frames_truncated = False

metrics.distribution("seer.grouping.exceptions.length", len(exceptions))

def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
nonlocal frame_count
nonlocal html_frame_count
nonlocal found_non_snipped_context_line
nonlocal is_frames_truncated
frame_strings = []

contributing_frames = [
frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes")
]
if len(contributing_frames) + frame_count > MAX_FRAME_COUNT:
is_frames_truncated = True
contributing_frames = _discard_excess_frames(
contributing_frames, MAX_FRAME_COUNT, frame_count
)
Expand Down Expand Up @@ -287,6 +291,14 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
},
)

if is_frames_truncated and not app_hash:
logger_extra = {
"project_id": data.get("project_id", ""),
"event_id": data.get("event_id", ""),
"hash": system_hash,
}
logger.info("grouping.similarity.over_threshold_system_only_frames", extra=logger_extra)

return stacktrace_str.strip()


Expand Down
4 changes: 4 additions & 0 deletions src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,10 @@ def get_events_from_nodestore(
event._project_cache = project
if event and event.data and event_content_has_stacktrace(event):
grouping_info = get_grouping_info(None, project=project, event=event)
# Temporarily add project and event id to this for logging purposes
# TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed
grouping_info["project_id"] = project.id
grouping_info["event_id"] = event.event_id # type: ignore[assignment]
stacktrace_string = get_stacktrace_string(grouping_info)
if stacktrace_string == "":
invalid_event_group_ids.append(group_id)
Expand Down
46 changes: 46 additions & 0 deletions tests/sentry/grouping/ingest/test_seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from sentry import options
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
from sentry.eventstore.models import Event
from sentry.grouping.grouping_info import get_grouping_info_from_variants
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.types import SeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers.eventprocessing import save_new_event
from sentry.testutils.helpers.options import override_options
Expand Down Expand Up @@ -306,3 +308,47 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self)
expected_metadata,
None,
)

@patch("sentry.seer.similarity.utils.logger")
def test_too_many_only_system_frames(self, mock_logger: Mock) -> None:
type = "FailedToFetchError"
value = "Charlie didn't bring the ball back"
context_line = f"raise {type}('{value}')"
new_event = Event(
project_id=self.project.id,
event_id="22312012112120120908201304152013",
data={
"title": f"{type}('{value}')",
"exception": {
"values": [
{
"type": type,
"value": value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(MAX_FRAME_COUNT + 1)
]
},
}
]
},
"platform": "python",
},
)
variants = new_event.get_grouping_variants()
get_seer_similar_issues(new_event, variants)

grouping_info = get_grouping_info_from_variants(variants)
mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": new_event.event_id,
"hash": grouping_info["system"]["hash"],
},
)
50 changes: 49 additions & 1 deletion tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import copy
from collections.abc import Callable
from typing import Any, Literal, cast
from unittest.mock import patch
from uuid import uuid1

from sentry.eventstore.models import Event
from sentry.seer.similarity.utils import (
BASE64_ENCODED_PREFIXES,
MAX_FRAME_COUNT,
SEER_ELIGIBLE_PLATFORMS,
_is_snipped_context_line,
event_content_is_seer_eligible,
Expand Down Expand Up @@ -710,7 +712,53 @@ def test_no_app_no_system(self):
stacktrace_str = get_stacktrace_string(data)
assert stacktrace_str == ""

def test_over_30_contributing_frames(self):
@patch("sentry.seer.similarity.utils.logger")
def test_too_many_system_frames_single_exception(self, mock_logger):
data_system = copy.deepcopy(self.BASE_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["system"]["component"]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT + 1, True)
data_system["project_id"] = self.project.id
data_system["event_id"] = "39485673049520"

get_stacktrace_string(data_system)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": data_system["event_id"],
"hash": data_system["system"]["hash"],
},
)

@patch("sentry.seer.similarity.utils.logger")
def test_too_many_system_frames_chained_exception(self, mock_logger):
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["project_id"] = self.project.id
data_system["event_id"] = "39485673049520"
# Split MAX_FRAME_COUNT across the two exceptions
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)

get_stacktrace_string(data_system)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": data_system["event_id"],
"hash": data_system["system"]["hash"],
},
)

def test_too_many_in_app_contributing_frames(self):
"""Check that when there are over 30 contributing frames, the last 30 are included."""

data_frames = copy.deepcopy(self.BASE_APP_DATA)
Expand Down
64 changes: 64 additions & 0 deletions tests/sentry/tasks/test_backfill_seer_grouping_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.grouping_records import CreateGroupingRecordData
from sentry.seer.similarity.types import RawSeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.snuba.dataset import Dataset
from sentry.snuba.referrer import Referrer
from sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project import (
Expand Down Expand Up @@ -365,6 +366,69 @@ def test_lookup_group_data_stacktrace_bulk_no_stacktrace_exception(self):
assert bulk_group_data_stacktraces["data"] == expected_group_data
assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces

@patch("sentry.seer.similarity.utils.logger")
def test_lookup_group_data_stacktrace_bulk_too_many_system_frames(self, mock_logger):
"""
Test that if a group has over MAX_FRAME_COUNT only system frames, it is logged
and included in results
"""
# Use 2 events
rows, events, hashes = self.bulk_rows[:2], self.bulk_events[:2], {}
group_ids = [row["group_id"] for row in rows]
for group_id in group_ids:
hashes.update({group_id: self.group_hashes[group_id]})
# Create one event where the stacktrace has over MAX_FRAME_COUNT system only frames
exception: Any = copy.deepcopy(EXCEPTION)
exception["values"][0]["stacktrace"]["frames"] += [
{
"function": f"divide_by_zero_{i}",
"module": "__main__",
"filename": "python_onboarding_{i}.py",
"abs_path": "/Users/user/python_onboarding/python_onboarding_{i}.py",
"lineno": i,
"in_app": True,
}
for i in range(MAX_FRAME_COUNT + 1)
]
event = self.store_event(
data={
"platform": "python",
"exception": exception,
"title": "title",
"timestamp": before_now(seconds=10).isoformat(),
},
project_id=self.project.id,
assert_no_errors=False,
)
rows.append({"event_id": event.event_id, "group_id": event.group_id})
group_hash = GroupHash.objects.filter(group_id=event.group.id).first()
assert group_hash
hashes.update({event.group_id: group_hash.hash})
events.append(event)

bulk_group_data_stacktraces, _ = get_events_from_nodestore(self.project, rows, group_ids)
expected_group_data = [
CreateGroupingRecordData(
group_id=event.group.id,
hash=hashes[event.group.id],
project_id=self.project.id,
exception_type=get_path(event.data, "exception", "values", -1, "type"),
)
for event in events
]

assert bulk_group_data_stacktraces["data"] == expected_group_data
assert len(bulk_group_data_stacktraces["stacktrace_list"]) == len(events)

mock_logger.info.assert_called_with(
"grouping.similarity.over_threshold_system_only_frames",
extra={
"project_id": self.project.id,
"event_id": event.event_id,
"hash": group_hash.hash,
},
)

def test_lookup_group_data_stacktrace_bulk_with_fallback_success(self):
"""Test successful bulk lookup with fallback, where the fallback isn't used"""
rows, events, hashes = (
Expand Down
Loading