Skip to content

Commit

Permalink
Merge pull request #146 from ymtdzzz/feature/mark_error_spans
Browse files Browse the repository at this point in the history
Mark the error spans in traces and timeline page
  • Loading branch information
ymtdzzz authored Aug 24, 2024
2 parents d4cee58 + 3bd8956 commit c41c0cf
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 47 deletions.
42 changes: 36 additions & 6 deletions tuiexporter/internal/telemetry/cache.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package telemetry

import "go.opentelemetry.io/collector/pdata/ptrace"

// SpanDataMap is a map of span id to span data
// This is used to quickly look up a span by its id
type SpanDataMap map[string]*SpanData
Expand All @@ -12,37 +14,49 @@ type TraceSpanDataMap map[string][]*SpanData
// This is used to quickly look up all spans in a trace for a service
type TraceServiceSpanDataMap map[string]map[string][]*SpanData

// TraceServiceHasErrorMap is a map of trace id and service name to a flag whether
// the spans have any error status
type TraceServiceHasErrorMap map[string]map[string]bool

// TraceCache is a cache of trace spans
type TraceCache struct {
spanid2span SpanDataMap
traceid2spans TraceSpanDataMap
tracesvc2spans TraceServiceSpanDataMap
spanid2span SpanDataMap
traceid2spans TraceSpanDataMap
tracesvc2spans TraceServiceSpanDataMap
tracesvc2haserror TraceServiceHasErrorMap
}

// NewTraceCache returns a new trace cache
func NewTraceCache() *TraceCache {
return &TraceCache{
spanid2span: SpanDataMap{},
traceid2spans: TraceSpanDataMap{},
tracesvc2spans: TraceServiceSpanDataMap{},
spanid2span: SpanDataMap{},
traceid2spans: TraceSpanDataMap{},
tracesvc2spans: TraceServiceSpanDataMap{},
tracesvc2haserror: TraceServiceHasErrorMap{},
}
}

// UpdateCache updates the cache with a new span
func (c *TraceCache) UpdateCache(sname string, data *SpanData) (newtracesvc bool) {
c.spanid2span[data.Span.SpanID().String()] = data
traceID := data.Span.TraceID().String()
hasError := spanHasError(data.Span)
if ts, ok := c.traceid2spans[traceID]; ok {
c.traceid2spans[traceID] = append(ts, data)
if _, ok := c.tracesvc2spans[traceID][sname]; ok {
c.tracesvc2spans[traceID][sname] = append(c.tracesvc2spans[traceID][sname], data)
if hasError {
c.tracesvc2haserror[traceID][sname] = hasError
}
} else {
c.tracesvc2spans[traceID][sname] = []*SpanData{data}
c.tracesvc2haserror[traceID][sname] = hasError
newtracesvc = true
}
} else {
c.traceid2spans[traceID] = []*SpanData{data}
c.tracesvc2spans[traceID] = map[string][]*SpanData{sname: {data}}
c.tracesvc2haserror[traceID] = map[string]bool{sname: hasError}
newtracesvc = true
}

Expand All @@ -62,8 +76,10 @@ func (c *TraceCache) DeleteCache(serviceSpans []*SpanData) {
}
}
delete(c.tracesvc2spans[traceID], sname.AsString())
delete(c.tracesvc2haserror[traceID], sname.AsString())
if len(c.tracesvc2spans[traceID]) == 0 {
delete(c.tracesvc2spans, traceID)
delete(c.tracesvc2haserror, traceID)
// delete spans in traceid2spans only if there are no spans left in tracesvc2spans
// for better performance
delete(c.traceid2spans, traceID)
Expand All @@ -87,6 +103,16 @@ func (c *TraceCache) GetSpansByTraceIDAndSvc(traceID, svc string) ([]*SpanData,
return nil, false
}

// HasErrorByTraceIDAndSvc returns the flag whether the spans have any errors
func (c *TraceCache) HasErrorByTraceIDAndSvc(traceID, svc string) (bool, bool) {
if spans, ok := c.tracesvc2haserror[traceID]; ok {
if haserr, ok := spans[svc]; ok {
return haserr, ok
}
}
return false, false
}

// GetSpanByID returns a span by its id
func (c *TraceCache) GetSpanByID(spanID string) (*SpanData, bool) {
span, ok := c.spanid2span[spanID]
Expand All @@ -99,6 +125,10 @@ func (c *TraceCache) flush() {
c.tracesvc2spans = TraceServiceSpanDataMap{}
}

func spanHasError(span *ptrace.Span) bool {
return span.Status().Code() == ptrace.StatusCodeError
}

// TraceLogDataMap is a map of trace id to a slice of logs
// This is used to quickly look up all logs in a trace
type TraceLogDataMap map[string][]*LogData
Expand Down
2 changes: 1 addition & 1 deletion tuiexporter/internal/tui/component/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func (p *TUIPages) createTracePage(store *telemetry.Store) *tview.Flex {
table := tview.NewTable().
SetBorders(false).
SetSelectable(true, false).
SetContent(NewSpanDataForTable(store.GetFilteredSvcSpans())).
SetContent(NewSpanDataForTable(store.GetTraceCache(), store.GetFilteredSvcSpans())).
SetSelectedFunc(func(row, _ int) {
p.showTimelineByRow(store, row-1)
}).
Expand Down
15 changes: 13 additions & 2 deletions tuiexporter/internal/tui/component/timeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/gdamore/tcell/v2"
"github.com/rivo/tview"
"github.com/ymtdzzz/otel-tui/tuiexporter/internal/telemetry"
"go.opentelemetry.io/collector/pdata/ptrace"
)

const (
Expand Down Expand Up @@ -372,7 +373,11 @@ func newSpanTree(traceID string, cache *telemetry.TraceCache) (rootNodes []*span
st, en := span.Span.StartTimestamp().AsTime().Sub(start), span.Span.EndTimestamp().AsTime().Sub(start)
d := en - st
node.box = createSpan(colorMemo[sname.AsString()], duration, st, en)
node.label = fmt.Sprintf("%s %s", span.Span.Name(), d.String())
if span.Span.Status().Code() == ptrace.StatusCodeError {
node.label = fmt.Sprintf("[!] %s %s", span.Span.Name(), d.String())
} else {
node.label = fmt.Sprintf("%s %s", span.Span.Name(), d.String())
}

parent := span.Span.ParentSpanID().String()
_, parentExists := cache.GetSpanByID(parent)
Expand Down Expand Up @@ -484,7 +489,13 @@ func getSpanInfoTree(commands *tview.TextView, span *telemetry.SpanData, title s
smessageNode := tview.NewTreeNode(fmt.Sprintf("message: %s", smessage))
status.AddChild(smessageNode)
scode := span.Span.Status().Code()
scodeNode := tview.NewTreeNode(fmt.Sprintf("code: %s", scode))
scodeText := ""
if scode == ptrace.StatusCodeError {
scodeText = fmt.Sprintf("code: %s ⚠️", scode)
} else {
scodeText = fmt.Sprintf("code: %s", scode)
}
scodeNode := tview.NewTreeNode(scodeText)
status.AddChild(scodeNode)
root.AddChild(status)

Expand Down
59 changes: 36 additions & 23 deletions tuiexporter/internal/tui/component/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import (
"go.opentelemetry.io/collector/pdata/pcommon"
)

var spanTableHeader = [4]string{
var spanTableHeader = [5]string{
" ", // Error indicator
"Trace ID",
"Service Name",
"Received At",
Expand All @@ -20,13 +21,15 @@ var spanTableHeader = [4]string{
// SpanDataForTable is a wrapper for spans to be displayed in a table.
type SpanDataForTable struct {
tview.TableContentReadOnly
spans *telemetry.SvcSpans
tcache *telemetry.TraceCache
spans *telemetry.SvcSpans
}

// NewSpanDataForTable creates a new SpanDataForTable.
func NewSpanDataForTable(spans *telemetry.SvcSpans) SpanDataForTable {
func NewSpanDataForTable(tcache *telemetry.TraceCache, spans *telemetry.SvcSpans) SpanDataForTable {
return SpanDataForTable{
spans: spans,
tcache: tcache,
spans: spans,
}
}

Expand All @@ -37,7 +40,7 @@ func (s SpanDataForTable) GetCell(row, column int) *tview.TableCell {
return getHeaderCell(spanTableHeader[:], column)
}
if row > 0 && row <= len(*s.spans) {
return getCellFromSpan((*s.spans)[row-1], column)
return s.getCellFromSpan((*s.spans)[row-1], column)
}
return tview.NewTableCell("N/A")
}
Expand All @@ -50,38 +53,48 @@ func (s SpanDataForTable) GetColumnCount() int {
return len(spanTableHeader)
}

func getHeaderCell(header []string, column int) *tview.TableCell {
cell := tview.NewTableCell("N/A").
SetSelectable(false).
SetTextColor(tcell.ColorYellow)
if column >= len(header) {
return cell
}
cell.SetText(header[column])

return cell
}

// getCellFromSpan returns a table cell for the given span and column.
func getCellFromSpan(span *telemetry.SpanData, column int) *tview.TableCell {
func (s SpanDataForTable) getCellFromSpan(span *telemetry.SpanData, column int) *tview.TableCell {
text := "N/A"

switch column {
case 0:
text = span.Span.TraceID().String()
case 1:
if serviceName, ok := span.ResourceSpan.Resource().Attributes().Get("service.name"); ok {
text = serviceName.AsString()
if s.tcache == nil {
return tview.NewTableCell("")
}
text = ""
if sname, ok := span.ResourceSpan.Resource().Attributes().Get("service.name"); ok {
if haserr, ok := s.tcache.HasErrorByTraceIDAndSvc(span.Span.TraceID().String(), sname.AsString()); ok && haserr {
text = "[!]"
}
}
case 1:
text = span.Span.TraceID().String()
case 2:
text = span.ReceivedAt.Local().Format("2006-01-02 15:04:05")
if sname, ok := span.ResourceSpan.Resource().Attributes().Get("service.name"); ok {
text = sname.AsString()
}
case 3:
text = span.ReceivedAt.Local().Format("2006-01-02 15:04:05")
case 4:
text = span.Span.Name()
}

return tview.NewTableCell(text)
}

func getHeaderCell(header []string, column int) *tview.TableCell {
cell := tview.NewTableCell("N/A").
SetSelectable(false).
SetTextColor(tcell.ColorYellow)
if column >= len(header) {
return cell
}
cell.SetText(header[column])

return cell
}

func getTraceInfoTree(commands *tview.TextView, spans []*telemetry.SpanData) *tview.TreeView {
if len(spans) == 0 {
return tview.NewTreeView()
Expand Down
66 changes: 51 additions & 15 deletions tuiexporter/internal/tui/component/trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,51 +10,69 @@ import (
"github.com/stretchr/testify/assert"
"github.com/ymtdzzz/otel-tui/tuiexporter/internal/telemetry"
"github.com/ymtdzzz/otel-tui/tuiexporter/internal/test"
"go.opentelemetry.io/collector/pdata/ptrace"
)

func TestSpanDataForTable(t *testing.T) {
// traceid: 1
// └- resource: test-service-1
// | └- scope: test-scope-1-1
// | | └- span: span-1-1-1
// | | └- span: span-1-1-1 (code: Error)
// | | └- span: span-1-1-2
// | └- scope: test-scope-1-2
// | └- span: span-1-2-3
// └- resource: test-service-2
// └- scope: test-scope-2-1
// └- span: span-2-1-1
// └- span: span-2-1-1 (code: OK)
// traceid: 2
// └- resource: test-service-1
// └- scope: test-scope-1-1
// └- span: span-1-1-1
// └- span: span-1-1-1 (code: Unset)
_, testdata1 := test.GenerateOTLPTracesPayload(t, 1, 2, []int{2, 1}, [][]int{{2, 1}, {1}})
_, testdata2 := test.GenerateOTLPTracesPayload(t, 2, 1, []int{1}, [][]int{{1}})
receivedAt := time.Date(2024, 3, 30, 12, 30, 15, 0, time.UTC)
svcspans := &telemetry.SvcSpans{
&telemetry.SpanData{
testdata1.Spans[0].Status().SetCode(ptrace.StatusCodeError)
testdata1.Spans[3].Status().SetCode(ptrace.StatusCodeOk)
testdata2.Spans[0].Status().SetCode(ptrace.StatusCodeUnset)
svc1sds := []*telemetry.SpanData{
{
Span: testdata1.Spans[0],
ResourceSpan: testdata1.RSpans[0],
ReceivedAt: receivedAt,
}, // trace 1, span-1-1-1
&telemetry.SpanData{
{
Span: testdata1.Spans[3],
ResourceSpan: testdata1.RSpans[1],
ReceivedAt: receivedAt,
}, // trace 1, span-2-1-1
&telemetry.SpanData{
}
svc2sds := []*telemetry.SpanData{
{
Span: testdata2.Spans[0],
ResourceSpan: testdata2.RSpans[0],
ReceivedAt: receivedAt,
}, // trace 2, span-1-1-1
}
sdftable := NewSpanDataForTable(svcspans)
svcspans := &telemetry.SvcSpans{
svc1sds[0],
svc1sds[1],
svc2sds[0],
}
tcache := telemetry.NewTraceCache()
for _, sd := range svc1sds {
tcache.UpdateCache("test-service-1", sd)
}
for _, sd := range svc2sds {
tcache.UpdateCache("test-service-2", sd)
}
sdftable := NewSpanDataForTable(tcache, svcspans)

t.Run("GetRowCount", func(t *testing.T) {
assert.Equal(t, 4, sdftable.GetRowCount()) // including header row
})

t.Run("GetColumnCount", func(t *testing.T) {
assert.Equal(t, 4, sdftable.GetColumnCount())
assert.Equal(t, 5, sdftable.GetColumnCount())
})

t.Run("GetCell", func(t *testing.T) {
Expand All @@ -67,37 +85,55 @@ func TestSpanDataForTable(t *testing.T) {
{
name: "invalid row",
row: 3,
column: 0,
column: 1,
want: "N/A",
},
{
name: "invalid column",
row: 0,
column: 4,
column: 5,
want: "N/A",
},
{
name: "trace ID trace 1 span-1-1-1",
name: "has error trace 1 span-1-1-1",
row: 0,
column: 0,
want: "[!]",
},
{
name: "has no errors (OK) trace 1 span-2-1-1",
row: 1,
column: 0,
want: "",
},
{
name: "has no errors (Unset) trace 2 span-1-1-1",
row: 2,
column: 0,
want: "",
},
{
name: "trace ID trace 1 span-1-1-1",
row: 0,
column: 1,
want: "01000000000000000000000000000000",
},
{
name: "service name trace 1 span-2-1-1",
row: 1,
column: 1,
column: 2,
want: "test-service-2",
},
{
name: "received at trace 2 span-1-1-1",
row: 2,
column: 2,
column: 3,
want: receivedAt.Local().Format("2006-01-02 15:04:05"),
},
{
name: "span name trace 2 span-1-1-1",
row: 2,
column: 3,
column: 4,
want: "span-0-0-0",
},
}
Expand Down

0 comments on commit c41c0cf

Please sign in to comment.