Skip to content

Commit

Permalink
Add system to track only Autometricized parents
Browse files Browse the repository at this point in the history
The parent tracking is span-based (using `(TraceID, SpanId)` as keys)

It falls back to the previous method (`runtime.Callers()` analysis) if
the function id for a given span is missing from the global state.

To prevent the global state from ballooning memory usage, the entries
in the span-to-functionIDs table are deleted as part of the cleanup
code in `Instrument` (that is why some goroutines outliving their
callers with weird scheduling might miss the function id in the
global state.)

This solution deals well with functions spawning multiple goroutines,
but the caller info will be "enhanced" (i.e. with Autometrics annotated
functions in the parent field) only if the context created by
autometrics.PreInstrument is properly passed to callees, which is
a task followed in #78

Fixes: AM-42
  • Loading branch information
gagbo committed Nov 2, 2023
1 parent 40f4dee commit 2589958
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 82 deletions.
43 changes: 24 additions & 19 deletions otel/autometrics/instrument.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ func Instrument(ctx context.Context, err *error) {

functionCallsCount.Add(ctx, 1,
metric.WithAttributes([]attribute.KeyValue{
attribute.Key(FunctionLabel).String(callInfo.FuncName),
attribute.Key(ModuleLabel).String(callInfo.ModuleName),
attribute.Key(CallerFunctionLabel).String(callInfo.ParentFuncName),
attribute.Key(CallerModuleLabel).String(callInfo.ParentModuleName),
attribute.Key(FunctionLabel).String(callInfo.Current.Function),
attribute.Key(ModuleLabel).String(callInfo.Current.Module),
attribute.Key(CallerFunctionLabel).String(callInfo.Parent.Function),
attribute.Key(CallerModuleLabel).String(callInfo.Parent.Module),
attribute.Key(ResultLabel).String(result),
attribute.Key(TargetSuccessRateLabel).String(successObjective),
attribute.Key(SloNameLabel).String(sloName),
Expand All @@ -62,10 +62,10 @@ func Instrument(ctx context.Context, err *error) {
}...))
functionCallsDuration.Record(ctx, time.Since(am.GetStartTime(ctx)).Seconds(),
metric.WithAttributes([]attribute.KeyValue{
attribute.Key(FunctionLabel).String(callInfo.FuncName),
attribute.Key(ModuleLabel).String(callInfo.ModuleName),
attribute.Key(CallerFunctionLabel).String(callInfo.ParentFuncName),
attribute.Key(CallerModuleLabel).String(callInfo.ParentModuleName),
attribute.Key(FunctionLabel).String(callInfo.Current.Function),
attribute.Key(ModuleLabel).String(callInfo.Current.Module),
attribute.Key(CallerFunctionLabel).String(callInfo.Parent.Function),
attribute.Key(CallerModuleLabel).String(callInfo.Parent.Module),
attribute.Key(TargetLatencyLabel).String(latencyTarget),
attribute.Key(TargetSuccessRateLabel).String(latencyObjective),
attribute.Key(SloNameLabel).String(sloName),
Expand All @@ -79,17 +79,22 @@ func Instrument(ctx context.Context, err *error) {
if am.GetTrackConcurrentCalls(ctx) {
functionCallsConcurrent.Add(ctx, -1,
metric.WithAttributes([]attribute.KeyValue{
attribute.Key(FunctionLabel).String(callInfo.FuncName),
attribute.Key(ModuleLabel).String(callInfo.ModuleName),
attribute.Key(CallerFunctionLabel).String(callInfo.ParentFuncName),
attribute.Key(CallerModuleLabel).String(callInfo.ParentModuleName),
attribute.Key(FunctionLabel).String(callInfo.Current.Function),
attribute.Key(ModuleLabel).String(callInfo.Current.Module),
attribute.Key(CallerFunctionLabel).String(callInfo.Parent.Function),
attribute.Key(CallerModuleLabel).String(callInfo.Parent.Module),
attribute.Key(CommitLabel).String(buildInfo.Commit),
attribute.Key(VersionLabel).String(buildInfo.Version),
attribute.Key(BranchLabel).String(buildInfo.Branch),
attribute.Key(ServiceNameLabel).String(buildInfo.Service),
attribute.Key(JobNameLabel).String(am.GetPushJobName()),
}...))
}

// NOTE: This call means that goroutines that outlive this function as the caller will not have access to parent
// caller information, but hopefully by that point we got all the necessary accesses done.
// If not, it is a convenience we accept to give up to prevent memory usage from exploding
am.PopFunctionName(ctx)
}

// PreInstrument runs the "before wrappee" part of instrumentation.
Expand All @@ -101,19 +106,19 @@ func PreInstrument(ctx context.Context) context.Context {
return nil
}

callInfo := am.CallerInfo()
ctx = am.SetCallInfo(ctx, callInfo)
ctx = am.FillTracingAndCallerInfo(ctx)
ctx = am.FillBuildInfo(ctx)
ctx = am.FillTracingInfo(ctx)

callInfo := am.GetCallInfo(ctx)

if am.GetTrackConcurrentCalls(ctx) {
buildInfo := am.GetBuildInfo(ctx)
functionCallsConcurrent.Add(ctx, 1,
metric.WithAttributes([]attribute.KeyValue{
attribute.Key(FunctionLabel).String(callInfo.FuncName),
attribute.Key(ModuleLabel).String(callInfo.ModuleName),
attribute.Key(CallerFunctionLabel).String(callInfo.ParentFuncName),
attribute.Key(CallerModuleLabel).String(callInfo.ParentModuleName),
attribute.Key(FunctionLabel).String(callInfo.Current.Function),
attribute.Key(ModuleLabel).String(callInfo.Current.Module),
attribute.Key(CallerFunctionLabel).String(callInfo.Parent.Function),
attribute.Key(CallerModuleLabel).String(callInfo.Parent.Module),
attribute.Key(CommitLabel).String(buildInfo.Commit),
attribute.Key(VersionLabel).String(buildInfo.Version),
attribute.Key(BranchLabel).String(buildInfo.Branch),
Expand Down
60 changes: 58 additions & 2 deletions pkg/autometrics/ctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package autometrics // import "github.com/autometrics-dev/autometrics-go/pkg/aut

import (
"context"
"errors"
"log"
"math/rand"
"time"
Expand Down Expand Up @@ -229,12 +230,12 @@ func GetParentSpanID(c context.Context) (SpanID, bool) {
return sid, ok
}

// FillTracingInfo ensures the context has a traceID and a spanID.
// FillTracingAndCallerInfo ensures the context has a traceID and a spanID, and looks for relevant caller information to add in the context as well.
// If they do not have this information, this method adds randomly
// generated IDs in the context to be used later for exemplars
//
// The random generator is a PRNG, seeded with the timestamp of the first time new IDs are needed.
func FillTracingInfo(ctx context.Context) context.Context {
func FillTracingAndCallerInfo(ctx context.Context) context.Context {
// We are using a PRNG because FillTracingInfo is expected to be called in PreInstrument.
// Therefore it can have a noticeable impact on the performance of instrumented code.
// Pseudo randomness should be enough for our use cases, true randomness might introduce too much latency.
Expand All @@ -258,6 +259,18 @@ func FillTracingInfo(ctx context.Context) context.Context {
ctx = SetTraceID(ctx, tid)
}

callInfo := callerInfo(ctx)
ctx = SetCallInfo(ctx, callInfo)

// Adds an entry in the global map from current (traceID, spanID) to the function ID
// NOTE: this will "leak" memory if PopFunctionName is never called for the matching (TraceId, SpanId) pair.
// Calling the cleanup function is the responsibility of the otel.Instrument() and prometheus.Instrument()
// functions as the closers.
// NOTE: This also means that goroutines that outlive their as the caller will not have access to parent
// caller information, but hopefully by that point we got all the necessary accesses done.
// If not, it is a convenience we accept to give up to prevent memory usage from exploding.
PushFunctionName(ctx, callInfo.Current)

return ctx
}

Expand Down Expand Up @@ -336,3 +349,46 @@ func GetValidHttpCodeRanges(c context.Context) []InclusiveIntRange {

return ranges
}

func ParentFunctionName(ctx context.Context) (FunctionID, error) {
tid, ok := GetTraceID(ctx)
if !ok {
return FunctionID{}, errors.New("context does not have any trace ID to follow.")
}
pSpanID, ok := GetParentSpanID(ctx)
if !ok {
return FunctionID{}, errors.New("context does not have any parent span ID to follow.")
}

return fetchFunctionName(tid, pSpanID)
}

func PopFunctionName(ctx context.Context) error {
tid, ok := GetTraceID(ctx)
if !ok {
return errors.New("context does not have any trace ID to follow.")
}
sid, ok := GetSpanID(ctx)
if !ok {
return errors.New("context does not have any current span ID to follow.")
}

popFunctionName(tid, sid)

return nil
}

func PushFunctionName(ctx context.Context, functionID FunctionID) error {
tid, ok := GetTraceID(ctx)
if !ok {
return errors.New("context does not have any trace ID to follow.")
}
sid, ok := GetSpanID(ctx)
if !ok {
return errors.New("context does not have any current span ID to follow.")
}

pushFunctionName(tid, sid, functionID)

return nil
}
39 changes: 33 additions & 6 deletions pkg/autometrics/global_state.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package autometrics // import "github.com/autometrics-dev/autometrics-go/pkg/autometrics"

import (
"fmt"
)

// These variables are describing the state of the application being autometricized,
// _not_ the build information of the binary

Expand All @@ -15,14 +19,20 @@ const (
)

var (
version string
commit string
branch string
service string
pushJobName string
pushJobURL string
version string
commit string
branch string
service string
pushJobName string
pushJobURL string
instrumentedSpans map[spanKey]FunctionID = make(map[spanKey]FunctionID)
)

type spanKey struct {
tid TraceID
sid SpanID
}

// GetVersion returns the version of the codebase being instrumented.
func GetVersion() string {
return version
Expand Down Expand Up @@ -82,3 +92,20 @@ func GetPushJobURL() string {
func SetPushJobURL(newPushJobURL string) {
pushJobURL = newPushJobURL
}

func fetchFunctionName(traceID TraceID, spanID SpanID) (FunctionID, error) {
fid, ok := instrumentedSpans[spanKey{tid: traceID, sid: spanID}]
if !ok {
return FunctionID{}, fmt.Errorf("%v,%v is not a known traceID/spanID pair now", traceID, spanID)
}

return fid, nil
}

func popFunctionName(traceID TraceID, spanID SpanID) {
delete(instrumentedSpans, spanKey{tid: traceID, sid: spanID})
}

func pushFunctionName(traceID TraceID, spanID SpanID, functionID FunctionID) {
instrumentedSpans[spanKey{tid: traceID, sid: spanID}] = functionID
}
63 changes: 35 additions & 28 deletions pkg/autometrics/instrument.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
package autometrics

import (
"context"
"reflect"
"runtime"
"strings"
)

// CallerInfo returns the (method name, module name) of the function that called the function that called this function.
//
// It also returns the information about its grandparent.
// It also returns the information about its autometricized grandparent.
//
// The module name and the parent module names are cropped to their last part, because the generator we use
// only has access to the last "package" name in `GOPACKAGE` environment variable.
Expand All @@ -17,7 +18,7 @@ import (
// then we can lift this artificial limitation here and use the full "module name" from the caller information.
// Currently this compromise is the only way to have the documentation links generator creating correct
// queries.
func CallerInfo() (callInfo CallInfo) {
func callerInfo(ctx context.Context) (callInfo CallInfo) {
programCounters := make([]uintptr, 15)

// skip 3 frames to start with:
Expand All @@ -33,40 +34,46 @@ func CallerInfo() (callInfo CallInfo) {
index := strings.LastIndex(functionName, ".")

if index == -1 {
callInfo.FuncName = functionName
callInfo.Current.Function = functionName
} else {
callInfo.ModuleName = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(
callInfo.Current.Module = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(
functionName[:index],
"(", ""),
")", ""),
"*", "")

callInfo.FuncName = functionName[index+1:]
callInfo.Current.Function = functionName[index+1:]
}

if !hasParent {
return
}
parent, err := ParentFunctionName(ctx)

// Do the same with the parent
parentFrame, _ := frames.Next()
if err != nil {
// We try to fallback to the parent in the call stack if we don't have the info
if !hasParent {
return
}

parentFunctionName := parentFrame.Function
index = strings.LastIndex(parentFunctionName, ".")
parentFrame, _ := frames.Next()
parentFrameFunctionName := parentFrame.Function
index = strings.LastIndex(parentFrameFunctionName, ".")

if index == -1 {
callInfo.ParentFuncName = parentFunctionName
} else {
moduleIndex := strings.LastIndex(parentFunctionName[:index], ".")
if moduleIndex == -1 {
callInfo.ParentModuleName = parentFunctionName[:index]
if index == -1 {
callInfo.Parent.Function = parentFrameFunctionName
} else {
callInfo.ParentModuleName = parentFunctionName[moduleIndex+1 : index]
callInfo.Parent.Module = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(
parentFrameFunctionName[:index],
"(", ""),
")", ""),
"*", "")

callInfo.Parent.Function = functionName[index+1:]
}

callInfo.ParentFuncName = parentFunctionName[index+1:]
return
}

callInfo.Parent = parent

return
}

Expand All @@ -80,15 +87,15 @@ func ReflectFunctionModuleName(f interface{}) (callInfo CallInfo) {

index := strings.LastIndex(functionName, ".")
if index == -1 {
callInfo.FuncName = functionName
callInfo.Current.Function = functionName
} else {
moduleIndex := strings.LastIndex(functionName[:index], ".")
if moduleIndex == -1 {
callInfo.ModuleName = functionName[:index]
} else {
callInfo.ModuleName = functionName[moduleIndex+1 : index]
}
callInfo.FuncName = functionName[index+1:]
callInfo.Current.Module = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(
functionName[:index],
"(", ""),
")", ""),
"*", "")

callInfo.Current.Function = functionName[index+1:]
}

return callInfo
Expand Down
26 changes: 18 additions & 8 deletions pkg/autometrics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,26 @@ const (
MiddlewareTraceIDKey = "autometricsTraceID"
)

// FunctionID is a unique identifier for a function (on a per-codebase basis)
type FunctionID struct {
// Function is the name of the function being tracked
Function string
// Module is the name of the module containing the function being tracked, up to (and including) the class name
Module string
}

// CallInfo holds the information about the current function call and its parent names.
type CallInfo struct {
// FuncName is name of the function being tracked.
FuncName string
// ModuleName is name of the module of the function being tracked.
ModuleName string
// ParentFuncName is name of the caller of the function being tracked.
ParentFuncName string
// ParentModuleName is name of the module of the caller of the function being tracked.
ParentModuleName string
// Current is the identifier of the function being tracked
Current FunctionID
// Parent is the identifirs of the caller of the function being tracked.
//
// The parent information is only included for autometricized functions. This
// means:
// - if A calls B calls C,
// - only A and C have autometrics annotations,
// then the Caller information when working with C is going to be A, _not_ B
Parent FunctionID
}

// BuildInfo holds the information about the current build of the instrumented code.
Expand Down
Loading

0 comments on commit 2589958

Please sign in to comment.