Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RSDK-8819: Finish FTDC #4579

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions ftdc/cmd/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@ package main

import (
"bufio"
"cmp"
"errors"
"fmt"
"io"
"math"
"os"
"os/exec"
"slices"
"strings"
"time"

"go.viam.com/utils"

"go.viam.com/rdk/ftdc"
"go.viam.com/rdk/logging"
)

// gnuplotWriter organizes all of the output for `gnuplot` to create a graph from FTDC
Expand All @@ -36,6 +39,31 @@ type gnuplotWriter struct {
options graphOptions
}

type kvPair[K, V any] struct {
Key K
Val V
}

func sorted[K cmp.Ordered, V any](mp map[K]V) []kvPair[K, V] {
ret := make([]kvPair[K, V], 0, len(mp))
for key, val := range mp {
ret = append(ret, kvPair[K, V]{key, val})
}

slices.SortFunc(ret, func(left, right kvPair[K, V]) int {
if left.Key < right.Key {
return -1
}
if right.Key < left.Key {
return 1
}

return 0
})

return ret
}

type graphOptions struct {
// minTimeSeconds and maxTimeSeconds control which datapoints should render based on their
// timestamp. The default is all datapoints (minTimeSeconds: 0, maxTimeSeconds: MaxInt64).
Expand Down Expand Up @@ -155,7 +183,8 @@ func (gpw *gnuplotWriter) CompileAndClose() string {
// per-graph setting rather than a global.
writeln(gnuFile, "set yrange [0:*]")

for metricName, file := range gpw.metricFiles {
for _, nameFilePair := range sorted(gpw.metricFiles) {
metricName, file := nameFilePair.Key, nameFilePair.Val
writelnf(gnuFile, "plot '%v' using 1:2 with lines linestyle 7 lw 4 title '%v'", file.Name(), strings.ReplaceAll(metricName, "_", "\\_"))
utils.UncheckedErrorFunc(file.Close)
}
Expand All @@ -165,25 +194,19 @@ func (gpw *gnuplotWriter) CompileAndClose() string {

func main() {
if len(os.Args) < 2 {
// We are a CLI, it's appropriate to write to stdout.
//

nolintPrintln("Expected an FTDC filename. E.g: go run parser.go <path-to>/viam-server.ftdc")
return
}

ftdcFile, err := os.Open(os.Args[1])
if err != nil {
// We are a CLI, it's appropriate to write to stdout.
//

nolintPrintln("Error opening file. File:", os.Args[1], "Err:", err)

nolintPrintln("Expected an FTDC filename. E.g: go run parser.go <path-to>/viam-server.ftdc")
return
}

data, err := ftdc.Parse(ftdcFile)
logger := logging.NewLogger("parser")
data, err := ftdc.ParseWithLogger(ftdcFile, logger)
if err != nil {
panic(err)
}
Expand Down
15 changes: 14 additions & 1 deletion ftdc/custom_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,9 @@ func ParseWithLogger(rawReader io.Reader, logger logging.Logger) ([]FlatDatum, e
// "packed byte" where the first bit is not a diff bit. `readDiffBits` must account for
// that.
diffedFieldsIndexes := readDiffBits(reader, schema)
logger.Debugw("Diff bits", "changedFields", diffedFieldsIndexes)
logger.Debugw("Diff bits",
"changedFieldIndexes", diffedFieldsIndexes,
"changedFieldNames", schema.FieldNamesForIndexes(diffedFieldsIndexes))

// The next eight bytes after the diff bits is the time in nanoseconds since the 1970 epoch.
var dataTime int64
Expand Down Expand Up @@ -653,3 +655,14 @@ func (schema *schema) Zip(data []float32) []Reading {

return ret
}

// FieldNamesForIndexes maps the integers to their string form as defined in the schema. This is
// useful for creating human consumable output.
func (schema *schema) FieldNamesForIndexes(fieldIdxs []int) []string {
ret := make([]string, len(fieldIdxs))
for idx, fieldIdx := range fieldIdxs {
ret[idx] = schema.fieldOrder[fieldIdx]
}

return ret
}
37 changes: 30 additions & 7 deletions ftdc/ftdc.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,35 @@ type FTDC struct {
logger logging.Logger
}

// New creates a new *FTDC.
func New(logger logging.Logger) *FTDC {
return NewWithWriter(nil, logger)
// New creates a new *FTDC. This FTDC object will write FTDC formatted files into the input
// `ftdcDirectory`.
func New(ftdcDirectory string, logger logging.Logger) *FTDC {
ret := newFTDC(logger)
ret.maxFileSizeBytes = 1_000_000
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made the settings a more formal division. One either passes in a directory and all these variables about managing files are initialized. Or one passes in a writer and none of those variables matter.

ret.maxNumFiles = 10
ret.ftdcDir = ftdcDirectory
return ret
}

// NewWithWriter creates a new *FTDC that outputs bytes to the specified writer.
func NewWithWriter(writer io.Writer, logger logging.Logger) *FTDC {
ret := newFTDC(logger)
ret.outputWriter = writer
return ret
}

// DefaultDirectory returns a directory to write FTDC data files in. Each unique "part" running on a
// single computer will get its own directory.
func DefaultDirectory(viamHome, partID string) string {
return filepath.Join(viamHome, "diagnostics.data", partID)
}

func newFTDC(logger logging.Logger) *FTDC {
return &FTDC{
// Allow for some wiggle before blocking producers.
datumCh: make(chan datum, 20),
outputWorkerDone: make(chan struct{}),
logger: logger,
outputWriter: writer,
maxFileSizeBytes: 1_000_000,
maxNumFiles: 10,
}
}

Expand Down Expand Up @@ -415,6 +429,15 @@ func (ftdc *FTDC) getWriter() (io.Writer, error) {
// It's unclear in what circumstance we'd expect creating a new file to fail. Try 5 times for no
// good reason before giving up entirely and shutting down FTDC.
for numTries := 0; numTries < 5; numTries++ {
// The viam process is expected to be run as root. The FTDC directory must be readable by
// "other" users.
//
//nolint:gosec
if err = os.MkdirAll(ftdc.ftdcDir, 0o755); err != nil {
ftdc.logger.Warnw("Failed to create FTDC directory", "dir", ftdc.ftdcDir, "err", err)
return nil, err
}

now := time.Now().UTC()
// lint wants 0o600 file permissions. We don't expect the unix user someone is ssh'ed in as
// to be on the same unix user as is running the viam-server process. Thus the file needs to
Expand Down Expand Up @@ -536,7 +559,7 @@ func (ftdc *FTDC) checkAndDeleteOldFiles() error {
// deletion testing. Filename generation uses padding such that we can rely on there before 2/4
// digits for every numeric value.
//
//nolint
// nolint
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It joys me that our linter lints our linter

// Example filename: countingBytesTest1228324349/viam-server-2024-11-18T20-37-01Z.ftdc
var filenameTimeRe = regexp.MustCompile(`viam-server-(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z.ftdc`)

Expand Down
25 changes: 11 additions & 14 deletions ftdc/ftdc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,18 +294,16 @@ func TestStatsWriterContinuesOnSchemaError(t *testing.T) {
func TestCountingBytes(t *testing.T) {
logger := logging.NewTestLogger(t)

// We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC
// file rotation.
ftdc := New(logger.Sublogger("ftdc"))
// Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums.
ftdc.maxFileSizeBytes = 1000

// Isolate all of the files we're going to create to a single, fresh directory.
ftdcFileDir, err := os.MkdirTemp("./", "countingBytesTest")
test.That(t, err, test.ShouldBeNil)
defer os.RemoveAll(ftdcFileDir)

// Isolate all of the files we're going to create to a single, fresh directory.
ftdc.ftdcDir = ftdcFileDir
// We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC
// file rotation.
ftdc := New(ftdcFileDir, logger.Sublogger("ftdc"))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The "refactor" of New vs NewWithWriter resulted in changing the order here. First create the directory, then pass it to the constructor.

// Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums.
ftdc.maxFileSizeBytes = 1000

timesRolledOver := 0
foo := &foo{}
Expand Down Expand Up @@ -419,16 +417,15 @@ func TestFileDeletion(t *testing.T) {
// a second before being able to create the next file.
logger := logging.NewTestLogger(t)

// We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC
// file rotation.
ftdc := New(logger.Sublogger("ftdc"))

// Isolate all of the files we're going to create to a single, fresh directory.
ftdcFileDir, err := os.MkdirTemp("./", "fileDeletionTest")
test.That(t, err, test.ShouldBeNil)
defer os.RemoveAll(ftdcFileDir)

// Isolate all of the files we're going to create to a single, fresh directory.
ftdc.ftdcDir = ftdcFileDir
// We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC
// file rotation.
ftdc := New(ftdcFileDir, logger.Sublogger("ftdc"))

// Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums.
ftdc.maxFileSizeBytes = 1000
ftdc.maxNumFiles = 3
Expand Down
7 changes: 6 additions & 1 deletion robot/impl/local_robot.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,12 @@ func newWithResources(

var ftdcWorker *ftdc.FTDC
if rOpts.enableFTDC {
ftdcWorker = ftdc.New(logger.Sublogger("ftdc"))
partID := "local-config"
if cfg.Cloud != nil {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bug fix

partID = cfg.Cloud.ID
}
// CloudID is also known as the robot part id.
ftdcWorker = ftdc.New(ftdc.DefaultDirectory(config.ViamDotDir, partID), logger.Sublogger("ftdc"))
ftdcWorker.Start()
}

Expand Down
2 changes: 2 additions & 0 deletions robot/web/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ type Service interface {

// Returns the unix socket path the module server listens on.
ModuleAddress() string

Stats() any
}

var internalWebServiceName = resource.NewName(
Expand Down
6 changes: 6 additions & 0 deletions robot/web/web_c.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ type webService struct {
modWorkers sync.WaitGroup
}

func (svc *webService) Stats() any {
return struct {
RPCServer any
}{svc.rpcServer.Stats()}
}

// Reconfigure pulls resources and updates the stream server audio and video streams with the new resources.
func (svc *webService) Reconfigure(ctx context.Context, deps resource.Dependencies, _ resource.Config) error {
svc.mu.Lock()
Expand Down
2 changes: 1 addition & 1 deletion web/server/entrypoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ type Arguments struct {
OutputTelemetry bool `flag:"output-telemetry,usage=print out telemetry data (metrics and spans)"`
DisableMulticastDNS bool `flag:"disable-mdns,usage=disable server discovery through multicast DNS"`
DumpResourcesPath string `flag:"dump-resources,usage=dump all resource registrations as json to the provided file path"`
EnableFTDC bool `flag:"ftdc,usage=enable fulltime data capture for diagnostics [beta feature]"`
EnableFTDC bool `flag:"ftdc,default=true,usage=enable fulltime data capture for diagnostics"`
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stole from WebRTC above on line 45. Hand tested that -ftdc=false turns this off.

}

type robotServer struct {
Expand Down
Loading