From a65d8359253958257705adaf0be83dd6d6b52bbf Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 13:41:20 -0500 Subject: [PATCH 1/9] RSDK-8819: Add goutils rpc stats to FTDC. --- robot/web/web.go | 2 ++ robot/web/web_c.go | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/robot/web/web.go b/robot/web/web.go index 3380ac4332a..9510657e68a 100644 --- a/robot/web/web.go +++ b/robot/web/web.go @@ -188,6 +188,8 @@ type Service interface { // Returns the unix socket path the module server listens on. ModuleAddress() string + + Stats() any } var internalWebServiceName = resource.NewName( diff --git a/robot/web/web_c.go b/robot/web/web_c.go index 30dfdb09236..a8c1c5f7f80 100644 --- a/robot/web/web_c.go +++ b/robot/web/web_c.go @@ -57,6 +57,12 @@ type webService struct { modWorkers sync.WaitGroup } +func (svc *webService) Stats() any { + return struct { + RpcServer any + }{svc.rpcServer.Stats()} +} + // Reconfigure pulls resources and updates the stream server audio and video streams with the new resources. func (svc *webService) Reconfigure(ctx context.Context, deps resource.Dependencies, _ resource.Config) error { svc.mu.Lock() From 6a945bf5a00ac07e0ec8dce646a1fcb6b6c2ae3a Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 13:42:18 -0500 Subject: [PATCH 2/9] RSDK-8819: Parameterize FTDC diagnostics.data directory. --- ftdc/ftdc.go | 31 ++++++++++++++++++++++++------- ftdc/ftdc_test.go | 25 +++++++++++-------------- robot/impl/local_robot.go | 3 ++- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/ftdc/ftdc.go b/ftdc/ftdc.go index 81c23cf4ed8..bf760d4202c 100644 --- a/ftdc/ftdc.go +++ b/ftdc/ftdc.go @@ -125,21 +125,33 @@ type FTDC struct { logger logging.Logger } -// New creates a new *FTDC. -func New(logger logging.Logger) *FTDC { - return NewWithWriter(nil, logger) +// New creates a new *FTDC. This FTDC object will write FTDC formatted files into the input +// `ftdcDirectory`. +func New(ftdcDirectory string, logger logging.Logger) *FTDC { + ret := newFTDC(logger) + ret.maxFileSizeBytes = 1_000_000 + ret.maxNumFiles = 10 + ret.ftdcDir = ftdcDirectory + return ret } // NewWithWriter creates a new *FTDC that outputs bytes to the specified writer. func NewWithWriter(writer io.Writer, logger logging.Logger) *FTDC { + ret := newFTDC(logger) + ret.outputWriter = writer + return ret +} + +func DefaultDirectory(viamHome string, partId string) string { + return filepath.Join(viamHome, "diagnostics.data", partId) +} + +func newFTDC(logger logging.Logger) *FTDC { return &FTDC{ // Allow for some wiggle before blocking producers. datumCh: make(chan datum, 20), outputWorkerDone: make(chan struct{}), logger: logger, - outputWriter: writer, - maxFileSizeBytes: 1_000_000, - maxNumFiles: 10, } } @@ -415,6 +427,11 @@ func (ftdc *FTDC) getWriter() (io.Writer, error) { // It's unclear in what circumstance we'd expect creating a new file to fail. Try 5 times for no // good reason before giving up entirely and shutting down FTDC. for numTries := 0; numTries < 5; numTries++ { + if err = os.MkdirAll(ftdc.ftdcDir, 0o755); err != nil { + ftdc.logger.Warnw("Failed to create FTDC directory", "dir", ftdc.ftdcDir, "err", err) + return nil, err + } + now := time.Now().UTC() // lint wants 0o600 file permissions. We don't expect the unix user someone is ssh'ed in as // to be on the same unix user as is running the viam-server process. Thus the file needs to @@ -536,7 +553,7 @@ func (ftdc *FTDC) checkAndDeleteOldFiles() error { // deletion testing. Filename generation uses padding such that we can rely on there before 2/4 // digits for every numeric value. // -//nolint +// nolint // Example filename: countingBytesTest1228324349/viam-server-2024-11-18T20-37-01Z.ftdc var filenameTimeRe = regexp.MustCompile(`viam-server-(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z.ftdc`) diff --git a/ftdc/ftdc_test.go b/ftdc/ftdc_test.go index 3c4552347a3..3c7bf115e2e 100644 --- a/ftdc/ftdc_test.go +++ b/ftdc/ftdc_test.go @@ -294,18 +294,16 @@ func TestStatsWriterContinuesOnSchemaError(t *testing.T) { func TestCountingBytes(t *testing.T) { logger := logging.NewTestLogger(t) - // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC - // file rotation. - ftdc := New(logger.Sublogger("ftdc")) - // Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums. - ftdc.maxFileSizeBytes = 1000 - + // Isolate all of the files we're going to create to a single, fresh directory. ftdcFileDir, err := os.MkdirTemp("./", "countingBytesTest") test.That(t, err, test.ShouldBeNil) defer os.RemoveAll(ftdcFileDir) - // Isolate all of the files we're going to create to a single, fresh directory. - ftdc.ftdcDir = ftdcFileDir + // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC + // file rotation. + ftdc := New(ftdcFileDir, logger.Sublogger("ftdc")) + // Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums. + ftdc.maxFileSizeBytes = 1000 timesRolledOver := 0 foo := &foo{} @@ -419,16 +417,15 @@ func TestFileDeletion(t *testing.T) { // a second before being able to create the next file. logger := logging.NewTestLogger(t) - // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC - // file rotation. - ftdc := New(logger.Sublogger("ftdc")) - + // Isolate all of the files we're going to create to a single, fresh directory. ftdcFileDir, err := os.MkdirTemp("./", "fileDeletionTest") test.That(t, err, test.ShouldBeNil) defer os.RemoveAll(ftdcFileDir) - // Isolate all of the files we're going to create to a single, fresh directory. - ftdc.ftdcDir = ftdcFileDir + // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC + // file rotation. + ftdc := New(ftdcFileDir, logger.Sublogger("ftdc")) + // Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums. ftdc.maxFileSizeBytes = 1000 ftdc.maxNumFiles = 3 diff --git a/robot/impl/local_robot.go b/robot/impl/local_robot.go index a157f5afddf..8308b708b37 100644 --- a/robot/impl/local_robot.go +++ b/robot/impl/local_robot.go @@ -416,7 +416,8 @@ func newWithResources( var ftdcWorker *ftdc.FTDC if rOpts.enableFTDC { - ftdcWorker = ftdc.New(logger.Sublogger("ftdc")) + // CloudID is also known as the robot part id. + ftdcWorker = ftdc.New(ftdc.DefaultDirectory(config.ViamDotDir, cfg.Cloud.ID), logger.Sublogger("ftdc")) ftdcWorker.Start() } From 8ca673d90ced2d1868991f89f12feebf15f188c6 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 13:43:00 -0500 Subject: [PATCH 3/9] RSDK-8819: Sort charts in ftdc viewing by metric name. --- ftdc/cmd/parser.go | 42 +++++++++++++++++++++++++++++++++--------- ftdc/custom_format.go | 13 ++++++++++++- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/ftdc/cmd/parser.go b/ftdc/cmd/parser.go index 36e3ea2fa17..c81b3d8a4b8 100644 --- a/ftdc/cmd/parser.go +++ b/ftdc/cmd/parser.go @@ -3,18 +3,21 @@ package main import ( "bufio" + "cmp" "errors" "fmt" "io" "math" "os" "os/exec" + "slices" "strings" "time" "go.viam.com/utils" "go.viam.com/rdk/ftdc" + "go.viam.com/rdk/logging" ) // gnuplotWriter organizes all of the output for `gnuplot` to create a graph from FTDC @@ -36,6 +39,31 @@ type gnuplotWriter struct { options graphOptions } +type KVPair[K, V any] struct { + Key K + Val V +} + +func sorted[K cmp.Ordered, V any](mp map[K]V) []KVPair[K, V] { + ret := make([]KVPair[K, V], 0, len(mp)) + for key, val := range mp { + ret = append(ret, KVPair[K, V]{key, val}) + } + + slices.SortFunc(ret, func(left, right KVPair[K, V]) int { + if left.Key < right.Key { + return -1 + } + if right.Key < left.Key { + return 1 + } + + return 0 + }) + + return ret +} + type graphOptions struct { // minTimeSeconds and maxTimeSeconds control which datapoints should render based on their // timestamp. The default is all datapoints (minTimeSeconds: 0, maxTimeSeconds: MaxInt64). @@ -155,7 +183,8 @@ func (gpw *gnuplotWriter) CompileAndClose() string { // per-graph setting rather than a global. writeln(gnuFile, "set yrange [0:*]") - for metricName, file := range gpw.metricFiles { + for _, nameFilePair := range sorted(gpw.metricFiles) { + metricName, file := nameFilePair.Key, nameFilePair.Val writelnf(gnuFile, "plot '%v' using 1:2 with lines linestyle 7 lw 4 title '%v'", file.Name(), strings.ReplaceAll(metricName, "_", "\\_")) utils.UncheckedErrorFunc(file.Close) } @@ -165,25 +194,20 @@ func (gpw *gnuplotWriter) CompileAndClose() string { func main() { if len(os.Args) < 2 { - // We are a CLI, it's appropriate to write to stdout. - // - nolintPrintln("Expected an FTDC filename. E.g: go run parser.go /viam-server.ftdc") return } ftdcFile, err := os.Open(os.Args[1]) if err != nil { - // We are a CLI, it's appropriate to write to stdout. - // - nolintPrintln("Error opening file. File:", os.Args[1], "Err:", err) - nolintPrintln("Expected an FTDC filename. E.g: go run parser.go /viam-server.ftdc") return } - data, err := ftdc.Parse(ftdcFile) + logger := logging.NewDebugLogger("parser") + logger.SetLevel(logging.WARN) + data, err := ftdc.ParseWithLogger(ftdcFile, logger) if err != nil { panic(err) } diff --git a/ftdc/custom_format.go b/ftdc/custom_format.go index d6ea3ccc3d3..8b13a986f3e 100644 --- a/ftdc/custom_format.go +++ b/ftdc/custom_format.go @@ -429,7 +429,9 @@ func ParseWithLogger(rawReader io.Reader, logger logging.Logger) ([]FlatDatum, e // "packed byte" where the first bit is not a diff bit. `readDiffBits` must account for // that. diffedFieldsIndexes := readDiffBits(reader, schema) - logger.Debugw("Diff bits", "changedFields", diffedFieldsIndexes) + logger.Debugw("Diff bits", + "changedFieldIndexes", diffedFieldsIndexes, + "changedFieldNames", schema.FieldNamesForIndexes(diffedFieldsIndexes)) // The next eight bytes after the diff bits is the time in nanoseconds since the 1970 epoch. var dataTime int64 @@ -653,3 +655,12 @@ func (schema *schema) Zip(data []float32) []Reading { return ret } + +func (schema *schema) FieldNamesForIndexes(fieldIdxs []int) []string { + ret := make([]string, len(fieldIdxs)) + for idx, fieldIdx := range fieldIdxs { + ret[idx] = schema.fieldOrder[fieldIdx] + } + + return ret +} From 68a11a60fa02f03f6c34671b4ab4680996dfd931 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 14:10:23 -0500 Subject: [PATCH 4/9] RSDK-8819: Turn on FTDC by default. --- robot/impl/local_robot.go | 6 +++++- web/server/entrypoint.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/robot/impl/local_robot.go b/robot/impl/local_robot.go index 8308b708b37..980394be464 100644 --- a/robot/impl/local_robot.go +++ b/robot/impl/local_robot.go @@ -416,8 +416,12 @@ func newWithResources( var ftdcWorker *ftdc.FTDC if rOpts.enableFTDC { + partID := "local-config" + if cfg.Cloud != nil { + partID = cfg.Cloud.ID + } // CloudID is also known as the robot part id. - ftdcWorker = ftdc.New(ftdc.DefaultDirectory(config.ViamDotDir, cfg.Cloud.ID), logger.Sublogger("ftdc")) + ftdcWorker = ftdc.New(ftdc.DefaultDirectory(config.ViamDotDir, partID), logger.Sublogger("ftdc")) ftdcWorker.Start() } diff --git a/web/server/entrypoint.go b/web/server/entrypoint.go index 3c1266a767b..a2b5b1fdbdc 100644 --- a/web/server/entrypoint.go +++ b/web/server/entrypoint.go @@ -48,7 +48,7 @@ type Arguments struct { OutputTelemetry bool `flag:"output-telemetry,usage=print out telemetry data (metrics and spans)"` DisableMulticastDNS bool `flag:"disable-mdns,usage=disable server discovery through multicast DNS"` DumpResourcesPath string `flag:"dump-resources,usage=dump all resource registrations as json to the provided file path"` - EnableFTDC bool `flag:"ftdc,usage=enable fulltime data capture for diagnostics [beta feature]"` + EnableFTDC bool `flag:"ftdc,default=true,usage=enable fulltime data capture for diagnostics"` } type robotServer struct { From 61ef64a34b90a5277665138bd1dd19dce6a28918 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 14:22:30 -0500 Subject: [PATCH 5/9] lint --- ftdc/cmd/parser.go | 13 ++++++------- ftdc/custom_format.go | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ftdc/cmd/parser.go b/ftdc/cmd/parser.go index c81b3d8a4b8..31b271514d6 100644 --- a/ftdc/cmd/parser.go +++ b/ftdc/cmd/parser.go @@ -39,18 +39,18 @@ type gnuplotWriter struct { options graphOptions } -type KVPair[K, V any] struct { +type kvPair[K, V any] struct { Key K Val V } -func sorted[K cmp.Ordered, V any](mp map[K]V) []KVPair[K, V] { - ret := make([]KVPair[K, V], 0, len(mp)) +func sorted[K cmp.Ordered, V any](mp map[K]V) []kvPair[K, V] { + ret := make([]kvPair[K, V], 0, len(mp)) for key, val := range mp { - ret = append(ret, KVPair[K, V]{key, val}) + ret = append(ret, kvPair[K, V]{key, val}) } - slices.SortFunc(ret, func(left, right KVPair[K, V]) int { + slices.SortFunc(ret, func(left, right kvPair[K, V]) int { if left.Key < right.Key { return -1 } @@ -205,8 +205,7 @@ func main() { return } - logger := logging.NewDebugLogger("parser") - logger.SetLevel(logging.WARN) + logger := logging.NewLogger("parser") data, err := ftdc.ParseWithLogger(ftdcFile, logger) if err != nil { panic(err) diff --git a/ftdc/custom_format.go b/ftdc/custom_format.go index 8b13a986f3e..98b2518b826 100644 --- a/ftdc/custom_format.go +++ b/ftdc/custom_format.go @@ -656,6 +656,8 @@ func (schema *schema) Zip(data []float32) []Reading { return ret } +// FieldNamesForIndexes maps the integers to their string form as defined in the schema. This is +// useful for creating human consumable output. func (schema *schema) FieldNamesForIndexes(fieldIdxs []int) []string { ret := make([]string, len(fieldIdxs)) for idx, fieldIdx := range fieldIdxs { From 906a3b763c809a9daff36cc37d0eb7306dde1370 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 15:30:40 -0500 Subject: [PATCH 6/9] lint --- ftdc/ftdc.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ftdc/ftdc.go b/ftdc/ftdc.go index bf760d4202c..1de5b5335ad 100644 --- a/ftdc/ftdc.go +++ b/ftdc/ftdc.go @@ -142,7 +142,7 @@ func NewWithWriter(writer io.Writer, logger logging.Logger) *FTDC { return ret } -func DefaultDirectory(viamHome string, partId string) string { +func DefaultDirectory(viamHome, partId string) string { return filepath.Join(viamHome, "diagnostics.data", partId) } @@ -427,6 +427,10 @@ func (ftdc *FTDC) getWriter() (io.Writer, error) { // It's unclear in what circumstance we'd expect creating a new file to fail. Try 5 times for no // good reason before giving up entirely and shutting down FTDC. for numTries := 0; numTries < 5; numTries++ { + // The viam process is expected to be run as root. The FTDC directory must be readable by + // "other" users. + // + //nolint:gosec if err = os.MkdirAll(ftdc.ftdcDir, 0o755); err != nil { ftdc.logger.Warnw("Failed to create FTDC directory", "dir", ftdc.ftdcDir, "err", err) return nil, err From fe547abd5b12e25d744d271534d7f19f3ef723f3 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 15:31:13 -0500 Subject: [PATCH 7/9] lint --- robot/web/web_c.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/robot/web/web_c.go b/robot/web/web_c.go index a8c1c5f7f80..c8b9f6ef33c 100644 --- a/robot/web/web_c.go +++ b/robot/web/web_c.go @@ -59,7 +59,7 @@ type webService struct { func (svc *webService) Stats() any { return struct { - RpcServer any + RPCServer any }{svc.rpcServer.Stats()} } From 9b1ce552d01f6bead9285a53b607265aeb0aa2dc Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Thu, 21 Nov 2024 16:25:16 -0500 Subject: [PATCH 8/9] lint --- ftdc/ftdc.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ftdc/ftdc.go b/ftdc/ftdc.go index 1de5b5335ad..63e4c05f3d2 100644 --- a/ftdc/ftdc.go +++ b/ftdc/ftdc.go @@ -142,8 +142,10 @@ func NewWithWriter(writer io.Writer, logger logging.Logger) *FTDC { return ret } -func DefaultDirectory(viamHome, partId string) string { - return filepath.Join(viamHome, "diagnostics.data", partId) +// DefaultDirectory returns a directory to write FTDC data files in. Each unique "part" running on a +// single computer will get its own directory. +func DefaultDirectory(viamHome, partID string) string { + return filepath.Join(viamHome, "diagnostics.data", partID) } func newFTDC(logger logging.Logger) *FTDC { From b21b4432dc9f36fd9311f0a20485218503228ab7 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Fri, 22 Nov 2024 09:03:27 -0500 Subject: [PATCH 9/9] lint lint lint lint --- ftdc/ftdc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ftdc/ftdc.go b/ftdc/ftdc.go index 63e4c05f3d2..bf9bca3cdf6 100644 --- a/ftdc/ftdc.go +++ b/ftdc/ftdc.go @@ -559,7 +559,7 @@ func (ftdc *FTDC) checkAndDeleteOldFiles() error { // deletion testing. Filename generation uses padding such that we can rely on there before 2/4 // digits for every numeric value. // -// nolint +//nolint // Example filename: countingBytesTest1228324349/viam-server-2024-11-18T20-37-01Z.ftdc var filenameTimeRe = regexp.MustCompile(`viam-server-(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})Z.ftdc`)