diff --git a/ftdc/cmd/parser.go b/ftdc/cmd/parser.go index 36e3ea2fa17..31b271514d6 100644 --- a/ftdc/cmd/parser.go +++ b/ftdc/cmd/parser.go @@ -3,18 +3,21 @@ package main import ( "bufio" + "cmp" "errors" "fmt" "io" "math" "os" "os/exec" + "slices" "strings" "time" "go.viam.com/utils" "go.viam.com/rdk/ftdc" + "go.viam.com/rdk/logging" ) // gnuplotWriter organizes all of the output for `gnuplot` to create a graph from FTDC @@ -36,6 +39,31 @@ type gnuplotWriter struct { options graphOptions } +type kvPair[K, V any] struct { + Key K + Val V +} + +func sorted[K cmp.Ordered, V any](mp map[K]V) []kvPair[K, V] { + ret := make([]kvPair[K, V], 0, len(mp)) + for key, val := range mp { + ret = append(ret, kvPair[K, V]{key, val}) + } + + slices.SortFunc(ret, func(left, right kvPair[K, V]) int { + if left.Key < right.Key { + return -1 + } + if right.Key < left.Key { + return 1 + } + + return 0 + }) + + return ret +} + type graphOptions struct { // minTimeSeconds and maxTimeSeconds control which datapoints should render based on their // timestamp. The default is all datapoints (minTimeSeconds: 0, maxTimeSeconds: MaxInt64). @@ -155,7 +183,8 @@ func (gpw *gnuplotWriter) CompileAndClose() string { // per-graph setting rather than a global. writeln(gnuFile, "set yrange [0:*]") - for metricName, file := range gpw.metricFiles { + for _, nameFilePair := range sorted(gpw.metricFiles) { + metricName, file := nameFilePair.Key, nameFilePair.Val writelnf(gnuFile, "plot '%v' using 1:2 with lines linestyle 7 lw 4 title '%v'", file.Name(), strings.ReplaceAll(metricName, "_", "\\_")) utils.UncheckedErrorFunc(file.Close) } @@ -165,25 +194,19 @@ func (gpw *gnuplotWriter) CompileAndClose() string { func main() { if len(os.Args) < 2 { - // We are a CLI, it's appropriate to write to stdout. - // - nolintPrintln("Expected an FTDC filename. E.g: go run parser.go /viam-server.ftdc") return } ftdcFile, err := os.Open(os.Args[1]) if err != nil { - // We are a CLI, it's appropriate to write to stdout. - // - nolintPrintln("Error opening file. File:", os.Args[1], "Err:", err) - nolintPrintln("Expected an FTDC filename. E.g: go run parser.go /viam-server.ftdc") return } - data, err := ftdc.Parse(ftdcFile) + logger := logging.NewLogger("parser") + data, err := ftdc.ParseWithLogger(ftdcFile, logger) if err != nil { panic(err) } diff --git a/ftdc/custom_format.go b/ftdc/custom_format.go index d6ea3ccc3d3..98b2518b826 100644 --- a/ftdc/custom_format.go +++ b/ftdc/custom_format.go @@ -429,7 +429,9 @@ func ParseWithLogger(rawReader io.Reader, logger logging.Logger) ([]FlatDatum, e // "packed byte" where the first bit is not a diff bit. `readDiffBits` must account for // that. diffedFieldsIndexes := readDiffBits(reader, schema) - logger.Debugw("Diff bits", "changedFields", diffedFieldsIndexes) + logger.Debugw("Diff bits", + "changedFieldIndexes", diffedFieldsIndexes, + "changedFieldNames", schema.FieldNamesForIndexes(diffedFieldsIndexes)) // The next eight bytes after the diff bits is the time in nanoseconds since the 1970 epoch. var dataTime int64 @@ -653,3 +655,14 @@ func (schema *schema) Zip(data []float32) []Reading { return ret } + +// FieldNamesForIndexes maps the integers to their string form as defined in the schema. This is +// useful for creating human consumable output. +func (schema *schema) FieldNamesForIndexes(fieldIdxs []int) []string { + ret := make([]string, len(fieldIdxs)) + for idx, fieldIdx := range fieldIdxs { + ret[idx] = schema.fieldOrder[fieldIdx] + } + + return ret +} diff --git a/ftdc/ftdc.go b/ftdc/ftdc.go index 81c23cf4ed8..bf9bca3cdf6 100644 --- a/ftdc/ftdc.go +++ b/ftdc/ftdc.go @@ -125,21 +125,35 @@ type FTDC struct { logger logging.Logger } -// New creates a new *FTDC. -func New(logger logging.Logger) *FTDC { - return NewWithWriter(nil, logger) +// New creates a new *FTDC. This FTDC object will write FTDC formatted files into the input +// `ftdcDirectory`. +func New(ftdcDirectory string, logger logging.Logger) *FTDC { + ret := newFTDC(logger) + ret.maxFileSizeBytes = 1_000_000 + ret.maxNumFiles = 10 + ret.ftdcDir = ftdcDirectory + return ret } // NewWithWriter creates a new *FTDC that outputs bytes to the specified writer. func NewWithWriter(writer io.Writer, logger logging.Logger) *FTDC { + ret := newFTDC(logger) + ret.outputWriter = writer + return ret +} + +// DefaultDirectory returns a directory to write FTDC data files in. Each unique "part" running on a +// single computer will get its own directory. +func DefaultDirectory(viamHome, partID string) string { + return filepath.Join(viamHome, "diagnostics.data", partID) +} + +func newFTDC(logger logging.Logger) *FTDC { return &FTDC{ // Allow for some wiggle before blocking producers. datumCh: make(chan datum, 20), outputWorkerDone: make(chan struct{}), logger: logger, - outputWriter: writer, - maxFileSizeBytes: 1_000_000, - maxNumFiles: 10, } } @@ -415,6 +429,15 @@ func (ftdc *FTDC) getWriter() (io.Writer, error) { // It's unclear in what circumstance we'd expect creating a new file to fail. Try 5 times for no // good reason before giving up entirely and shutting down FTDC. for numTries := 0; numTries < 5; numTries++ { + // The viam process is expected to be run as root. The FTDC directory must be readable by + // "other" users. + // + //nolint:gosec + if err = os.MkdirAll(ftdc.ftdcDir, 0o755); err != nil { + ftdc.logger.Warnw("Failed to create FTDC directory", "dir", ftdc.ftdcDir, "err", err) + return nil, err + } + now := time.Now().UTC() // lint wants 0o600 file permissions. We don't expect the unix user someone is ssh'ed in as // to be on the same unix user as is running the viam-server process. Thus the file needs to diff --git a/ftdc/ftdc_test.go b/ftdc/ftdc_test.go index 3c4552347a3..3c7bf115e2e 100644 --- a/ftdc/ftdc_test.go +++ b/ftdc/ftdc_test.go @@ -294,18 +294,16 @@ func TestStatsWriterContinuesOnSchemaError(t *testing.T) { func TestCountingBytes(t *testing.T) { logger := logging.NewTestLogger(t) - // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC - // file rotation. - ftdc := New(logger.Sublogger("ftdc")) - // Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums. - ftdc.maxFileSizeBytes = 1000 - + // Isolate all of the files we're going to create to a single, fresh directory. ftdcFileDir, err := os.MkdirTemp("./", "countingBytesTest") test.That(t, err, test.ShouldBeNil) defer os.RemoveAll(ftdcFileDir) - // Isolate all of the files we're going to create to a single, fresh directory. - ftdc.ftdcDir = ftdcFileDir + // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC + // file rotation. + ftdc := New(ftdcFileDir, logger.Sublogger("ftdc")) + // Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums. + ftdc.maxFileSizeBytes = 1000 timesRolledOver := 0 foo := &foo{} @@ -419,16 +417,15 @@ func TestFileDeletion(t *testing.T) { // a second before being able to create the next file. logger := logging.NewTestLogger(t) - // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC - // file rotation. - ftdc := New(logger.Sublogger("ftdc")) - + // Isolate all of the files we're going to create to a single, fresh directory. ftdcFileDir, err := os.MkdirTemp("./", "fileDeletionTest") test.That(t, err, test.ShouldBeNil) defer os.RemoveAll(ftdcFileDir) - // Isolate all of the files we're going to create to a single, fresh directory. - ftdc.ftdcDir = ftdcFileDir + // We must not use `NewWithWriter`. Forcing a writer for FTDC data is not compatible with FTDC + // file rotation. + ftdc := New(ftdcFileDir, logger.Sublogger("ftdc")) + // Expect a log rotation after 1,000 bytes. For a changing `foo` object, this is ~60 datums. ftdc.maxFileSizeBytes = 1000 ftdc.maxNumFiles = 3 diff --git a/robot/impl/local_robot.go b/robot/impl/local_robot.go index a157f5afddf..980394be464 100644 --- a/robot/impl/local_robot.go +++ b/robot/impl/local_robot.go @@ -416,7 +416,12 @@ func newWithResources( var ftdcWorker *ftdc.FTDC if rOpts.enableFTDC { - ftdcWorker = ftdc.New(logger.Sublogger("ftdc")) + partID := "local-config" + if cfg.Cloud != nil { + partID = cfg.Cloud.ID + } + // CloudID is also known as the robot part id. + ftdcWorker = ftdc.New(ftdc.DefaultDirectory(config.ViamDotDir, partID), logger.Sublogger("ftdc")) ftdcWorker.Start() } diff --git a/robot/web/web.go b/robot/web/web.go index 3380ac4332a..9510657e68a 100644 --- a/robot/web/web.go +++ b/robot/web/web.go @@ -188,6 +188,8 @@ type Service interface { // Returns the unix socket path the module server listens on. ModuleAddress() string + + Stats() any } var internalWebServiceName = resource.NewName( diff --git a/robot/web/web_c.go b/robot/web/web_c.go index 30dfdb09236..c8b9f6ef33c 100644 --- a/robot/web/web_c.go +++ b/robot/web/web_c.go @@ -57,6 +57,12 @@ type webService struct { modWorkers sync.WaitGroup } +func (svc *webService) Stats() any { + return struct { + RPCServer any + }{svc.rpcServer.Stats()} +} + // Reconfigure pulls resources and updates the stream server audio and video streams with the new resources. func (svc *webService) Reconfigure(ctx context.Context, deps resource.Dependencies, _ resource.Config) error { svc.mu.Lock() diff --git a/web/server/entrypoint.go b/web/server/entrypoint.go index 3c1266a767b..a2b5b1fdbdc 100644 --- a/web/server/entrypoint.go +++ b/web/server/entrypoint.go @@ -48,7 +48,7 @@ type Arguments struct { OutputTelemetry bool `flag:"output-telemetry,usage=print out telemetry data (metrics and spans)"` DisableMulticastDNS bool `flag:"disable-mdns,usage=disable server discovery through multicast DNS"` DumpResourcesPath string `flag:"dump-resources,usage=dump all resource registrations as json to the provided file path"` - EnableFTDC bool `flag:"ftdc,usage=enable fulltime data capture for diagnostics [beta feature]"` + EnableFTDC bool `flag:"ftdc,default=true,usage=enable fulltime data capture for diagnostics"` } type robotServer struct {