From 71acf22a409d708eef7fddb6f5c88beda5910a1e Mon Sep 17 00:00:00 2001 From: James Otting Date: Thu, 24 Oct 2024 14:08:39 -0400 Subject: [PATCH] [APP-6748] Fix deadlock when adding additional networks (#38) --- go.mod | 1 + go.sum | 2 + subsystems/provisioning/networkstate.go | 1 - subsystems/provisioning/provisioning.go | 66 +++++++++++++++++++++---- subsystems/syscfg/syscfg.go | 4 +- 5 files changed, 62 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 3501da3..9870411 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/viamrobotics/agent go 1.23.1 require ( + github.com/Masterminds/semver/v3 v3.3.0 github.com/Otterverse/gonetworkmanager/v2 v2.2.0 github.com/google/uuid v1.6.0 github.com/jessevdk/go-flags v1.6.1 diff --git a/go.sum b/go.sum index ed2ddae..c5ee894 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/sprig v2.15.0+incompatible/go.mod h1:y6hNFY5UBTIWBxnzTeuNhlNS5hqE0NB0E6fgfo2Br3o= github.com/Masterminds/sprig v2.22.0+incompatible/go.mod h1:y6hNFY5UBTIWBxnzTeuNhlNS5hqE0NB0E6fgfo2Br3o= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= diff --git a/subsystems/provisioning/networkstate.go b/subsystems/provisioning/networkstate.go index c2a8cd4..6b8c72a 100644 --- a/subsystems/provisioning/networkstate.go +++ b/subsystems/provisioning/networkstate.go @@ -123,7 +123,6 @@ func (n *networkState) PrimarySSID(iface string) string { ssid, ok := n.primarySSID[iface] if !ok { - n.logger.Warnf("cannot find primary SSID for %s", iface) return "" } diff --git a/subsystems/provisioning/provisioning.go b/subsystems/provisioning/provisioning.go index 5f9b317..5ebade6 100644 --- a/subsystems/provisioning/provisioning.go +++ b/subsystems/provisioning/provisioning.go @@ -9,6 +9,7 @@ import ( "sync" "time" + semver "github.com/Masterminds/semver/v3" gnm "github.com/Otterverse/gonetworkmanager/v2" errw "github.com/pkg/errors" "github.com/viamrobotics/agent" @@ -32,6 +33,7 @@ type Provisioning struct { opMu sync.Mutex running bool disabled bool + noNM bool // used to stop main/bg loops cancel context.CancelFunc @@ -90,11 +92,46 @@ func NewProvisioning(ctx context.Context, logger logging.Logger, updateConf *age return w, nil } +func (w *Provisioning) getNM() (gnm.NetworkManager, error) { + nmErr := errw.New("NetworkManager does not appear to be responding as expected. " + + "Please ensure NetworkManger >= v1.42 is installed and enabled. Disabling agent-provisioning until next restart.") + + nm, err := gnm.NewNetworkManager() + if err != nil { + w.noNM = true + w.logger.Error(err) + return nil, nmErr + } + + ver, err := nm.GetPropertyVersion() + if err != nil { + w.noNM = true + w.logger.Error(err) + return nil, nmErr + } + + w.logger.Infof("Found NetworkManager version: %s", ver) + + sv, err := semver.NewVersion(ver) + if err != nil { + w.noNM = true + w.logger.Error(err) + return nil, nmErr + } + + if !sv.GreaterThanEqual(semver.MustParse("1.42.0")) { + w.noNM = true + return nil, nmErr + } + + return nm, nil +} + func (w *Provisioning) init(ctx context.Context) error { w.mainLoopHealth.MarkGood() w.bgLoopHealth.MarkGood() - nm, err := gnm.NewNetworkManager() + nm, err := w.getNM() if err != nil { return err } @@ -109,7 +146,7 @@ func (w *Provisioning) init(ctx context.Context) error { w.hostname, err = settings.GetPropertyHostname() if err != nil { - return errw.Wrap(err, "error getting hostname from NetworkManager, is NetworkManager installed and enabled?") + return errw.Wrap(err, "getting hostname from NetworkManager") } w.updateHotspotSSID(w.cfg) @@ -136,8 +173,11 @@ func (w *Provisioning) init(ctx context.Context) error { if w.cfg.RoamingMode { w.logger.Info("Roaming Mode enabled. Will try all connections for global internet connectivity.") } else { - w.logger.Infof("Default (Single Network) Mode enabled. Will directly connect only to primary network: %s", - w.netState.PrimarySSID(w.Config().HotspotInterface)) + primarySSID := w.netState.PrimarySSID(w.Config().HotspotInterface) + w.logger.Infof("Default (Single Network) Mode enabled. Will directly connect only to primary network: %s", primarySSID) + if primarySSID == "" { + w.logger.Warnf("cannot find primary SSID for %s", w.Config().HotspotInterface) + } } if err := w.checkConnections(); err != nil { @@ -164,8 +204,7 @@ func (w *Provisioning) Start(ctx context.Context) error { return nil } - if w.disabled { - w.logger.Infof("agent-provisioning disabled") + if w.disabled || w.noNM { return agent.ErrSubsystemDisabled } @@ -222,8 +261,15 @@ func (w *Provisioning) Update(ctx context.Context, updateConf *agentpb.DeviceSub var needRestart bool + if w.noNM { + return needRestart, nil + } + if w.disabled != updateConf.GetDisable() { w.disabled = updateConf.GetDisable() + if w.disabled { + w.logger.Infof("agent-provisioning disabled") + } needRestart = true } @@ -265,7 +311,7 @@ func (w *Provisioning) Update(ctx context.Context, updateConf *agentpb.DeviceSub func (w *Provisioning) HealthCheck(ctx context.Context) error { w.opMu.Lock() defer w.opMu.Unlock() - if w.disabled { + if w.disabled || w.noNM { return nil } @@ -293,12 +339,12 @@ func (w *Provisioning) processAdditionalnetworks(ctx context.Context) { } for _, network := range w.cfg.Networks { - _, err := w.AddOrUpdateConnection(network) + _, err := w.addOrUpdateConnection(network) if err != nil { w.logger.Error(errw.Wrapf(err, "error adding network %s", network.SSID)) } - if network.Interface != "" && w.Config().HotspotInterface != network.Interface { - if err := w.ActivateConnection(ctx, network.Interface, network.SSID); err != nil { + if network.Interface != "" { + if err := w.activateConnection(ctx, network.Interface, network.SSID); err != nil { w.logger.Error(err) } } diff --git a/subsystems/syscfg/syscfg.go b/subsystems/syscfg/syscfg.go index 50b2660..7e83598 100644 --- a/subsystems/syscfg/syscfg.go +++ b/subsystems/syscfg/syscfg.go @@ -55,6 +55,9 @@ func (s *syscfg) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig) (boo var needRestart bool if cfg.GetDisable() != s.disabled { s.disabled = cfg.GetDisable() + if s.disabled { + s.logger.Infof("agent-syscfg disabled") + } needRestart = true } @@ -90,7 +93,6 @@ func (s *syscfg) Start(ctx context.Context) error { } if s.disabled { - s.logger.Infof("agent-syscfg disabled") return agent.ErrSubsystemDisabled }