Skip to content

Commit

Permalink
feat(v2 upgrade): expose NVMe subsystem status
Browse files Browse the repository at this point in the history
Longhorn 9104

Signed-off-by: Derek Su <[email protected]>
  • Loading branch information
derekbit committed Nov 27, 2024
1 parent 8acec77 commit 99f98a7
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 10 deletions.
31 changes: 31 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"github.com/longhorn/types/pkg/generated/spdkrpc"
"google.golang.org/protobuf/types/known/emptypb"

helpertypes "github.com/longhorn/go-spdk-helper/pkg/types"

"github.com/longhorn/longhorn-spdk-engine/pkg/types"
)

Expand Down Expand Up @@ -40,6 +42,18 @@ type Lvol struct {
SnapshotTimestamp string `json:"snapshot_timestamp"`
}

type NvmeDevicePath struct {
Trtype string `json:"trtype"`
Traddr string `json:"traddr"`
Trsvcid string `json:"trsvcid"`
SrcAddr string `json:"src_addr"`
State helpertypes.NVMeControllerState `json:"state"`
}

type NvmeSubsystem struct {
Paths map[string]*NvmeDevicePath `json:"paths"`
}

func ProtoLvolToLvol(l *spdkrpc.Lvol) *Lvol {
if l == nil {
return nil
Expand Down Expand Up @@ -138,9 +152,25 @@ type Engine struct {
Endpoint string `json:"endpoint"`
State string `json:"state"`
ErrorMsg string `json:"error_msg"`
NvmeSubsystem NvmeSubsystem `json:"nvme_subsystem"`
}

func ProtoEngineToEngine(e *spdkrpc.Engine) *Engine {
nvme := NvmeSubsystem{
Paths: map[string]*NvmeDevicePath{},
}
if e.NvmeSubsystem != nil {
for pathName, path := range e.NvmeSubsystem.Paths {
nvme.Paths[pathName] = &NvmeDevicePath{
Trtype: path.Trtype,
Traddr: path.Traddr,
Trsvcid: path.Trsvcid,
SrcAddr: path.SrcAddr,
State: helpertypes.NVMeControllerState(path.State),
}
}
}

res := &Engine{
Name: e.Name,
VolumeName: e.VolumeName,
Expand All @@ -159,6 +189,7 @@ func ProtoEngineToEngine(e *spdkrpc.Engine) *Engine {
Endpoint: e.Endpoint,
State: e.State,
ErrorMsg: e.ErrorMsg,
NvmeSubsystem: nvme,
}
for rName, mode := range e.ReplicaModeMap {
res.ReplicaModeMap[rName] = types.GRPCReplicaModeToReplicaMode(mode)
Expand Down
92 changes: 82 additions & 10 deletions pkg/spdk/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,22 @@ import (

"github.com/pkg/errors"
"github.com/sirupsen/logrus"

grpccodes "google.golang.org/grpc/codes"
grpcstatus "google.golang.org/grpc/status"

"github.com/longhorn/go-spdk-helper/pkg/jsonrpc"
"github.com/longhorn/go-spdk-helper/pkg/nvme"
"github.com/longhorn/types/pkg/generated/spdkrpc"

commonbitmap "github.com/longhorn/go-common-libs/bitmap"
commonnet "github.com/longhorn/go-common-libs/net"
commontypes "github.com/longhorn/go-common-libs/types"
commonutils "github.com/longhorn/go-common-libs/utils"
"github.com/longhorn/go-spdk-helper/pkg/jsonrpc"
"github.com/longhorn/go-spdk-helper/pkg/nvme"
spdkclient "github.com/longhorn/go-spdk-helper/pkg/spdk/client"
spdktypes "github.com/longhorn/go-spdk-helper/pkg/spdk/types"
helpertypes "github.com/longhorn/go-spdk-helper/pkg/types"
helperutil "github.com/longhorn/go-spdk-helper/pkg/util"
"github.com/longhorn/types/pkg/generated/spdkrpc"

"github.com/longhorn/longhorn-spdk-engine/pkg/api"
"github.com/longhorn/longhorn-spdk-engine/pkg/client"
Expand All @@ -47,6 +49,7 @@ type Engine struct {
Endpoint string
Nqn string
Nguid string
NvmeSubsystem api.NvmeSubsystem

ReplicaStatusMap map[string]*EngineReplicaStatus

Expand Down Expand Up @@ -99,6 +102,10 @@ func NewEngine(engineName, volumeName, frontend string, specSize uint64, engineU

SnapshotMap: map[string]*api.Lvol{},

NvmeSubsystem: api.NvmeSubsystem{
Paths: map[string]*api.NvmeDevicePath{},
},

Check warning on line 108 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L105-L108

Added lines #L105 - L108 were not covered by tests
UpdateCh: engineUpdateCh,

log: log,
Expand Down Expand Up @@ -613,6 +620,19 @@ func (e *Engine) Get() (res *spdkrpc.Engine) {
}

func (e *Engine) getWithoutLock() (res *spdkrpc.Engine) {
nvmeSubsystem := &spdkrpc.NvmeSubsystem{
Paths: map[string]*spdkrpc.NvmeDevicePath{},
}
for pathName, path := range e.NvmeSubsystem.Paths {
nvmeSubsystem.Paths[pathName] = &spdkrpc.NvmeDevicePath{
Trtype: path.Trtype,
Traddr: path.Traddr,
Trsvcid: path.Trsvcid,
SrcAddr: path.SrcAddr,
State: string(path.State),
}
}

Check warning on line 634 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L623-L634

Added lines #L623 - L634 were not covered by tests

res = &spdkrpc.Engine{
Name: e.Name,
SpecSize: e.SpecSize,
Expand All @@ -629,6 +649,7 @@ func (e *Engine) getWithoutLock() (res *spdkrpc.Engine) {
Endpoint: e.Endpoint,
State: string(e.State),
ErrorMsg: e.ErrorMsg,
NvmeSubsystem: nvmeSubsystem,

Check warning on line 652 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L652

Added line #L652 was not covered by tests
}

for replicaName, replicaStatus := range e.ReplicaStatusMap {
Expand All @@ -643,11 +664,50 @@ func (e *Engine) getWithoutLock() (res *spdkrpc.Engine) {
return res
}

func areNvmeSubsystemsEqual(old, new *api.NvmeSubsystem) bool {

Check notice on line 667 in pkg/spdk/engine.go

View check run for this annotation

codefactor.io / CodeFactor

pkg/spdk/engine.go#L667

Redefinition of the built-in function new. (redefines-builtin-id)
if len(old.Paths) != len(new.Paths) {
return false
}
for oldPathName, oldPath := range old.Paths {
newPath, ok := new.Paths[oldPathName]
if !ok {
return false
}
if oldPath.Trtype != newPath.Trtype ||
oldPath.Traddr != newPath.Traddr ||
oldPath.Trsvcid != newPath.Trsvcid ||
oldPath.SrcAddr != newPath.SrcAddr ||
oldPath.State != newPath.State {
return false
}

Check warning on line 682 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L667-L682

Added lines #L667 - L682 were not covered by tests

}
return true

Check warning on line 685 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L685

Added line #L685 was not covered by tests
}

func (e *Engine) ValidateAndUpdate(spdkClient *spdkclient.Client) (err error) {
updateRequired := false

e.Lock()
existingNvemSubsystem := api.NvmeSubsystem{
Paths: map[string]*api.NvmeDevicePath{},
}
for pathName, path := range e.NvmeSubsystem.Paths {
existingNvemSubsystem.Paths[pathName] = &api.NvmeDevicePath{
Trtype: path.Trtype,
Traddr: path.Traddr,
Trsvcid: path.Trsvcid,
SrcAddr: path.SrcAddr,
State: path.State,
}
}

Check warning on line 703 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L692-L703

Added lines #L692 - L703 were not covered by tests

defer func() {
if !areNvmeSubsystemsEqual(&existingNvemSubsystem, &e.NvmeSubsystem) {
e.log.Infof("Found difference between existing nvme subsystem %+v and updated nvme subsystem %+v during ValidateAndUpdate", existingNvemSubsystem, e.NvmeSubsystem)
updateRequired = true
}

Check warning on line 709 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L706-L709

Added lines #L706 - L709 were not covered by tests

e.Unlock()

if updateRequired {
Expand Down Expand Up @@ -927,14 +987,26 @@ func (e *Engine) validateAndUpdateFrontend(subsystemMap map[string]*spdktypes.Nv
}
e.initiator = initiator
}
if err := e.initiator.LoadNVMeDeviceInfo(e.initiator.TransportAddress, e.initiator.TransportServiceID, e.initiator.SubsystemNQN); err != nil {
if strings.Contains(err.Error(), "connecting state") ||
strings.Contains(err.Error(), "resetting state") {
e.log.WithError(err).Warnf("Ignored to validate and update engine %v, because the device is still in a transient state", e.Name)
return nil
}
return err

err = e.initiator.LoadNVMeDeviceInfo(e.initiator.TransportAddress, e.initiator.TransportServiceID, e.initiator.SubsystemNQN)
if err != nil {
return errors.Wrapf(err, "failed to load NVMe device info for engine %v", e.Name)

Check warning on line 993 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L991-L993

Added lines #L991 - L993 were not covered by tests
}
e.NvmeSubsystem.Paths[e.initiator.ControllerName] = &api.NvmeDevicePath{
Trtype: string(spdktypes.NvmeTransportTypeTCP),
Traddr: e.initiator.TransportAddress,
Trsvcid: e.initiator.TransportServiceID,
SrcAddr: e.initiator.SourceAddress,
State: e.initiator.ControllerState,
}

if e.NvmeSubsystem.Paths[e.initiator.ControllerName].State == helpertypes.NVMeControllerStateConnecting ||
e.NvmeSubsystem.Paths[e.initiator.ControllerName].State == helpertypes.NVMeControllerStateResetting {
e.log.WithError(err).Warnf("Ignored to validate and update engine %v, because the device is still in a %v state",
e.Name, e.NvmeSubsystem.Paths[e.initiator.ControllerName].State)
return nil
}

Check warning on line 1008 in pkg/spdk/engine.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/engine.go#L995-L1008

Added lines #L995 - L1008 were not covered by tests

if err := e.initiator.LoadEndpoint(e.dmDeviceBusy); err != nil {
return err
}
Expand Down

0 comments on commit 99f98a7

Please sign in to comment.