From 8c252d58eacd04fe2d43e59c451abcf71882d87b Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Wed, 25 Sep 2024 11:36:32 +0200 Subject: [PATCH 1/5] rbd: prevent re-use of destroyed resources When an `.Destroy()` is called on an rbdImage (or rbdVolume or rbdSnapshot), the IOContext, Connection and other attributes are invalid. When using a destroyed resource that points to an object that was allocated through librbd, the process most likely ends with a panic. Signed-off-by: Niels de Vos --- internal/rbd/rbd_util.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index e363e391cc1..65b2b57280e 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -390,15 +390,19 @@ func (ri *rbdImage) Connect(cr *util.Credentials) error { func (ri *rbdImage) Destroy(ctx context.Context) { if ri.ioctx != nil { ri.ioctx.Destroy() + ri.ioctx = nil } if ri.conn != nil { ri.conn.Destroy() + ri.conn = nil } if ri.isBlockEncrypted() { ri.blockEncryption.Destroy() + ri.blockEncryption = nil } if ri.isFileEncrypted() { ri.fileEncryption.Destroy() + ri.fileEncryption = nil } } From 01a0ec2d8ca4e9372dbc148079a2a500352c7656 Mon Sep 17 00:00:00 2001 From: Nikhil-Ladha Date: Wed, 25 Sep 2024 15:45:28 +0530 Subject: [PATCH 2/5] util: use protobuf encoding for core k8s apis For core K8s API objects like Pods, Nodes, etc., we can use protobuf encoding which reduces CPU consumption related to (de)serialization, reduces overall latency of the API call, reduces memory footprint, reduces the amount of work performed by the GC and results in quicker propagation of objects to event handlers of shared informers. Signed-off-by: Nikhil-Ladha --- internal/controller/controller.go | 8 +++++++- internal/util/k8s/client.go | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 37066045e91..5c4693d7d22 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -20,6 +20,8 @@ import ( "github.com/ceph/ceph-csi/internal/util/log" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/leaderelection/resourcelock" clientConfig "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -69,7 +71,11 @@ func Start(config Config) error { LeaderElectionResourceLock: resourcelock.LeasesResourceLock, LeaderElectionID: electionID, } - mgr, err := manager.New(clientConfig.GetConfigOrDie(), opts) + + kubeConfig := clientConfig.GetConfigOrDie() + coreKubeConfig := rest.CopyConfig(kubeConfig) + coreKubeConfig.ContentType = runtime.ContentTypeProtobuf + mgr, err := manager.New(coreKubeConfig, opts) if err != nil { log.ErrorLogMsg("failed to create manager %s", err) diff --git a/internal/util/k8s/client.go b/internal/util/k8s/client.go index d5629fee54d..c7731db04e9 100644 --- a/internal/util/k8s/client.go +++ b/internal/util/k8s/client.go @@ -20,6 +20,7 @@ import ( "fmt" "os" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -47,6 +48,7 @@ func NewK8sClient() (*kubernetes.Clientset, error) { return nil, fmt.Errorf("failed to get cluster config: %w", err) } } + cfg.ContentType = runtime.ContentTypeProtobuf client, err := kubernetes.NewForConfig(cfg) if err != nil { return nil, fmt.Errorf("failed to create client: %w", err) From f2bc1c674bad36e918d2fde5187f945ce152f0c8 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 17 Sep 2024 14:07:19 +0200 Subject: [PATCH 3/5] rbd: replace Manager.DeleteVolumeGroup() by VolumeGroup.Delete() There is no need for the `Manager.DeleteVolumeGroup()` function as `VolumeGroup.Delete()` should cover everything too. By moving the `.Delete()` functionality of removing the group from the journal to the shared `commonVolumeGroup` type, a volume group snaphot can use it as well. Signed-off-by: Niels de Vos --- internal/csi-addons/rbd/volumegroup.go | 2 +- internal/rbd/group/util.go | 25 ++++++++++++++++ internal/rbd/group/volume_group.go | 2 +- internal/rbd/manager.go | 41 -------------------------- internal/rbd/types/manager.go | 4 --- 5 files changed, 27 insertions(+), 47 deletions(-) diff --git a/internal/csi-addons/rbd/volumegroup.go b/internal/csi-addons/rbd/volumegroup.go index 1b418a0827f..065a6c517f8 100644 --- a/internal/csi-addons/rbd/volumegroup.go +++ b/internal/csi-addons/rbd/volumegroup.go @@ -222,7 +222,7 @@ func (vs *VolumeGroupServer) DeleteVolumeGroup( } // delete the volume group - err = mgr.DeleteVolumeGroup(ctx, vg) + err = vg.Delete(ctx) if err != nil { return nil, status.Errorf(codes.Internal, "failed to delete volume group %q: %s", diff --git a/internal/rbd/group/util.go b/internal/rbd/group/util.go index 8e7f76eb30b..0271cfc81c5 100644 --- a/internal/rbd/group/util.go +++ b/internal/rbd/group/util.go @@ -233,6 +233,31 @@ func (cvg *commonVolumeGroup) GetIOContext(ctx context.Context) (*rados.IOContex return ioctx, nil } +// Delete removes the volume group from the journal. +func (cvg *commonVolumeGroup) Delete(ctx context.Context) error { + name, err := cvg.GetName(ctx) + if err != nil { + return fmt.Errorf("failed to get name for volume group %q: %w", cvg, err) + } + + csiID, err := cvg.GetID(ctx) + if err != nil { + return fmt.Errorf("failed to get id for volume group %q: %w", cvg, err) + } + + pool, err := cvg.GetPool(ctx) + if err != nil { + return fmt.Errorf("failed to get pool for volume group %q: %w", cvg, err) + } + + err = cvg.journal.UndoReservation(ctx, pool, name, csiID) + if err != nil /* TODO? !errors.Is(..., err) */ { + return fmt.Errorf("failed to undo the reservation for volume group %q: %w", cvg, err) + } + + return nil +} + // GetCreationTime fetches the creation time of the volume group from the // journal and returns it. func (cvg *commonVolumeGroup) GetCreationTime(ctx context.Context) (*time.Time, error) { diff --git a/internal/rbd/group/volume_group.go b/internal/rbd/group/volume_group.go index 337c974ef09..7de46131a3a 100644 --- a/internal/rbd/group/volume_group.go +++ b/internal/rbd/group/volume_group.go @@ -194,7 +194,7 @@ func (vg *volumeGroup) Delete(ctx context.Context) error { log.DebugLog(ctx, "volume group %q has been removed", vg) - return nil + return vg.commonVolumeGroup.Delete(ctx) } func (vg *volumeGroup) AddVolume(ctx context.Context, vol types.Volume) error { diff --git a/internal/rbd/manager.go b/internal/rbd/manager.go index 3c41afdc69d..b8c9c2f7da9 100644 --- a/internal/rbd/manager.go +++ b/internal/rbd/manager.go @@ -21,8 +21,6 @@ import ( "errors" "fmt" - "github.com/ceph/go-ceph/rados" - "github.com/ceph/ceph-csi/internal/journal" rbd_group "github.com/ceph/ceph-csi/internal/rbd/group" "github.com/ceph/ceph-csi/internal/rbd/types" @@ -255,42 +253,3 @@ func (mgr *rbdManager) CreateVolumeGroup(ctx context.Context, name string) (type return vg, nil } - -func (mgr *rbdManager) DeleteVolumeGroup(ctx context.Context, vg types.VolumeGroup) error { - err := vg.Delete(ctx) - if err != nil && !errors.Is(rados.ErrNotFound, err) { - return fmt.Errorf("failed to delete volume group %q: %w", vg, err) - } - - clusterID, err := vg.GetClusterID(ctx) - if err != nil { - return fmt.Errorf("failed to get cluster id for volume group %q: %w", vg, err) - } - - vgJournal, err := mgr.getVolumeGroupJournal(clusterID) - if err != nil { - return err - } - - name, err := vg.GetName(ctx) - if err != nil { - return fmt.Errorf("failed to get name for volume group %q: %w", vg, err) - } - - csiID, err := vg.GetID(ctx) - if err != nil { - return fmt.Errorf("failed to get id for volume group %q: %w", vg, err) - } - - pool, err := vg.GetPool(ctx) - if err != nil { - return fmt.Errorf("failed to get pool for volume group %q: %w", vg, err) - } - - err = vgJournal.UndoReservation(ctx, pool, name, csiID) - if err != nil /* TODO? !errors.Is(..., err) */ { - return fmt.Errorf("failed to undo the reservation for volume group %q: %w", vg, err) - } - - return nil -} diff --git a/internal/rbd/types/manager.go b/internal/rbd/types/manager.go index 33497493226..2e9521570b5 100644 --- a/internal/rbd/types/manager.go +++ b/internal/rbd/types/manager.go @@ -43,8 +43,4 @@ type Manager interface { // CreateVolumeGroup allocates a new VolumeGroup in the backend storage // and records details about it in the journal. CreateVolumeGroup(ctx context.Context, name string) (VolumeGroup, error) - - // DeleteVolumeGroup removes VolumeGroup from the backend storage and - // any details from the journal. - DeleteVolumeGroup(ctx context.Context, vg VolumeGroup) error } From 9c567fd8a04adfe2b877998e354c1b2bb57bc044 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Thu, 26 Sep 2024 10:54:44 +0200 Subject: [PATCH 4/5] doc: add cephfs vsg to readme added cephfs vsg feature to the readme https://docs.ceph.com/en/latest/ cephfs/fs-volumes/#subvolume-quiesce Signed-off-by: Madhu Rajanna --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f7f9a9d584f..7e216c477d8 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ for its support details. | | Dynamically provision, de-provision File mode ROX volume | Alpha | >= v3.0.0 | >= v1.0.0 | Pacific (>=v16.2.0) | >= v1.14.0 | | | Dynamically provision, de-provision File mode RWOP volume | Alpha | >= v3.5.0 | >= v1.5.0 | Pacific (>=v16.2.0) | >= v1.22.0 | | | Creating and deleting snapshot | GA | >= v3.1.0 | >= v1.0.0 | Pacific (>=v16.2.0) | >= v1.17.0 | +| | Creating and deleting volume group snapshot | Alpha | >= v3.11.0 | >= v1.9.0 | Squid (>=v19.0.0) | >= v1.31.0 | | | Provision volume from snapshot | GA | >= v3.1.0 | >= v1.0.0 | Pacific (>=v16.2.0) | >= v1.17.0 | | | Provision volume from another volume | GA | >= v3.1.0 | >= v1.0.0 | Pacific (>=v16.2.0) | >= v1.16.0 | | | Expand volume | Beta | >= v2.0.0 | >= v1.1.0 | Pacific (>=v16.2.0) | >= v1.15.0 | From 2d82cebfebcc8beeef2136804063a6bf8db4076d Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 24 Sep 2024 12:55:26 +0200 Subject: [PATCH 5/5] rbd: move repairImageID() from rbdVolume struct to rbdImage The `repairImageID()` function is useful for the `rbdSnapshot` objects as well. Move it to the `rbdImage` struct that is the base for both `rbdVolume` and `rbdSnapshot`. Signed-off-by: Niels de Vos --- internal/rbd/rbd_journal.go | 17 +++++++++-------- internal/rbd/rbd_util.go | 3 +-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/rbd/rbd_journal.go b/internal/rbd/rbd_journal.go index 44c951c5338..38f4c8c705c 100644 --- a/internal/rbd/rbd_journal.go +++ b/internal/rbd/rbd_journal.go @@ -352,29 +352,30 @@ func (rv *rbdVolume) Exists(ctx context.Context, parentVol *rbdVolume) (bool, er return true, nil } -// repairImageID checks if rv.ImageID is already available (if so, it was +// repairImageID checks if ri.ImageID is already available (if so, it was // fetched from the journal), in case it is missing, the imageID is obtained // and stored in the journal. // if the force is set to true, the latest imageID will get added/updated in OMAP. -func (rv *rbdVolume) repairImageID(ctx context.Context, j *journal.Connection, force bool) error { +func (ri *rbdImage) repairImageID(ctx context.Context, j *journal.Connection, force bool) error { if force { // reset the imageID so that we can fetch latest imageID from ceph cluster. - rv.ImageID = "" + ri.ImageID = "" } - if rv.ImageID != "" { + if ri.ImageID != "" { return nil } - err := rv.getImageID() + err := ri.getImageID() if err != nil { - log.ErrorLog(ctx, "failed to get image id %s: %v", rv, err) + log.ErrorLog(ctx, "failed to get image id %s: %v", ri, err) return err } - err = j.StoreImageID(ctx, rv.JournalPool, rv.ReservedID, rv.ImageID) + + err = j.StoreImageID(ctx, ri.JournalPool, ri.ReservedID, ri.ImageID) if err != nil { - log.ErrorLog(ctx, "failed to store volume id %s: %v", rv, err) + log.ErrorLog(ctx, "failed to store volume id %s: %v", ri, err) return err } diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 65b2b57280e..1beb4f5318f 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -110,6 +110,7 @@ type rbdImage struct { // This does not have a JSON tag as it is not stashed in JSON encoded // config maps in v1.0.0 RequestName string + ReservedID string NamePrefix string // ParentName represents the parent image name of the image. ParentName string @@ -167,7 +168,6 @@ type rbdVolume struct { AdminID string UserID string Mounter string - ReservedID string MapOptions string UnmapOptions string LogDir string @@ -190,7 +190,6 @@ type rbdSnapshot struct { // SourceVolumeID is the volume ID of RbdImageName, that is exchanged with CSI drivers // RbdSnapName is the name of the RBD snapshot backing this rbdSnapshot SourceVolumeID string - ReservedID string RbdSnapName string }