Skip to content

Commit

Permalink
Merge pull request #544 from red-hat-storage/sync_ds--release-1.13
Browse files Browse the repository at this point in the history
Syncing latest changes from release-1.13 for rook
  • Loading branch information
travisn authored Dec 8, 2023
2 parents f3778a5 + f7fe2e4 commit 890a206
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 20 deletions.
6 changes: 1 addition & 5 deletions cmd/rook/ceph/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,8 @@ func removeOSDs(cmd *cobra.Command, args []string) error {
return errors.Wrapf(err, "failed to parse --preserve-pvc flag")
}

exitIfNotSafe := false
forceRemovalCallback := func(x int) (bool, bool) {
return forceOSDRemovalBool, exitIfNotSafe
}
// Run OSD remove sequence
err = osddaemon.RemoveOSDs(context, &clusterInfo, strings.Split(osdIDsToRemove, ","), preservePVCBool, forceRemovalCallback)
err = osddaemon.RemoveOSDs(context, &clusterInfo, strings.Split(osdIDsToRemove, ","), preservePVCBool, forceOSDRemovalBool)
if err != nil {
rook.TerminateFatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/rook-ceph/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ rules:
# Node access is needed for determining nodes where mons should run
- nodes
- nodes/proxy
- services
# Rook watches secrets which it uses to configure access to external resources.
# e.g., external Ceph cluster or object store
- secrets
Expand All @@ -96,6 +95,7 @@ rules:
- persistentvolumeclaims
# Rook creates endpoints for mgr and object store access
- endpoints
- services
verbs:
- get
- list
Expand Down
2 changes: 1 addition & 1 deletion deploy/examples/common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ rules:
# Node access is needed for determining nodes where mons should run
- nodes
- nodes/proxy
- services
# Rook watches secrets which it uses to configure access to external resources.
# e.g., external Ceph cluster or object store
- secrets
Expand All @@ -249,6 +248,7 @@ rules:
- persistentvolumeclaims
# Rook creates endpoints for mgr and object store access
- endpoints
- services
verbs:
- get
- list
Expand Down
19 changes: 6 additions & 13 deletions pkg/daemon/ceph/osd/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
)

// RemoveOSDs purges a list of OSDs from the cluster
func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdsToRemove []string, preservePVC bool, forceRemovalCallback func(osdID int) (bool, bool)) error {
func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdsToRemove []string, preservePVC, forceOSDRemoval bool) error {
// Generate the ceph config for running ceph commands similar to the operator
if err := client.WriteCephConfig(context, clusterInfo); err != nil {
return errors.Wrap(err, "failed to write the ceph config")
Expand Down Expand Up @@ -63,13 +63,13 @@ func RemoveOSDs(context *clusterd.Context, clusterInfo *client.ClusterInfo, osds
logger.Infof("osd.%d is marked 'DOWN'", osdID)
}

removeOSD(context, clusterInfo, osdID, preservePVC, forceRemovalCallback)
removeOSD(context, clusterInfo, osdID, preservePVC, forceOSDRemoval)
}

return nil
}

func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInfo, osdID int, preservePVC bool, forceRemovalCallback func(osdID int) (bool, bool)) {
func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInfo, osdID int, preservePVC, forceOSDRemoval bool) {
// Get the host where the OSD is found
hostName, err := client.GetCrushHostName(clusterdContext, clusterInfo, osdID)
if err != nil {
Expand All @@ -83,21 +83,17 @@ func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInf
if err != nil {
logger.Errorf("failed to exclude osd.%d out of the crush map. %v", osdID, err)
}
forceRemoval, exitIfNotSafe := forceRemovalCallback(osdID)

// Check we can remove the OSD
// Loop forever until the osd is safe-to-destroy
for {
isSafeToDestroy, err := client.OsdSafeToDestroy(clusterdContext, clusterInfo, osdID)
if err != nil {
// If we want to force remove the OSD and there was an error let's break outside of
// the loop and proceed with the OSD removal

if forceRemoval {
if forceOSDRemoval {
logger.Errorf("failed to check if osd %d is safe to destroy, but force removal is enabled so proceeding with removal. %v", osdID, err)
break
} else if exitIfNotSafe {
logger.Error("osd.%d is not safe to destroy")
return
} else {
logger.Errorf("failed to check if osd %d is safe to destroy, retrying in 1m. %v", osdID, err)
time.Sleep(1 * time.Minute)
Expand All @@ -111,12 +107,9 @@ func removeOSD(clusterdContext *clusterd.Context, clusterInfo *client.ClusterInf
break
} else {
// If we arrive here and forceOSDRemoval is true, we should proceed with the OSD removal
if forceRemoval {
if forceOSDRemoval {
logger.Infof("osd.%d is NOT ok to destroy but force removal is enabled so proceeding with removal", osdID)
break
} else if exitIfNotSafe {
logger.Error("osd.%d is not safe to destroy")
return
}
// Else we wait until the OSD can be removed
logger.Warningf("osd.%d is NOT ok to destroy, retrying in 1m until success", osdID)
Expand Down

0 comments on commit 890a206

Please sign in to comment.