Skip to content

Commit

Permalink
Merge pull request #720 from travisn/backport-mon-race
Browse files Browse the repository at this point in the history
Bug 2292435: mon: Remove extra mon from quorum before taking down pod
  • Loading branch information
travisn authored Sep 9, 2024
2 parents acb3979 + 6df7135 commit 6c78c88
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 26 deletions.
56 changes: 31 additions & 25 deletions pkg/operator/ceph/cluster/mon/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,31 +730,40 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro
}
logger.Infof("ensuring removal of unhealthy monitor %s", daemonName)

resourceName := resourceName(daemonName)

// Remove the mon pod if it is still there
var gracePeriod int64
propagation := metav1.DeletePropagationForeground
options := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod, PropagationPolicy: &propagation}
if err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil {
if kerrors.IsNotFound(err) {
logger.Infof("dead mon %s was already gone", resourceName)
} else {
logger.Errorf("failed to remove dead mon deployment %q. %v", resourceName, err)
}
}

// Remove the bad monitor from quorum
if shouldRemoveFromQuorum {
if err := c.removeMonitorFromQuorum(daemonName); err != nil {
logger.Errorf("failed to remove mon %q from quorum. %v", daemonName, err)
}
}
delete(c.ClusterInfo.Monitors, daemonName)

delete(c.mapping.Schedule, daemonName)

if err := c.saveMonConfig(); err != nil {
return errors.Wrapf(err, "failed to save mon config after failing over mon %s", daemonName)
}

// Update cluster-wide RBD bootstrap peer token since Monitors have changed
_, err := controller.CreateBootstrapPeerSecret(c.context, c.ClusterInfo, &cephv1.CephCluster{ObjectMeta: metav1.ObjectMeta{Name: c.ClusterInfo.NamespacedName().Name, Namespace: c.Namespace}}, c.ownerInfo)
if err != nil {
return errors.Wrap(err, "failed to update cluster rbd bootstrap peer token")
}

// When the mon is removed from quorum, it is possible that the operator will be restarted
// before the mon pod is deleted. In this case, the operator will need to delete the mon
// during the next reconcile. If the reconcile finds an extra mon pod, it will be removed
// at that later reconcile. Thus, we delete the mon pod last during the failover
// and in case the failover is interrupted, the operator can detect the resources to finish the cleanup.
c.removeMonResources(daemonName)
return nil
}

func (c *Cluster) removeMonResources(daemonName string) {
// Remove the service endpoint
resourceName := resourceName(daemonName)
var gracePeriod int64
propagation := metav1.DeletePropagationForeground
options := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod, PropagationPolicy: &propagation}
if err := c.context.Clientset.CoreV1().Services(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil {
if kerrors.IsNotFound(err) {
logger.Infof("dead mon service %s was already gone", resourceName)
Expand All @@ -772,17 +781,14 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro
}
}

if err := c.saveMonConfig(); err != nil {
return errors.Wrapf(err, "failed to save mon config after failing over mon %s", daemonName)
}

// Update cluster-wide RBD bootstrap peer token since Monitors have changed
_, err := controller.CreateBootstrapPeerSecret(c.context, c.ClusterInfo, &cephv1.CephCluster{ObjectMeta: metav1.ObjectMeta{Name: c.ClusterInfo.NamespacedName().Name, Namespace: c.Namespace}}, c.ownerInfo)
if err != nil {
return errors.Wrap(err, "failed to update cluster rbd bootstrap peer token")
// Remove the mon pod if it is still there
if err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil {
if kerrors.IsNotFound(err) {
logger.Infof("dead mon %s was already gone", resourceName)
} else {
logger.Errorf("failed to remove dead mon deployment %q. %v", resourceName, err)
}
}

return nil
}

func (c *Cluster) removeMonitorFromQuorum(name string) error {
Expand Down
34 changes: 33 additions & 1 deletion pkg/operator/ceph/cluster/mon/mon.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,39 @@ func (c *Cluster) startDeployments(mons []*monConfig, requireAllInQuorum bool) e
requireAllInQuorum = true
}
}
return c.waitForMonsToJoin(mons, requireAllInQuorum)
err = c.waitForMonsToJoin(mons, requireAllInQuorum)

// Check for the rare case of an extra mon deployment that needs to be cleaned up
c.checkForExtraMonResources(mons, deployments.Items)
return err
}

func (c *Cluster) checkForExtraMonResources(mons []*monConfig, deployments []apps.Deployment) string {
if len(deployments) <= c.spec.Mon.Count || len(deployments) <= len(mons) {
return ""
}

// If there are more deployments than expected mons from the ceph quorum,
// find the extra mon deployment and clean it up.
logger.Infof("there is an extra mon deployment that is not needed and not in quorum")
for _, deploy := range deployments {
monName := deploy.Labels[controller.DaemonIDLabel]
found := false
// Search for the mon in the list of mons expected in quorum
for _, monDaemon := range mons {
if monName == monDaemon.DaemonName {
found = true
break
}
}
if !found {
logger.Infof("deleting extra mon deployment %q", deploy.Name)
c.removeMonResources(monName)
return monName
}
}

return ""
}

func (c *Cluster) waitForMonsToJoin(mons []*monConfig, requireAllInQuorum bool) error {
Expand Down
44 changes: 44 additions & 0 deletions pkg/operator/ceph/cluster/mon/mon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,50 @@ func TestMonVolumeClaimTemplate(t *testing.T) {
})
}
}

func TestRemoveExtraMonDeployments(t *testing.T) {
namespace := "ns"
context, err := newTestStartCluster(t, namespace)
assert.NoError(t, err)
c := newCluster(context, namespace, true, v1.ResourceRequirements{})
c.ClusterInfo = clienttest.CreateTestClusterInfo(1)

// Nothing to remove when the mons match the deployment
mons := []*monConfig{
{ResourceName: "rook-ceph-mon-a", DaemonName: "a"},
}
deployments := []apps.Deployment{
{
ObjectMeta: metav1.ObjectMeta{
Name: "rook-ceph-mon-a",
Labels: map[string]string{"ceph_daemon_id": "a"},
},
},
}
c.spec.Mon.Count = 1
removed := c.checkForExtraMonResources(mons, deployments)
assert.Equal(t, "", removed)

// Remove an extra mon deployment
deployments = append(deployments, apps.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "rook-ceph-mon-b",
Labels: map[string]string{"ceph_daemon_id": "b"},
}})
removed = c.checkForExtraMonResources(mons, deployments)
assert.Equal(t, "b", removed)

// Nothing to remove when there are not enough deployments for the expected mons
mons = []*monConfig{
{ResourceName: "rook-ceph-mon-a", DaemonName: "a"},
{ResourceName: "rook-ceph-mon-b", DaemonName: "b"},
{ResourceName: "rook-ceph-mon-c", DaemonName: "c"},
}
c.spec.Mon.Count = 3
removed = c.checkForExtraMonResources(mons, deployments)
assert.Equal(t, "", removed)
}

func TestStretchMonVolumeClaimTemplate(t *testing.T) {
generalSC := "generalSC"
zoneSC := "zoneSC"
Expand Down

0 comments on commit 6c78c88

Please sign in to comment.