diff --git a/pkg/operator/ceph/cluster/mon/health.go b/pkg/operator/ceph/cluster/mon/health.go index df8de52878a0..a8fe149fe02c 100644 --- a/pkg/operator/ceph/cluster/mon/health.go +++ b/pkg/operator/ceph/cluster/mon/health.go @@ -730,20 +730,6 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro } logger.Infof("ensuring removal of unhealthy monitor %s", daemonName) - resourceName := resourceName(daemonName) - - // Remove the mon pod if it is still there - var gracePeriod int64 - propagation := metav1.DeletePropagationForeground - options := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod, PropagationPolicy: &propagation} - if err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil { - if kerrors.IsNotFound(err) { - logger.Infof("dead mon %s was already gone", resourceName) - } else { - logger.Errorf("failed to remove dead mon deployment %q. %v", resourceName, err) - } - } - // Remove the bad monitor from quorum if shouldRemoveFromQuorum { if err := c.removeMonitorFromQuorum(daemonName); err != nil { @@ -751,10 +737,33 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro } } delete(c.ClusterInfo.Monitors, daemonName) - delete(c.mapping.Schedule, daemonName) + if err := c.saveMonConfig(); err != nil { + return errors.Wrapf(err, "failed to save mon config after failing over mon %s", daemonName) + } + + // Update cluster-wide RBD bootstrap peer token since Monitors have changed + _, err := controller.CreateBootstrapPeerSecret(c.context, c.ClusterInfo, &cephv1.CephCluster{ObjectMeta: metav1.ObjectMeta{Name: c.ClusterInfo.NamespacedName().Name, Namespace: c.Namespace}}, c.ownerInfo) + if err != nil { + return errors.Wrap(err, "failed to update cluster rbd bootstrap peer token") + } + + // When the mon is removed from quorum, it is possible that the operator will be restarted + // before the mon pod is deleted. In this case, the operator will need to delete the mon + // during the next reconcile. If the reconcile finds an extra mon pod, it will be removed + // at that later reconcile. Thus, we delete the mon pod last during the failover + // and in case the failover is interrupted, the operator can detect the resources to finish the cleanup. + c.removeMonResources(daemonName) + return nil +} + +func (c *Cluster) removeMonResources(daemonName string) { // Remove the service endpoint + resourceName := resourceName(daemonName) + var gracePeriod int64 + propagation := metav1.DeletePropagationForeground + options := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod, PropagationPolicy: &propagation} if err := c.context.Clientset.CoreV1().Services(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil { if kerrors.IsNotFound(err) { logger.Infof("dead mon service %s was already gone", resourceName) @@ -772,17 +781,14 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro } } - if err := c.saveMonConfig(); err != nil { - return errors.Wrapf(err, "failed to save mon config after failing over mon %s", daemonName) - } - - // Update cluster-wide RBD bootstrap peer token since Monitors have changed - _, err := controller.CreateBootstrapPeerSecret(c.context, c.ClusterInfo, &cephv1.CephCluster{ObjectMeta: metav1.ObjectMeta{Name: c.ClusterInfo.NamespacedName().Name, Namespace: c.Namespace}}, c.ownerInfo) - if err != nil { - return errors.Wrap(err, "failed to update cluster rbd bootstrap peer token") + // Remove the mon pod if it is still there + if err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil { + if kerrors.IsNotFound(err) { + logger.Infof("dead mon %s was already gone", resourceName) + } else { + logger.Errorf("failed to remove dead mon deployment %q. %v", resourceName, err) + } } - - return nil } func (c *Cluster) removeMonitorFromQuorum(name string) error { diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index d80839d65c2c..17258a5322d5 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -1060,7 +1060,39 @@ func (c *Cluster) startDeployments(mons []*monConfig, requireAllInQuorum bool) e requireAllInQuorum = true } } - return c.waitForMonsToJoin(mons, requireAllInQuorum) + err = c.waitForMonsToJoin(mons, requireAllInQuorum) + + // Check for the rare case of an extra mon deployment that needs to be cleaned up + c.checkForExtraMonResources(mons, deployments.Items) + return err +} + +func (c *Cluster) checkForExtraMonResources(mons []*monConfig, deployments []apps.Deployment) string { + if len(deployments) <= c.spec.Mon.Count || len(deployments) <= len(mons) { + return "" + } + + // If there are more deployments than expected mons from the ceph quorum, + // find the extra mon deployment and clean it up. + logger.Infof("there is an extra mon deployment that is not needed and not in quorum") + for _, deploy := range deployments { + monName := deploy.Labels[controller.DaemonIDLabel] + found := false + // Search for the mon in the list of mons expected in quorum + for _, monDaemon := range mons { + if monName == monDaemon.DaemonName { + found = true + break + } + } + if !found { + logger.Infof("deleting extra mon deployment %q", deploy.Name) + c.removeMonResources(monName) + return monName + } + } + + return "" } func (c *Cluster) waitForMonsToJoin(mons []*monConfig, requireAllInQuorum bool) error { diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go index fb8643d8c251..9d27be4014c9 100644 --- a/pkg/operator/ceph/cluster/mon/mon_test.go +++ b/pkg/operator/ceph/cluster/mon/mon_test.go @@ -676,6 +676,50 @@ func TestMonVolumeClaimTemplate(t *testing.T) { }) } } + +func TestRemoveExtraMonDeployments(t *testing.T) { + namespace := "ns" + context, err := newTestStartCluster(t, namespace) + assert.NoError(t, err) + c := newCluster(context, namespace, true, v1.ResourceRequirements{}) + c.ClusterInfo = clienttest.CreateTestClusterInfo(1) + + // Nothing to remove when the mons match the deployment + mons := []*monConfig{ + {ResourceName: "rook-ceph-mon-a", DaemonName: "a"}, + } + deployments := []apps.Deployment{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "rook-ceph-mon-a", + Labels: map[string]string{"ceph_daemon_id": "a"}, + }, + }, + } + c.spec.Mon.Count = 1 + removed := c.checkForExtraMonResources(mons, deployments) + assert.Equal(t, "", removed) + + // Remove an extra mon deployment + deployments = append(deployments, apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rook-ceph-mon-b", + Labels: map[string]string{"ceph_daemon_id": "b"}, + }}) + removed = c.checkForExtraMonResources(mons, deployments) + assert.Equal(t, "b", removed) + + // Nothing to remove when there are not enough deployments for the expected mons + mons = []*monConfig{ + {ResourceName: "rook-ceph-mon-a", DaemonName: "a"}, + {ResourceName: "rook-ceph-mon-b", DaemonName: "b"}, + {ResourceName: "rook-ceph-mon-c", DaemonName: "c"}, + } + c.spec.Mon.Count = 3 + removed = c.checkForExtraMonResources(mons, deployments) + assert.Equal(t, "", removed) +} + func TestStretchMonVolumeClaimTemplate(t *testing.T) { generalSC := "generalSC" zoneSC := "zoneSC"