From db10c33169806cc006479b513b4fd0d3b9a9f195 Mon Sep 17 00:00:00 2001 From: Divyansh Kamboj Date: Thu, 31 Aug 2023 19:47:06 +0530 Subject: [PATCH] monitoring: set port for servicemonitor for ceph-exporter ceph-exporter's ServiceMonitor and Service CRD contain different port name, which results in no metrics being collected by prometheus. this commit makes GetCephMonitor configurable, which we use to set consistent port names in ServiceMonitor and Service for ceph-exporter. Signed-off-by: Divyansh Kamboj --- pkg/operator/ceph/cluster/mgr/mgr.go | 3 ++- pkg/operator/ceph/cluster/nodedaemon/exporter.go | 5 +++-- pkg/operator/ceph/cluster/nodedaemon/reconcile.go | 2 +- pkg/operator/k8sutil/prometheus.go | 4 ++-- pkg/operator/k8sutil/prometheus_test.go | 4 +++- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pkg/operator/ceph/cluster/mgr/mgr.go b/pkg/operator/ceph/cluster/mgr/mgr.go index 1af927658811..f72db4b1ed17 100644 --- a/pkg/operator/ceph/cluster/mgr/mgr.go +++ b/pkg/operator/ceph/cluster/mgr/mgr.go @@ -55,6 +55,7 @@ const ( standbyMgrStatus = "standby" monitoringPath = "/etc/ceph-monitoring/" serviceMonitorFile = "service-monitor.yaml" + serviceMonitorPort = "http-metrics" // minimum amount of memory in MB to run the pod cephMgrPodMinimumMemory uint64 = 512 // DefaultMetricsPort prometheus exporter port @@ -502,7 +503,7 @@ func wellKnownModule(name string) bool { // EnableServiceMonitor add a servicemonitor that allows prometheus to scrape from the monitoring endpoint of the cluster func (c *Cluster) EnableServiceMonitor() error { - serviceMonitor := k8sutil.GetServiceMonitor(AppName, c.clusterInfo.Namespace) + serviceMonitor := k8sutil.GetServiceMonitor(AppName, c.clusterInfo.Namespace, serviceMonitorPort) cephv1.GetMonitoringLabels(c.spec.Labels).OverwriteApplyToObjectMeta(&serviceMonitor.ObjectMeta) if c.spec.External.Enable { diff --git a/pkg/operator/ceph/cluster/nodedaemon/exporter.go b/pkg/operator/ceph/cluster/nodedaemon/exporter.go index a7effc024949..3869ce6189fa 100644 --- a/pkg/operator/ceph/cluster/nodedaemon/exporter.go +++ b/pkg/operator/ceph/cluster/nodedaemon/exporter.go @@ -218,8 +218,9 @@ func MakeCephExporterMetricsService(cephCluster cephv1.CephCluster, servicePortM } // EnableCephExporterServiceMonitor add a servicemonitor that allows prometheus to scrape from the monitoring endpoint of the exporter -func EnableCephExporterServiceMonitor(context *clusterd.Context, cephCluster cephv1.CephCluster, scheme *runtime.Scheme, opManagerContext context.Context) error { - serviceMonitor := k8sutil.GetServiceMonitor(cephExporterAppName, cephCluster.Namespace) +func EnableCephExporterServiceMonitor(context *clusterd.Context, cephCluster cephv1.CephCluster, scheme *runtime.Scheme, opManagerContext context.Context, servicePortMetricName string) error { + serviceMonitor := k8sutil.GetServiceMonitor(cephExporterAppName, cephCluster.Namespace, servicePortMetricName) + cephv1.GetCephExporterLabels(cephCluster.Spec.Labels).OverwriteApplyToObjectMeta(&serviceMonitor.ObjectMeta) err := controllerutil.SetControllerReference(&cephCluster, serviceMonitor, scheme) diff --git a/pkg/operator/ceph/cluster/nodedaemon/reconcile.go b/pkg/operator/ceph/cluster/nodedaemon/reconcile.go index 9b3d39c19b3a..128908b0ee3e 100644 --- a/pkg/operator/ceph/cluster/nodedaemon/reconcile.go +++ b/pkg/operator/ceph/cluster/nodedaemon/reconcile.go @@ -269,7 +269,7 @@ func (r *ReconcileNode) createOrUpdateNodeDaemons(node corev1.Node, tolerations } if cephCluster.Spec.Monitoring.Enabled { - if err := EnableCephExporterServiceMonitor(r.context, cephCluster, r.scheme, r.opManagerContext); err != nil { + if err := EnableCephExporterServiceMonitor(r.context, cephCluster, r.scheme, r.opManagerContext, exporterServiceMetricName); err != nil { return errors.Wrap(err, "failed to enable service monitor") } logger.Debug("service monitor for ceph exporter was enabled successfully") diff --git a/pkg/operator/k8sutil/prometheus.go b/pkg/operator/k8sutil/prometheus.go index fe9bd664ee62..f646bb6fdcfe 100644 --- a/pkg/operator/k8sutil/prometheus.go +++ b/pkg/operator/k8sutil/prometheus.go @@ -39,7 +39,7 @@ func getMonitoringClient(context *clusterd.Context) (*monitoringclient.Clientset } // GetServiceMonitor creates serviceMonitor object template -func GetServiceMonitor(name string, namespace string) *monitoringv1.ServiceMonitor { +func GetServiceMonitor(name string, namespace string, portName string) *monitoringv1.ServiceMonitor { return &monitoringv1.ServiceMonitor{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -62,7 +62,7 @@ func GetServiceMonitor(name string, namespace string) *monitoringv1.ServiceMonit }, Endpoints: []monitoringv1.Endpoint{ { - Port: "http-metrics", + Port: portName, Path: "/metrics", Interval: "5s", }, diff --git a/pkg/operator/k8sutil/prometheus_test.go b/pkg/operator/k8sutil/prometheus_test.go index 23204c9faff0..b2166138270d 100644 --- a/pkg/operator/k8sutil/prometheus_test.go +++ b/pkg/operator/k8sutil/prometheus_test.go @@ -26,9 +26,11 @@ import ( func TestGetServiceMonitor(t *testing.T) { name := "rook-ceph-mgr" namespace := "rook-ceph" - servicemonitor := GetServiceMonitor(name, namespace) + port := "http-metrics" + servicemonitor := GetServiceMonitor(name, namespace, port) assert.Equal(t, name, servicemonitor.GetName()) assert.Equal(t, namespace, servicemonitor.GetNamespace()) + assert.Equal(t, port, servicemonitor.Spec.Endpoints[0].Port) assert.NotNil(t, servicemonitor.GetLabels()) assert.NotNil(t, servicemonitor.Spec.NamespaceSelector.MatchNames) assert.NotNil(t, servicemonitor.Spec.Selector.MatchLabels)