From 67d383fd485ca6aabd45c4c7e5966b6a74387562 Mon Sep 17 00:00:00 2001 From: Manjunath Shivanna Date: Tue, 12 Mar 2024 21:28:11 -0400 Subject: [PATCH] Adds (Prometheus) ServiceMonitor integration (#16) Signed-off-by: mshivanna_tdx Signed-off-by: Adrian Cole --- .github/workflows/test.yml | 6 ++ README.md | 6 ++ charts/zipkin/ci/helmfile.yaml | 56 +++++++++++++++++++ charts/zipkin/ci/serviceMonitor-values.yaml | 6 ++ charts/zipkin/templates/servicemonitor.yaml | 47 ++++++++++++++++ .../templates/tests/test-connection.yaml | 18 +++++- charts/zipkin/values.schema.json | 20 +++++++ charts/zipkin/values.yaml | 11 ++++ 8 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 charts/zipkin/ci/helmfile.yaml create mode 100644 charts/zipkin/ci/serviceMonitor-values.yaml create mode 100644 charts/zipkin/templates/servicemonitor.yaml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a3f8a4..8eb3436 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -58,5 +58,11 @@ jobs: uses: helm/kind-action@v1 if: steps.list-changed.outputs.changed == 'true' + - name: Setup helmfile + uses: mamezou-tech/setup-helmfile@v2.0.0 + + - name: Install prometheus + run: helmfile -f charts/zipkin/ci/helmfile.yaml sync + - name: Run chart-testing (install) run: ct install diff --git a/README.md b/README.md index 081728a..92a706a 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ You can then run `helm search repo zipkin` to see the charts. | ingress.path | string | `"/"` | | | ingress.tls | list | `[]` | | | nameOverride | string | `""` | | +| namespaceOverride | string | release namespace | Namespace to create the zipkin resources in | | nodeSelector | object | `{}` | | | podAnnotations."sidecar.istio.io/inject" | string | `"false"` | | | podSecurityContext | object | `{}` | | @@ -55,6 +56,11 @@ You can then run `helm search repo zipkin` to see the charts. | serviceAccount.create | bool | `true` | | | serviceAccount.name | string | `""` | If not set and create is true, a name is generated using the fullname template | | serviceAccount.psp | bool | `false` | | +| serviceMonitor.enabled | bool | `false` | Creates a ServiceMonitor to scrape /prometheus. Requires prometheus-operator | +| serviceMonitor.namespace | string | override or release namespace | Namespace to create the service monitor in | +| serviceMonitor.labels | object | `{}` | Additional metadata labels | +| serviceMonitor.interval | string | Prometheus global scrape interval | How often to scrape /prometheus. e.g. '5s' | +| serviceMonitor.scrapeTimeout | string | Prometheus global scrape timeout | Timeout for scraping metrics. e.g. '10s' | | tolerations | list | `[]` | | | zipkin.discovery.eureka.serviceUrl | string | no default | v2 endpoint of Eureka, e.g. `https://eureka-prod/eureka/v2` | | zipkin.discovery.eureka.app | string | `"zipkin"` | The application this instance registers to | diff --git a/charts/zipkin/ci/helmfile.yaml b/charts/zipkin/ci/helmfile.yaml new file mode 100644 index 0000000..37aa52e --- /dev/null +++ b/charts/zipkin/ci/helmfile.yaml @@ -0,0 +1,56 @@ +--- +# install via `helmfile -f charts/zipkin/ci/helmfile.yaml sync` +repositories: + - name: prometheus-community + url: https://prometheus-community.github.io/helm-charts + +# Prometheus requires the CRD servicemonitors.monitoring.coreos.com as well as +# Prometheus, deployed as the service named "prometheus-operated" in our test +# namespace "ci-monitoring". We set this up via helm prior to running tests, as +# adding CRDs and multiple resources during a test is far more complicated. +releases: + - name: prometheus-stack + namespace: ci-monitoring # arbitrary non-default name + createNamespace: true + chart: prometheus-community/kube-prometheus-stack + values: + - prometheusOperator: + enabled: true + prometheus: + enabled: true + # By default, the service monitor has namespace restrictions and must + # match a label "release: kube-prometheus-stack". Relax for testing. + # See https://prometheus-operator.dev/docs/operator/troubleshooting/#it-is-in-the-configuration-but-not-on-the-service-discovery-page + prometheusSpec: + serviceMonitorNamespaceSelector: + any: true + serviceMonitorSelector: + any: true + serviceMonitorSelectorNilUsesHelmValues: false + # Disable anything else, like multi-container grafana pods. + defaultRules: + enabled: false + alertmanager: + enabled: false + kubeApiServer: + enabled: false + kubelet: + enabled: false + kubeControllerManager: + enabled: false + coreDns: + enabled: false + kubeDns: + enabled: false + kubeEtcd: + enabled: false + kubeScheduler: + enabled: false + kubeProxy: + enabled: false + kubeStateMetrics: + enabled: false + nodeExporter: + enabled: false + grafana: + enabled: false diff --git a/charts/zipkin/ci/serviceMonitor-values.yaml b/charts/zipkin/ci/serviceMonitor-values.yaml new file mode 100644 index 0000000..3d517ea --- /dev/null +++ b/charts/zipkin/ci/serviceMonitor-values.yaml @@ -0,0 +1,6 @@ +--- +serviceMonitor: + enabled: true + interval: 1s + scrapeTimeout: 1s + namespace: ci-monitoring diff --git a/charts/zipkin/templates/servicemonitor.yaml b/charts/zipkin/templates/servicemonitor.yaml new file mode 100644 index 0000000..83d0236 --- /dev/null +++ b/charts/zipkin/templates/servicemonitor.yaml @@ -0,0 +1,47 @@ +{{- /* +Copyright 2024 The OpenZipkin Authors + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +in compliance with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License +is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +or implied. See the License for the specific language governing permissions and limitations under +the License. +*/}} +{{- if .Values.serviceMonitor.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "zipkin.fullname" . }} + {{- if .Values.serviceMonitor.namespace }} + namespace: {{ .Values.serviceMonitor.namespace }} + {{- else }} + namespace: {{ include "zipkin.namespace" . }} + {{- end }} + labels: + {{- include "zipkin.labels" . | nindent 4 }} + {{- if .Values.serviceMonitor.labels }} + {{- (toYaml .Values.serviceMonitor.labels | nindent 4) }} + {{- end }} +spec: + jobLabel: {{ include "zipkin.fullname" . }} + namespaceSelector: + matchNames: + - {{ include "zipkin.namespace" . }} + endpoints: + - port: http-query + path: '/prometheus' + scheme: http + {{- with .Values.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + selector: + matchLabels: + {{- include "zipkin.selectorLabels" . | nindent 8 }} +{{- end }} diff --git a/charts/zipkin/templates/tests/test-connection.yaml b/charts/zipkin/templates/tests/test-connection.yaml index 8a44bc8..461efe4 100644 --- a/charts/zipkin/templates/tests/test-connection.yaml +++ b/charts/zipkin/templates/tests/test-connection.yaml @@ -3,8 +3,7 @@ apiVersion: v1 kind: Pod metadata: name: "{{ include "zipkin.fullname" . }}-test-connection" - labels: - {{- include "zipkin.labels" . | nindent 4 }} + labels: {} # we don't need any labels in the test pod! annotations: "helm.sh/hook": test spec: @@ -45,5 +44,20 @@ spec: command: [ '/bin/sh', '-c' ] # If self-tracing, sleep for the trace to process. Then, get it by the constant ID passed above. args: [ 'sleep 3 && wget -q --spider http://{{ include "zipkin.fullname" . }}:{{ .Values.service.port }}/api/v2/trace/cafebabecafebabe' ] +{{- end }} +{{- if .Values.serviceMonitor.enabled }} + # This verifies prometheus scraped the zipkin service on the correct + # endpoint, by reading an actual statistic. + # See https://prometheus.io/docs/prometheus/latest/querying/api/ + - name: get-prometheus-query + image: 'ghcr.io/openzipkin/alpine:3.19.1' + command: [ '/bin/sh', '-c' ] + # Note: The below commands use the Prometheus API, which returns HTTP 200 + # even on empty. Rather than install jq, we use grep to ensure a result. + # + # We use a sleep loop despite the scrape delay of only 1s. This is due to + # an up to one-minute read-back delay between adding the service monitor, + # and being visibility as a prometheus target in kube-prometheus-stack. + args: [ 'until (wget -q -O - http://prometheus-operated.{{ .Values.serviceMonitor.namespace }}.svc.cluster.local:9090/api/v1/query?query=http_server_requests_seconds_max|grep zipkin); do sleep 3; done' ] {{- end }} restartPolicy: Never diff --git a/charts/zipkin/values.schema.json b/charts/zipkin/values.schema.json index 28272ac..b38b4e0 100644 --- a/charts/zipkin/values.schema.json +++ b/charts/zipkin/values.schema.json @@ -197,6 +197,26 @@ } } }, + "serviceMonitor": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "interval": { + "type": "string" + }, + "labels": { + "type": "object" + }, + "namespace": { + "type": "string" + }, + "scrapeTimeout": { + "type": "string" + } + } + }, "tolerations": { "type": "array" }, diff --git a/charts/zipkin/values.yaml b/charts/zipkin/values.yaml index 12d66b5..cd014a4 100644 --- a/charts/zipkin/values.yaml +++ b/charts/zipkin/values.yaml @@ -58,6 +58,17 @@ service: type: ClusterIP port: 9411 +serviceMonitor: + # Creates a ServiceMonitor to scrape /prometheus + enabled: false + # Namespace to create the service monitor in + namespace: "" + # interval: 10s + # scrapeTimeout: 10s + # Add any labels required by your prometheus spec serviceMonitorSelector + labels: {} + # release: prometheus + ingress: enabled: false annotations: