From 1f226c7222f7a16f2a7b4b7ab9779d1b17b82063 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Wed, 31 Jan 2024 16:49:26 -0600 Subject: [PATCH 1/5] introspection: lints Signed-off-by: Hank Donnay --- introspection/server.go | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/introspection/server.go b/introspection/server.go index 31ce6e5929..160166b6f0 100644 --- a/introspection/server.go +++ b/introspection/server.go @@ -1,3 +1,5 @@ +// Package introspection holds the implementation details for the +// "introspection" HTTP server that Clair hosts. package introspection import ( @@ -22,18 +24,32 @@ import ( "github.com/quay/clair/v4/health" ) +// Valid backends for both metrics and traces. const ( - Prom = "prometheus" - DefaultPromEndpoint = "/metrics" - Stdout = "stdout" - Jaeger = "jaeger" - HealthEndpoint = "/healthz" - ReadyEndpoint = "/readyz" - DefaultIntrospectionAddr = ":8089" + Stdout = "stdout" ) -// Server provides an http server -// exposing Clair metrics and traces +// Valid backends for metrics. +const ( + Prom = "prometheus" +) + +// Valid backends for traces. +const ( + Jaeger = "jaeger" +) + +// Endpoints on the introspection HTTP server. +const ( + DefaultPromEndpoint = "/metrics" + HealthEndpoint = "/healthz" + ReadyEndpoint = "/readyz" +) + +// DefaultIntrospectionAddr is the default address if not provided in the configuration. +const DefaultIntrospectionAddr = ":8089" + +// Server provides an HTTP server exposing Clair metrics and debugging information. type Server struct { // configuration provided when starting Clair conf *config.Config @@ -46,6 +62,7 @@ type Server struct { health func() bool } +// New constructs a [*Server], which has an embedded [*http.Server]. func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, error) { ctx = zlog.ContextWithValues(ctx, "component", "introspection/New") From c8cceac4e342bd5f2d44eb8bcc91ea7706e5fb10 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Wed, 31 Jan 2024 16:51:40 -0600 Subject: [PATCH 2/5] introspection: implement OTLP support for metrics and traces This should allow a Clair instance to use the "native" OpenTelemetry transport for metrics and traces. Some parts of Clair and Claircore are directly instrumented with Prometheus, and so the OTLP support should be considered beta-quality. Signed-off-by: Hank Donnay --- go.mod | 27 ++++-- go.sum | 61 ++++++++++--- introspection/otlp.go | 197 ++++++++++++++++++++++++++++++++++++++++ introspection/server.go | 144 ++++++++++++++++++++++++----- 4 files changed, 383 insertions(+), 46 deletions(-) create mode 100644 introspection/otlp.go diff --git a/go.mod b/go.mod index 3d8f85611c..e5d6994077 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/jackc/pgconn v1.14.3 github.com/jackc/pgx/v4 v4.18.3 github.com/klauspost/compress v1.17.9 - github.com/prometheus/client_golang v1.19.0 + github.com/prometheus/client_golang v1.19.1 github.com/quay/clair/config v1.4.0 github.com/quay/claircore v1.5.28 github.com/quay/zlog v1.1.8 @@ -28,18 +28,28 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 go.opentelemetry.io/otel v1.28.0 go.opentelemetry.io/otel/exporters/jaeger v1.17.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 + go.opentelemetry.io/otel/exporters/prometheus v0.50.0 + go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.28.0 go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.28.0 go.opentelemetry.io/otel/sdk v1.28.0 + go.opentelemetry.io/otel/sdk/metric v1.28.0 go.opentelemetry.io/otel/trace v1.28.0 golang.org/x/net v0.27.0 golang.org/x/sync v0.7.0 golang.org/x/time v0.4.0 + google.golang.org/grpc v1.64.0 gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -53,6 +63,7 @@ require ( github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/mock v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect github.com/jackc/pgio v1.0.0 // indirect @@ -67,13 +78,14 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0-rc3 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/common v0.48.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/quay/claircore/toolkit v1.2.0 // indirect github.com/quay/claircore/updater/driver v1.0.0 // indirect github.com/quay/goval-parser v0.8.8 // indirect @@ -84,12 +96,15 @@ require ( github.com/vbatts/tar-split v0.11.3 // indirect github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.25.0 // indirect golang.org/x/mod v0.17.0 // indirect golang.org/x/sys v0.22.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect - google.golang.org/protobuf v1.33.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect + google.golang.org/protobuf v1.34.2 // indirect modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect modernc.org/libc v1.49.3 // indirect modernc.org/mathutil v1.6.0 // indirect diff --git a/go.sum b/go.sum index 6ef08449bb..84c3b7e621 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,10 @@ github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030I github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSkDwbkEETK84kQgEeFwDC+62k= @@ -74,6 +76,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grafana/pyroscope-go/godeltaprof v0.1.7 h1:C11j63y7gymiW8VugJ9ZW0pWfxTZugdSJyC48olk5KY= github.com/grafana/pyroscope-go/godeltaprof v0.1.7/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= @@ -143,8 +147,9 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= @@ -166,6 +171,8 @@ github.com/mattn/go-sqlite3 v1.10.0 h1:jbhqpg7tQe4SupckyijYiy0mJJ/pRyHvXf7JdWK86 github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -177,14 +184,14 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= -github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE= -github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/quay/clair/config v1.4.0 h1:jLveqMKEAZmyKjhGeWBkdQmCoGRbdUh4oTYVgaoFBQs= github.com/quay/clair/config v1.4.0/go.mod h1:c2/sfLoPdFRXpMtpD/UCi1gNm4uUFmdseqhqeQkKVbY= github.com/quay/claircore v1.5.28 h1:NQ6zJGm/G406D8DF7XlxNS0IdxhrVic+eehly9VjnOU= @@ -205,8 +212,8 @@ github.com/remind101/migrate v0.0.0-20170729031349-52c1edff7319/go.mod h1:rhSvwc github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= @@ -264,14 +271,32 @@ go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= go.opentelemetry.io/otel/exporters/jaeger v1.17.0 h1:D7UpUy2Xc2wsi1Ras6V40q806WM07rqoCWzXu7Sqy+4= go.opentelemetry.io/otel/exporters/jaeger v1.17.0/go.mod h1:nPCqOnEH9rNLKqH/+rrUjiMzHJdV1BlpKcTwRTyKkKI= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0 h1:U2guen0GhqH8o/G2un8f/aG/y++OuW6MyCo6hT9prXk= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0/go.mod h1:yeGZANgEcpdx/WK0IvvRFC+2oLiMS2u4L/0Rj2M2Qr0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0 h1:aLmmtjRke7LPDQ3lvpFz+kNEH43faFhzW7v8BFIEydg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0/go.mod h1:TC1pyCt6G9Sjb4bQpShH+P5R53pO6ZuGnHuuln9xMeE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 h1:R3X6ZXmNPRR8ul6i3WgFURCHzaXjHdm0karRG/+dj3s= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0/go.mod h1:QWFXnDavXWwMx2EEcZsf3yxgEKAqsxQ+Syjp+seyInw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 h1:j9+03ymgYhPKmeXGk5Zu+cIZOlVzd9Zv7QIiyItjFBU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0/go.mod h1:Y5+XiUG4Emn1hTfciPzGPJaSI+RpDts6BnCIir0SLqk= +go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng= +go.opentelemetry.io/otel/exporters/prometheus v0.50.0/go.mod h1:pMm5PkUo5YwbLiuEf7t2xg4wbP0/eSJrMxIMxKosynY= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.28.0 h1:BJee2iLkfRfl9lc7aFmBwkWxY/RI1RDdXepSF6y8TPE= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.28.0/go.mod h1:DIzlHs3DRscCIBU3Y9YSzPfScwnYnzfnCd4g8zA7bZc= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.28.0 h1:EVSnY9JbEEW92bEkIYOVMw4q1WJxIAGoFTrtYOzWuRQ= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.28.0/go.mod h1:Ea1N1QQryNXpCD0I1fdLibBAIpQuBkznMmkdKrapk1Y= go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= +go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08= +go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg= go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -394,8 +419,14 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094 h1:0+ozOGcrp+Y8Aq8TLNN2Aliibms5LEzsq99ZZmAGYm0= +google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094/go.mod h1:fJ/e3If/Q67Mj99hin0hMhiNyCRmt6BQ2aWIJshUSJw= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/introspection/otlp.go b/introspection/otlp.go new file mode 100644 index 0000000000..049c391bee --- /dev/null +++ b/introspection/otlp.go @@ -0,0 +1,197 @@ +package introspection + +import ( + "crypto/tls" + "fmt" + "time" + + "github.com/quay/clair/config" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "google.golang.org/grpc/credentials" +) + +// This file holds all the OTLP weirdness. +// +// Most of the options are the same internally, but have a bunch of type ceremony around them to obfuscate this. + +// OtlpHooks is a hook structure for using the correct types with the various OTLP exporters. +// The declared variables are largely duplicates, with the real logic living in [otlpHooks.Options]. +// +// The type parameter "O" should really be a sum type, but that's currently inexpressible. +type otlpHooks[O any] struct { + WithCompressor func(config.OTLPCompressor) O + WithEndpoint func(string) O + WithHeaders func(map[string]string) O + WithInsecure func() O + WithTimeout func(time.Duration) O + WithTLSClientConfig func(*tls.Config) O + + WithURLPath func(string) O + + WithReconnectionPeriod func(time.Duration) O + WithServiceConfig func(string) O +} + +// Options returns the correct Options to pass into the constructor based on the receiver type. +// +// This function will panic if called in unexpected ways. To be safe: +// +// - Only use the provided instances ([omhHooks], [omgHooks], [othHooks], [otgHooks]). +// - Read the implementation. +func (h *otlpHooks[O]) Options(v any) (opts []O, err error) { + switch cfg := v.(type) { + // Signal-specific options. + // + // Currently, none; recurse to the transport options. + case *config.MetricOTLPHTTP: + opts, err = h.Options(&cfg.OTLPHTTPCommon) + case *config.TraceOTLPHTTP: + opts, err = h.Options(&cfg.OTLPHTTPCommon) + case *config.MetricOTLPgRPC: + opts, err = h.Options(&cfg.OTLPgRPCCommon) + case *config.TraceOTLPgRPC: + opts, err = h.Options(&cfg.OTLPgRPCCommon) + + // Transport-specific options. + // + // Recurse to the common options then return the transport options, in case of some ordering oddness. + // Will panic if called on the wrong receiver, as some of the members will (purposefully!) be nil. + case *config.OTLPHTTPCommon: + opts, err = h.Options(&cfg.OTLPCommon) + if err != nil { + return nil, err + } + if p := cfg.URLPath; p != "" { + opts = append(opts, h.WithURLPath(p)) + } + case *config.OTLPgRPCCommon: + opts, err = h.Options(&cfg.OTLPCommon) + if err != nil { + return nil, err + } + if r := cfg.Reconnect; r != nil { + opts = append(opts, h.WithReconnectionPeriod(time.Duration(*r))) + } + if srv := cfg.ServiceConfig; srv != "" { + opts = append(opts, h.WithServiceConfig(srv)) + } + + // Common options. + case *config.OTLPCommon: + if e := cfg.Endpoint; e != "" { + opts = append(opts, h.WithEndpoint(e)) + } + opts = append(opts, h.WithCompressor(cfg.Compression)) + if len(cfg.Headers) != 0 { + opts = append(opts, h.WithHeaders(cfg.Headers)) + } + if cfg.Insecure { + opts = append(opts, h.WithInsecure()) + } + if t := cfg.Timeout; t != nil { + opts = append(opts, h.WithTimeout(time.Duration(*t))) + } + if tc := cfg.ClientTLS; tc != nil { + tlscfg, err := tc.Config() + if err != nil { + return nil, fmt.Errorf("TLS client configuration error: %w", err) + } + opts = append(opts, h.WithTLSClientConfig(tlscfg)) + } + + // Make the switch exhaustive. + default: + panic(fmt.Sprintf("programmer error: unexpected type: %T", v)) + } + return opts, nil +} + +// In order, these instances are for: +// +// - Metrics HTTP +// - Metrics gRPC +// - Traces HTTP +// - Traces gRPC +var ( + omhHooks = otlpHooks[otlpmetrichttp.Option]{ + WithCompressor: otlpCompressorHook( + otlpmetrichttp.WithCompression(otlpmetrichttp.NoCompression), + otlpmetrichttp.WithCompression(otlpmetrichttp.GzipCompression), + ), + WithEndpoint: otlpmetrichttp.WithEndpoint, + WithHeaders: otlpmetrichttp.WithHeaders, + WithInsecure: otlpmetrichttp.WithInsecure, + WithTimeout: otlpmetrichttp.WithTimeout, + WithTLSClientConfig: otlpmetrichttp.WithTLSClientConfig, + WithURLPath: otlpmetrichttp.WithURLPath, + } + omgHooks = otlpHooks[otlpmetricgrpc.Option]{ + WithCompressor: otlpCompressorHook( + otlpmetricgrpc.WithCompressor("none"), + otlpmetricgrpc.WithCompressor("gzip"), + ), + WithEndpoint: otlpmetricgrpc.WithEndpoint, + WithHeaders: otlpmetricgrpc.WithHeaders, + WithInsecure: otlpmetricgrpc.WithInsecure, + WithTimeout: otlpmetricgrpc.WithTimeout, + WithTLSClientConfig: grpcTLSHook(otlpmetricgrpc.WithTLSCredentials), + WithReconnectionPeriod: otlpmetricgrpc.WithReconnectionPeriod, + WithServiceConfig: otlpmetricgrpc.WithServiceConfig, + } + othHooks = otlpHooks[otlptracehttp.Option]{ + WithCompressor: otlpCompressorHook( + otlptracehttp.WithCompression(otlptracehttp.NoCompression), + otlptracehttp.WithCompression(otlptracehttp.GzipCompression), + ), + WithEndpoint: otlptracehttp.WithEndpoint, + WithHeaders: otlptracehttp.WithHeaders, + WithInsecure: otlptracehttp.WithInsecure, + WithTimeout: otlptracehttp.WithTimeout, + WithTLSClientConfig: otlptracehttp.WithTLSClientConfig, + WithURLPath: otlptracehttp.WithURLPath, + } + otgHooks = otlpHooks[otlptracegrpc.Option]{ + WithCompressor: otlpCompressorHook( + otlptracegrpc.WithCompressor("none"), + otlptracegrpc.WithCompressor("gzip"), + ), + WithEndpoint: otlptracegrpc.WithEndpoint, + WithHeaders: otlptracegrpc.WithHeaders, + WithInsecure: otlptracegrpc.WithInsecure, + WithTimeout: otlptracegrpc.WithTimeout, + WithTLSClientConfig: grpcTLSHook(otlptracegrpc.WithTLSCredentials), + WithReconnectionPeriod: otlptracegrpc.WithReconnectionPeriod, + WithServiceConfig: otlptracegrpc.WithServiceConfig, + } +) + +// OtlpCompressorHook maps from the [config.OTLPCompressor] type to the correct option. +// +// The type parameter is too broad, see also [otlpHooks]. +// This function causes some extra garbage to be created. +// Inlining and simplifying at use sites would prevent the options from being constructed until needed, +// but consolidates the precedence and default logic. +func otlpCompressorHook[O any](none, gzip O) func(config.OTLPCompressor) O { + return func(z config.OTLPCompressor) O { + switch z { + case config.OTLPCompressUnset: // Actual default: + fallthrough + case config.OTLPCompressNone: + return none + case config.OTLPCompressGzip: + return gzip + default: + panic("unreachable: exhaustive switch") + } + } +} + +// GrpcTLSHook maps a [tls.Config] to a correctly typed option. +// +// The type parameter is too broad, see also [otlpHooks]. +func grpcTLSHook[O any](f func(credentials.TransportCredentials) O) func(*tls.Config) O { + return func(c *tls.Config) O { return f(credentials.NewTLS(c)) } +} diff --git a/introspection/server.go b/introspection/server.go index 160166b6f0..c823733d7b 100644 --- a/introspection/server.go +++ b/introspection/server.go @@ -16,10 +16,18 @@ import ( "github.com/quay/zlog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/jaeger" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" - semconv "go.opentelemetry.io/otel/semconv/v1.7.0" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" "github.com/quay/clair/v4/health" ) @@ -27,6 +35,7 @@ import ( // Valid backends for both metrics and traces. const ( Stdout = "stdout" + OTLP = "otlp" ) // Valid backends for metrics. @@ -64,6 +73,7 @@ type Server struct { // New constructs a [*Server], which has an embedded [*http.Server]. func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, error) { + var err error ctx = zlog.ContextWithValues(ctx, "component", "introspection/New") var addr string @@ -93,10 +103,83 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, i.health = health } - // configure prometheus - err := i.withPrometheus(ctx) + // Configure metrics + var mr metric.Reader + switch conf.Metrics.Name { + case Stdout: + var ex metric.Exporter + ex, err = stdoutmetric.New() + if err != nil { + break + } + mr = metric.NewPeriodicReader(ex) + case Prom, "": + endpoint := DefaultPromEndpoint + if p := conf.Metrics.Prometheus.Endpoint; p != nil { + endpoint = *p + } + zlog.Info(ctx). + Str("endpoint", endpoint). + Str("server", i.Addr). + Msg("configuring prometheus") + + i.Handle(endpoint, promhttp.Handler()) + + mr, err = prometheus.New() + case OTLP: + conf := i.conf.Trace.OTLP + if conf.GRPC == nil && conf.HTTP == nil { + return nil, fmt.Errorf(`must define either "grpc" or "http" transport for otlp traces`) + } + + var ex metric.Exporter + switch { + case conf.GRPC != nil: + var opts []otlpmetricgrpc.Option + opts, err = omgHooks.Options(conf.GRPC) + if err != nil { + break + } + ex, err = otlpmetricgrpc.New(ctx, opts...) + case conf.HTTP != nil: + var opts []otlpmetrichttp.Option + opts, err = omhHooks.Options(conf.HTTP) + if err != nil { + break + } + ex, err = otlpmetrichttp.New(ctx, opts...) + default: + panic("programmer error: exhaustive switch") + } + if err != nil { + break + } + + // Print a warning as long as direct prometheus metrics exist in "our" packages. + zlog.Warn(ctx).Msg("OTLP metrics should be considered beta; metrics may be missing") + mr = metric.NewPeriodicReader(ex) + default: + zlog.Info(ctx).Msg("no metrics enabled") + } if err != nil { - return nil, fmt.Errorf("error configuring prometheus handler: %v", err) + return nil, fmt.Errorf("error configuring metrics: %w", err) + } + if mr != nil { + mp := metric.NewMeterProvider( + metric.WithReader(mr), + metric.WithResource(resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(fmt.Sprintf("clairv4/%v", i.conf.Mode)), + )), + ) + otel.SetMeterProvider(mp) + i.Server.RegisterOnShutdown(func() { + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + if err := mp.Shutdown(ctx); err != nil { + zlog.Error(ctx).Err(err).Msg("error shutting down metric provider") + } + }) } // configure tracing @@ -111,7 +194,7 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, sdktrace.TraceIDRatioBased(p), ) default: - sampler = sdktrace.NeverSample() + sampler = sdktrace.ParentBased(sdktrace.NeverSample()) } // trace exporter @@ -119,9 +202,6 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, switch conf.Trace.Name { case Stdout: exporter, err = stdouttrace.New() - if err != nil { - return nil, fmt.Errorf("error configuring stdout tracing: %w", err) - } case Jaeger: conf := i.conf.Trace.Jaeger var mode string @@ -176,12 +256,42 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, // configure the exporter exporter, err = jaeger.New(e) + case OTLP: + conf := i.conf.Trace.OTLP + if conf.GRPC == nil && conf.HTTP == nil { + return nil, fmt.Errorf(`must define either "grpc" or "http" transport for otlp traces`) + } + + var c otlptrace.Client + switch { + case conf.GRPC != nil: + var opts []otlptracegrpc.Option + opts, err = otgHooks.Options(conf.GRPC) + if err != nil { + break + } + c = otlptracegrpc.NewClient(opts...) + case conf.HTTP != nil: + var opts []otlptracehttp.Option + opts, err = othHooks.Options(conf.HTTP) + if err != nil { + break + } + c = otlptracehttp.NewClient(opts...) + default: + panic("programmer error: exhaustive switch") + } if err != nil { - return nil, fmt.Errorf("error configuring jaeger tracing: %w", err) + break } + + exporter, err = otlptrace.New(ctx, c) default: zlog.Info(ctx).Msg("no distributed tracing enabled") } + if err != nil { + return nil, fmt.Errorf("error configuring tracing: %w", err) + } if exporter != nil { tp := sdktrace.NewTracerProvider( sdktrace.WithSampler(sampler), @@ -245,19 +355,3 @@ func (i *Server) withReady(_ context.Context) error { i.ServeMux.Handle(ReadyEndpoint, health.ReadinessHandler()) return nil } - -// WithPrometheus adds the prometheus endpoint to i's ServeMux. -func (i *Server) withPrometheus(ctx context.Context) error { - ctx = zlog.ContextWithValues(ctx, "component", "introspection/Server.withPrometheus") - endpoint := DefaultPromEndpoint - if i.conf.Metrics.Prometheus.Endpoint != nil { - endpoint = *i.conf.Metrics.Prometheus.Endpoint - } - zlog.Info(ctx). - Str("endpoint", endpoint). - Str("server", i.Addr). - Msg("configuring prometheus") - - i.Handle(endpoint, promhttp.Handler()) - return nil -} From 81c5b11f61bb43b8810ee2b413c58d730cf28877 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Wed, 31 Jan 2024 16:55:13 -0600 Subject: [PATCH 3/5] docker-compose: update containers This updates the `local-dev` stack. Most of these updates were needed to test the OpenTelemetry support. Signed-off-by: Hank Donnay --- docker-compose.yaml | 17 +++++++++-------- local-dev/clair/config.yaml | 6 +++++- .../provisioning/datasources/datasource.yml | 6 ++++++ local-dev/traefik/config/clair.yaml | 3 --- local-dev/traefik/traefik.yaml | 4 ++++ 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 853a070c18..f8e530eae7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -4,17 +4,17 @@ version: "3.7" # the config. x-anchors: go: &go-image quay.io/projectquay/golang:1.21 - grafana: &grafana-image docker.io/grafana/grafana:8.0.3 - jaeger: &jaeger-image docker.io/jaegertracing/all-in-one:1.26 + grafana: &grafana-image docker.io/grafana/grafana:10.3.1 + jaeger: &jaeger-image docker.io/jaegertracing/all-in-one:1 pgadmin: &pgadmin-image docker.io/dpage/pgadmin4:5.7 - postgres: &postgres-image docker.io/library/postgres:12 - prom: &prom-image docker.io/prom/prometheus:v2.30.2 - pyroscope: &pyroscope-image docker.io/pyroscope/pyroscope:0.37.2 + postgres: &postgres-image docker.io/library/postgres:15 + prom: &prom-image docker.io/prom/prometheus:latest + pyroscope: &pyroscope-image docker.io/grafana/pyroscope:latest quay: &quay-image quay.io/projectquay/quay:latest - rabbitmq: &rabbitmq-image docker.io/library/rabbitmq:3.11 - redis: &redis-image docker.io/library/redis:6.2 + rabbitmq: &rabbitmq-image docker.io/library/rabbitmq:3 + redis: &redis-image docker.io/library/redis:6 skopeo: &skopeo-image quay.io/skopeo/stable:latest - traefik: &traefik-image docker.io/library/traefik:v2.2 + traefik: &traefik-image docker.io/library/traefik:v3.0 clair-service: &clair-service image: *go-image depends_on: @@ -98,6 +98,7 @@ services: image: *jaeger-image environment: QUERY_BASE_PATH: '/jaeger' + COLLECTOR_OTLP_ENABLED: 'true' prometheus: container_name: clair-prometheus profiles: diff --git a/local-dev/clair/config.yaml b/local-dev/clair/config.yaml index f69fa427aa..916a0b288c 100644 --- a/local-dev/clair/config.yaml +++ b/local-dev/clair/config.yaml @@ -48,8 +48,12 @@ notifier: # callback: "http://clair-notifier/notifier/api/v1/notification" # tracing and metrics config trace: - name: "jaeger" + name: "otlp" # probability: 1 + otlp: + http: + endpoint: "clair-jaeger:6831" + insecure: true jaeger: agent: endpoint: "clair-jaeger:6831" diff --git a/local-dev/grafana/provisioning/datasources/datasource.yml b/local-dev/grafana/provisioning/datasources/datasource.yml index 79940f13b8..eae5f70b4a 100644 --- a/local-dev/grafana/provisioning/datasources/datasource.yml +++ b/local-dev/grafana/provisioning/datasources/datasource.yml @@ -32,3 +32,9 @@ datasources: jsonData: path: http://clair-pyroscope:4040/ editable: false +- name: Jaeger + type: jaeger + access: proxy + orgId: 1 + url: http://clair-jaeger:16686/jaeger/ + editable: false diff --git a/local-dev/traefik/config/clair.yaml b/local-dev/traefik/config/clair.yaml index 741946ef8b..a31bb74260 100644 --- a/local-dev/traefik/config/clair.yaml +++ b/local-dev/traefik/config/clair.yaml @@ -1,8 +1,5 @@ --- http: - entrypoint: - clair: - address: ':6060' routers: indexer: entryPoints: [clair] diff --git a/local-dev/traefik/traefik.yaml b/local-dev/traefik/traefik.yaml index 0725f0ca10..3db5895b02 100644 --- a/local-dev/traefik/traefik.yaml +++ b/local-dev/traefik/traefik.yaml @@ -19,4 +19,8 @@ providers: metrics: prometheus: addServicesLabels: true +tracing: + openTelemetry: + address: clair-jaeger:4318 + insecure: true accessLog: {} From 6763676c7ad6130da183fae77e61b6fadce6fd50 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Mon, 12 Feb 2024 14:50:02 -0800 Subject: [PATCH 4/5] health: PoC for health checks via otel metrics Signed-off-by: Hank Donnay --- go.mod | 2 +- health/health.go | 3 + health/main_test.go | 25 +++++ health/otel.go | 235 ++++++++++++++++++++++++++++++++++++++++++++ health/otel_test.go | 153 ++++++++++++++++++++++++++++ 5 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 health/health.go create mode 100644 health/main_test.go create mode 100644 health/otel.go create mode 100644 health/otel_test.go diff --git a/go.mod b/go.mod index e5d6994077..6b256440f9 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,7 @@ require ( go.opentelemetry.io/otel/exporters/prometheus v0.50.0 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.28.0 go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.28.0 + go.opentelemetry.io/otel/metric v1.28.0 go.opentelemetry.io/otel/sdk v1.28.0 go.opentelemetry.io/otel/sdk/metric v1.28.0 go.opentelemetry.io/otel/trace v1.28.0 @@ -95,7 +96,6 @@ require ( github.com/ulikunitz/xz v0.5.11 // indirect github.com/vbatts/tar-split v0.11.3 // indirect github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.25.0 // indirect golang.org/x/mod v0.17.0 // indirect diff --git a/health/health.go b/health/health.go new file mode 100644 index 0000000000..f609f12c50 --- /dev/null +++ b/health/health.go @@ -0,0 +1,3 @@ +// Package health provides HTTP handlers and adapters for health and readiness +// probes. +package health diff --git a/health/main_test.go b/health/main_test.go new file mode 100644 index 0000000000..343cf3f3e6 --- /dev/null +++ b/health/main_test.go @@ -0,0 +1,25 @@ +package health + +import ( + "os" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/sdk/metric" +) + +func TestMain(m *testing.M) { + exit := 0 + defer func() { + if exit != 0 { + os.Exit(exit) + } + }() + + exp, h := NewMetricsHook() + handler = h // Declared in otel_test.go + p := metric.NewMeterProvider(metric.WithReader(exp)) + otel.SetMeterProvider(p) + + exit = m.Run() +} diff --git a/health/otel.go b/health/otel.go new file mode 100644 index 0000000000..02a33a7b8f --- /dev/null +++ b/health/otel.go @@ -0,0 +1,235 @@ +package health + +import ( + "bytes" + "fmt" + "io" + "net/http" + "sync" + "text/tabwriter" + "time" + + "github.com/quay/zlog" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/instrumentation" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +// HealthUnit is the unit that float64 gauges must use to be hooked into this this package's reporting. +// +// The instruments should be implemented as callbacks to avoid missing data after a cold start. +// +// Individual instruments may assign meaning to nonzero values, but should assume the values will +// not be machine parsed. +const HealthUnit = "{health}" + +// FallibleKey is an [attribute.Key] that instruments can use as a boolean [attribute.KeyValue] to +// indicate that a metric shouldn't fail the check. +// Requests can override this by using the "strict" query parameter. +// +// Package authors can use this to indicate that there may be something wrong in a downstream service. +// Tripping a process' health check in the event of a transient event may cause restart storms +// or needless load balancer evictions, causing even worse service degradation. +var FallibleKey = attribute.Key("github.com/quay/clair/v4/health.fallible") + +// This is modeled on the prometheus exporter: https://github.com/open-telemetry/opentelemetry-go/blob/exporters/prometheus/v0.45.2/exporters/prometheus/exporter.go + +// NewMetricsHook returns an [sdkmetric.Reader] for hooking into the otel +// metrics pipeline and an [http.Handler] for serving the health check HTTP API. +// +// The returned [http.Handler] currently does not care about the request path, but may in the +// future. Users should remove any prefixes for forward compatibility. +// +// Three query parameters are used: +// +// - meter: Select a single meter name. +// - instrument: Select a single instrument name. +// - strict: Disregard the "fallible" attribute. +// +// GET and HEAD methods are supported and return the same status code. +// Returned status codes are: +// +// - 200 OK: All checks reported ok (modified by the "strict" parameter). +// - 204 No Content: No health check instruments match the supplied filters. +// - 425 Too Early: Instruments exist, but have no data. +// - 503 Service Unavailable: At least one check reported not-ok (modified by the "strict" parameter). +// +// GET requests return a body containing details. +// The contents are intended for humans and not considered API. +// The current format is space-separated columns containing: +// +// - Instrument name +// - Status +// - Value +// - Timestamp +// - Description +func NewMetricsHook() (sdkmetric.Reader, http.Handler) { + reader := sdkmetric.NewManualReader() + c := collector{ + reader: reader, + } + return reader, &c +} + +// Collector implements the HTTP API by calling the enclosed ManualReader on demand. +// +// There's no provision to prevent a user from DoS-ing the process by making requests in a tight loop. +type collector struct { + reader *sdkmetric.ManualReader + bufPool sync.Pool +} + +// ServeHTTP implements [http.Handler]. +// +// The API is described in the [NewMetricsHook] documentation. +func (c *collector) ServeHTTP(w http.ResponseWriter, r *http.Request) { + nowrite := r.Method == http.MethodHead + switch r.Method { + case http.MethodGet, http.MethodHead: + default: + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + ctx := r.Context() + if err := r.ParseForm(); err != nil { + zlog.Warn(ctx). + Err(err). + Msg("unable to parse health check request") + return + } + meter := r.Form.Get(`meter`) + instrument := r.Form.Get(`instrument`) + strict := r.Form.Has(`strict`) + + var rm metricdata.ResourceMetrics + if err := c.reader.Collect(ctx, &rm); err != nil { + zlog.Warn(ctx). + Err(err). + Msg("unable to collect health check") + return + } + + // Mertic writing hook: by default, do nothing. + writeMetric := func(s instrumentation.Scope, m metricdata.Metrics, pt metricdata.DataPoint[float64]) {} + if !nowrite { + buf := c.getBuf() + tw := tabwriter.NewWriter(buf, 4, 4, 1, ' ', 0) + // The actual writing is handled in this defer. + defer func() { + tw.Flush() + io.Copy(w, buf) + c.putBuf(buf) + http.NewResponseController(w).Flush() + }() + writeMetric = func(s instrumentation.Scope, m metricdata.Metrics, pt metricdata.DataPoint[float64]) { + fmt.Fprintf(tw, "%s.%s\t%s\t%g\t%s\t# %s\n", + s.Name, m.Name, + checkStatus(pt.Value).String(), + pt.Value, + pt.Time.UTC().Format(time.RFC3339), + m.Description, + ) + } + } + status := http.StatusOK + var haveData bool + +Metrics: + for _, sm := range rm.ScopeMetrics { + // Tempting to break out of this loop when not writing a body, but we want to return the + // same status code no matter what. Consider a case where the first instrument has no data + // and the last one is failing. + + s := sm.Scope + // Filter if needed. + if meter != "" && meter != s.Name { + continue + } + + for _, m := range sm.Metrics { + if m.Unit != HealthUnit { + continue + } + g, ok := m.Data.(metricdata.Gauge[float64]) + if !ok { + continue + } + // Filter if needed. + if instrument != "" && instrument != m.Name { + continue + } + + if len(g.DataPoints) == 0 { + if status < http.StatusTooEarly { + status = http.StatusTooEarly + } + w.Header().Add(`health-data-missing`, s.Name+"."+m.Name) + continue + } + + for _, pt := range g.DataPoints { + haveData = true + + var fallible bool + if fv, ok := pt.Attributes.Value(FallibleKey); ok && fv.Type() == attribute.BOOL { + fallible = fv.AsBool() + } + switch ok := pt.Value == 0; { + case ok: + case fallible && !strict: + default: + status = http.StatusServiceUnavailable + } + + writeMetric(s, m, pt) + } + } + + if meter != "" { + break Metrics + } + } + if !haveData { + status = http.StatusNoContent + } + + h := w.Header() + h.Set("Content-Type", "text/plain; charset=utf-8") + h.Set("Cache-Control", "no-store") + h.Set("X-Content-Type-Options", "nosniff") + w.WriteHeader(status) +} + +// CheckStatus formats a float64 for printing. +type checkStatus float64 + +// String implements [fmt.Stringer]. +func (s checkStatus) String() string { + if s == 0 { + return " ok" + } + return "bad" +} + +// GetBuf returns a pooled buffer or creates one. +func (c *collector) getBuf() *bytes.Buffer { + v := c.bufPool.Get() + if v == nil { + var buf bytes.Buffer + buf.Grow(1024) + return &buf + } + return v.(*bytes.Buffer) +} + +// PutBuf resets the buffer and returns it to the pool. +func (c *collector) putBuf(buf *bytes.Buffer) { + // If gigantic, leak the buffer. + // Trick from log/slog to reduce steady-state memory usage. + if buf.Cap() > 4096 { + return + } + buf.Reset() + c.bufPool.Put(buf) +} diff --git a/health/otel_test.go b/health/otel_test.go new file mode 100644 index 0000000000..0eef118735 --- /dev/null +++ b/health/otel_test.go @@ -0,0 +1,153 @@ +package health + +import ( + "bufio" + "bytes" + "context" + "net/http" + "net/http/httptest" + "net/url" + "regexp" + "sync/atomic" + "testing" + + "github.com/quay/zlog" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +var ( + status atomic.Value + handler http.Handler +) + +func init() { + otel.GetMeterProvider().Meter("github.com/quay/clair/v4/health").Float64ObservableGauge("dummy", + metric.WithDescription("This is a dummy healthcheck."), + metric.WithUnit(HealthUnit), + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { + o.Observe(status.Load().(float64)) + return nil + }), + ) + otel.GetMeterProvider().Meter("example.com/health").Float64ObservableGauge("example", + metric.WithDescription(`Example of a "fallible" check.`), + metric.WithUnit(HealthUnit), + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { + o.Observe(status.Load().(float64), + metric.WithAttributes(FallibleKey.Bool(true))) + return nil + }), + ) + + var call int64 + otel.GetMeterProvider().Meter("example.com/health").Int64ObservableCounter("other", + metric.WithDescription("This is a dummy healthcheck that always reports OK."), + metric.WithUnit("{count}"), + metric.WithInt64Callback(func(_ context.Context, o metric.Int64Observer) error { + o.Observe(atomic.AddInt64(&call, 1)) + return nil + }), + ) +} + +func runRequest(t *testing.T, r *http.Request, check func(*testing.T, *http.Response)) { + t.Helper() + ctx := zlog.Test(context.Background(), t) + w := httptest.NewRecorder() + w.Body = new(bytes.Buffer) + + t.Logf("request URI: %s %s", r.Method, r.URL.RequestURI()) + handler.ServeHTTP(w, r.WithContext(ctx)) + + t.Logf("body:\n%s", w.Body.String()) + res := w.Result() + defer res.Body.Close() + check(t, res) +} + +func checkOKRegexp(pat string) func(*testing.T, *http.Response) { + re := regexp.MustCompile(pat) + return func(t *testing.T, res *http.Response) { + t.Logf("status code: got: %d, want: %d", res.StatusCode, http.StatusOK) + if got, want := res.StatusCode, http.StatusOK; got != want { + t.Fail() + } + if !re.MatchReader(bufio.NewReader(res.Body)) { + t.Error("regexp failed") + } + } +} + +func TestHTTP(t *testing.T) { + // Setup + status.Store(float64(0)) + mp := otel.GetMeterProvider() + meter := mp.Meter("test") + _, err := meter.Float64ObservableGauge("dummy", + metric.WithDescription("This is a dummy healthcheck."), + metric.WithUnit(HealthUnit), + metric.WithFloat64Callback(func(_ context.Context, o metric.Float64Observer) error { + o.Observe(status.Load().(float64)) + return nil + }), + ) + if err != nil { + t.Fatal(err) + } + + // Tests + // Basic ones: + t.Run("OK", func(t *testing.T) { + r := httptest.NewRequest(http.MethodGet, "/", nil) + runRequest(t, r, checkOKRegexp(`.`)) + }) + t.Run("HEAD", func(t *testing.T) { + r := httptest.NewRequest(http.MethodHead, "/", nil) + runRequest(t, r, + func(t *testing.T, res *http.Response) { + t.Logf("status code: got: %d, want: %d", res.StatusCode, http.StatusOK) + if got, want := res.StatusCode, http.StatusOK; got != want { + t.Fail() + } + }) + }) + t.Run("Single", func(t *testing.T) { + pkg := "github.com/quay/clair/v4/health" + r := httptest.NewRequest(http.MethodGet, "/?meter="+url.PathEscape(pkg), nil) + runRequest(t, r, checkOKRegexp(`^`+pkg+`\.\w+ `)) // not exactly correct, but good enough. + }) + + // With failing checks: + status.Store(float64(0.5)) + t.Run("Fail", func(t *testing.T) { + r := httptest.NewRequest(http.MethodGet, "/", nil) + runRequest(t, r, func(t *testing.T, res *http.Response) { + got, want := res.StatusCode, http.StatusServiceUnavailable + t.Logf("status code: got: %d, want: %d", got, want) + if got != want { + t.Fail() + } + }) + }) + t.Run("Fallible", func(t *testing.T) { + v := url.Values{ + "meter": {"example.com/health"}, + } + t.Run("Lax", func(t *testing.T) { + r := httptest.NewRequest(http.MethodGet, "/?"+v.Encode(), nil) + runRequest(t, r, checkOKRegexp(`.`)) + }) + t.Run("Strict", func(t *testing.T) { + v.Set("strict", "") + r := httptest.NewRequest(http.MethodGet, "/?"+v.Encode(), nil) + runRequest(t, r, func(t *testing.T, res *http.Response) { + got, want := res.StatusCode, http.StatusServiceUnavailable + t.Logf("status code: got: %d, want: %d", got, want) + if got != want { + t.Fail() + } + }) + }) + }) +} From 2d56f0c9e9e9ddaa832878011cd8c05cca4299ba Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Tue, 23 Jul 2024 15:23:53 -0500 Subject: [PATCH 5/5] introspection: use metrics-based health check implementation Signed-off-by: Hank Donnay --- introspection/server.go | 61 ++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/introspection/server.go b/introspection/server.go index c823733d7b..f1cc1a11ac 100644 --- a/introspection/server.go +++ b/introspection/server.go @@ -67,12 +67,10 @@ type Server struct { // initialization. *http.Server *http.ServeMux - // a health check function - health func() bool } // New constructs a [*Server], which has an embedded [*http.Server]. -func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, error) { +func New(ctx context.Context, conf *config.Config, _ func() bool) (*Server, error) { var err error ctx = zlog.ContextWithValues(ctx, "component", "introspection/New") @@ -95,13 +93,8 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, ServeMux: http.NewServeMux(), } - // check for health - if health == nil { - zlog.Warn(ctx).Msg("no health check configured; unconditionally reporting OK") - i.health = func() bool { return true } - } else { - i.health = health - } + // Health check setup: + healthReader, healthHandler := health.NewMetricsHook() // Configure metrics var mr metric.Reader @@ -164,23 +157,28 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, if err != nil { return nil, fmt.Errorf("error configuring metrics: %w", err) } + + // The metrics setup is slightly different in that it's always enabled + // because it powers the health check machinery. + mOpt := []metric.Option{ + metric.WithResource(resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(fmt.Sprintf("clairv4/%v", i.conf.Mode)), + )), + metric.WithReader(healthReader), + } if mr != nil { - mp := metric.NewMeterProvider( - metric.WithReader(mr), - metric.WithResource(resource.NewWithAttributes( - semconv.SchemaURL, - semconv.ServiceNameKey.String(fmt.Sprintf("clairv4/%v", i.conf.Mode)), - )), - ) - otel.SetMeterProvider(mp) - i.Server.RegisterOnShutdown(func() { - ctx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - if err := mp.Shutdown(ctx); err != nil { - zlog.Error(ctx).Err(err).Msg("error shutting down metric provider") - } - }) + mOpt = append(mOpt, metric.WithReader(mr)) } + mp := metric.NewMeterProvider(mOpt...) + otel.SetMeterProvider(mp) + i.Server.RegisterOnShutdown(func() { + ctx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Second) + defer cancel() + if err := mp.Shutdown(ctx); err != nil { + zlog.Error(ctx).Err(err).Msg("error shutting down metric provider") + } + }) // configure tracing // sampler @@ -313,6 +311,9 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, zlog.Info(ctx).Msg("distributed tracing configured") } + // Health check HTTP handler: + i.ServeMux.Handle(HealthEndpoint, http.StripPrefix(HealthEndpoint, healthHandler)) + // configure diagnostics err = i.withDiagnostics(ctx) if err != nil { @@ -330,16 +331,6 @@ func New(ctx context.Context, conf *config.Config, health func() bool) (*Server, // WithDiagnostics enables healthz and pprof endpoints. func (i *Server) withDiagnostics(_ context.Context) error { - health := i.health - i.HandleFunc(HealthEndpoint, func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("X-Content-Type-Options", "nosniff") - w.Header().Set("Content-Type", "text/plain; charset=utf-8") - if !health() { - w.WriteHeader(http.StatusInternalServerError) - return - } - fmt.Fprint(w, `ok`) - }) i.HandleFunc("/debug/pprof/", pprof.Index) i.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) i.HandleFunc("/debug/pprof/profile", pprof.Profile)