Skip to content

Commit

Permalink
fix: request record label with commitment mode and version (#109)
Browse files Browse the repository at this point in the history
* fix: request record label with commitment mode and version

* fix: lint

* refactor: combine request metrics and write bad requests

* feat: added docker compose services and provisioning

* fix: commitment version type

* refactor: commitment mode and version meta struct

* chore: passing gosec with nosec directives

* ci: add tests on expected commitment mode and cert version

* fix: comments and labels
  • Loading branch information
hopeyen authored Sep 18, 2024
1 parent 5993bb2 commit be16229
Show file tree
Hide file tree
Showing 13 changed files with 515 additions and 104 deletions.
1 change: 0 additions & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ jobs:
with:
version: v1.60
args: --timeout 3m

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ An optional storage caching CLI flag `--routing.cache-targets` can be leveraged

To the see list of available metrics, run `./bin/eigenda-proxy doc metrics`

To quickly set up monitoring dashboard, add eigenda-proxy metrics endpoint to a reachable prometheus server config as a scrape target, add prometheus datasource to Grafana to, and import the existing [Grafana dashboard JSON file](./grafana_dashboard.json)

## Deployment Guide

### Hardware Requirements
Expand Down
6 changes: 6 additions & 0 deletions commitments/mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import (
"fmt"
)

type CommitmentMeta struct {
Mode CommitmentMode
// CertVersion is shared for all modes and denotes version of the EigenDA certificate
CertVersion byte
}

type CommitmentMode string

const (
Expand Down
57 changes: 57 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
services:
eigenda_proxy:
build:
context: .
dockerfile: Dockerfile
container_name: eigenda-proxy
environment:
- EIGENDA_PROXY_ADDR=0.0.0.0
- EIGENDA_PROXY_PORT=4242
- MEMSTORE_ENABLED=false
- MEMSTORE_EXPIRATION=45m
- EIGENDA_PROXY_SIGNER_PRIVATE_KEY_HEX=$PRIVATE_KEY
- EIGENDA_PROXY_EIGENDA_DISPERSER_RPC=disperser-holesky.eigenda.xyz:443
- EIGENDA_PROXY_SERVICE_MANAGER_ADDR=0xD4A7E1Bd8015057293f0D0A557088c286942e84b
- EIGENDA_PROXY_ETH_RPC=$ETH_RPC
- EIGENDA_PROXY_ETH_CONFIRMATION_DEPTH=0
- EIGENDA_PROXY_METRICS_ADDR=0.0.0.0
- EIGENDA_PROXY_METRICS_ENABLED=true
- EIGENDA_PROXY_METRICS_PORT=7300
ports:
- 4242:4242
- 7300:7300

prometheus:
image: prom/prometheus:latest
container_name: prometheus
volumes:
- ./monitor/prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"

grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "127.0.0.1:3000:3000"
volumes:
- ./monitor/grafana/provisioning/:/etc/grafana/provisioning/:ro
- ./monitor/grafana/dashboards:/var/lib/grafana/dashboards
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
depends_on:
- prometheus

traffic-generator:
image: alpine:latest
build: scripts/
container_name: traffic_generator
depends_on:
- eigenda_proxy
volumes:
- ./scripts/:/scripts/

volumes:
grafana-data:
25 changes: 17 additions & 8 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type Config struct {
type Metricer interface {
RecordInfo(version string)
RecordUp()
RecordRPCServerRequest(method string) func(status string)
RecordRPCServerRequest(method string) func(status string, commitmentMode string, version string)

Document() []metrics.DocumentedMetric
}
Expand All @@ -40,6 +40,7 @@ type Metrics struct {
Up prometheus.Gauge

HTTPServerRequestsTotal *prometheus.CounterVec
HTTPServerBadRequestHeader *prometheus.CounterVec
HTTPServerRequestDurationSeconds *prometheus.HistogramVec

registry *prometheus.Registry
Expand Down Expand Up @@ -79,7 +80,15 @@ func NewMetrics(subsystem string) *Metrics {
Name: "requests_total",
Help: "Total requests to the HTTP server",
}, []string{
"method", "status",
"method", "status", "commitment_mode", "DA_cert_version",
}),
HTTPServerBadRequestHeader: factory.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: httpServerSubsystem,
Name: "requests_bad_header_total",
Help: "Total requests to the HTTP server with bad headers",
}, []string{
"method", "error_type",
}),
HTTPServerRequestDurationSeconds: factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Expand All @@ -90,7 +99,7 @@ func NewMetrics(subsystem string) *Metrics {
Buckets: prometheus.ExponentialBucketsRange(0.05, 1200, 20),
Help: "Histogram of HTTP server request durations",
}, []string{
"method", // no status on histograms because those are very expensive
"method", "commitment_mode", "DA_cert_version", // no status on histograms because those are very expensive
}),
registry: registry,
factory: factory,
Expand All @@ -112,12 +121,12 @@ func (m *Metrics) RecordUp() {
// RecordRPCServerRequest is a helper method to record an incoming HTTP request.
// It bumps the requests metric, and tracks how long it takes to serve a response,
// including the HTTP status code.
func (m *Metrics) RecordRPCServerRequest(method string) func(status string) {
func (m *Metrics) RecordRPCServerRequest(method string) func(status string, mode string, ver string) {
// we don't want to track the status code on the histogram because that would
// create a huge number of labels, and cost a lot on cloud hosted services
timer := prometheus.NewTimer(m.HTTPServerRequestDurationSeconds.WithLabelValues(method))
return func(status string) {
m.HTTPServerRequestsTotal.WithLabelValues(method, status).Inc()
return func(status, mode, ver string) {
m.HTTPServerRequestsTotal.WithLabelValues(method, status, mode, ver).Inc()
timer.ObserveDuration()
}
}
Expand Down Expand Up @@ -150,6 +159,6 @@ func (n *noopMetricer) RecordInfo(_ string) {
func (n *noopMetricer) RecordUp() {
}

func (n *noopMetricer) RecordRPCServerRequest(string) func(status string) {
return func(string) {}
func (n *noopMetricer) RecordRPCServerRequest(string) func(status, mode, ver string) {
return func(string, string, string) {}
}
243 changes: 243 additions & 0 deletions monitor/grafana/dashboards/simple_dashboard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "ddshms3dlineoe"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "ddshms3dlineoe"
},
"editorMode": "code",
"expr": "eigenda_proxy_default_rpc_server_requests_total{method=\"/put/\"}",
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "A"
}
],
"title": "/put requests total",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "ddshms3dlineoe"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"sizing": "auto",
"valueMode": "color"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "ddshms3dlineoe"
},
"editorMode": "code",
"expr": "eigenda_proxy_default_rpc_server_request_duration_seconds_bucket{method=\"/put/\"}",
"format": "heatmap",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "/put requests duration",
"type": "bargauge"
},
{
"datasource": {
"type": "loki",
"uid": "loki-datasource"
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 10
},
"id": 2,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki-datasource"
},
"editorMode": "builder",
"expr": "{container=\"ops-bedrock-da-server-1\"} |= ``",
"queryType": "range",
"refId": "A"
}
],
"title": "logs",
"type": "logs"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "EigenDA Proxy",
"uid": "ddw5n232n5vy8e",
"version": 1,
"weekStart": ""
}
Loading

0 comments on commit be16229

Please sign in to comment.