Skip to content

Commit

Permalink
Use auto rate interval for blob thoughput metrics
Browse files Browse the repository at this point in the history
DA internal dashboard blob throughput does not match blob explorer throughput graphs because dataapi query uses 120s rate interval and DA dashboard uses auto $__rateInterval. This change converts dataAPI metrics to use the auto $__rateInterval.

See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/
  • Loading branch information
pschork committed Jun 26, 2024
1 parent b911361 commit 2124809
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 10 deletions.
2 changes: 1 addition & 1 deletion disperser/dataapi/metrics_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (s *server) getMetric(ctx context.Context, startTime int64, endTime int64)
}

func (s *server) getThroughput(ctx context.Context, start int64, end int64) ([]*Throughput, error) {
result, err := s.promClient.QueryDisperserAvgThroughputBlobSizeBytes(ctx, time.Unix(start, 0), time.Unix(end, 0), avgThroughputWindowSize)
result, err := s.promClient.QueryDisperserAvgThroughputBlobSizeBytes(ctx, time.Unix(start, 0), time.Unix(end, 0))
if err != nil {
return nil, err
}
Expand Down
13 changes: 4 additions & 9 deletions disperser/dataapi/prometheus_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@ import (

const (
// maxNumOfDataPoints is the maximum number of data points that can be queried from Prometheus based on latency that this API can provide
maxNumOfDataPoints = 3500
throughputRateWindowInSec = 60
maxNumOfDataPoints = 3500
)

type (
PrometheusClient interface {
QueryDisperserBlobSizeBytesPerSecond(ctx context.Context, start time.Time, end time.Time) (*PrometheusResult, error)
QueryDisperserAvgThroughputBlobSizeBytes(ctx context.Context, start time.Time, end time.Time, windowSizeInSec uint8) (*PrometheusResult, error)
QueryDisperserAvgThroughputBlobSizeBytes(ctx context.Context, start time.Time, end time.Time) (*PrometheusResult, error)
}

PrometheusResultValues struct {
Expand Down Expand Up @@ -47,12 +46,8 @@ func (pc *prometheusClient) QueryDisperserBlobSizeBytesPerSecond(ctx context.Con
return pc.queryRange(ctx, query, start, end)
}

func (pc *prometheusClient) QueryDisperserAvgThroughputBlobSizeBytes(ctx context.Context, start time.Time, end time.Time, windowSizeInSec uint8) (*PrometheusResult, error) {
if windowSizeInSec < throughputRateWindowInSec {
windowSizeInSec = throughputRateWindowInSec
}

query := fmt.Sprintf("sum by (job) (rate(eigenda_batcher_blobs_total{state=\"confirmed\",data=\"size\",cluster=\"%s\"}[%ds]))", pc.cluster, windowSizeInSec)
func (pc *prometheusClient) QueryDisperserAvgThroughputBlobSizeBytes(ctx context.Context, start time.Time, end time.Time) (*PrometheusResult, error) {
query := fmt.Sprintf("sum by (job) (rate(eigenda_batcher_blobs_total{state=\"confirmed\",data=\"size\",cluster=\"%s\"}[$$__rate_interval]))", pc.cluster)
return pc.queryRange(ctx, query, start, end)
}

Expand Down

0 comments on commit 2124809

Please sign in to comment.