Skip to content
This repository has been archived by the owner on May 10, 2022. It is now read-only.

feat: support prometheus #110

Open
wants to merge 9 commits into
base: thrift-0.11.0-inlined
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
<packaging>jar</packaging>
<version>1.12-thrift-0.11.0-inlined-SNAPSHOT</version>
<name>Pegasus Java Client</name>

<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
Expand Down Expand Up @@ -80,6 +81,16 @@
<artifactId>javax.annotation-api</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>io.prometheus</groupId>
<artifactId>simpleclient</artifactId>
<version>0.4.0</version>
</dependency>
<dependency>
<groupId>io.prometheus</groupId>
<artifactId>simpleclient_httpserver</artifactId>
<version>0.4.0</version>
</dependency>
</dependencies>
<reporting>
<plugins>
Expand Down
81 changes: 56 additions & 25 deletions src/main/java/com/xiaomi/infra/pegasus/client/ClientOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* .operationTimeout(Duration.ofMillis(1000))
* .asyncWorkers(4)
* .enablePerfCounter(false)
* .falconPerfCounterTags("")
* .perfCounterTags("")
* .falconPushInterval(Duration.ofSeconds(10))
* .metaQueryTimeout(Duration.ofMillis(5000))
* .build();
Expand All @@ -36,7 +36,8 @@ public class ClientOptions {
public static final Duration DEFAULT_OPERATION_TIMEOUT = Duration.ofMillis(1000);
public static final int DEFAULT_ASYNC_WORKERS = Runtime.getRuntime().availableProcessors();
public static final boolean DEFAULT_ENABLE_PERF_COUNTER = true;
public static final String DEFAULT_FALCON_PERF_COUNTER_TAGS = "";
public static final String DEFAULT_PERF_COUNTER_TYPE = "falcon";
public static final String DEFAULT_PERF_COUNTER_TAGS = "";
public static final Duration DEFAULT_FALCON_PUSH_INTERVAL = Duration.ofSeconds(10);
public static final boolean DEFAULT_ENABLE_WRITE_LIMIT = true;
public static final Duration DEFAULT_META_QUERY_TIMEOUT = Duration.ofMillis(5000);
Expand All @@ -45,7 +46,8 @@ public class ClientOptions {
private final Duration operationTimeout;
private final int asyncWorkers;
private final boolean enablePerfCounter;
private final String falconPerfCounterTags;
private final String perfCounterType;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about define a enum to represent the type of perf counter?

private final String perfCounterTags;
private final Duration falconPushInterval;
private final boolean enableWriteLimit;
private final Duration metaQueryTimeout;
Expand All @@ -55,7 +57,8 @@ protected ClientOptions(Builder builder) {
this.operationTimeout = builder.operationTimeout;
this.asyncWorkers = builder.asyncWorkers;
this.enablePerfCounter = builder.enablePerfCounter;
this.falconPerfCounterTags = builder.falconPerfCounterTags;
this.perfCounterType = builder.perfCounterType;
this.perfCounterTags = builder.perfCounterTags;
this.falconPushInterval = builder.falconPushInterval;
this.enableWriteLimit = builder.enableWriteLimit;
this.metaQueryTimeout = builder.metaQueryTimeout;
Expand All @@ -66,7 +69,8 @@ protected ClientOptions(ClientOptions original) {
this.operationTimeout = original.getOperationTimeout();
this.asyncWorkers = original.getAsyncWorkers();
this.enablePerfCounter = original.isEnablePerfCounter();
this.falconPerfCounterTags = original.getFalconPerfCounterTags();
this.perfCounterType = original.perfCounterType;
this.perfCounterTags = original.getPerfCounterTags();
this.falconPushInterval = original.getFalconPushInterval();
this.enableWriteLimit = original.isWriteLimitEnabled();
this.metaQueryTimeout = original.getMetaQueryTimeout();
Expand Down Expand Up @@ -111,7 +115,8 @@ public boolean equals(Object options) {
&& this.operationTimeout.toMillis() == clientOptions.operationTimeout.toMillis()
&& this.asyncWorkers == clientOptions.asyncWorkers
&& this.enablePerfCounter == clientOptions.enablePerfCounter
&& this.falconPerfCounterTags.equals(clientOptions.falconPerfCounterTags)
&& this.perfCounterType.equals(clientOptions.perfCounterType)
&& this.perfCounterTags.equals(clientOptions.perfCounterTags)
&& this.falconPushInterval.toMillis() == clientOptions.falconPushInterval.toMillis()
&& this.enableWriteLimit == clientOptions.enableWriteLimit
&& this.metaQueryTimeout.toMillis() == clientOptions.metaQueryTimeout.toMillis();
Expand All @@ -129,10 +134,12 @@ public String toString() {
+ operationTimeout.toMillis()
+ ", asyncWorkers="
+ asyncWorkers
+ ", enablePerfCounter="
+ ", perfCounterType="
+ enablePerfCounter
+ ", falconPerfCounterTags='"
+ falconPerfCounterTags
+ ", perfCounterTags='"
+ perfCounterType
+ ", enablePerfCounter="
+ perfCounterTags
+ '\''
+ ", falconPushInterval(s)="
+ falconPushInterval.getSeconds()
Expand All @@ -149,7 +156,8 @@ public static class Builder {
private Duration operationTimeout = DEFAULT_OPERATION_TIMEOUT;
private int asyncWorkers = DEFAULT_ASYNC_WORKERS;
private boolean enablePerfCounter = DEFAULT_ENABLE_PERF_COUNTER;
private String falconPerfCounterTags = DEFAULT_FALCON_PERF_COUNTER_TAGS;
private String perfCounterType = DEFAULT_PERF_COUNTER_TYPE;
private String perfCounterTags = DEFAULT_PERF_COUNTER_TAGS;
private Duration falconPushInterval = DEFAULT_FALCON_PUSH_INTERVAL;
private boolean enableWriteLimit = DEFAULT_ENABLE_WRITE_LIMIT;
private Duration metaQueryTimeout = DEFAULT_META_QUERY_TIMEOUT;
Expand Down Expand Up @@ -194,8 +202,8 @@ public Builder asyncWorkers(int asyncWorkers) {

/**
* Whether to enable performance statistics. If true, the client will periodically report
* metrics to local falcon agent (currently we only support falcon as monitoring system).
* Defaults to {@literal true}, see {@link #DEFAULT_ENABLE_PERF_COUNTER}.
* metrics to local falcon agent (if set falcon as monitoring system) or open prometheus
* collector http server. Defaults to {@literal true}, see {@link #DEFAULT_ENABLE_PERF_COUNTER}.
*
* @param enablePerfCounter enablePerfCounter
* @return {@code this}
Expand All @@ -205,22 +213,34 @@ public Builder enablePerfCounter(boolean enablePerfCounter) {
return this;
}

/**
* set the perf-counter type, now only support falcon and prometheus, Defaults to {@literal
* falcon}, see {@link #DEFAULT_PERF_COUNTER_TYPE}
*
* @param perfCounterType perfCounterType
* @return this
*/
public Builder perfCounterType(String perfCounterType) {
this.perfCounterType = perfCounterType;
return this;
}

/**
* Additional tags for falcon metrics. For example:
* "cluster=c3srv-ad,job=recommend-service-history". Defaults to empty string, see {@link
* #DEFAULT_FALCON_PERF_COUNTER_TAGS}.
* #DEFAULT_PERF_COUNTER_TAGS}.
*
* @param falconPerfCounterTags falconPerfCounterTags
* @param perfCounterTags perfCounterTags
* @return {@code this}
*/
public Builder falconPerfCounterTags(String falconPerfCounterTags) {
this.falconPerfCounterTags = falconPerfCounterTags;
public Builder perfCounterTags(String perfCounterTags) {
this.perfCounterTags = perfCounterTags;
return this;
}

/**
* The interval to report metrics to local falcon agent. Defaults to {@literal 10s}, see {@link
* #DEFAULT_FALCON_PUSH_INTERVAL}.
* The interval to report metrics to local falcon agent(if set falcon as monitor system).
* Defaults to {@literal 10s}, see {@link #DEFAULT_FALCON_PUSH_INTERVAL}.
*
* @param falconPushInterval falconPushInterval
* @return {@code this}
Expand Down Expand Up @@ -279,7 +299,8 @@ public ClientOptions.Builder mutate() {
.operationTimeout(getOperationTimeout())
.asyncWorkers(getAsyncWorkers())
.enablePerfCounter(isEnablePerfCounter())
.falconPerfCounterTags(getFalconPerfCounterTags())
.perfCounterType(getPerfCounterType())
.perfCounterTags(getPerfCounterTags())
.falconPushInterval(getFalconPushInterval())
.enableWriteLimit(isWriteLimitEnabled())
.metaQueryTimeout(getMetaQueryTimeout());
Expand Down Expand Up @@ -316,8 +337,8 @@ public int getAsyncWorkers() {

/**
* Whether to enable performance statistics. If true, the client will periodically report metrics
* to local falcon agent (currently we only support falcon as monitoring system). Defaults to
* {@literal true}.
* to local falcon agent (if set falcon as monitoring system) or open prometheus collector http
* server. Defaults to {@literal true}.
*
* @return whether to enable performance statistics.
*/
Expand All @@ -326,16 +347,26 @@ public boolean isEnablePerfCounter() {
}

/**
* Additional tags for falcon metrics. Defaults to empty string.
* get perf-counter type, now only support falcon and prometheus
*
* @return perf-counter type
*/
public String getPerfCounterType() {
return perfCounterType;
}

/**
* Additional tags for metrics. Defaults to empty string.
*
* @return additional tags for falcon metrics.
*/
public String getFalconPerfCounterTags() {
return falconPerfCounterTags;
public String getPerfCounterTags() {
return perfCounterTags;
}

/**
* The interval to report metrics to local falcon agent. Defaults to {@literal 10s}.
* The interval to report metrics to local falcon agent(if set falcon as monitor system). Defaults
* to {@literal 10s}.
*
* @return the interval to report metrics to local falcon agent.
*/
Expand Down
101 changes: 101 additions & 0 deletions src/main/java/com/xiaomi/infra/pegasus/metrics/FalconCollector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package com.xiaomi.infra.pegasus.metrics;

import static com.xiaomi.infra.pegasus.metrics.MetricsPool.getTableTag;

import com.codahale.metrics.Histogram;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Snapshot;
import com.xiaomi.infra.pegasus.tools.Tools;
import java.util.Map;
import java.util.SortedMap;
import org.json.JSONException;
import org.json.JSONObject;

public class FalconCollector {
private FalconMetric falconMetric = new FalconMetric();
private final MetricRegistry registry;
public final String defaultTags;

public FalconCollector(String host, String tags, int reportStepSec, MetricRegistry registry) {
this.defaultTags = tags;
this.registry = registry;
falconMetric.endpoint = host;
falconMetric.step = reportStepSec;
}

public String metricsToJson() {
falconMetric.timestamp = Tools.unixEpochMills() / 1000;

StringBuilder builder = new StringBuilder();
builder.append('[');
SortedMap<String, Meter> meters = registry.getMeters();
for (Map.Entry<String, Meter> entry : meters.entrySet()) {
genJsonsFromMeter(entry.getKey(), entry.getValue(), builder);
builder.append(',');
}

for (Map.Entry<String, Histogram> entry : registry.getHistograms().entrySet()) {
genJsonsFromHistogram(entry.getKey(), entry.getValue(), builder);
builder.append(',');
}

if (builder.charAt(builder.length() - 1) == ',') {
builder.deleteCharAt(builder.length() - 1);
}

builder.append("]");

return builder.toString();
}

public void genJsonsFromMeter(String name, Meter meter, StringBuilder output)
throws JSONException {
falconMetric.counterType = "GAUGE";

falconMetric.metric = name + "_cps_1sec";
falconMetric.tags = getTableTag(name, defaultTags);
falconMetric.value = meter.getMeanRate();
oneMetricToJson(falconMetric, output);
}

public void genJsonsFromHistogram(String name, Histogram hist, StringBuilder output)
throws JSONException {
falconMetric.counterType = "GAUGE";
Snapshot s = hist.getSnapshot();

falconMetric.metric = name + "_p99";
falconMetric.tags = getTableTag(name, defaultTags);
falconMetric.value = s.get99thPercentile();
oneMetricToJson(falconMetric, output);
output.append(',');

falconMetric.metric = name + "_p999";
falconMetric.tags = getTableTag(name, defaultTags);
falconMetric.value = s.get999thPercentile();
oneMetricToJson(falconMetric, output);
}

public static void oneMetricToJson(FalconMetric metric, StringBuilder output)
throws JSONException {
JSONObject obj = new JSONObject();
obj.put("endpoint", metric.endpoint);
obj.put("metric", metric.metric);
obj.put("timestamp", metric.timestamp);
obj.put("step", metric.step);
obj.put("value", metric.value);
obj.put("counterType", metric.counterType);
obj.put("tags", metric.tags);
output.append(obj.toString());
}

static final class FalconMetric {
public String endpoint; // metric host
public String metric; // metric name
public long timestamp; // report time in unix seconds
public int step; // report interval in seconds;
public double value; // metric value
public String counterType; // GAUGE or COUNTER
public String tags; // metrics description
}
}
Loading