Skip to content

Commit

Permalink
Merge pull request #8 from giantswarm/improve-metrics
Browse files Browse the repository at this point in the history
Improve metrics
  • Loading branch information
njuettner authored Jun 30, 2022
2 parents 4bf988a + 078a102 commit ea2098d
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 11 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Added applied quota value metrics.
- Added account id to metrics.

## [0.2.0] - 2022-06-28

### Added
Expand Down
1 change: 1 addition & 0 deletions controllers/legacy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ func (r *AWSLegacyClusterReconciler) Reconcile(ctx context.Context, req ctrl.Req

// create the cluster scope.
clusterScope, err := scope.NewClusterScope(scope.ClusterScopeParams{
AccountId: accountID,
ARN: arn,
ClusterName: cluster.Name,
ClusterNamespace: cluster.Namespace,
Expand Down
11 changes: 11 additions & 0 deletions pkg/aws/scope/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

// ClusterScopeParams defines the input parameters used to create a new Scope.
type ClusterScopeParams struct {
AccountId string
ARN string
Cluster runtime.Object
ClusterName string
Expand All @@ -25,6 +26,9 @@ type ClusterScopeParams struct {
// NewClusterScope creates a new Scope from the supplied parameters.
// This is meant to be called for each reconcile iteration.
func NewClusterScope(params ClusterScopeParams) (*ClusterScope, error) {
if params.AccountId == "" {
return nil, errors.New("failed to generate new scope from emtpy string AccountID")
}
if params.ARN == "" {
return nil, errors.New("failed to generate new scope from emtpy string ARN")
}
Expand Down Expand Up @@ -55,6 +59,7 @@ func NewClusterScope(params ClusterScopeParams) (*ClusterScope, error) {
}

return &ClusterScope{
accountId: params.AccountId,
assumeRole: params.ARN,
cluster: params.Cluster,
clusterName: params.ClusterName,
Expand All @@ -67,6 +72,7 @@ func NewClusterScope(params ClusterScopeParams) (*ClusterScope, error) {

// ClusterScope defines the basic context for an actuator to operate upon.
type ClusterScope struct {
accountId string
assumeRole string
cluster runtime.Object
clusterName string
Expand All @@ -77,6 +83,11 @@ type ClusterScope struct {
session awsclient.ConfigProvider
}

// AccountId returns the AWS account id from cluster object.
func (s *ClusterScope) AccountId() string {
return s.accountId
}

// ARN returns the AWS SDK assumed role.
func (s *ClusterScope) ARN() string {
return s.assumeRole
Expand Down
22 changes: 17 additions & 5 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const (
metricNamespace = "aws_servicequotas_operator"
metricSubsystem = "quota"

labelAccountId = "account_id"
labelCluster = "cluster_id"
labelNamespace = "cluster_namespace"
labelServiceName = "service_name"
Expand All @@ -18,7 +19,8 @@ const (
)

var (
labels = []string{labelCluster, labelNamespace, labelServiceName, labelQuotaDescription, labelQuotaCode, labelQuotaValue}
errorLabels = []string{labelAccountId, labelCluster, labelNamespace, labelServiceName, labelQuotaDescription, labelQuotaCode, labelQuotaValue}
infoLabels = []string{labelAccountId, labelCluster, labelNamespace, labelServiceName, labelQuotaDescription, labelQuotaCode}

QuotaIncreaseErrors = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand All @@ -27,7 +29,7 @@ var (
Name: "increase_request_errors",
Help: "Number of service quota increase request errors",
},
labels,
errorLabels,
)
QuotaAppliedErrors = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand All @@ -36,7 +38,7 @@ var (
Name: "applied_request_errors",
Help: "Number of applied quota request errors",
},
labels,
errorLabels,
)
QuotaHistoryErrors = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand All @@ -45,11 +47,21 @@ var (
Name: "history_request_errors",
Help: "Number of service quota history request errors",
},
labels,
errorLabels,
)

QuotaAppliedValues = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Subsystem: metricSubsystem,
Name: "applied_values",
Help: "Number of applied quota values",
},
infoLabels,
)
)

func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(QuotaHistoryErrors, QuotaAppliedErrors, QuotaIncreaseErrors)
metrics.Registry.MustRegister(QuotaAppliedValues, QuotaHistoryErrors, QuotaAppliedErrors, QuotaIncreaseErrors)
}
14 changes: 8 additions & 6 deletions pkg/quotas/quotas.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,18 @@ func (s *QuotasService) Reconcile(ctx context.Context) {
case servicequotas.ErrCodeNoSuchResourceException:
// fall through
default:
ctrlmetrics.QuotaAppliedErrors.WithLabelValues(s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
ctrlmetrics.QuotaAppliedErrors.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
s.Scope.Error(err, "Failed to get applied service quota")
continue
}
} else {
ctrlmetrics.QuotaAppliedErrors.WithLabelValues(s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
ctrlmetrics.QuotaAppliedErrors.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
s.Scope.Error(err, "Failed to get applied service quota")
continue
}
}
if appliedOutput.Quota != nil {
ctrlmetrics.QuotaAppliedValues.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code).Set(*appliedOutput.Quota.Value)
if *quotaCodeValue.Value > *appliedOutput.Quota.Value {
increaseQuota = true
} else {
Expand All @@ -168,7 +169,7 @@ func (s *QuotasService) Reconcile(ctx context.Context) {
for {
historyOutput, err = s.Quotas.Client.ListRequestedServiceQuotaChangeHistoryByQuota(historyInput)
if err != nil {
ctrlmetrics.QuotaHistoryErrors.WithLabelValues(s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
ctrlmetrics.QuotaHistoryErrors.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
s.Scope.Error(err, "Failed to list requested service quota change history by quota")
break
}
Expand All @@ -179,8 +180,9 @@ func (s *QuotasService) Reconcile(ctx context.Context) {
}

if historyOutput != nil {
count := 0
var count int
for _, r := range historyOutput.RequestedQuotas {
ctrlmetrics.QuotaAppliedValues.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code).Set(*r.DesiredValue)
if (*quotaCodeValue.Value > *r.DesiredValue) &&
(*r.QuotaCode == *quotaCodeValue.Code) &&
(*r.ServiceCode == serviceCode) {
Expand Down Expand Up @@ -214,12 +216,12 @@ func (s *QuotasService) Reconcile(ctx context.Context) {
s.Scope.Info("Current service quota value is already greater, skipping")
continue
default:
ctrlmetrics.QuotaIncreaseErrors.WithLabelValues(s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
ctrlmetrics.QuotaIncreaseErrors.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
s.Scope.Error(err, "Failed to request service quota increase")
continue
}
} else {
ctrlmetrics.QuotaIncreaseErrors.WithLabelValues(s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
ctrlmetrics.QuotaIncreaseErrors.WithLabelValues(s.Scope.AccountId(), s.Scope.ClusterName(), s.Scope.ClusterNamespace(), serviceCode, quotaCodeValue.Description, *quotaCodeValue.Code, strconv.Itoa(int(*quotaCodeValue.Value))).Inc()
s.Scope.Error(err, "Failed to request service quota increase")
continue
}
Expand Down

0 comments on commit ea2098d

Please sign in to comment.