Skip to content

Commit

Permalink
Use nearFullRatio/fullRatio specified values in the prometheus rules
Browse files Browse the repository at this point in the history
Earlier the cluster utilization alert rules (CephClusterNearFull,
CephClusterCriticallyFull, CephClusterReadOnly) and the osd alert rules
(CephOSDNearFull, CephOSDCriticallyFull) were hardcoded to use the
nearFullRatio 0.75, criticallyFullRatio 0.80, and fullRatio 0.85 values.

But these values are now configurable on the storageCluster CR. So the
prometheus rules for these alerts will now be updated to use the
specified values if provided in the storageCluster CR.

This also includes the refactor of the changing the prometheus rule
process. The function is now easier to read, maintain & expand.
Also add tests for prometheus rule changing process.

Signed-off-by: Malay Kumar Parida <[email protected]>
  • Loading branch information
malayparida2000 committed Sep 24, 2024
1 parent 6c43ec6 commit 5f95821
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 34 deletions.
103 changes: 80 additions & 23 deletions controllers/storagecluster/cephcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1178,16 +1178,39 @@ func createPrometheusRules(r *StorageClusterReconciler, sc *ocsv1.StorageCluster
return err
}
applyLabels(getCephClusterMonitoringLabels(*sc), &prometheusRule.ObjectMeta)
replaceTokens := []exprReplaceToken{

replaceTokens := []replaceToken{
{
recordOrAlertName: "CephMgrIsAbsent",
wordInExpr: "openshift-storage",
wordToReplace: "openshift-storage",
replaceWith: sc.Namespace,
},
}

// if different than default nearFullRatio or fullRatio values are provided, replace the values in the prometheus rule accordingly
nearFullRatio := *getFullRatios(sc, "nearFull")
fullRatio := *getFullRatios(sc, "full")
criticallyFullRatio := (nearFullRatio + fullRatio) / 2

if nearFullRatio != defaults.DefaultNearFullRatio {
replaceTokens = append(replaceTokens,
createReplaceToken("", "", "75%", fmt.Sprintf("%.2f%%", nearFullRatio*100)),
createReplaceToken("", "", "0.75", fmt.Sprintf("%f", nearFullRatio)))
}
if criticallyFullRatio != (defaults.DefaultNearFullRatio+defaults.DefaultFullRatio)/2 {
replaceTokens = append(replaceTokens,
createReplaceToken("", "", "80%", fmt.Sprintf("%.2f%%", criticallyFullRatio*100)),
createReplaceToken("", "", "0.80", fmt.Sprintf("%f", criticallyFullRatio)))
}
if fullRatio != defaults.DefaultFullRatio {
replaceTokens = append(replaceTokens,
createReplaceToken("", "", "85%", fmt.Sprintf("%.2f%%", fullRatio*100)),
createReplaceToken("", "", "0.85", fmt.Sprintf("%f", fullRatio)))
}

// nothing to replace in external mode
if name != prometheusExternalRuleName {
changePromRuleExpr(prometheusRule, replaceTokens)
changePromRule(prometheusRule, replaceTokens)
}

if err := createOrUpdatePrometheusRule(r, prometheusRule); err != nil {
Expand All @@ -1210,43 +1233,77 @@ func applyLabels(labels map[string]string, t *metav1.ObjectMeta) {
}
}

type exprReplaceToken struct {
type replaceToken struct {
groupName string
recordOrAlertName string
wordInExpr string
wordToReplace string
replaceWith string
}

func changePromRuleExpr(promRules *monitoringv1.PrometheusRule, replaceTokens []exprReplaceToken) {
if promRules == nil {
func createReplaceToken(groupName, recordOrAlertName, wordToReplace, replaceWith string) replaceToken {
return replaceToken{
groupName: groupName,
recordOrAlertName: recordOrAlertName,
wordToReplace: wordToReplace,
replaceWith: replaceWith,
}
}

// changePromRule replaces the wordToReplace with replaceWith in the PrometheusRule
// This can be used to update the values in the PrometheusRule dynamically
func changePromRule(promRule *monitoringv1.PrometheusRule, tokens []replaceToken) {
if promRule == nil {
return
}
for _, eachToken := range replaceTokens {
// if both the words, one being replaced and the one replacing it, are same
// then we don't have to do anything
if eachToken.replaceWith == eachToken.wordInExpr {

// Iterate over each token for replacements
for _, token := range tokens {
// Skip if the word and replacement are the same
if token.replaceWith == token.wordToReplace {
continue
}
for gIndx, currGroup := range promRules.Spec.Groups {
if eachToken.groupName != "" && eachToken.groupName != currGroup.Name {

// Iterate through all groups in the Prometheus rule
for groupIdx, group := range promRule.Spec.Groups {
// If groupName is specified, ensure it matches; otherwise, apply to all groups
if token.groupName != "" && token.groupName != group.Name {
continue
}
for rIndx, currRule := range currGroup.Rules {
if eachToken.recordOrAlertName != "" {
if currRule.Record != "" && currRule.Record != eachToken.recordOrAlertName {
continue
} else if currRule.Alert != "" && currRule.Alert != eachToken.recordOrAlertName {
continue
}

// Iterate through the rules in the group
for ruleIdx, rule := range group.Rules {
// If recordOrAlertName is specified, ensure it matches; otherwise, apply to all rules
if token.recordOrAlertName == "" || rule.Record == token.recordOrAlertName || rule.Alert == token.recordOrAlertName {
// Update the annotations in the rule
updateAnnotations(&promRule.Spec.Groups[groupIdx].Rules[ruleIdx], token)
// Update the expression field in the rule
updateExp(&promRule.Spec.Groups[groupIdx].Rules[ruleIdx], token)
}
exprStr := currRule.Expr.String()
newExpr := strings.Replace(exprStr, eachToken.wordInExpr, eachToken.replaceWith, -1)
promRules.Spec.Groups[gIndx].Rules[rIndx].Expr = intstr.Parse(newExpr)
}
}
}
}

// updateExp updates the expression field in a rule
func updateExp(rule *monitoringv1.Rule, token replaceToken) {
exprStr := rule.Expr.String()
if exprStr != "" {
newExpr := strings.Replace(exprStr, token.wordToReplace, token.replaceWith, -1)
rule.Expr = intstr.Parse(newExpr)
}
}

// updateAnnotations updates the annotations in a rule
func updateAnnotations(rule *monitoringv1.Rule, token replaceToken) {
if rule.Annotations != nil {
// Update description if it exists
if description, exists := rule.Annotations["description"]; exists {
newDescription := strings.Replace(description, token.wordToReplace, token.replaceWith, -1)
rule.Annotations["description"] = newDescription
}
}
}

// parsePrometheusRule returns provided prometheus rules or an error
func parsePrometheusRule(rules string) (*monitoringv1.PrometheusRule, error) {
var rule monitoringv1.PrometheusRule
Expand Down
33 changes: 22 additions & 11 deletions controllers/storagecluster/cephcluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,29 +1042,40 @@ func TestParsePrometheusRules(t *testing.T) {
}

func TestChangePrometheusExprFunc(t *testing.T) {
prometheusRules, err := parsePrometheusRule(localPrometheusRules)
prometheusRule, err := parsePrometheusRule(localPrometheusRules)
assert.NilError(t, err)
var changeTokens = []exprReplaceToken{
{recordOrAlertName: "CephMgrIsAbsent", wordInExpr: "openshift-storage", replaceWith: "new-namespace"},
var changeTokens = []replaceToken{
{recordOrAlertName: "CephMgrIsAbsent", wordToReplace: "openshift-storage", replaceWith: "new-namespace"},
// when alert or record name is not specified,
// the change should affect all the expressions which has the 'wordInExpr'
{recordOrAlertName: "", wordInExpr: "ceph_pool_stored_raw", replaceWith: "new_ceph_pool_stored_raw"},
{recordOrAlertName: "", wordToReplace: "ceph_pool_stored_raw", replaceWith: "new_ceph_pool_stored_raw"},
{recordOrAlertName: "", wordToReplace: "0.75", replaceWith: "0.775"},
{recordOrAlertName: "", wordToReplace: "85%", replaceWith: "92.50%"},
}
changePromRuleExpr(prometheusRules, changeTokens)
alertNameAndChangedExpr := [][2]string{
changePromRule(prometheusRule, changeTokens)

recordOrAlertNameAndReplacedWord := [][2]string{
{"CephMgrIsAbsent", "new-namespace"},
{"CephPoolQuotaBytesNearExhaustion", "new_ceph_pool_stored_raw"},
{"CephPoolQuotaBytesCriticallyExhausted", "new_ceph_pool_stored_raw"},
{"CephClusterNearFull", "0.775"},
{"CephOSDNearFull", "0.775"},
{"CephClusterNearFull", "92.50%"},
{"CephClusterCriticallyFull", "92.50%"},
{"CephClusterReadOnly", "92.50%"},
}
for _, grp := range prometheusRules.Spec.Groups {
for _, grp := range prometheusRule.Spec.Groups {
for _, rule := range grp.Rules {
for _, eachAlertChanged := range alertNameAndChangedExpr {
alertName := eachAlertChanged[0]
changeStr := eachAlertChanged[1]
for _, eachChange := range recordOrAlertNameAndReplacedWord {
alertName := eachChange[0]
changeStr := eachChange[1]
if rule.Alert != alertName {
continue
}
assert.Assert(t, strings.Contains(rule.Expr.String(), changeStr))
assert.Assert(t,
strings.Contains(rule.Expr.String(), changeStr) ||
(rule.Annotations != nil && strings.Contains(rule.Annotations["description"], changeStr)),
fmt.Sprintf("Expected '%s' to be found in either Expr or Annotations for alert %s", changeStr, alertName))
}
}
}
Expand Down

0 comments on commit 5f95821

Please sign in to comment.