From 8968b6c1d012c44709ce098644b57d681a5eb571 Mon Sep 17 00:00:00 2001 From: Federico Torres Date: Wed, 21 Aug 2024 08:43:40 -0300 Subject: [PATCH] expfmt: Add UTF-8 syntax support in text_parse.go (#670) Update expfmt/text_parse.go to support the new UTF-8 syntax --------- Signed-off-by: Federico Torres --- expfmt/text_parse.go | 152 +++++++++++++++++++---- expfmt/text_parse_test.go | 251 +++++++++++++++++++++++++++++++++++++- 2 files changed, 381 insertions(+), 22 deletions(-) diff --git a/expfmt/text_parse.go b/expfmt/text_parse.go index 26490211..25db4f21 100644 --- a/expfmt/text_parse.go +++ b/expfmt/text_parse.go @@ -22,9 +22,9 @@ import ( "math" "strconv" "strings" + "unicode/utf8" dto "github.com/prometheus/client_model/go" - "google.golang.org/protobuf/proto" "github.com/prometheus/common/model" @@ -60,6 +60,7 @@ type TextParser struct { currentMF *dto.MetricFamily currentMetric *dto.Metric currentLabelPair *dto.LabelPair + currentLabelPairs []*dto.LabelPair // Temporarily stores label pairs while parsing a metric line. // The remaining member variables are only used for summaries/histograms. currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le' @@ -74,6 +75,7 @@ type TextParser struct { // count and sum of that summary/histogram. currentIsSummaryCount, currentIsSummarySum bool currentIsHistogramCount, currentIsHistogramSum bool + currentMetricIsInsideBraces bool } // TextToMetricFamilies reads 'in' as the simple and flat text-based exchange @@ -137,12 +139,14 @@ func (p *TextParser) reset(in io.Reader) { } p.currentQuantile = math.NaN() p.currentBucket = math.NaN() + p.currentMF = nil } // startOfLine represents the state where the next byte read from p.buf is the // start of a line (or whitespace leading up to it). func (p *TextParser) startOfLine() stateFn { p.lineCount++ + p.currentMetricIsInsideBraces = false if p.skipBlankTab(); p.err != nil { // This is the only place that we expect to see io.EOF, // which is not an error but the signal that we are done. @@ -158,6 +162,9 @@ func (p *TextParser) startOfLine() stateFn { return p.startComment case '\n': return p.startOfLine // Empty line, start the next one. + case '{': + p.currentMetricIsInsideBraces = true + return p.readingLabels } return p.readingMetricName } @@ -275,6 +282,8 @@ func (p *TextParser) startLabelName() stateFn { return nil // Unexpected end of input. } if p.currentByte == '}' { + p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...) + p.currentLabelPairs = nil if p.skipBlankTab(); p.err != nil { return nil // Unexpected end of input. } @@ -287,6 +296,38 @@ func (p *TextParser) startLabelName() stateFn { p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName())) return nil } + if p.skipBlankTabIfCurrentBlankTab(); p.err != nil { + return nil // Unexpected end of input. + } + if p.currentByte != '=' { + if p.currentMetricIsInsideBraces { + if p.currentMF != nil && p.currentMF.GetName() != p.currentToken.String() { + p.parseError(fmt.Sprintf("multiple metric names %s %s", p.currentMF.GetName(), p.currentToken.String())) + return nil + } + switch p.currentByte { + case ',': + p.setOrCreateCurrentMF() + p.currentMetric = &dto.Metric{} + return p.startLabelName + case '}': + p.setOrCreateCurrentMF() + p.currentMetric = &dto.Metric{} + p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...) + p.currentLabelPairs = nil + if p.skipBlankTab(); p.err != nil { + return nil // Unexpected end of input. + } + return p.readingValue + default: + p.parseError(fmt.Sprintf("unexpected end of metric name %q", p.currentByte)) + return nil + } + } + p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte)) + p.currentLabelPairs = nil + return nil + } p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())} if p.currentLabelPair.GetName() == string(model.MetricNameLabel) { p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel)) @@ -296,23 +337,17 @@ func (p *TextParser) startLabelName() stateFn { // labels to 'real' labels. if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) && !(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) { - p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair) - } - if p.skipBlankTabIfCurrentBlankTab(); p.err != nil { - return nil // Unexpected end of input. - } - if p.currentByte != '=' { - p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte)) - return nil + p.currentLabelPairs = append(p.currentLabelPairs, p.currentLabelPair) } // Check for duplicate label names. labels := make(map[string]struct{}) - for _, l := range p.currentMetric.Label { + for _, l := range p.currentLabelPairs { lName := l.GetName() if _, exists := labels[lName]; !exists { labels[lName] = struct{}{} } else { p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName())) + p.currentLabelPairs = nil return nil } } @@ -345,6 +380,7 @@ func (p *TextParser) startLabelValue() stateFn { if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil { // Create a more helpful error message. p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue())) + p.currentLabelPairs = nil return nil } } else { @@ -371,12 +407,19 @@ func (p *TextParser) startLabelValue() stateFn { return p.startLabelName case '}': + if p.currentMF == nil { + p.parseError("invalid metric name") + return nil + } + p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...) + p.currentLabelPairs = nil if p.skipBlankTab(); p.err != nil { return nil // Unexpected end of input. } return p.readingValue default: p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue())) + p.currentLabelPairs = nil return nil } } @@ -585,6 +628,8 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) { p.currentToken.WriteByte(p.currentByte) case 'n': p.currentToken.WriteByte('\n') + case '"': + p.currentToken.WriteByte('"') default: p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) return @@ -610,13 +655,45 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) { // but not into p.currentToken. func (p *TextParser) readTokenAsMetricName() { p.currentToken.Reset() + // A UTF-8 metric name must be quoted and may have escaped characters. + quoted := false + escaped := false if !isValidMetricNameStart(p.currentByte) { return } - for { - p.currentToken.WriteByte(p.currentByte) + for p.err == nil { + if escaped { + switch p.currentByte { + case '\\': + p.currentToken.WriteByte(p.currentByte) + case 'n': + p.currentToken.WriteByte('\n') + case '"': + p.currentToken.WriteByte('"') + default: + p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) + return + } + escaped = false + } else { + switch p.currentByte { + case '"': + quoted = !quoted + if !quoted { + p.currentByte, p.err = p.buf.ReadByte() + return + } + case '\n': + p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String())) + return + case '\\': + escaped = true + default: + p.currentToken.WriteByte(p.currentByte) + } + } p.currentByte, p.err = p.buf.ReadByte() - if p.err != nil || !isValidMetricNameContinuation(p.currentByte) { + if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') { return } } @@ -628,13 +705,45 @@ func (p *TextParser) readTokenAsMetricName() { // but not into p.currentToken. func (p *TextParser) readTokenAsLabelName() { p.currentToken.Reset() + // A UTF-8 label name must be quoted and may have escaped characters. + quoted := false + escaped := false if !isValidLabelNameStart(p.currentByte) { return } - for { - p.currentToken.WriteByte(p.currentByte) + for p.err == nil { + if escaped { + switch p.currentByte { + case '\\': + p.currentToken.WriteByte(p.currentByte) + case 'n': + p.currentToken.WriteByte('\n') + case '"': + p.currentToken.WriteByte('"') + default: + p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) + return + } + escaped = false + } else { + switch p.currentByte { + case '"': + quoted = !quoted + if !quoted { + p.currentByte, p.err = p.buf.ReadByte() + return + } + case '\n': + p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String())) + return + case '\\': + escaped = true + default: + p.currentToken.WriteByte(p.currentByte) + } + } p.currentByte, p.err = p.buf.ReadByte() - if p.err != nil || !isValidLabelNameContinuation(p.currentByte) { + if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') { return } } @@ -660,6 +769,7 @@ func (p *TextParser) readTokenAsLabelValue() { p.currentToken.WriteByte('\n') default: p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte)) + p.currentLabelPairs = nil return } escaped = false @@ -718,19 +828,19 @@ func (p *TextParser) setOrCreateCurrentMF() { } func isValidLabelNameStart(b byte) bool { - return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' + return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"' } -func isValidLabelNameContinuation(b byte) bool { - return isValidLabelNameStart(b) || (b >= '0' && b <= '9') +func isValidLabelNameContinuation(b byte, quoted bool) bool { + return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b))) } func isValidMetricNameStart(b byte) bool { return isValidLabelNameStart(b) || b == ':' } -func isValidMetricNameContinuation(b byte) bool { - return isValidLabelNameContinuation(b) || b == ':' +func isValidMetricNameContinuation(b byte, quoted bool) bool { + return isValidLabelNameContinuation(b, quoted) || b == ':' } func isBlankOrTab(b byte) bool { diff --git a/expfmt/text_parse_test.go b/expfmt/text_parse_test.go index 0540546a..1ddba297 100644 --- a/expfmt/text_parse_test.go +++ b/expfmt/text_parse_test.go @@ -385,6 +385,215 @@ request_duration_microseconds_count 2693 }, }, }, + // 5: Quoted metric name and quoted label name with dots. + { + in: ` +# HELP "my.noncompliant.metric" help text +# TYPE "my.noncompliant.metric" counter +{"my.noncompliant.metric","label.name"="value"} 1 +`, + out: []*dto.MetricFamily{ + { + Name: proto.String("my.noncompliant.metric"), + Help: proto.String("help text"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("label.name"), + Value: proto.String("value"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + }, + }, + }, + // 6: Metric family with dots in name. + { + in: ` +# HELP "name.with.dots" boring help +# TYPE "name.with.dots" counter +{"name.with.dots",labelname="val1",basename="basevalue"} 42.0 +{"name.with.dots",labelname="val2",basename="basevalue"} 0.23 1234567890 +`, + out: []*dto.MetricFamily{ + { + Name: proto.String("name.with.dots"), + Help: proto.String("boring help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("labelname"), + Value: proto.String("val1"), + }, + { + Name: proto.String("basename"), + Value: proto.String("basevalue"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(42), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("labelname"), + Value: proto.String("val2"), + }, + { + Name: proto.String("basename"), + Value: proto.String("basevalue"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(.23), + }, + TimestampMs: proto.Int64(1234567890), + }, + }, + }, + }, + }, + // 7: Metric family with dots in name, no labels. + { + in: ` + # HELP "name.with.dots" boring help + # TYPE "name.with.dots" counter + {"name.with.dots"} 42.0 + {"name.with.dots"} 0.23 1234567890 + `, + out: []*dto.MetricFamily{ + { + Name: proto.String("name.with.dots"), + Help: proto.String("boring help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Counter: &dto.Counter{ + Value: proto.Float64(42), + }, + }, + { + Counter: &dto.Counter{ + Value: proto.Float64(.23), + }, + TimestampMs: proto.Int64(1234567890), + }, + }, + }, + }, + }, + // 8: Quoted metric name and quoted label names with dots and asterisks, special characters in label values. + { + in: `# HELP "gauge.name" gauge\ndoc\nstr\"ing +# TYPE "gauge.name" gauge +{"gauge.name","name.1"="val with\nnew line","name*2"="val with \\backslash and \"quotes\""} +Inf +{"gauge.name","name.1"="Björn","name*2"="佖佥"} 3.14e+42 +`, + out: []*dto.MetricFamily{ + { + Name: proto.String("gauge.name"), + Help: proto.String("gauge\ndoc\nstr\"ing"), + Type: dto.MetricType_GAUGE.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("name.1"), + Value: proto.String("val with\nnew line"), + }, + { + Name: proto.String("name*2"), + Value: proto.String("val with \\backslash and \"quotes\""), + }, + }, + Gauge: &dto.Gauge{ + Value: proto.Float64(math.Inf(+1)), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("name.1"), + Value: proto.String("Björn"), + }, + { + Name: proto.String("name*2"), + Value: proto.String("佖佥"), + }, + }, + Gauge: &dto.Gauge{ + Value: proto.Float64(3.14e42), + }, + }, + }, + }, + }, + }, + // 9: Various escaped special characters in metric and label names. + { + in: ` +# HELP "my\"noncompliant\nmetric\\" help text +# TYPE "my\"noncompliant\nmetric\\" counter +{"my\"noncompliant\nmetric\\","label\"name\n"="value"} 1 +`, + out: []*dto.MetricFamily{ + { + Name: proto.String("my\"noncompliant\nmetric\\"), + Help: proto.String("help text"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("label\"name\n"), + Value: proto.String("value"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + }, + }, + }, + // 10: Quoted metric name, not the first element in the label set. + { + in: ` +# HELP "my.noncompliant.metric" help text +# TYPE "my.noncompliant.metric" counter +{labelname="value", "my.noncompliant.metric"} 1 +`, + out: []*dto.MetricFamily{ + { + Name: proto.String("my.noncompliant.metric"), + Help: proto.String("help text"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("labelname"), + Value: proto.String("value"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + }, + }, + }, } for i, scenario := range scenarios { @@ -641,8 +850,48 @@ metric{quantile="0x1p-3"} 3.14 in: `metric{label="bla",label="bla"} 3.14`, err: "text format parsing error in line 1: duplicate label names for metric", }, + // 34: Multiple quoted metric names. + { + in: `{"one.name","another.name"} 3.14`, + err: "text format parsing error in line 1: multiple metric names", + }, + // 35: Invalid escape sequence in quoted metric name. + { + in: `{"a\xc5z",label="bla"} 3.14`, + err: "text format parsing error in line 1: invalid escape sequence", + }, + // 36: Unexpected end of quoted metric name. + { + in: `{"metric.name".label="bla"} 3.14`, + err: "text format parsing error in line 1: unexpected end of metric name", + }, + // 37: Invalid escape sequence in quoted metric name. + { + in: ` +# TYPE "metric.name\t" counter +{"metric.name\t",label="bla"} 3.14 +`, + err: "text format parsing error in line 2: invalid escape sequence", + }, + // 38: Newline in quoted metric name. + { + in: ` +# TYPE "metric +name" counter +{"metric +name",label="bla"} 3.14 +`, + err: `text format parsing error in line 2: metric name "metric" contains unescaped new-line`, + }, + // 39: Newline in quoted label name. + { + in: ` +{"metric.name","new +line"="bla"} 3.14 +`, + err: `text format parsing error in line 2: label name "new" contains unescaped new-line`, + }, } - for i, scenario := range scenarios { _, err := parser.TextToMetricFamilies(strings.NewReader(scenario.in)) if err == nil {