Skip to content

Commit

Permalink
expfmt: Add UTF-8 syntax support in text_parse.go (#670)
Browse files Browse the repository at this point in the history
Update expfmt/text_parse.go to support the new UTF-8 syntax

---------

Signed-off-by: Federico Torres <[email protected]>
  • Loading branch information
fedetorres93 authored Aug 21, 2024
1 parent cd4bcc0 commit 8968b6c
Show file tree
Hide file tree
Showing 2 changed files with 381 additions and 22 deletions.
152 changes: 131 additions & 21 deletions expfmt/text_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ import (
"math"
"strconv"
"strings"
"unicode/utf8"

dto "github.com/prometheus/client_model/go"

"google.golang.org/protobuf/proto"

"github.com/prometheus/common/model"
Expand Down Expand Up @@ -60,6 +60,7 @@ type TextParser struct {
currentMF *dto.MetricFamily
currentMetric *dto.Metric
currentLabelPair *dto.LabelPair
currentLabelPairs []*dto.LabelPair // Temporarily stores label pairs while parsing a metric line.

// The remaining member variables are only used for summaries/histograms.
currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le'
Expand All @@ -74,6 +75,7 @@ type TextParser struct {
// count and sum of that summary/histogram.
currentIsSummaryCount, currentIsSummarySum bool
currentIsHistogramCount, currentIsHistogramSum bool
currentMetricIsInsideBraces bool
}

// TextToMetricFamilies reads 'in' as the simple and flat text-based exchange
Expand Down Expand Up @@ -137,12 +139,14 @@ func (p *TextParser) reset(in io.Reader) {
}
p.currentQuantile = math.NaN()
p.currentBucket = math.NaN()
p.currentMF = nil
}

// startOfLine represents the state where the next byte read from p.buf is the
// start of a line (or whitespace leading up to it).
func (p *TextParser) startOfLine() stateFn {
p.lineCount++
p.currentMetricIsInsideBraces = false
if p.skipBlankTab(); p.err != nil {
// This is the only place that we expect to see io.EOF,
// which is not an error but the signal that we are done.
Expand All @@ -158,6 +162,9 @@ func (p *TextParser) startOfLine() stateFn {
return p.startComment
case '\n':
return p.startOfLine // Empty line, start the next one.
case '{':
p.currentMetricIsInsideBraces = true
return p.readingLabels
}
return p.readingMetricName
}
Expand Down Expand Up @@ -275,6 +282,8 @@ func (p *TextParser) startLabelName() stateFn {
return nil // Unexpected end of input.
}
if p.currentByte == '}' {
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
p.currentLabelPairs = nil
if p.skipBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
Expand All @@ -287,6 +296,38 @@ func (p *TextParser) startLabelName() stateFn {
p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
return nil
}
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
if p.currentByte != '=' {
if p.currentMetricIsInsideBraces {
if p.currentMF != nil && p.currentMF.GetName() != p.currentToken.String() {
p.parseError(fmt.Sprintf("multiple metric names %s %s", p.currentMF.GetName(), p.currentToken.String()))
return nil
}
switch p.currentByte {
case ',':
p.setOrCreateCurrentMF()
p.currentMetric = &dto.Metric{}
return p.startLabelName
case '}':
p.setOrCreateCurrentMF()
p.currentMetric = &dto.Metric{}
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
p.currentLabelPairs = nil
if p.skipBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
return p.readingValue
default:
p.parseError(fmt.Sprintf("unexpected end of metric name %q", p.currentByte))
return nil
}
}
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
p.currentLabelPairs = nil
return nil
}
p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
Expand All @@ -296,23 +337,17 @@ func (p *TextParser) startLabelName() stateFn {
// labels to 'real' labels.
if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) &&
!(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) {
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair)
}
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
if p.currentByte != '=' {
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
return nil
p.currentLabelPairs = append(p.currentLabelPairs, p.currentLabelPair)
}
// Check for duplicate label names.
labels := make(map[string]struct{})
for _, l := range p.currentMetric.Label {
for _, l := range p.currentLabelPairs {
lName := l.GetName()
if _, exists := labels[lName]; !exists {
labels[lName] = struct{}{}
} else {
p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
p.currentLabelPairs = nil
return nil
}
}
Expand Down Expand Up @@ -345,6 +380,7 @@ func (p *TextParser) startLabelValue() stateFn {
if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
// Create a more helpful error message.
p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
p.currentLabelPairs = nil
return nil
}
} else {
Expand All @@ -371,12 +407,19 @@ func (p *TextParser) startLabelValue() stateFn {
return p.startLabelName

case '}':
if p.currentMF == nil {
p.parseError("invalid metric name")
return nil
}
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
p.currentLabelPairs = nil
if p.skipBlankTab(); p.err != nil {
return nil // Unexpected end of input.
}
return p.readingValue
default:
p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
p.currentLabelPairs = nil
return nil
}
}
Expand Down Expand Up @@ -585,6 +628,8 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
p.currentToken.WriteByte(p.currentByte)
case 'n':
p.currentToken.WriteByte('\n')
case '"':
p.currentToken.WriteByte('"')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
return
Expand All @@ -610,13 +655,45 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
// but not into p.currentToken.
func (p *TextParser) readTokenAsMetricName() {
p.currentToken.Reset()
// A UTF-8 metric name must be quoted and may have escaped characters.
quoted := false
escaped := false
if !isValidMetricNameStart(p.currentByte) {
return
}
for {
p.currentToken.WriteByte(p.currentByte)
for p.err == nil {
if escaped {
switch p.currentByte {
case '\\':
p.currentToken.WriteByte(p.currentByte)
case 'n':
p.currentToken.WriteByte('\n')
case '"':
p.currentToken.WriteByte('"')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
return
}
escaped = false
} else {
switch p.currentByte {
case '"':
quoted = !quoted
if !quoted {
p.currentByte, p.err = p.buf.ReadByte()
return
}
case '\n':
p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String()))
return
case '\\':
escaped = true
default:
p.currentToken.WriteByte(p.currentByte)
}
}
p.currentByte, p.err = p.buf.ReadByte()
if p.err != nil || !isValidMetricNameContinuation(p.currentByte) {
if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') {
return
}
}
Expand All @@ -628,13 +705,45 @@ func (p *TextParser) readTokenAsMetricName() {
// but not into p.currentToken.
func (p *TextParser) readTokenAsLabelName() {
p.currentToken.Reset()
// A UTF-8 label name must be quoted and may have escaped characters.
quoted := false
escaped := false
if !isValidLabelNameStart(p.currentByte) {
return
}
for {
p.currentToken.WriteByte(p.currentByte)
for p.err == nil {
if escaped {
switch p.currentByte {
case '\\':
p.currentToken.WriteByte(p.currentByte)
case 'n':
p.currentToken.WriteByte('\n')
case '"':
p.currentToken.WriteByte('"')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
return
}
escaped = false
} else {
switch p.currentByte {
case '"':
quoted = !quoted
if !quoted {
p.currentByte, p.err = p.buf.ReadByte()
return
}
case '\n':
p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String()))
return
case '\\':
escaped = true
default:
p.currentToken.WriteByte(p.currentByte)
}
}
p.currentByte, p.err = p.buf.ReadByte()
if p.err != nil || !isValidLabelNameContinuation(p.currentByte) {
if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') {
return
}
}
Expand All @@ -660,6 +769,7 @@ func (p *TextParser) readTokenAsLabelValue() {
p.currentToken.WriteByte('\n')
default:
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
p.currentLabelPairs = nil
return
}
escaped = false
Expand Down Expand Up @@ -718,19 +828,19 @@ func (p *TextParser) setOrCreateCurrentMF() {
}

func isValidLabelNameStart(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"'
}

func isValidLabelNameContinuation(b byte) bool {
return isValidLabelNameStart(b) || (b >= '0' && b <= '9')
func isValidLabelNameContinuation(b byte, quoted bool) bool {
return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b)))
}

func isValidMetricNameStart(b byte) bool {
return isValidLabelNameStart(b) || b == ':'
}

func isValidMetricNameContinuation(b byte) bool {
return isValidLabelNameContinuation(b) || b == ':'
func isValidMetricNameContinuation(b byte, quoted bool) bool {
return isValidLabelNameContinuation(b, quoted) || b == ':'
}

func isBlankOrTab(b byte) bool {
Expand Down
Loading

0 comments on commit 8968b6c

Please sign in to comment.