Skip to content

Commit

Permalink
Added ability to define skip values in csv parser (#8627)
Browse files Browse the repository at this point in the history
  • Loading branch information
helenosheaa authored Jan 11, 2021
1 parent 3531e9d commit 3b87438
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 1 deletion.
3 changes: 2 additions & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,7 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config,
c.getFieldInt(tbl, "csv_skip_rows", &pc.CSVSkipRows)
c.getFieldInt(tbl, "csv_skip_columns", &pc.CSVSkipColumns)
c.getFieldBool(tbl, "csv_trim_space", &pc.CSVTrimSpace)
c.getFieldStringSlice(tbl, "csv_skip_values", &pc.CSVSkipValues)

c.getFieldStringSlice(tbl, "form_urlencoded_tag_keys", &pc.FormUrlencodedTagKeys)

Expand Down Expand Up @@ -1413,7 +1414,7 @@ func (c *Config) missingTomlField(typ reflect.Type, key string) error {
"collectd_security_level", "collectd_typesdb", "collection_jitter", "csv_column_names",
"csv_column_types", "csv_comment", "csv_delimiter", "csv_header_row_count",
"csv_measurement_column", "csv_skip_columns", "csv_skip_rows", "csv_tag_columns",
"csv_timestamp_column", "csv_timestamp_format", "csv_timezone", "csv_trim_space",
"csv_timestamp_column", "csv_timestamp_format", "csv_timezone", "csv_trim_space", "csv_skip_values",
"data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path",
"dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path",
"fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys",
Expand Down
4 changes: 4 additions & 0 deletions plugins/parsers/csv/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ values.
## in case of there is no timezone information.
## It follows the IANA Time Zone database.
csv_timezone = ""

## Indicates values to skip, such as an empty string value "".
## The field will be skipped entirely where it matches any values inserted here.
csv_skip_values = []
```
#### csv_timestamp_column, csv_timestamp_format

Expand Down
8 changes: 8 additions & 0 deletions plugins/parsers/csv/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type Config struct {
TimestampFormat string `toml:"csv_timestamp_format"`
Timezone string `toml:"csv_timezone"`
TrimSpace bool `toml:"csv_trim_space"`
SkipValues []string `toml:"csv_skip_values"`

gotColumnNames bool

Expand Down Expand Up @@ -197,6 +198,13 @@ outer:
value = strings.Trim(value, " ")
}

// don't record fields where the value matches a skip value
for _, s := range p.SkipValues {
if value == s {
continue outer
}
}

for _, tagName := range p.TagColumns {
if tagName == fieldName {
tags[tagName] = value
Expand Down
54 changes: 54 additions & 0 deletions plugins/parsers/csv/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,57 @@ func TestStaticMeasurementName(t *testing.T) {
}
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
}

func TestSkipEmptyStringValue(t *testing.T) {
p, err := NewParser(
&Config{
MetricName: "csv",
HeaderRowCount: 1,
ColumnNames: []string{"a", "b"},
SkipValues: []string{""},
},
)
require.NoError(t, err)
testCSV := `a,b
1,""`
metrics, err := p.Parse([]byte(testCSV))
require.NoError(t, err)

expected := []telegraf.Metric{
testutil.MustMetric("csv",
map[string]string{},
map[string]interface{}{
"a": 1,
},
time.Unix(0, 0),
),
}
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
}

func TestSkipSpecifiedStringValue(t *testing.T) {
p, err := NewParser(
&Config{
MetricName: "csv",
HeaderRowCount: 1,
ColumnNames: []string{"a", "b"},
SkipValues: []string{"MM"},
},
)
require.NoError(t, err)
testCSV := `a,b
1,MM`
metrics, err := p.Parse([]byte(testCSV))
require.NoError(t, err)

expected := []telegraf.Metric{
testutil.MustMetric("csv",
map[string]string{},
map[string]interface{}{
"a": 1,
},
time.Unix(0, 0),
),
}
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
}
2 changes: 2 additions & 0 deletions plugins/parsers/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ type Config struct {
CSVTimestampFormat string `toml:"csv_timestamp_format"`
CSVTimezone string `toml:"csv_timezone"`
CSVTrimSpace bool `toml:"csv_trim_space"`
CSVSkipValues []string `toml:"csv_skip_values"`

// FormData configuration
FormUrlencodedTagKeys []string `toml:"form_urlencoded_tag_keys"`
Expand Down Expand Up @@ -222,6 +223,7 @@ func NewParser(config *Config) (Parser, error) {
TimestampFormat: config.CSVTimestampFormat,
Timezone: config.CSVTimezone,
DefaultTags: config.DefaultTags,
SkipValues: config.CSVSkipValues,
}

return csv.NewParser(config)
Expand Down

0 comments on commit 3b87438

Please sign in to comment.