diff --git a/config/config.go b/config/config.go index f86692835afb4..b395f7df1c725 100644 --- a/config/config.go +++ b/config/config.go @@ -1320,6 +1320,7 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config, c.getFieldInt(tbl, "csv_skip_rows", &pc.CSVSkipRows) c.getFieldInt(tbl, "csv_skip_columns", &pc.CSVSkipColumns) c.getFieldBool(tbl, "csv_trim_space", &pc.CSVTrimSpace) + c.getFieldStringSlice(tbl, "csv_skip_values", &pc.CSVSkipValues) c.getFieldStringSlice(tbl, "form_urlencoded_tag_keys", &pc.FormUrlencodedTagKeys) @@ -1413,7 +1414,7 @@ func (c *Config) missingTomlField(typ reflect.Type, key string) error { "collectd_security_level", "collectd_typesdb", "collection_jitter", "csv_column_names", "csv_column_types", "csv_comment", "csv_delimiter", "csv_header_row_count", "csv_measurement_column", "csv_skip_columns", "csv_skip_rows", "csv_tag_columns", - "csv_timestamp_column", "csv_timestamp_format", "csv_timezone", "csv_trim_space", + "csv_timestamp_column", "csv_timestamp_format", "csv_timezone", "csv_trim_space", "csv_skip_values", "data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path", "dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path", "fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys", diff --git a/plugins/parsers/csv/README.md b/plugins/parsers/csv/README.md index b44d2fc2d2576..220ac60686636 100644 --- a/plugins/parsers/csv/README.md +++ b/plugins/parsers/csv/README.md @@ -73,6 +73,10 @@ values. ## in case of there is no timezone information. ## It follows the IANA Time Zone database. csv_timezone = "" + + ## Indicates values to skip, such as an empty string value "". + ## The field will be skipped entirely where it matches any values inserted here. + csv_skip_values = [] ``` #### csv_timestamp_column, csv_timestamp_format diff --git a/plugins/parsers/csv/parser.go b/plugins/parsers/csv/parser.go index 1c3d511ef43eb..3f370b507dc4f 100644 --- a/plugins/parsers/csv/parser.go +++ b/plugins/parsers/csv/parser.go @@ -31,6 +31,7 @@ type Config struct { TimestampFormat string `toml:"csv_timestamp_format"` Timezone string `toml:"csv_timezone"` TrimSpace bool `toml:"csv_trim_space"` + SkipValues []string `toml:"csv_skip_values"` gotColumnNames bool @@ -197,6 +198,13 @@ outer: value = strings.Trim(value, " ") } + // don't record fields where the value matches a skip value + for _, s := range p.SkipValues { + if value == s { + continue outer + } + } + for _, tagName := range p.TagColumns { if tagName == fieldName { tags[tagName] = value diff --git a/plugins/parsers/csv/parser_test.go b/plugins/parsers/csv/parser_test.go index 31fd4b02a0966..f942eb0716346 100644 --- a/plugins/parsers/csv/parser_test.go +++ b/plugins/parsers/csv/parser_test.go @@ -613,3 +613,57 @@ func TestStaticMeasurementName(t *testing.T) { } testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime()) } + +func TestSkipEmptyStringValue(t *testing.T) { + p, err := NewParser( + &Config{ + MetricName: "csv", + HeaderRowCount: 1, + ColumnNames: []string{"a", "b"}, + SkipValues: []string{""}, + }, + ) + require.NoError(t, err) + testCSV := `a,b +1,""` + metrics, err := p.Parse([]byte(testCSV)) + require.NoError(t, err) + + expected := []telegraf.Metric{ + testutil.MustMetric("csv", + map[string]string{}, + map[string]interface{}{ + "a": 1, + }, + time.Unix(0, 0), + ), + } + testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime()) +} + +func TestSkipSpecifiedStringValue(t *testing.T) { + p, err := NewParser( + &Config{ + MetricName: "csv", + HeaderRowCount: 1, + ColumnNames: []string{"a", "b"}, + SkipValues: []string{"MM"}, + }, + ) + require.NoError(t, err) + testCSV := `a,b +1,MM` + metrics, err := p.Parse([]byte(testCSV)) + require.NoError(t, err) + + expected := []telegraf.Metric{ + testutil.MustMetric("csv", + map[string]string{}, + map[string]interface{}{ + "a": 1, + }, + time.Unix(0, 0), + ), + } + testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime()) +} diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index ac31a374dd75d..54edf3300b612 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -146,6 +146,7 @@ type Config struct { CSVTimestampFormat string `toml:"csv_timestamp_format"` CSVTimezone string `toml:"csv_timezone"` CSVTrimSpace bool `toml:"csv_trim_space"` + CSVSkipValues []string `toml:"csv_skip_values"` // FormData configuration FormUrlencodedTagKeys []string `toml:"form_urlencoded_tag_keys"` @@ -222,6 +223,7 @@ func NewParser(config *Config) (Parser, error) { TimestampFormat: config.CSVTimestampFormat, Timezone: config.CSVTimezone, DefaultTags: config.DefaultTags, + SkipValues: config.CSVSkipValues, } return csv.NewParser(config)