Skip to content

Commit

Permalink
enhancement(regex_parser transform): Add RegexSet support to regex (v…
Browse files Browse the repository at this point in the history
…ectordotdev#2493)

Signed-off-by: Brian Menges <[email protected]>
  • Loading branch information
mre authored and Brian Menges committed Dec 9, 2020
1 parent d8f7de2 commit c635db1
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 55 deletions.
8 changes: 4 additions & 4 deletions .meta/transforms/regex_parser.toml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ If `target_field` is set and the log contains a field of the same name \
as the target, it will only be overwritten if this is set to `true`.\
"""

[transforms.regex_parser.options.regex]
[transforms.regex_parser.options.patterns]
type = "string"
common = true
examples = [
"""\
^(?P<timestamp>[\\w\\-:\\+]+) (?P<level>\\w+) (?P<message>.*)$\
['^(?P<timestamp>[\\w\\-:\\+]+) (?P<level>\\w+) (?P<message>.*)$']\
"""
]
required = true
description = """\
The Regular Expression to apply. Do not include the leading or trailing `/`.\
The Regular Expressions to apply. Do not include the leading or trailing `/` in any of the expressions.\
"""

[transforms.regex_parser.options.target_field]
Expand Down Expand Up @@ -85,7 +85,7 @@ And the following configuration:
[transforms.<transform-id>]
type = "regex_parser"
field = "message"
regex = '^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$'
patterns = ['^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$']

[transforms.<transform-id>.types]
bytes_in = "int"
Expand Down
6 changes: 3 additions & 3 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ fn benchmark_transforms(c: &mut Criterion) {
"parser",
&["in"],
transforms::regex_parser::RegexParserConfig {
regex: r"status=(?P<status>\d+)".to_string(),
patterns: vec![r"status=(?P<status>\d+)".to_string()],
field: None,
..Default::default()
},
Expand Down Expand Up @@ -410,7 +410,7 @@ fn benchmark_regex(c: &mut Criterion) {
let rt = vector::runtime::Runtime::single_threaded().unwrap();
let parser =transforms::regex_parser::RegexParserConfig {
// Many captures to stress the regex parser
regex: r#"^(?P<addr>\d+\.\d+\.\d+\.\d+) (?P<user>\S+) (?P<auth>\S+) \[(?P<date>\d+/[A-Za-z]+/\d+:\d+:\d+:\d+ [+-]\d{4})\] "(?P<method>[A-Z]+) (?P<uri>[^"]+) HTTP/\d\.\d" (?P<code>\d+) (?P<size>\d+) "(?P<referrer>[^"]+)" "(?P<browser>[^"]+)""#.into(),
patterns: vec![r#"^(?P<addr>\d+\.\d+\.\d+\.\d+) (?P<user>\S+) (?P<auth>\S+) \[(?P<date>\d+/[A-Za-z]+/\d+:\d+:\d+:\d+ [+-]\d{4})\] "(?P<method>[A-Z]+) (?P<uri>[^"]+) HTTP/\d\.\d" (?P<code>\d+) (?P<size>\d+) "(?P<referrer>[^"]+)" "(?P<browser>[^"]+)""#.into()],
field: None,
drop_failed: true,
..Default::default()
Expand Down Expand Up @@ -465,7 +465,7 @@ fn benchmark_complex(c: &mut Criterion) {
"parser",
&["in1", "in2"],
transforms::regex_parser::RegexParserConfig {
regex: r"status=(?P<status>\d+)".to_string(),
patterns: vec![r"status=(?P<status>\d+)".to_string()],
field: None,
..Default::default()
},
Expand Down
2 changes: 1 addition & 1 deletion config/examples/docs_example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ data_dir = "/var/lib/vector"
[transforms.apache_parser]
inputs = ["apache_logs"]
type = "regex_parser" # fast/powerful regex
regex = '^(?P<host>[w.]+) - (?P<user>[w]+) (?P<bytes_in>[d]+) [(?P<timestamp>.*)] "(?P<method>[w]+) (?P<path>.*)" (?P<status>[d]+) (?P<bytes_out>[d]+)$'
patterns = ['^(?P<host>[w.]+) - (?P<user>[w]+) (?P<bytes_in>[d]+) [(?P<timestamp>.*)] "(?P<method>[w]+) (?P<path>.*)" (?P<status>[d]+) (?P<bytes_out>[d]+)$']

# Sample the data to save on cost
[transforms.apache_sampler]
Expand Down
2 changes: 1 addition & 1 deletion config/examples/file_to_cloudwatch_metrics.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ start_at_beginning = true
[transforms.regex_parser]
inputs = ["file"]
type = "regex_parser"
regex = '^(?P<host>[\w\.]+) - (?P<user>[\w-]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$'
patterns = ['^(?P<host>[\w\.]+) - (?P<user>[\w-]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$']

# Transform into metrics
[transforms.log_to_metric]
Expand Down
2 changes: 1 addition & 1 deletion config/examples/file_to_prometheus.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ start_at_beginning = true
[transforms.regex_parser]
inputs = ["file"]
type = "regex_parser"
regex = '^(?P<host>[\w\.]+) - (?P<user>[\w-]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$'
patterns = ['^(?P<host>[\w\.]+) - (?P<user>[\w-]+) \[(?P<timestamp>.*)\] "(?P<method>[\w]+) (?P<path>.*)" (?P<status>[\d]+) (?P<bytes_out>[\d]+)$']

# Transform into metrics
[transforms.log_to_metric]
Expand Down
Loading

0 comments on commit c635db1

Please sign in to comment.