-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use filestream input as default for hints autodiscover. #36950
Changes from all commits
572d2de
621ee6b
b417380
f431c6b
988c850
85d658d
1f97bbf
40c2957
d0ece70
2a3b5b6
9d13102
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,9 +112,16 @@ metadata: | |
data: | ||
filebeat.yml: |- | ||
filebeat.inputs: | ||
- type: container | ||
- type: filestream | ||
paths: | ||
- /var/log/containers/*.log | ||
parsers: | ||
- container: ~ | ||
prospector: | ||
scanner: | ||
fingerprint.enabled: true | ||
symlinks: true | ||
file_identity.fingerprint: ~ | ||
rdner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
processors: | ||
- add_kubernetes_metadata: | ||
host: ${NODE_NAME} | ||
|
@@ -123,15 +130,23 @@ data: | |
logs_path: "/var/log/containers/" | ||
|
||
# To enable hints based autodiscover, remove `filebeat.inputs` configuration and uncomment this: | ||
#filebeat.autodiscover: | ||
# filebeat.autodiscover: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this extra space intentional? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By default, when I commented in and out the autodiscover block, it added this space. TBH it looks more readable |
||
# providers: | ||
# - type: kubernetes | ||
# node: ${NODE_NAME} | ||
# hints.enabled: true | ||
# hints.default_config: | ||
# type: container | ||
# type: filestream | ||
# id: kubernetes-container-logs-${data.kubernetes.pod.name}-${data.kubernetes.container.id} | ||
# paths: | ||
# - /var/log/containers/*${data.kubernetes.container.id}.log | ||
# - /var/log/containers/*-${data.kubernetes.container.id}.log | ||
# parsers: | ||
# - container: ~ | ||
# prospector: | ||
# scanner: | ||
# fingerprint.enabled: true | ||
# symlinks: true | ||
# file_identity.fingerprint: ~ | ||
|
||
processors: | ||
- add_cloud_metadata: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,9 +8,16 @@ metadata: | |
data: | ||
filebeat.yml: |- | ||
filebeat.inputs: | ||
- type: container | ||
- type: filestream | ||
paths: | ||
- /var/log/containers/*.log | ||
parsers: | ||
- container: ~ | ||
prospector: | ||
scanner: | ||
fingerprint.enabled: true | ||
symlinks: true | ||
file_identity.fingerprint: ~ | ||
processors: | ||
- add_kubernetes_metadata: | ||
host: ${NODE_NAME} | ||
|
@@ -19,15 +26,23 @@ data: | |
logs_path: "/var/log/containers/" | ||
|
||
# To enable hints based autodiscover, remove `filebeat.inputs` configuration and uncomment this: | ||
#filebeat.autodiscover: | ||
# filebeat.autodiscover: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same: is this needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say to remove the spaces because you can end up uncommenting this block and this not to have the correct spacing. |
||
# providers: | ||
# - type: kubernetes | ||
# node: ${NODE_NAME} | ||
# hints.enabled: true | ||
# hints.default_config: | ||
# type: container | ||
# type: filestream | ||
# id: kubernetes-container-logs-${data.kubernetes.pod.name}-${data.kubernetes.container.id} | ||
# paths: | ||
# - /var/log/containers/*${data.kubernetes.container.id}.log | ||
# - /var/log/containers/*-${data.kubernetes.container.id}.log | ||
# parsers: | ||
# - container: ~ | ||
# prospector: | ||
# scanner: | ||
# fingerprint.enabled: true | ||
# symlinks: true | ||
# file_identity.fingerprint: ~ | ||
|
||
processors: | ||
- add_cloud_metadata: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,8 @@ const ( | |
processors = "processors" | ||
json = "json" | ||
pipeline = "pipeline" | ||
ndjson = "ndjson" | ||
parsers = "parsers" | ||
) | ||
|
||
// validModuleNames to sanitize user input | ||
|
@@ -115,10 +117,20 @@ func (l *logHints) CreateConfig(event bus.Event, options ...ucfg.Option) []*conf | |
continue | ||
} | ||
|
||
inputType, _ := config.String("type", -1) | ||
tempCfg := mapstr.M{} | ||
mline := l.getMultiline(h) | ||
if len(mline) != 0 { | ||
kubernetes.ShouldPut(tempCfg, multiline, mline, l.log) | ||
|
||
if mline := l.getMultiline(h); len(mline) != 0 { | ||
if inputType == harvester.FilestreamType { | ||
// multiline options should be under multiline parser in filestream input | ||
parsersTempCfg := []mapstr.M{} | ||
mlineTempCfg := mapstr.M{} | ||
kubernetes.ShouldPut(mlineTempCfg, multiline, mline, l.log) | ||
parsersTempCfg = append(parsersTempCfg, mlineTempCfg) | ||
kubernetes.ShouldPut(tempCfg, parsers, parsersTempCfg, l.log) | ||
} else { | ||
kubernetes.ShouldPut(tempCfg, multiline, mline, l.log) | ||
} | ||
} | ||
if ilines := l.getIncludeLines(h); len(ilines) != 0 { | ||
kubernetes.ShouldPut(tempCfg, includeLines, ilines, l.log) | ||
|
@@ -136,15 +148,24 @@ func (l *logHints) CreateConfig(event bus.Event, options ...ucfg.Option) []*conf | |
} | ||
|
||
if jsonOpts := l.getJSONOptions(h); len(jsonOpts) != 0 { | ||
kubernetes.ShouldPut(tempCfg, json, jsonOpts, l.log) | ||
if inputType == harvester.FilestreamType { | ||
// json options should be under ndjson parser in filestream input | ||
parsersTempCfg := []mapstr.M{} | ||
ndjsonTempCfg := mapstr.M{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ignore this. I just realsised that those are empty mapstr.M |
||
kubernetes.ShouldPut(ndjsonTempCfg, ndjson, jsonOpts, l.log) | ||
parsersTempCfg = append(parsersTempCfg, ndjsonTempCfg) | ||
kubernetes.ShouldPut(tempCfg, parsers, parsersTempCfg, l.log) | ||
} else { | ||
kubernetes.ShouldPut(tempCfg, json, jsonOpts, l.log) | ||
} | ||
|
||
} | ||
// Merge config template with the configs from the annotations | ||
// AppendValues option is used to append arrays from annotations to existing arrays while merging | ||
if err := config.MergeWithOpts(tempCfg, ucfg.AppendValues); err != nil { | ||
logp.Debug("hints.builder", "config merge failed with error: %v", err) | ||
l.log.Debugf("hints.builder", "config merge failed with error: %v", err) | ||
continue | ||
} | ||
|
||
module := l.getModule(hints) | ||
if module != "" { | ||
moduleConf := map[string]interface{}{ | ||
|
@@ -154,24 +175,31 @@ func (l *logHints) CreateConfig(event bus.Event, options ...ucfg.Option) []*conf | |
filesets := l.getFilesets(hints, module) | ||
for fileset, cfg := range filesets { | ||
filesetConf, _ := conf.NewConfigFrom(config) | ||
|
||
if inputType, _ := filesetConf.String("type", -1); inputType == harvester.ContainerType { | ||
if inputType == harvester.ContainerType { | ||
_ = filesetConf.SetString("stream", -1, cfg.Stream) | ||
} else if inputType == harvester.FilestreamType { | ||
filestreamContainerParser := map[string]interface{}{ | ||
"container": map[string]interface{}{ | ||
"stream": cfg.Stream, | ||
"format": "auto", | ||
}, | ||
} | ||
parserCfg, _ := conf.NewConfigFrom(filestreamContainerParser) | ||
_ = filesetConf.SetChild("parsers", 0, parserCfg) | ||
} else { | ||
_ = filesetConf.SetString("containers.stream", -1, cfg.Stream) | ||
} | ||
|
||
moduleConf[fileset+".enabled"] = cfg.Enabled | ||
moduleConf[fileset+".input"] = filesetConf | ||
|
||
logp.Debug("hints.builder", "generated config %+v", moduleConf) | ||
l.log.Debugf("hints.builder", "generated config %+v", moduleConf) | ||
} | ||
config, _ = conf.NewConfigFrom(moduleConf) | ||
} | ||
logp.Debug("hints.builder", "generated config %+v", config) | ||
l.log.Debugf("hints.builder", "generated config %+v of logHints %+v", config, l) | ||
configs = append(configs, config) | ||
} | ||
|
||
// Apply information in event to the template to generate the final config | ||
return template.ApplyConfigTemplate(event, configs) | ||
} | ||
|
@@ -222,7 +250,7 @@ func (l *logHints) getFilesets(hints mapstr.M, module string) map[string]*filese | |
|
||
moduleFilesets, err := l.registry.ModuleAvailableFilesets(module) | ||
if err != nil { | ||
logp.Err("Error retrieving module filesets: %+v", err) | ||
l.log.Errorf("Error retrieving module filesets: %+v", err) | ||
return nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@MichaelKatsoulis
shouldn't here be defined
id
in input? or in such case it will be automatically generated?doc:
so for all files that are matching
/var/log/containers/*.log
we have 1 filestream with unique id, correct? do you know what does it imply in comparison to theautodiscover
where it will be created a dedicated filestream per container?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tetianakravchenko Yes an id will automatically get generated. When the
filebeat.input
is used instead of auto discovery then there will be one stream offilestream
input looking at all files in the path. When autodiscovery is used there will be one stream for each discovered container looking at one log file only.For the metadata in first scenario, the processor is used which requires the matchers log path so it can extract the container id from the log file name, and add the metadata of that container.
In the autodiscovery case the metadata are enriched by the kubernetes provider.
So yes, we have one filestream with one id for all the log collection. Both options work just fine. But with the first approach we cannot enable hints.