From 4071317cea35523723215700fe8f45c25fd3dba3 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 00:46:03 +1000 Subject: [PATCH 01/50] first cut of structured logging --- .gitignore | 1 + synapse/app/_base.py | 12 +- synapse/config/logger.py | 99 +++++++++------ synapse/logging/_structured.py | 217 +++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+), 45 deletions(-) create mode 100644 synapse/logging/_structured.py diff --git a/.gitignore b/.gitignore index a84c41b0c95a..dd6c6cf79e09 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ _trial_temp*/ /homeserver*.yaml /media_store/ /uploads +/logs # IDEs /.idea/ diff --git a/synapse/app/_base.py b/synapse/app/_base.py index bd285122eab9..45ac2881f34c 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -34,18 +34,20 @@ logger = logging.getLogger(__name__) +# list of tuples of function, args list, kwargs dict _sighup_callbacks = [] -def register_sighup(func): +def register_sighup(func, *args, **kwargs): """ Register a function to be called when a SIGHUP occurs. Args: func (function): Function to be called when sent a SIGHUP signal. - Will be called with a single argument, the homeserver. + Will be called with a single default argument, the homeserver. + *args, **kwargs: args and kwargs to be passed to the target function. """ - _sighup_callbacks.append(func) + _sighup_callbacks.append((func, args, kwargs)) def start_worker_reactor(appname, config): @@ -233,8 +235,8 @@ def start(hs, listeners=None): if hasattr(signal, "SIGHUP"): def handle_sighup(*args, **kwargs): - for i in _sighup_callbacks: - i(hs) + for i, args, kwargs in _sighup_callbacks: + i(hs, *args, **kwargs) signal.signal(signal.SIGHUP, handle_sighup) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 0f5554211c6a..1e7fa078da2f 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging import logging.config import os @@ -21,9 +22,14 @@ import yaml from twisted.logger import STDLibLogObserver, globalLogBeginner +from twisted.python.filepath import FilePath import synapse from synapse.app import _base as appbase +from synapse.logging._structured import ( + reload_structured_logging, + setup_structured_logging, +) from synapse.logging.context import LoggingContextFilter from synapse.util.versionstring import get_version_string @@ -145,22 +151,10 @@ def generate_files(self, config, config_dir_path): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def setup_logging(config, use_worker_options=False): - """ Set up python logging - - Args: - config (LoggingConfig | synapse.config.workers.WorkerConfig): - configuration data - - use_worker_options (bool): True to use 'worker_log_config' and - 'worker_log_file' options instead of 'log_config' and 'log_file'. - - register_sighup (func | None): Function to call to register a - sighup handler. +def _setup_stdlib_logging(log_config, log_file): + """ + Set up Python stdlib logging. """ - log_config = config.worker_log_config if use_worker_options else config.log_config - log_file = config.worker_log_file if use_worker_options else config.log_file - log_format = ( "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" " - %(message)s" @@ -189,42 +183,20 @@ def setup_logging(config, use_worker_options=False): log_file, maxBytes=(1000 * 1000 * 100), backupCount=3, encoding="utf8" ) - def sighup(signum, stack): + def sighup(*args): logger.info("Closing log file due to SIGHUP") handler.doRollover() logger.info("Opened new log file due to SIGHUP") + appbase.register_sighup(sighup) else: handler = logging.StreamHandler() - def sighup(*args): - pass - handler.setFormatter(formatter) - handler.addFilter(LoggingContextFilter(request="")) - logger.addHandler(handler) else: - - def load_log_config(): - with open(log_config, "r") as f: - logging.config.dictConfig(yaml.safe_load(f)) - - def sighup(*args): - # it might be better to use a file watcher or something for this. - load_log_config() - logging.info("Reloaded log config from %s due to SIGHUP", log_config) - - load_log_config() - - appbase.register_sighup(sighup) - - # make sure that the first thing we log is a thing we can grep backwards - # for - logging.warn("***** STARTING SERVER *****") - logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) - logging.info("Server hostname: %s", config.server_name) + logging.config.dictConfig(log_config) # It's critical to point twisted's internal logging somewhere, otherwise it # stacks up and leaks kup to 64K object; @@ -257,3 +229,50 @@ def _log(event): ) if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") + + +def _reload_stdlib_logging(*args, log_config=None): + if not log_config: + logger.warn("Reloaded a blank config?") + + logging.config.dictConfig(log_config) + + +def setup_logging(config, use_worker_options=False): + """ Set up python logging + + Args: + config (LoggingConfig | synapse.config.workers.WorkerConfig): + configuration data + + use_worker_options (bool): True to use 'worker_log_config' and + 'worker_log_file' options instead of 'log_config' and 'log_file'. + + register_sighup (func | None): Function to call to register a + sighup handler. + """ + log_config = config.worker_log_config if use_worker_options else config.log_config + log_file = config.worker_log_file if use_worker_options else config.log_file + + def read_config(*args, callback=None): + log_config_body = yaml.safe_load(FilePath(log_config).getContent()) + if args: + logging.info("Reloaded log config from %s due to SIGHUP", log_config) + if callback: + callback(log_config=log_config_body) + return log_config_body + + log_config_body = read_config() + + if log_config_body.get("version") == 2: + setup_structured_logging(log_config_body) + appbase.register_sighup(read_config, callback=reload_structured_logging) + else: + _setup_stdlib_logging(log_config_body, log_file) + appbase.register_sighup(read_config, callback=_reload_stdlib_logging) + + # make sure that the first thing we log is a thing we can grep backwards + # for + logging.warn("***** STARTING SERVER *****") + logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) + logging.info("Server hostname: %s", config.server_name) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py new file mode 100644 index 000000000000..36dbafc42e78 --- /dev/null +++ b/synapse/logging/_structured.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import sys + +import attr +from constantly import NamedConstant, Names, ValueConstant, Values +from zope.interface import implementer + +from twisted.logger import ( + FileLogObserver, + FilteringLogObserver, + ILogObserver, + Logger, + LogLevel, + LogLevelFilterPredicate, + LogPublisher, + eventAsText, + globalLogBeginner, + jsonFileLogObserver, +) + +from synapse.config._base import ConfigError +from synapse.logging.context import LoggingContext + + +def stdlib_log_level_to_twisted(level): + """ + Convert a stdlib log level to Twisted's log level. + """ + lvl = level.lower().replace("warning", "warn") + return LogLevel.levelWithName(lvl) + + +@attr.s +@implementer(ILogObserver) +class LogContextObserver(object): + observer = attr.ib() + + def __call__(self, event): + + # Filter out some useless events that Twisted outputs + if "log_text" in event: + if event["log_text"].startswith("DNSDatagramProtocol starting on "): + return + + if event["log_text"].startswith("(UDP Port "): + return + + if event["log_text"].startswith("Timing out client"): + return + + event["request"] = "None" + event["scope"] = None + + context = LoggingContext.current_context() + if context is not None and isinstance(context, LoggingContext): + event["request"] = context.request + event["scope"] = context.scope + + return self.observer(event) + + +class PythonStdlibToTwistedLogger(logging.Handler): + """ + Transform a Python stdlib log message into a Twisted one. + """ + + def __init__(self, observer, *args, **kwargs): + self.observer = observer + super().__init__(*args, **kwargs) + + def emit(self, record): + + self.observer( + { + "log_time": record.created, + "log_text": record.getMessage(), + "log_format": "{log_text}", + "log_namespace": record.name, + "log_level": stdlib_log_level_to_twisted(record.levelname), + } + ) + + +def SynapseFileLogObserver(outFile): + """ + A log observer that formats events like Synapse wants and sends them to + `outFile`. + """ + + def formatEvent(_event): + event = _event.copy() + event["log_level"] = event["log_level"].name.upper() + event["log_format"] = ( + "- {log_namespace} - {log_level} - {request} - " + + event.get("log_format", "{event_text}") + ) + return eventAsText(event, includeSystem=False) + "\n" + + return FileLogObserver(outFile, formatEvent) + + +class LoggingOutputType(Names): + CONSOLE = NamedConstant() + JSON = NamedConstant() + FLUENTD = NamedConstant() + + +class OutputPipeType(Values): + stdout = ValueConstant(sys.__stdout__) + stderr = ValueConstant(sys.__stderr__) + + +@attr.s +class HandlerConfiguration(object): + name = attr.ib() + type = attr.ib() + location = attr.ib() + + +def parse_handler_configs(config): + """ + Parse the logging format version 2 handler section. + """ + for name, config in config.get("handlers").items(): + logging_type = None + + if "type" not in config and "class" in config: + # Handle the old "class": style. + if config["class"] == "logging.StreamHandler": + yield ( + HandlerConfiguration( + name=name, + type=LoggingOutputType.CONSOLE, + location=sys.__stdout__, + ) + ) + else: + raise ConfigError( + "The logging class %s is not supported in logging format 2." + % (config["class"],) + ) + elif "type" in config: + logging_type = LoggingOutputType.lookupByName(config["type"].upper()) + + if logging_type in [LoggingOutputType.CONSOLE, LoggingOutputType.JSON]: + location = config.get("location") + if location is None or location not in ["stdout", "stderr"]: + raise ConfigError( + ( + "The %s handler needs the 'location' key set to " + "either 'stdout' or 'stderr'." + ) + ) + + yield HandlerConfiguration( + name=name, + type=logging_type, + location=OutputPipeType.lookupByName(location).value, + ) + else: + raise ConfigError("Handlers need to have either a 'type' or 'class' key.") + + +def setup_structured_logging(config): + """ + Set up Twisted's structured logging system to output. + """ + logger = Logger() + + observers = [] + + for observer in parse_handler_configs(config): + if observer.type == LoggingOutputType.CONSOLE: + logger.debug("Starting up the {name} console logger", name=observer.name) + observers.append(SynapseFileLogObserver(observer.location)) + if observer.type == LoggingOutputType.JSON: + logger.debug("Starting up the {name} JSON logger", name=observer.name) + observers.append(jsonFileLogObserver(observer.location)) + + publisher = LogPublisher(*observers) + log_filter = LogLevelFilterPredicate() + + for namespace, config in config.get("loggers", {}).items(): + log_filter.setLogLevelForNamespace( + namespace, stdlib_log_level_to_twisted(config.get("level", "INFO")) + ) + + f = FilteringLogObserver(publisher, [log_filter]) + lco = LogContextObserver(f) + stuff_into_twisted = PythonStdlibToTwistedLogger(lco) + + stdliblogger = logging.getLogger("") + stdliblogger.setLevel(logging.DEBUG) + stdliblogger.addHandler(stuff_into_twisted) + + # Redirecting stdio is important here, especially if there's a JSON + # outputter! + globalLogBeginner.beginLoggingTo([lco], redirectStandardIO=True) + + +def reload_structured_logging(*args, log_config=None): + pass From 0ea4fb1b857ae7091117f7b69e778ebcbd6e8455 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 01:06:29 +1000 Subject: [PATCH 02/50] remove deprecated options, command line options, and simplify the logging code in preparation for structured logging changes --- changelog.d/5678.misc | 1 + changelog.d/5678.removal | 1 + synapse/config/logger.py | 100 +++++---------------------------------- 3 files changed, 13 insertions(+), 89 deletions(-) create mode 100644 changelog.d/5678.misc create mode 100644 changelog.d/5678.removal diff --git a/changelog.d/5678.misc b/changelog.d/5678.misc new file mode 100644 index 000000000000..2d9a19ad23bf --- /dev/null +++ b/changelog.d/5678.misc @@ -0,0 +1 @@ +Synapse will now always redirect standard I/O to the logging system. diff --git a/changelog.d/5678.removal b/changelog.d/5678.removal new file mode 100644 index 000000000000..2ad86a5e701c --- /dev/null +++ b/changelog.d/5678.removal @@ -0,0 +1 @@ +Synapse now no longer accepts the `-v`/`--verbose`, `-f`/`--log-file`, `-n`/`--no-redirect-stdio`, or `--log-config` command line flags, and removes the deprecated `verbose`, `no_redirect_stdio`, and `log_file` configuration file options. Users of these options should migrate their options into the dedicated log configuration. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 0f5554211c6a..a21d9fcba8bd 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -75,10 +75,7 @@ class LoggingConfig(Config): def read_config(self, config, **kwargs): - self.verbosity = config.get("verbose", 0) - self.no_redirect_stdio = config.get("no_redirect_stdio", False) self.log_config = self.abspath(config.get("log_config")) - self.log_file = self.abspath(config.get("log_file")) def generate_config_section(self, config_dir_path, server_name, **kwargs): log_config = os.path.join(config_dir_path, server_name + ".log.config") @@ -93,46 +90,6 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): % locals() ) - def read_arguments(self, args): - if args.verbose is not None: - self.verbosity = args.verbose - if args.no_redirect_stdio is not None: - self.no_redirect_stdio = args.no_redirect_stdio - if args.log_config is not None: - self.log_config = args.log_config - if args.log_file is not None: - self.log_file = args.log_file - - def add_arguments(cls, parser): - logging_group = parser.add_argument_group("logging") - logging_group.add_argument( - "-v", - "--verbose", - dest="verbose", - action="count", - help="The verbosity level. Specify multiple times to increase " - "verbosity. (Ignored if --log-config is specified.)", - ) - logging_group.add_argument( - "-f", - "--log-file", - dest="log_file", - help="File to log to. (Ignored if --log-config is specified.)", - ) - logging_group.add_argument( - "--log-config", - dest="log_config", - default=None, - help="Python logging config file", - ) - logging_group.add_argument( - "-n", - "--no-redirect-stdio", - action="store_true", - default=None, - help="Do not redirect stdout/stderr to the log", - ) - def generate_files(self, config, config_dir_path): log_config = config.get("log_config") if log_config and not os.path.exists(log_config): @@ -152,61 +109,31 @@ def setup_logging(config, use_worker_options=False): config (LoggingConfig | synapse.config.workers.WorkerConfig): configuration data - use_worker_options (bool): True to use 'worker_log_config' and - 'worker_log_file' options instead of 'log_config' and 'log_file'. + use_worker_options (bool): True to use the 'worker_log_config' option + instead of 'log_config'. register_sighup (func | None): Function to call to register a sighup handler. """ log_config = config.worker_log_config if use_worker_options else config.log_config - log_file = config.worker_log_file if use_worker_options else config.log_file - - log_format = ( - "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" - " - %(message)s" - ) if log_config is None: - # We don't have a logfile, so fall back to the 'verbosity' param from - # the config or cmdline. (Note that we generate a log config for new - # installs, so this will be an unusual case) - level = logging.INFO - level_for_storage = logging.INFO - if config.verbosity: - level = logging.DEBUG - if config.verbosity > 1: - level_for_storage = logging.DEBUG + log_format = ( + "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" + " - %(message)s" + ) logger = logging.getLogger("") - logger.setLevel(level) - - logging.getLogger("synapse.storage.SQL").setLevel(level_for_storage) + logger.setLevel(logging.INFO) + logging.getLogger("synapse.storage.SQL").setLevel(logging.INFO) formatter = logging.Formatter(log_format) - if log_file: - # TODO: Customisable file size / backup count - handler = logging.handlers.RotatingFileHandler( - log_file, maxBytes=(1000 * 1000 * 100), backupCount=3, encoding="utf8" - ) - - def sighup(signum, stack): - logger.info("Closing log file due to SIGHUP") - handler.doRollover() - logger.info("Opened new log file due to SIGHUP") - - else: - handler = logging.StreamHandler() - - def sighup(*args): - pass + handler = logging.StreamHandler() handler.setFormatter(formatter) - handler.addFilter(LoggingContextFilter(request="")) - logger.addHandler(handler) else: - def load_log_config(): with open(log_config, "r") as f: logging.config.dictConfig(yaml.safe_load(f)) @@ -217,8 +144,7 @@ def sighup(*args): logging.info("Reloaded log config from %s due to SIGHUP", log_config) load_log_config() - - appbase.register_sighup(sighup) + appbase.register_sighup(sighup) # make sure that the first thing we log is a thing we can grep backwards # for @@ -252,8 +178,4 @@ def _log(event): return observer(event) - globalLogBeginner.beginLoggingTo( - [_log], redirectStandardIO=not config.no_redirect_stdio - ) - if not config.no_redirect_stdio: - print("Redirected stdout/stderr to logs") + globalLogBeginner.beginLoggingTo([_log], redirectStandardIO=False) From af7bc1c7f4d3155b71663d3f24aa1be7aad06e74 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 01:12:45 +1000 Subject: [PATCH 03/50] this should be true --- synapse/config/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index a21d9fcba8bd..8c54627e3b0a 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -178,4 +178,4 @@ def _log(event): return observer(event) - globalLogBeginner.beginLoggingTo([_log], redirectStandardIO=False) + globalLogBeginner.beginLoggingTo([_log], redirectStandardIO=True) From 83bf10c7b0ecad7bc26121f6c367a88ac169e270 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 01:13:08 +1000 Subject: [PATCH 04/50] cleanup --- synapse/config/logger.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 8c54627e3b0a..2a326776fb25 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -134,6 +134,7 @@ def setup_logging(config, use_worker_options=False): handler.addFilter(LoggingContextFilter(request="")) logger.addHandler(handler) else: + def load_log_config(): with open(log_config, "r") as f: logging.config.dictConfig(yaml.safe_load(f)) From 9c19dc3204bc50122aa78b2d2e83b79c139c4778 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 01:21:17 +1000 Subject: [PATCH 05/50] remove disused arguments from workers too --- synapse/config/workers.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 3b75471d8585..246d72cd611b 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -31,8 +31,6 @@ def read_config(self, config, **kwargs): self.worker_listeners = config.get("worker_listeners", []) self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") - self.worker_log_file = config.get("worker_log_file") - self.worker_log_config = config.get("worker_log_config") # The host used to connect to the main synapse self.worker_replication_host = config.get("worker_replication_host", None) @@ -78,9 +76,5 @@ def read_arguments(self, args): if args.daemonize is not None: self.worker_daemonize = args.daemonize - if args.log_config is not None: - self.worker_log_config = args.log_config - if args.log_file is not None: - self.worker_log_file = args.log_file if args.manhole is not None: self.worker_manhole = args.worker_manhole From 32b9dd0fe04c33b5d8a3bebfe80aee5d850ed297 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 02:37:46 +1000 Subject: [PATCH 06/50] fix and push up --- changelog.d/5680.feature | 1 + synapse/logging/_structured.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5680.feature diff --git a/changelog.d/5680.feature b/changelog.d/5680.feature new file mode 100644 index 000000000000..ce0953a0350d --- /dev/null +++ b/changelog.d/5680.feature @@ -0,0 +1 @@ +[ tba ] diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 36dbafc42e78..cdbd0509ce14 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -107,7 +107,7 @@ def formatEvent(_event): event["log_level"] = event["log_level"].name.upper() event["log_format"] = ( "- {log_namespace} - {log_level} - {request} - " - + event.get("log_format", "{event_text}") + + event.get("log_format", "{log_text}") ) return eventAsText(event, includeSystem=False) + "\n" From 83507eb39668b1e480f2c47c1128ccb178839145 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 03:57:24 +1000 Subject: [PATCH 07/50] fix --- changelog.d/5680.feature | 2 +- synapse/config/logger.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/changelog.d/5680.feature b/changelog.d/5680.feature index ce0953a0350d..8f85552c4a8b 100644 --- a/changelog.d/5680.feature +++ b/changelog.d/5680.feature @@ -1 +1 @@ -[ tba ] +tba. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 2cbfff5e80f5..ed3aa6c3ce3f 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -153,6 +153,8 @@ def _log(event): def _reload_stdlib_logging(*args, log_config=None): + logger = logging.getLogger("") + if not log_config: logger.warn("Reloaded a blank config?") From 5cec494329dacd7b1bc92241d9f68e787419cd97 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Sun, 14 Jul 2019 04:41:57 +1000 Subject: [PATCH 08/50] update to use 18.9 --- synapse/python_dependencies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index e7618057be6d..f5289520a9c7 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -47,9 +47,9 @@ "idna>=2.5", # validating SSL certs for IP addresses requires service_identity 18.1. "service_identity>=18.1.0", - # our logcontext handling relies on the ability to cancel inlineCallbacks - # (https://twistedmatrix.com/trac/ticket/4632) which landed in Twisted 18.7. - "Twisted>=18.7.0", + # Twisted 18.9 introduces some logger improvements that the structured + # logger utilises + "Twisted>=18.9.0", "treq>=15.1", # Twisted has required pyopenssl 16.0 since about Twisted 16.6. "pyopenssl>=16.0.0", From 69cd070c8fa49adc1a3a83ae4d4fbbd366fcacf5 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 15 Jul 2019 20:15:56 +1000 Subject: [PATCH 09/50] fix sytests? --- synapse/config/logger.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index ed3aa6c3ce3f..98c42c229653 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -177,7 +177,12 @@ def setup_logging(config, use_worker_options=False): log_config = config.worker_log_config if use_worker_options else config.log_config def read_config(*args, callback=None): - log_config_body = yaml.safe_load(FilePath(log_config).getContent()) + log_config_body = FilePath(log_config).getContent() + + if not log_config_body: + return + + log_config_body = yaml.safe_load(log_config_body) if args: logging.info("Reloaded log config from %s due to SIGHUP", log_config) if callback: @@ -186,7 +191,7 @@ def read_config(*args, callback=None): log_config_body = read_config() - if log_config_body.get("version") == 2: + if log_config_body and log_config_body.get("version") == 2: setup_structured_logging(log_config_body) appbase.register_sighup(read_config, callback=reload_structured_logging) else: From 51048fb4d9d6a89036c5def5329c927a3404d362 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 15 Jul 2019 21:21:16 +1000 Subject: [PATCH 10/50] fix sytests? --- synapse/config/logger.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 98c42c229653..7d876e2171ce 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -177,12 +177,10 @@ def setup_logging(config, use_worker_options=False): log_config = config.worker_log_config if use_worker_options else config.log_config def read_config(*args, callback=None): - log_config_body = FilePath(log_config).getContent() + if log_config is None: + return None - if not log_config_body: - return - - log_config_body = yaml.safe_load(log_config_body) + log_config_body = yaml.safe_load(FilePath(log_config).getContent()) if args: logging.info("Reloaded log config from %s due to SIGHUP", log_config) if callback: From 0b76783506174dfeda1a434b790bb9076ff3d109 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Tue, 16 Jul 2019 05:01:35 +1000 Subject: [PATCH 11/50] make a misc --- changelog.d/5680.feature | 1 - changelog.d/5680.misc | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 changelog.d/5680.feature create mode 100644 changelog.d/5680.misc diff --git a/changelog.d/5680.feature b/changelog.d/5680.feature deleted file mode 100644 index 8f85552c4a8b..000000000000 --- a/changelog.d/5680.feature +++ /dev/null @@ -1 +0,0 @@ -tba. diff --git a/changelog.d/5680.misc b/changelog.d/5680.misc new file mode 100644 index 000000000000..46a403a188ed --- /dev/null +++ b/changelog.d/5680.misc @@ -0,0 +1 @@ +Lay the groundwork for structured logging output. From 47820c5b238de1c06cc40d560147f28dcfab5488 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 19 Jul 2019 01:44:55 +1000 Subject: [PATCH 12/50] fix --- changelog.d/5678.misc | 1 - synapse/config/logger.py | 33 +++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 15 deletions(-) delete mode 100644 changelog.d/5678.misc diff --git a/changelog.d/5678.misc b/changelog.d/5678.misc deleted file mode 100644 index 2d9a19ad23bf..000000000000 --- a/changelog.d/5678.misc +++ /dev/null @@ -1 +0,0 @@ -Synapse will now always redirect standard I/O to the logging system. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 7e7fb2fc0202..bc00344a4d32 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -82,6 +82,7 @@ class LoggingConfig(Config): def read_config(self, config, **kwargs): self.log_config = self.abspath(config.get("log_config")) + self.no_redirect_stdio = config.get("no_redirect_stdio", False) def generate_config_section(self, config_dir_path, server_name, **kwargs): log_config = os.path.join(config_dir_path, server_name + ".log.config") @@ -96,6 +97,21 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): % locals() ) + def read_arguments(self, args): + if args.no_redirect_stdio is not None: + self.no_redirect_stdio = args.no_redirect_stdio + + @staticmethod + def add_arguments(parser): + logging_group = parser.add_argument_group("logging") + logging_group.add_argument( + "-n", + "--no-redirect-stdio", + action="store_true", + default=None, + help="Do not redirect stdout/stderr to the log", + ) + def generate_files(self, config, config_dir_path): log_config = config.get("log_config") if log_config and not os.path.exists(log_config): @@ -108,21 +124,10 @@ def generate_files(self, config, config_dir_path): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def setup_logging(config, use_worker_options=False): - """ Set up python logging - - Args: - config (LoggingConfig | synapse.config.workers.WorkerConfig): - configuration data - - use_worker_options (bool): True to use the 'worker_log_config' option - instead of 'log_config'. - - register_sighup (func | None): Function to call to register a - sighup handler. +def _setup_stdlib_logging(log_config): + """ + Set up Python stdlib logging. """ - log_config = config.worker_log_config if use_worker_options else config.log_config - if log_config is None: log_format = ( "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" From 5260a8bde25c24777c382540e11c30dd7b7398d4 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 19 Jul 2019 01:47:13 +1000 Subject: [PATCH 13/50] fix --- synapse/config/logger.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index bc00344a4d32..0092ed2a8fa2 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -178,7 +178,8 @@ def _reload_stdlib_logging(*args, log_config=None): def setup_logging(config, use_worker_options=False): - """ Set up python logging + """ + Set up the logging subsystem. Args: config (LoggingConfig | synapse.config.workers.WorkerConfig): From 586fd94f66d166a73d5863897c98b9b597e6b0c9 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 19 Jul 2019 01:53:07 +1000 Subject: [PATCH 14/50] fix --- synapse/config/logger.py | 8 ++++++-- synapse/logging/_structured.py | 15 ++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 0092ed2a8fa2..0f287f2ea452 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -124,7 +124,7 @@ def generate_files(self, config, config_dir_path): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(log_config): +def _setup_stdlib_logging(config, log_config): """ Set up Python stdlib logging. """ @@ -165,7 +165,11 @@ def _log(event): return observer(event) - globalLogBeginner.beginLoggingTo([_log], redirectStandardIO=True) + globalLogBeginner.beginLoggingTo( + [_log], redirectStandardIO=not config.no_redirect_stdio + ) + if not config.no_redirect_stdio: + print("Redirected stdout/stderr to logs") def _reload_stdlib_logging(*args, log_config=None): diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index cdbd0509ce14..4b64f4a36b76 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -176,15 +176,20 @@ def parse_handler_configs(config): raise ConfigError("Handlers need to have either a 'type' or 'class' key.") -def setup_structured_logging(config): +def setup_structured_logging(config, log_config): """ - Set up Twisted's structured logging system to output. + Set up Twisted's structured logging system. """ + if config.no_redirect_stdio: + raise ConfigError( + "no_redirect_stdio cannot be defined using log_config version 2." + ) + logger = Logger() observers = [] - for observer in parse_handler_configs(config): + for observer in parse_handler_configs(log_config): if observer.type == LoggingOutputType.CONSOLE: logger.debug("Starting up the {name} console logger", name=observer.name) observers.append(SynapseFileLogObserver(observer.location)) @@ -195,9 +200,9 @@ def setup_structured_logging(config): publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() - for namespace, config in config.get("loggers", {}).items(): + for namespace, config in log_config.get("loggers", {}).items(): log_filter.setLogLevelForNamespace( - namespace, stdlib_log_level_to_twisted(config.get("level", "INFO")) + namespace, stdlib_log_level_to_twisted(log_config.get("level", "INFO")) ) f = FilteringLogObserver(publisher, [log_filter]) From 50fcd6089b3ab56e8c06c0328c4f9e3c65eac804 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 19 Jul 2019 02:00:36 +1000 Subject: [PATCH 15/50] fix --- synapse/config/logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 0f287f2ea452..4c68d6c4e9bc 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -211,10 +211,10 @@ def read_config(*args, callback=None): log_config_body = read_config() if log_config_body and log_config_body.get("version") == 2: - setup_structured_logging(log_config_body) + setup_structured_logging(config, log_config_body) appbase.register_sighup(read_config, callback=reload_structured_logging) else: - _setup_stdlib_logging(log_config_body) + _setup_stdlib_logging(config, log_config_body) appbase.register_sighup(read_config, callback=_reload_stdlib_logging) # make sure that the first thing we log is a thing we can grep backwards From ae34f258d56f771c39d3453e0021aeee987440ee Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 24 Jul 2019 02:29:52 +1000 Subject: [PATCH 16/50] review comments --- .gitignore | 2 +- synapse/config/logger.py | 7 ++-- synapse/logging/_structured.py | 61 +++++++++++++++++++++++++++++----- synapse/logging/context.py | 11 ++++++ 4 files changed, 67 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index dd6c6cf79e09..8374a7d0afb2 100644 --- a/.gitignore +++ b/.gitignore @@ -19,9 +19,9 @@ _trial_temp*/ /*.signing.key /env/ /homeserver*.yaml +/logs /media_store/ /uploads -/logs # IDEs /.idea/ diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 4c68d6c4e9bc..6807d7605690 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -191,9 +191,6 @@ def setup_logging(config, use_worker_options=False): use_worker_options (bool): True to use the 'worker_log_config' option instead of 'log_config'. - - register_sighup (func | None): Function to call to register a - sighup handler. """ log_config = config.worker_log_config if use_worker_options else config.log_config @@ -202,10 +199,10 @@ def read_config(*args, callback=None): return None log_config_body = yaml.safe_load(FilePath(log_config).getContent()) - if args: - logging.info("Reloaded log config from %s due to SIGHUP", log_config) if callback: callback(log_config=log_config_body) + logging.info("Reloaded log config from %s due to SIGHUP", log_config) + return log_config_body log_config_body = read_config() diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 4b64f4a36b76..9fa1306bb22a 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -48,10 +48,22 @@ def stdlib_log_level_to_twisted(level): @attr.s @implementer(ILogObserver) class LogContextObserver(object): + """ + An ILogObserver which adds Synapse-specific log context information. + + Attributes: + observer (ILogObserver): The target parent observer. + """ observer = attr.ib() def __call__(self, event): + """ + Consume a log event and emit it to the parent observer after filtering + and adding log context information. + Args: + event (dict) + """ # Filter out some useless events that Twisted outputs if "log_text" in event: if event["log_text"].startswith("DNSDatagramProtocol starting on "): @@ -63,13 +75,17 @@ def __call__(self, event): if event["log_text"].startswith("Timing out client"): return - event["request"] = "None" - event["scope"] = None - context = LoggingContext.current_context() - if context is not None and isinstance(context, LoggingContext): - event["request"] = context.request - event["scope"] = context.scope + + # Copy the context information to the log event. + if context is not None: + context.copy_to_twisted_log_entry(event) + else: + # If there's no logging context, not even the root one, we might be + # starting up or it might be from non-Synapse code. Log it as if it + # came from the root logger. + event["request"] = None + event["scope"] = None return self.observer(event) @@ -80,10 +96,21 @@ class PythonStdlibToTwistedLogger(logging.Handler): """ def __init__(self, observer, *args, **kwargs): + """ + Args: + observer (ILogObserver): A Twisted logging observer. + *args, **kwargs: Args/kwargs to be passed to logging.Handler. + """ self.observer = observer super().__init__(*args, **kwargs) def emit(self, record): + """ + Emit a record to Twisted's observer. + + Args: + record (logging.LogRecord) + """ self.observer( { @@ -98,8 +125,11 @@ def emit(self, record): def SynapseFileLogObserver(outFile): """ - A log observer that formats events like Synapse wants and sends them to - `outFile`. + A log observer that formats events like the traditional log formatter and + sends them to `outFile`. + + Args: + outFile (file object): The file object to write to. """ def formatEvent(_event): @@ -135,6 +165,15 @@ class HandlerConfiguration(object): def parse_handler_configs(config): """ Parse the logging format version 2 handler section. + + Args: + config (dict): A logging format v2 dictionary. + + Yields: + HandlerConfiguration instances. + + Raises: + ConfigError: If any of the handler configuration items are invalid. """ for name, config in config.get("handlers").items(): logging_type = None @@ -179,6 +218,10 @@ def parse_handler_configs(config): def setup_structured_logging(config, log_config): """ Set up Twisted's structured logging system. + + Args: + config (HomeserverConfig): The configuration of the Synapse homeserver. + log_config (dict): The log configuration to use. """ if config.no_redirect_stdio: raise ConfigError( @@ -219,4 +262,6 @@ def setup_structured_logging(config, log_config): def reload_structured_logging(*args, log_config=None): + # TODO: Reload the structured logging system. Since we don't implement any + # sort of file rotation, we don't need to worry about doing that here. pass diff --git a/synapse/logging/context.py b/synapse/logging/context.py index b456c31f7071..2cf6af11cebd 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -202,6 +202,10 @@ def __str__(self): def copy_to(self, record): pass + def copy_to_twisted_log_entry(record): + record["request"] = None + record["scope"] = None + def start(self): pass @@ -330,6 +334,13 @@ def copy_to(self, record): # we also track the current scope: record.scope = self.scope + def copy_to_twisted_log_entry(self, record): + """ + Copy logging fields from this context to a Twisted log record. + """ + record["request"] = self.request + record["scope"] = self.scope + def start(self): if get_thread_id() != self.main_thread: logger.warning("Started logcontext %s on different thread", self) From 34b3fcebe2b732de93d9005e8097a08cabd4ba91 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 24 Jul 2019 03:33:37 +1000 Subject: [PATCH 17/50] review comments --- synapse/logging/_structured.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 9fa1306bb22a..d9c12a66e6c7 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -54,6 +54,7 @@ class LogContextObserver(object): Attributes: observer (ILogObserver): The target parent observer. """ + observer = attr.ib() def __call__(self, event): From ba5b355f2ce541b95f5bc8fe0534b4c8051da8c2 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 24 Jul 2019 03:57:16 +1000 Subject: [PATCH 18/50] use native python read --- synapse/config/logger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 6807d7605690..21707692f98d 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -22,7 +22,6 @@ import yaml from twisted.logger import STDLibLogObserver, globalLogBeginner -from twisted.python.filepath import FilePath import synapse from synapse.app import _base as appbase @@ -198,7 +197,9 @@ def read_config(*args, callback=None): if log_config is None: return None - log_config_body = yaml.safe_load(FilePath(log_config).getContent()) + with open(log_config, "rb") as f: + log_config_body = yaml.safe_load(f.read()) + if callback: callback(log_config=log_config_body) logging.info("Reloaded log config from %s due to SIGHUP", log_config) From d1fc0ff648a951e79848ad213c12a0889846f495 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 26 Jul 2019 23:46:08 +1000 Subject: [PATCH 19/50] set the log level on the namespace too --- synapse/logging/_structured.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index d9c12a66e6c7..cfc3d92606e0 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -245,10 +245,15 @@ def setup_structured_logging(config, log_config): log_filter = LogLevelFilterPredicate() for namespace, config in log_config.get("loggers", {}).items(): + # Set the log level for twisted.logger.Logger namespaces log_filter.setLogLevelForNamespace( namespace, stdlib_log_level_to_twisted(log_config.get("level", "INFO")) ) + # Also set the log levels for the stdlib logger namespaces, to prevent + # them getting to PythonStdlibToTwistedLogger and having to be formatted + logging.getLogger(namespace).setLevel(log_config.get("level", "NOTSET")) + f = FilteringLogObserver(publisher, [log_filter]) lco = LogContextObserver(f) stuff_into_twisted = PythonStdlibToTwistedLogger(lco) From fa764c6c0f69068e866bd3ced666a39d9dc6efee Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 26 Jul 2019 23:50:50 +1000 Subject: [PATCH 20/50] review comments --- synapse/logging/_structured.py | 44 +++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index cfc3d92606e0..bca35563c0f9 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -179,23 +179,13 @@ def parse_handler_configs(config): for name, config in config.get("handlers").items(): logging_type = None - if "type" not in config and "class" in config: - # Handle the old "class": style. - if config["class"] == "logging.StreamHandler": - yield ( - HandlerConfiguration( - name=name, - type=LoggingOutputType.CONSOLE, - location=sys.__stdout__, - ) - ) - else: + if "type" in config: + try: + logging_type = LoggingOutputType.lookupByName(config["type"].upper()) + except ValueError: raise ConfigError( - "The logging class %s is not supported in logging format 2." - % (config["class"],) + "%s is not a known logging handler type." % (config["type"],) ) - elif "type" in config: - logging_type = LoggingOutputType.lookupByName(config["type"].upper()) if logging_type in [LoggingOutputType.CONSOLE, LoggingOutputType.JSON]: location = config.get("location") @@ -212,6 +202,26 @@ def parse_handler_configs(config): type=logging_type, location=OutputPipeType.lookupByName(location).value, ) + else: + raise ConfigError( + "The %s logging handler type is currently not implemented." + % (config["type"].upper(),) + ) + elif "class" in config: + # Handle the old "class": style. + if config["class"] == "logging.StreamHandler": + yield ( + HandlerConfiguration( + name=name, + type=LoggingOutputType.CONSOLE, + location=sys.__stdout__, + ) + ) + else: + raise ConfigError( + "The logging class %s is not supported in logging format 2." + % (config["class"],) + ) else: raise ConfigError("Handlers need to have either a 'type' or 'class' key.") @@ -247,12 +257,12 @@ def setup_structured_logging(config, log_config): for namespace, config in log_config.get("loggers", {}).items(): # Set the log level for twisted.logger.Logger namespaces log_filter.setLogLevelForNamespace( - namespace, stdlib_log_level_to_twisted(log_config.get("level", "INFO")) + namespace, stdlib_log_level_to_twisted(config.get("level", "INFO")) ) # Also set the log levels for the stdlib logger namespaces, to prevent # them getting to PythonStdlibToTwistedLogger and having to be formatted - logging.getLogger(namespace).setLevel(log_config.get("level", "NOTSET")) + logging.getLogger(namespace).setLevel(config.get("level", "NOTSET")) f = FilteringLogObserver(publisher, [log_filter]) lco = LogContextObserver(f) From aafc342aba8576db8664b49538bd83f7a5e8c81f Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 1 Aug 2019 00:10:38 +1000 Subject: [PATCH 21/50] fix --- synapse/logging/_structured.py | 136 ++++++++++++++++++--------------- synapse/logging/context.py | 2 +- 2 files changed, 77 insertions(+), 61 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index bca35563c0f9..ff732a73af3e 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -145,9 +145,11 @@ def formatEvent(_event): return FileLogObserver(outFile, formatEvent) -class LoggingOutputType(Names): +class DrainType(Names): CONSOLE = NamedConstant() - JSON = NamedConstant() + CONSOLE_JSON = NamedConstant() + FILE = NamedConstant() + FILE_JSON = NamedConstant() FLUENTD = NamedConstant() @@ -157,76 +159,68 @@ class OutputPipeType(Values): @attr.s -class HandlerConfiguration(object): +class DrainConfiguration(object): name = attr.ib() type = attr.ib() location = attr.ib() -def parse_handler_configs(config): +def parse_drain_configs(drains): """ - Parse the logging format version 2 handler section. + Parse the drain configurations. Args: - config (dict): A logging format v2 dictionary. + drains (list): A list of drain configurations. Yields: - HandlerConfiguration instances. + DrainConfiguration instances. Raises: - ConfigError: If any of the handler configuration items are invalid. + ConfigError: If any of the drain configuration items are invalid. """ - for name, config in config.get("handlers").items(): - logging_type = None - - if "type" in config: - try: - logging_type = LoggingOutputType.lookupByName(config["type"].upper()) - except ValueError: + for name, config in drains.items(): + if "type" not in config: + raise ConfigError("Logging drains require a 'type' key.") + + try: + logging_type = DrainType.lookupByName(config["type"].upper()) + except ValueError: + raise ConfigError( + "%s is not a known logging drain type." % (config["type"],) + ) + + if logging_type in [DrainType.CONSOLE, DrainType.CONSOLE_JSON]: + location = config.get("location") + if location is None or location not in ["stdout", "stderr"]: raise ConfigError( - "%s is not a known logging handler type." % (config["type"],) + ( + "The %s drain needs the 'location' key set to " + "either 'stdout' or 'stderr'." + ) + % (logging_type,) ) - if logging_type in [LoggingOutputType.CONSOLE, LoggingOutputType.JSON]: - location = config.get("location") - if location is None or location not in ["stdout", "stderr"]: - raise ConfigError( - ( - "The %s handler needs the 'location' key set to " - "either 'stdout' or 'stderr'." - ) - ) + pipe = OutputPipeType.lookupByName(location).value - yield HandlerConfiguration( - name=name, - type=logging_type, - location=OutputPipeType.lookupByName(location).value, - ) - else: - raise ConfigError( - "The %s logging handler type is currently not implemented." - % (config["type"].upper(),) - ) - elif "class" in config: - # Handle the old "class": style. - if config["class"] == "logging.StreamHandler": - yield ( - HandlerConfiguration( - name=name, - type=LoggingOutputType.CONSOLE, - location=sys.__stdout__, - ) - ) - else: + yield DrainConfiguration(name=name, type=logging_type, location=pipe) + + elif logging_type in [DrainType.FILE, DrainType.FILE_JSON]: + if "location" not in config: raise ConfigError( - "The logging class %s is not supported in logging format 2." - % (config["class"],) + "The %s drain needs the 'location' key set." % (logging_type,) ) + + location = config.get("location") + yield DrainConfiguration(name=name, type=logging_type, location=location) + else: - raise ConfigError("Handlers need to have either a 'type' or 'class' key.") + raise ConfigError( + "The %s drain type is currently not implemented." + % (config["type"].upper(),) + ) -def setup_structured_logging(config, log_config): +def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): """ Set up Twisted's structured logging system. @@ -236,33 +230,55 @@ def setup_structured_logging(config, log_config): """ if config.no_redirect_stdio: raise ConfigError( - "no_redirect_stdio cannot be defined using log_config version 2." + "no_redirect_stdio cannot be defined using structured logging." ) logger = Logger() + if not "drains" in log_config: + raise ConfigError("The logging configuration requires a list of drains.") + observers = [] - for observer in parse_handler_configs(log_config): - if observer.type == LoggingOutputType.CONSOLE: - logger.debug("Starting up the {name} console logger", name=observer.name) + for observer in parse_drain_configs(log_config["drains"]): + # Pipe drains + if observer.type == DrainType.CONSOLE: + logger.debug( + "Starting up the {name} console logger drain", name=observer.name + ) observers.append(SynapseFileLogObserver(observer.location)) - if observer.type == LoggingOutputType.JSON: - logger.debug("Starting up the {name} JSON logger", name=observer.name) + if observer.type == DrainType.CONSOLE_JSON: + logger.debug( + "Starting up the {name} JSON console logger drain", name=observer.name + ) observers.append(jsonFileLogObserver(observer.location)) + # File drains + if observer.type == DrainType.FILE: + logger.debug("Starting up the {name} file logger drain", name=observer.name) + log_file = open(observer.location, "at", buffering=1, encoding="utf8") + observers.append(SynapseFileLogObserver(log_file)) + if observer.type == DrainType.FILE_JSON: + logger.debug( + "Starting up the {name} JSON file logger drain", name=observer.name + ) + log_file = open(observer.location, "at", buffering=1, encoding="utf8") + observers.append(jsonFileLogObserver(log_file)) + publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() - for namespace, config in log_config.get("loggers", {}).items(): + for namespace, namespace_config in log_config.get("loggers", {}).items(): # Set the log level for twisted.logger.Logger namespaces log_filter.setLogLevelForNamespace( - namespace, stdlib_log_level_to_twisted(config.get("level", "INFO")) + namespace, + stdlib_log_level_to_twisted(namespace_config.get("level", "INFO")), ) # Also set the log levels for the stdlib logger namespaces, to prevent # them getting to PythonStdlibToTwistedLogger and having to be formatted - logging.getLogger(namespace).setLevel(config.get("level", "NOTSET")) + if "level" in namespace_config: + logging.getLogger(namespace).setLevel(namespace_config.get("level")) f = FilteringLogObserver(publisher, [log_filter]) lco = LogContextObserver(f) @@ -274,7 +290,7 @@ def setup_structured_logging(config, log_config): # Redirecting stdio is important here, especially if there's a JSON # outputter! - globalLogBeginner.beginLoggingTo([lco], redirectStandardIO=True) + logBeginner.beginLoggingTo([lco], redirectStandardIO=True) def reload_structured_logging(*args, log_config=None): diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 2cf6af11cebd..df16cf722119 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -202,7 +202,7 @@ def __str__(self): def copy_to(self, record): pass - def copy_to_twisted_log_entry(record): + def copy_to_twisted_log_entry(self, record): record["request"] = None record["scope"] = None From 755d7e8f15e0aa9e215313ee946d73c767235af8 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 1 Aug 2019 03:43:13 +1000 Subject: [PATCH 22/50] more test --- synapse/logging/_structured.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index ff732a73af3e..6edc4f7bed7e 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -78,9 +78,12 @@ def __call__(self, event): context = LoggingContext.current_context() + print(context) + # Copy the context information to the log event. if context is not None: context.copy_to_twisted_log_entry(event) + print(event) else: # If there's no logging context, not even the root one, we might be # starting up or it might be from non-Synapse code. Log it as if it @@ -292,6 +295,8 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): # outputter! logBeginner.beginLoggingTo([lco], redirectStandardIO=True) + return publisher + def reload_structured_logging(*args, log_config=None): # TODO: Reload the structured logging system. Since we don't implement any From 680fdee71aaac720d6e9cd799794d73867327179 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 7 Aug 2019 22:16:32 +1000 Subject: [PATCH 23/50] fix --- synapse/logging/_structured.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 6edc4f7bed7e..cadeb75d3cfb 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -238,7 +238,7 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): logger = Logger() - if not "drains" in log_config: + if "drains" not in log_config: raise ConfigError("The logging configuration requires a list of drains.") observers = [] From db1b1b373c353b93b6742d0547f562b82b27503e Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 7 Aug 2019 22:16:43 +1000 Subject: [PATCH 24/50] tests --- tests/logging/__init__.py | 0 tests/logging/test_structured.py | 132 +++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 tests/logging/__init__.py create mode 100644 tests/logging/test_structured.py diff --git a/tests/logging/__init__.py b/tests/logging/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py new file mode 100644 index 000000000000..faaa7a04c911 --- /dev/null +++ b/tests/logging/test_structured.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path +import shutil + +from twisted.logger import Logger, eventAsText, eventsFromJSONLogFile + +from synapse.logging._structured import setup_structured_logging +from synapse.logging.context import LoggingContext + +from tests.unittest import HomeserverTestCase + + +class FakeBeginner(object): + def beginLoggingTo(self, observers, **kwargs): + self.observers = observers + + +class StructuredLoggingTestCase(HomeserverTestCase): + """ + Tests for Synapse's structured logging support. + """ + + def test_output_to_json_round_trip(self): + """ + Synapse logs can be outputted to JSON and then read back again. + """ + temp_dir = self.mktemp() + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + json_log_file = os.path.join(temp_dir, "out.json") + + log_config = { + "drains": {"jsonfile": {"type": "file_json", "location": json_log_file}} + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging(self.hs.config, log_config, logBeginner=beginner) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Read the log file and check it has the event we sent + with open(json_log_file, "r") as f: + logged_events = list(eventsFromJSONLogFile(f)) + self.assertEqual(len(logged_events), 1) + + # The event pulled from the file should render fine + self.assertEqual( + eventAsText(logged_events[0], includeTimestamp=False), + "[tests.logging.test_structured#info] Hello there, wally!", + ) + + def test_output_to_text(self): + """ + Synapse logs can be outputted to text. + """ + temp_dir = self.mktemp() + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + log_file = os.path.join(temp_dir, "out.log") + + log_config = {"drains": {"file": {"type": "file", "location": log_file}}} + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging(self.hs.config, log_config, logBeginner=beginner) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Read the log file and check it has the event we sent + with open(log_file, "r") as f: + logged_events = f.read().strip().split("\n") + self.assertEqual(len(logged_events), 1) + + # The event pulled from the file should render fine + self.assertTrue( + logged_events[0].endswith( + " - tests.logging.test_structured - INFO - None - Hello there, wally!" + ) + ) + + def test_collects_logcontext(self): + """ + Test that log outputs have the attached logging context. + """ + log_config = {"drains": {}} + + # Begin the logger with our config + beginner = FakeBeginner() + publisher = setup_structured_logging( + self.hs.config, log_config, logBeginner=beginner + ) + + logs = [] + + publisher.addObserver(logs.append) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + + with LoggingContext("testcontext", request="somereq"): + logger.info("Hello there, {name}!", name="steve") + + self.assertEqual(len(logs), 1) + self.assertEqual(logs[0]["request"], "somereq") From 92289a79c2567409f769bd2ce27a9e58b97b9433 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 7 Aug 2019 22:17:47 +1000 Subject: [PATCH 25/50] fix --- synapse/logging/_structured.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index cadeb75d3cfb..f8005a0f5ee1 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -78,12 +78,9 @@ def __call__(self, event): context = LoggingContext.current_context() - print(context) - # Copy the context information to the log event. if context is not None: context.copy_to_twisted_log_entry(event) - print(event) else: # If there's no logging context, not even the root one, we might be # starting up or it might be from non-Synapse code. Log it as if it From e7c62b565c337f4c81c6d9c4db24b8021e161f8d Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 7 Aug 2019 22:38:46 +1000 Subject: [PATCH 26/50] pass in a proper lang --- .buildkite/pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index b75269a15594..ec4a8157e402 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -55,6 +55,7 @@ steps: label: ":python: 3.5 / SQLite / Old Deps" env: TRIAL_FLAGS: "-j 2" + LANG: "C.UTF-8" plugins: - docker#v3.0.1: image: "ubuntu:xenial" # We use xenail to get an old sqlite and python From 55391d2a13ff4084d879dd6e313e57589b044dec Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 7 Aug 2019 22:52:48 +1000 Subject: [PATCH 27/50] try this to see if it'll go faster --- .buildkite/docker-compose.py35.pg95.yaml | 1 + .buildkite/docker-compose.py37.pg11.yaml | 1 + .buildkite/docker-compose.py37.pg95.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.buildkite/docker-compose.py35.pg95.yaml b/.buildkite/docker-compose.py35.pg95.yaml index 2f14387fbc37..aaea33006b2d 100644 --- a/.buildkite/docker-compose.py35.pg95.yaml +++ b/.buildkite/docker-compose.py35.pg95.yaml @@ -6,6 +6,7 @@ services: image: postgres:9.5 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.5 diff --git a/.buildkite/docker-compose.py37.pg11.yaml b/.buildkite/docker-compose.py37.pg11.yaml index f3eec05ceb52..1b32675e7847 100644 --- a/.buildkite/docker-compose.py37.pg11.yaml +++ b/.buildkite/docker-compose.py37.pg11.yaml @@ -6,6 +6,7 @@ services: image: postgres:11 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.7 diff --git a/.buildkite/docker-compose.py37.pg95.yaml b/.buildkite/docker-compose.py37.pg95.yaml index 2a41db8eba90..7679f6508d12 100644 --- a/.buildkite/docker-compose.py37.pg95.yaml +++ b/.buildkite/docker-compose.py37.pg95.yaml @@ -6,6 +6,7 @@ services: image: postgres:9.5 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.7 From b2fa4ade05b9f2d99f174867b405958e31cdb0ba Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 00:19:11 +1000 Subject: [PATCH 28/50] docs: --- docs/structured_logging.md | 69 ++++++++++++++++++++++++++++++++++ synapse/config/logger.py | 2 +- synapse/logging/_structured.py | 5 +-- 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 docs/structured_logging.md diff --git a/docs/structured_logging.md b/docs/structured_logging.md new file mode 100644 index 000000000000..df6039722552 --- /dev/null +++ b/docs/structured_logging.md @@ -0,0 +1,69 @@ +Structured Logging +================== + +A structured logging system can be useful when your logs are destined for a machine to parse and process. By maintaining its machine-readable characteristics, it enables more efficient searching and aggregations when consumed by software such as the "ELK stack". + +Synapse's structured logging system is configured via the file that Synapse's `log_config` config option points to. The file must be YAML and contain `structured: true`. It must contain a list of "drains" (places where logs go to). + +A structured logging configuration looks similar to the following: + +```yaml +structured: true + +loggers: + synapse: + level: INFO + synapse.storage.SQL: + level: WARNING + +drains: + console: + type: console + location: stdout + file: + type: file_json + location: homeserver.log +``` + +The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will have two logging drains (to the console and to a file, stored as JSON). + +Drain Types +----------- + +Drain types can be specified by the `type` key. + +`console` +~~~~~~~~~ + +Outputs human-readable logs to the console. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +`console_json` +~~~~~~~~~~~~~~ + +Outputs machine-readable JSON logs to the console. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +`file` +~~~~~~ + +Outputs human-readable logs to a file. + +Arguments: + +- `location`: A relative path to a file. + +`file_json` +~~~~~~ + +Outputs machine-readable logs to a file. + +Arguments: + +- `location`: A relative path to a file. \ No newline at end of file diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 21707692f98d..e8afac2ba7b0 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -208,7 +208,7 @@ def read_config(*args, callback=None): log_config_body = read_config() - if log_config_body and log_config_body.get("version") == 2: + if log_config_body and log_config_body.get("structured") == True: setup_structured_logging(config, log_config_body) appbase.register_sighup(read_config, callback=reload_structured_logging) else: diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index f8005a0f5ee1..60e5bf24e583 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -285,11 +285,10 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): stuff_into_twisted = PythonStdlibToTwistedLogger(lco) stdliblogger = logging.getLogger("") - stdliblogger.setLevel(logging.DEBUG) stdliblogger.addHandler(stuff_into_twisted) - # Redirecting stdio is important here, especially if there's a JSON - # outputter! + # Always redirect standard I/O, otherwise other logging outputs might miss + # it. logBeginner.beginLoggingTo([lco], redirectStandardIO=True) return publisher From 9cf3c286d580ec4db4f60b667f88c3d0491cb0eb Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 00:22:56 +1000 Subject: [PATCH 29/50] docs --- synapse/config/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index e8afac2ba7b0..a7b734f02108 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -208,7 +208,7 @@ def read_config(*args, callback=None): log_config_body = read_config() - if log_config_body and log_config_body.get("structured") == True: + if log_config_body and log_config_body.get("structured") is True: setup_structured_logging(config, log_config_body) appbase.register_sighup(read_config, callback=reload_structured_logging) else: From 673d380e7cff4d396d2b00222af94777f1090fd5 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 00:24:30 +1000 Subject: [PATCH 30/50] docs --- docs/structured_logging.md | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/structured_logging.md b/docs/structured_logging.md index df6039722552..e326333d84c0 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -1,5 +1,4 @@ -Structured Logging -================== +# Structured Logging A structured logging system can be useful when your logs are destined for a machine to parse and process. By maintaining its machine-readable characteristics, it enables more efficient searching and aggregations when consumed by software such as the "ELK stack". @@ -27,13 +26,11 @@ drains: The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will have two logging drains (to the console and to a file, stored as JSON). -Drain Types ------------ +## Drain Types Drain types can be specified by the `type` key. -`console` -~~~~~~~~~ +### `console` Outputs human-readable logs to the console. @@ -41,8 +38,7 @@ Arguments: - `location`: Either `stdout` or `stderr`. -`console_json` -~~~~~~~~~~~~~~ +### `console_json` Outputs machine-readable JSON logs to the console. @@ -50,8 +46,7 @@ Arguments: - `location`: Either `stdout` or `stderr`. -`file` -~~~~~~ +### `file` Outputs human-readable logs to a file. @@ -59,8 +54,7 @@ Arguments: - `location`: A relative path to a file. -`file_json` -~~~~~~ +### `file_json` Outputs machine-readable logs to a file. From aae792dda50aa246e80ce8665d736f5737d21f51 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 01:37:41 +1000 Subject: [PATCH 31/50] more test --- docs/structured_logging.md | 4 +- synapse/logging/_structured.py | 6 +++ tests/logging/test_structured.py | 63 +++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/docs/structured_logging.md b/docs/structured_logging.md index e326333d84c0..8ff3bec9d7d3 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -52,7 +52,7 @@ Outputs human-readable logs to a file. Arguments: -- `location`: A relative path to a file. +- `location`: An absolute path to the file to log to. ### `file_json` @@ -60,4 +60,4 @@ Outputs machine-readable logs to a file. Arguments: -- `location`: A relative path to a file. \ No newline at end of file +- `location`: An absolute path to the file to log to. \ No newline at end of file diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 60e5bf24e583..18980b2819aa 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -14,6 +14,7 @@ # limitations under the License. import logging +import os.path import sys import attr @@ -211,6 +212,11 @@ def parse_drain_configs(drains): ) location = config.get("location") + if os.path.abspath(location) != location: + raise ConfigError( + "File paths need to be absolute, '%s' is a relative path" + % (location,) + ) yield DrainConfiguration(name=name, type=logging_type, location=location) else: diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index faaa7a04c911..9c771ce1c5d8 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -16,13 +16,16 @@ import os import os.path import shutil +import sys +import textwrap from twisted.logger import Logger, eventAsText, eventsFromJSONLogFile +from synapse.config.logger import setup_logging from synapse.logging._structured import setup_structured_logging from synapse.logging.context import LoggingContext -from tests.unittest import HomeserverTestCase +from tests.unittest import DEBUG, HomeserverTestCase class FakeBeginner(object): @@ -130,3 +133,61 @@ def test_collects_logcontext(self): self.assertEqual(len(logs), 1) self.assertEqual(logs[0]["request"], "somereq") + + +class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + + tempdir = self.mktemp() + os.mkdir(tempdir) + log_config_file = os.path.abspath(os.path.join(tempdir, "log.config.yaml")) + self.homeserver_log = os.path.abspath(os.path.join(tempdir, "homeserver.log")) + + config = self.default_config() + config["log_config"] = log_config_file + + with open(log_config_file, "w") as f: + f.write( + textwrap.dedent( + """\ + structured: true + + drains: + file: + type: file_json + location: %s + """ + % (self.homeserver_log,) + ) + ) + + self.addCleanup(self._sys_cleanup) + + return self.setup_test_homeserver(config=config) + + def _sys_cleanup(self): + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + # Do not remove! We need the logging system to be set other than WARNING. + @DEBUG + def test_log_output(self): + """ + When a structured logging config is given, Synapse will use it. + """ + setup_logging(self.hs.config) + + # Make a logger and send an event + logger = Logger(namespace="tests.logging.test_structured") + + with LoggingContext("testcontext", request="somereq"): + logger.info("Hello there, {name}!", name="steve") + + with open(self.homeserver_log, "r") as f: + logged_events = [ + eventAsText(x, includeTimestamp=False) for x in eventsFromJSONLogFile(f) + ] + + logs = "\n".join(logged_events) + self.assertTrue("***** STARTING SERVER *****" in logs) + self.assertTrue("Hello there, steve!" in logs) From b50d3a9767406b8460faba5184a630dbace6a565 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 01:44:41 +1000 Subject: [PATCH 32/50] more test --- tests/logging/test_structured.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index 9c771ce1c5d8..a52f3cbceb2b 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -46,7 +46,7 @@ def test_output_to_json_round_trip(self): os.mkdir(temp_dir) self.addCleanup(shutil.rmtree, temp_dir) - json_log_file = os.path.join(temp_dir, "out.json") + json_log_file = os.path.abspath(os.path.join(temp_dir, "out.json")) log_config = { "drains": {"jsonfile": {"type": "file_json", "location": json_log_file}} @@ -81,7 +81,7 @@ def test_output_to_text(self): os.mkdir(temp_dir) self.addCleanup(shutil.rmtree, temp_dir) - log_file = os.path.join(temp_dir, "out.log") + log_file = os.path.abspath(os.path.join(temp_dir, "out.log")) log_config = {"drains": {"file": {"type": "file", "location": log_file}}} From 03ac5b49c575894aaad25685e1952c8a8c5b5467 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 21:31:36 +1000 Subject: [PATCH 33/50] fluentd log to export on console --- synapse/logging/_fluentd.py | 92 ++++++++++++++++++++++++++++++++++ synapse/logging/_structured.py | 32 +++++++++--- 2 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 synapse/logging/_fluentd.py diff --git a/synapse/logging/_fluentd.py b/synapse/logging/_fluentd.py new file mode 100644 index 000000000000..4e8669d4ed98 --- /dev/null +++ b/synapse/logging/_fluentd.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +FluentD log formatter. +""" + +import sys + +from simplejson import dumps + +from twisted.logger import FileLogObserver + + +def flatten_event(_event, metadata): + """ + Flatten a Twisted logging event to something that makes more sense to go + into a structured logging aggregation system. + + The format is vastly simplified and + """ + event = {} + + keys_to_delete = [ + "isError", + "log_failure", + "log_format", + "log_level", + "log_logger", + "log_source", + "log_system", + "log_time", + "log_text", + "observer", + "warning", + ] + + if _event.get("log_namespace") == "log_legacy": + keys_to_delete.extend(["message", "system", "time"]) + + if "log_failure" in _event: + event["log_failure"] = _event["log_failure"].getTraceback() + + if "warning" in _event: + event["warning"] = str(_event["warning"]) + + if "log_text" in _event: + event["log"] = _event["log_text"] + else: + event["log"] = _event["log_format"] + + event["level"] = _event["log_level"].name + + for key in _event.keys(): + + if key in keys_to_delete: + continue + + if isinstance(_event[key], (str, int, bool, float)) or _event[key] is None: + event[key] = _event[key] + else: + event[key] = str(_event[key]) + + return {**event, **metadata} + + +def FluentDToConsoleLogObserver(outFile, metadata={}): + """ + A log observer that formats events to a flattened JSON representation. + + Args: + outFile (file object): The file object to write to. + metadata (dict): Metadata to be added to the log file. + """ + + def formatEvent(_event): + flattened = flatten_event(_event, metadata) + return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" + + return FileLogObserver(outFile, formatEvent) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 18980b2819aa..00928476be72 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -35,6 +35,7 @@ ) from synapse.config._base import ConfigError +from synapse.logging._fluentd import FluentDToConsoleLogObserver from synapse.logging.context import LoggingContext @@ -74,7 +75,9 @@ def __call__(self, event): if event["log_text"].startswith("(UDP Port "): return - if event["log_text"].startswith("Timing out client"): + if event["log_text"].startswith("Timing out client") or event[ + "log_format" + ].startswith("Timing out client"): return context = LoggingContext.current_context() @@ -135,11 +138,10 @@ def SynapseFileLogObserver(outFile): """ def formatEvent(_event): - event = _event.copy() + event = dict(_event) event["log_level"] = event["log_level"].name.upper() - event["log_format"] = ( - "- {log_namespace} - {log_level} - {request} - " - + event.get("log_format", "{log_text}") + event["log_format"] = "- {log_namespace} - {log_level} - {request} - " + ( + event.get("log_format", "{log_text}") or "{log_text}" ) return eventAsText(event, includeSystem=False) + "\n" @@ -166,6 +168,9 @@ class DrainConfiguration(object): location = attr.ib() +DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} + + def parse_drain_configs(drains): """ Parse the drain configurations. @@ -190,7 +195,11 @@ def parse_drain_configs(drains): "%s is not a known logging drain type." % (config["type"],) ) - if logging_type in [DrainType.CONSOLE, DrainType.CONSOLE_JSON]: + if logging_type in [ + DrainType.CONSOLE, + DrainType.CONSOLE_JSON, + DrainType.FLUENTD, + ]: location = config.get("location") if location is None or location not in ["stdout", "stderr"]: raise ConfigError( @@ -271,10 +280,19 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): log_file = open(observer.location, "at", buffering=1, encoding="utf8") observers.append(jsonFileLogObserver(log_file)) + if observer.type == DrainType.FLUENTD: + logger.debug( + "Starting up the {name} FluentD console logger drain", + name=observer.name, + ) + observers.append(FluentDToConsoleLogObserver(observer.location)) + publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() - for namespace, namespace_config in log_config.get("loggers", {}).items(): + for namespace, namespace_config in log_config.get( + "loggers", DEFAULT_LOGGERS + ).items(): # Set the log level for twisted.logger.Logger namespaces log_filter.setLogLevelForNamespace( namespace, From d10b9b8e429b57d2762a61d29e69061fc866984f Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 21:51:57 +1000 Subject: [PATCH 34/50] fix --- synapse/logging/_structured.py | 19 +++++++++---------- .../logging/{_fluentd.py => _terse_json.py} | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) rename synapse/logging/{_fluentd.py => _terse_json.py} (96%) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 00928476be72..50d1f6bfd2fa 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -35,7 +35,7 @@ ) from synapse.config._base import ConfigError -from synapse.logging._fluentd import FluentDToConsoleLogObserver +from synapse.logging._terse_json import TerseJSONToConsoleLogObserver from synapse.logging.context import LoggingContext @@ -151,9 +151,9 @@ def formatEvent(_event): class DrainType(Names): CONSOLE = NamedConstant() CONSOLE_JSON = NamedConstant() + CONSOLE_JSON_TERSE = NamedConstant() FILE = NamedConstant() FILE_JSON = NamedConstant() - FLUENTD = NamedConstant() class OutputPipeType(Values): @@ -198,7 +198,7 @@ def parse_drain_configs(drains): if logging_type in [ DrainType.CONSOLE, DrainType.CONSOLE_JSON, - DrainType.FLUENTD, + DrainType.CONSOLE_JSON_TERSE, ]: location = config.get("location") if location is None or location not in ["stdout", "stderr"]: @@ -267,6 +267,12 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): "Starting up the {name} JSON console logger drain", name=observer.name ) observers.append(jsonFileLogObserver(observer.location)) + if observer.type == DrainType.CONSOLE_JSON_TERSE: + logger.debug( + "Starting up the {name} terse JSON console logger drain", + name=observer.name, + ) + observers.append(TerseJSONToConsoleLogObserver(observer.location)) # File drains if observer.type == DrainType.FILE: @@ -280,13 +286,6 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): log_file = open(observer.location, "at", buffering=1, encoding="utf8") observers.append(jsonFileLogObserver(log_file)) - if observer.type == DrainType.FLUENTD: - logger.debug( - "Starting up the {name} FluentD console logger drain", - name=observer.name, - ) - observers.append(FluentDToConsoleLogObserver(observer.location)) - publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() diff --git a/synapse/logging/_fluentd.py b/synapse/logging/_terse_json.py similarity index 96% rename from synapse/logging/_fluentd.py rename to synapse/logging/_terse_json.py index 4e8669d4ed98..fad05027e284 100644 --- a/synapse/logging/_fluentd.py +++ b/synapse/logging/_terse_json.py @@ -14,7 +14,7 @@ # limitations under the License. """ -FluentD log formatter. +Log formatters that output terse JSON. """ import sys @@ -76,7 +76,7 @@ def flatten_event(_event, metadata): return {**event, **metadata} -def FluentDToConsoleLogObserver(outFile, metadata={}): +def TerseJSONToConsoleLogObserver(outFile, metadata={}): """ A log observer that formats events to a flattened JSON representation. From 51b3e81b049d56e5f1356bf99d536e98f9fc45fb Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 23:36:10 +1000 Subject: [PATCH 35/50] fix --- synapse/app/homeserver.py | 4 +-- synapse/config/logger.py | 4 +-- synapse/logging/_structured.py | 23 +++++++++++-- synapse/logging/_terse_json.py | 63 +++++++++++++++++++++++++++++++++- 4 files changed, 87 insertions(+), 7 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7d6b51b5bc09..3a3c94eaa0e2 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -341,8 +341,6 @@ def setup(config_options): # generating config files and shouldn't try to continue. sys.exit(0) - synapse.config.logger.setup_logging(config, use_worker_options=False) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -356,6 +354,8 @@ def setup(config_options): database_engine=database_engine, ) + synapse.config.logger.setup_logging(hs, config, use_worker_options=False) + logger.info("Preparing database: %s...", config.database_config["name"]) try: diff --git a/synapse/config/logger.py b/synapse/config/logger.py index a7b734f02108..981df5a10c27 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -180,7 +180,7 @@ def _reload_stdlib_logging(*args, log_config=None): logging.config.dictConfig(log_config) -def setup_logging(config, use_worker_options=False): +def setup_logging(hs, config, use_worker_options=False): """ Set up the logging subsystem. @@ -209,7 +209,7 @@ def read_config(*args, callback=None): log_config_body = read_config() if log_config_body and log_config_body.get("structured") is True: - setup_structured_logging(config, log_config_body) + setup_structured_logging(hs, config, log_config_body) appbase.register_sighup(read_config, callback=reload_structured_logging) else: _setup_stdlib_logging(config, log_config_body) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 50d1f6bfd2fa..7e864dced1c5 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -35,7 +35,10 @@ ) from synapse.config._base import ConfigError -from synapse.logging._terse_json import TerseJSONToConsoleLogObserver +from synapse.logging._terse_json import ( + TerseJSONToConsoleLogObserver, + TerseJSONToTCPLogObserver, +) from synapse.logging.context import LoggingContext @@ -154,6 +157,7 @@ class DrainType(Names): CONSOLE_JSON_TERSE = NamedConstant() FILE = NamedConstant() FILE_JSON = NamedConstant() + NETWORK_JSON_TERSE = NamedConstant() class OutputPipeType(Values): @@ -228,6 +232,13 @@ def parse_drain_configs(drains): ) yield DrainConfiguration(name=name, type=logging_type, location=location) + elif logging_type in [DrainType.NETWORK_JSON_TERSE]: + host = config.get("host") + port = config.get("port") + yield DrainConfiguration( + name=name, type=logging_type, location=(host, port) + ) + else: raise ConfigError( "The %s drain type is currently not implemented." @@ -235,7 +246,7 @@ def parse_drain_configs(drains): ) -def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): +def setup_structured_logging(hs, config, log_config, logBeginner=globalLogBeginner): """ Set up Twisted's structured logging system. @@ -286,6 +297,14 @@ def setup_structured_logging(config, log_config, logBeginner=globalLogBeginner): log_file = open(observer.location, "at", buffering=1, encoding="utf8") observers.append(jsonFileLogObserver(log_file)) + if observer.type == DrainType.NETWORK_JSON_TERSE: + metadata = {"server_name": hs.config.server_name} + observer = TerseJSONToTCPLogObserver( + hs, observer.location[0], observer.location[1], metadata + ) + observer.start() + observers.append(observer) + publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index fad05027e284..3bd995575f46 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -18,13 +18,19 @@ """ import sys +from collections import deque +import attr from simplejson import dumps +from twisted.application.internet import ClientService +from twisted.internet.endpoints import HostnameEndpoint +from twisted.internet.protocol import Factory, Protocol from twisted.logger import FileLogObserver +from twisted.python.failure import Failure -def flatten_event(_event, metadata): +def flatten_event(_event, metadata, include_time=False): """ Flatten a Twisted logging event to something that makes more sense to go into a structured logging aggregation system. @@ -61,6 +67,9 @@ def flatten_event(_event, metadata): else: event["log"] = _event["log_format"] + if include_time: + event["time"] = _event["log_time"] + event["level"] = _event["log_level"].name for key in _event.keys(): @@ -90,3 +99,55 @@ def formatEvent(_event): return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" return FileLogObserver(outFile, formatEvent) + + +@attr.s +class TerseJSONToTCPLogObserver(object): + + hs = attr.ib() + host = attr.ib() + port = attr.ib() + metadata = attr.ib() + retry_time = attr.ib(default=5) + _buffer = attr.ib(default=attr.Factory(deque)) + _writer = attr.ib(default=None) + + def start(self): + endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) + factory = Factory.forProtocol(Protocol) + self._service = ClientService(endpoint, factory) + self._service.startService() + + def _write_loop(self): + """ + Implement the write loop. + """ + if self._writer: + return + + self._writer = self._service.whenConnected() + + @self._writer.addBoth + def _(r): + if isinstance(r, Failure): + r.printTraceback(file=sys.__stderr__) + self._writer = None + self.hs.get_reactor().callLater(self.retry_time, self._write_loop) + return + + try: + r.transport.write(b"\n".join(reversed(self._buffer)) + b"\n") + self._buffer.clear() + except Exception as e: + sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) + + self._writer = False + self.hs.get_reactor().callLater(self.retry_time, self._write_loop) + + def __call__(self, _event): + flattened = flatten_event(_event, self.metadata, include_time=True) + self._buffer.append( + dumps(flattened, ensure_ascii=False, separators=(",", ":")).encode("utf8") + ) + # Try and write immediately + self._write_loop() From 232064eea1d4c61ebd4a438cac72feb15de6f005 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 23:39:50 +1000 Subject: [PATCH 36/50] cleanups --- synapse/app/admin_cmd.py | 4 ++-- synapse/app/appservice.py | 4 ++-- synapse/app/client_reader.py | 4 ++-- synapse/app/event_creator.py | 4 ++-- synapse/app/federation_reader.py | 4 ++-- synapse/app/federation_sender.py | 4 ++-- synapse/app/frontend_proxy.py | 4 ++-- synapse/app/media_repository.py | 4 ++-- synapse/app/pusher.py | 4 ++-- synapse/app/synchrotron.py | 4 ++-- synapse/app/user_dir.py | 4 ++-- tests/logging/test_structured.py | 8 ++++---- 12 files changed, 26 insertions(+), 26 deletions(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 1fd52a552693..04751a6a5e29 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -227,8 +227,6 @@ def start(config_options): config.start_pushers = False config.send_federation = False - setup_logging(config, use_worker_options=True) - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -241,6 +239,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() # We use task.react as the basic run command as it correctly handles tearing diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 54bb114dec74..767b87d2db14 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -141,8 +141,6 @@ def start(config_options): assert config.worker_app == "synapse.app.appservice" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -167,6 +165,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ps, config, use_worker_options=True) + ps.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ps, config.worker_listeners diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 721bb5b119f3..86193d35a8b0 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -179,8 +179,6 @@ def start(config_options): assert config.worker_app == "synapse.app.client_reader" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -193,6 +191,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index 473c8895d0fe..c67fe69a5009 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -175,8 +175,6 @@ def start(config_options): assert config.worker_replication_http_port is not None - setup_logging(config, use_worker_options=True) - # This should only be done on the user directory worker or the master config.update_user_directory = False @@ -192,6 +190,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 5255d9e8ccef..1ef027a88cb4 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -160,8 +160,6 @@ def start(config_options): assert config.worker_app == "synapse.app.federation_reader" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -174,6 +172,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index c5a2880e6995..04fbb407af56 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -171,8 +171,6 @@ def start(config_options): assert config.worker_app == "synapse.app.federation_sender" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -197,6 +195,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index e2822ca848b3..611d2854211d 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -232,8 +232,6 @@ def start(config_options): assert config.worker_main_http_uri is not None - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -246,6 +244,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index ea26f29acb88..4e8468522f15 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -146,8 +146,6 @@ def start(config_options): "Please add ``enable_media_repo: false`` to the main config\n" ) - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -160,6 +158,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 692ffa2f0482..d84732ee3ca1 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -184,8 +184,6 @@ def start(config_options): assert config.worker_app == "synapse.app.pusher" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts if config.start_pushers: @@ -210,6 +208,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ps, config, use_worker_options=True) + ps.setup() def start(): diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index a1c3b162f7ed..473026fce513 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -435,8 +435,6 @@ def start(config_options): assert config.worker_app == "synapse.app.synchrotron" - setup_logging(config, use_worker_options=True) - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -450,6 +448,8 @@ def start(config_options): application_service_handler=SynchrotronApplicationService(), ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index cb29a1afabec..e01afb39f2b8 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -197,8 +197,6 @@ def start(config_options): assert config.worker_app == "synapse.app.user_dir" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -223,6 +221,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index a52f3cbceb2b..0c787c0f652b 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -54,7 +54,7 @@ def test_output_to_json_round_trip(self): # Begin the logger with our config beginner = FakeBeginner() - setup_structured_logging(self.hs.config, log_config, logBeginner=beginner) + setup_structured_logging(self.hs, self.hs.config, log_config, logBeginner=beginner) # Make a logger and send an event logger = Logger( @@ -87,7 +87,7 @@ def test_output_to_text(self): # Begin the logger with our config beginner = FakeBeginner() - setup_structured_logging(self.hs.config, log_config, logBeginner=beginner) + setup_structured_logging(self.hs, self.hs.config, log_config, logBeginner=beginner) # Make a logger and send an event logger = Logger( @@ -116,7 +116,7 @@ def test_collects_logcontext(self): # Begin the logger with our config beginner = FakeBeginner() publisher = setup_structured_logging( - self.hs.config, log_config, logBeginner=beginner + self.hs, self.hs.config, log_config, logBeginner=beginner ) logs = [] @@ -175,7 +175,7 @@ def test_log_output(self): """ When a structured logging config is given, Synapse will use it. """ - setup_logging(self.hs.config) + setup_logging(self.hs, self.hs.config) # Make a logger and send an event logger = Logger(namespace="tests.logging.test_structured") From d004e93ecaaf0383e34afd5bcc95042785e98c0a Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 8 Aug 2019 23:42:51 +1000 Subject: [PATCH 37/50] style fix --- tests/logging/test_structured.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py index 0c787c0f652b..a786de0233d2 100644 --- a/tests/logging/test_structured.py +++ b/tests/logging/test_structured.py @@ -54,7 +54,9 @@ def test_output_to_json_round_trip(self): # Begin the logger with our config beginner = FakeBeginner() - setup_structured_logging(self.hs, self.hs.config, log_config, logBeginner=beginner) + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) # Make a logger and send an event logger = Logger( @@ -87,7 +89,9 @@ def test_output_to_text(self): # Begin the logger with our config beginner = FakeBeginner() - setup_structured_logging(self.hs, self.hs.config, log_config, logBeginner=beginner) + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) # Make a logger and send an event logger = Logger( From 5bbc8e763beee8af4d50c185ab5008a279753fbd Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 9 Aug 2019 21:15:05 +1000 Subject: [PATCH 38/50] docs and typing --- synapse/logging/_structured.py | 27 +++++--- synapse/logging/_terse_json.py | 114 +++++++++++++++++++++++---------- 2 files changed, 97 insertions(+), 44 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 7e864dced1c5..a3ecfd29b0e0 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -16,6 +16,7 @@ import logging import os.path import sys +import typing import attr from constantly import NamedConstant, Names, ValueConstant, Values @@ -25,6 +26,7 @@ FileLogObserver, FilteringLogObserver, ILogObserver, + LogBeginner, Logger, LogLevel, LogLevelFilterPredicate, @@ -42,7 +44,7 @@ from synapse.logging.context import LoggingContext -def stdlib_log_level_to_twisted(level): +def stdlib_log_level_to_twisted(level: str) -> LogLevel: """ Convert a stdlib log level to Twisted's log level. """ @@ -62,7 +64,7 @@ class LogContextObserver(object): observer = attr.ib() - def __call__(self, event): + def __call__(self, event: dict) -> None: """ Consume a log event and emit it to the parent observer after filtering and adding log context information. @@ -95,7 +97,7 @@ def __call__(self, event): event["request"] = None event["scope"] = None - return self.observer(event) + self.observer(event) class PythonStdlibToTwistedLogger(logging.Handler): @@ -112,7 +114,7 @@ def __init__(self, observer, *args, **kwargs): self.observer = observer super().__init__(*args, **kwargs) - def emit(self, record): + def emit(self, record: logging.LogRecord) -> None: """ Emit a record to Twisted's observer. @@ -131,7 +133,7 @@ def emit(self, record): ) -def SynapseFileLogObserver(outFile): +def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver: """ A log observer that formats events like the traditional log formatter and sends them to `outFile`. @@ -140,7 +142,7 @@ def SynapseFileLogObserver(outFile): outFile (file object): The file object to write to. """ - def formatEvent(_event): + def formatEvent(_event: dict) -> str: event = dict(_event) event["log_level"] = event["log_level"].name.upper() event["log_format"] = "- {log_namespace} - {log_level} - {request} - " + ( @@ -175,7 +177,7 @@ class DrainConfiguration(object): DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} -def parse_drain_configs(drains): +def parse_drain_configs(drains: list) -> typing.List[DrainConfiguration]: """ Parse the drain configurations. @@ -246,11 +248,14 @@ def parse_drain_configs(drains): ) -def setup_structured_logging(hs, config, log_config, logBeginner=globalLogBeginner): +def setup_structured_logging( + hs, config, log_config: dict, logBeginner: LogBeginner = globalLogBeginner +) -> LogPublisher: """ Set up Twisted's structured logging system. Args: + hs: The homeserver to use. config (HomeserverConfig): The configuration of the Synapse homeserver. log_config (dict): The log configuration to use. """ @@ -283,7 +288,9 @@ def setup_structured_logging(hs, config, log_config, logBeginner=globalLogBeginn "Starting up the {name} terse JSON console logger drain", name=observer.name, ) - observers.append(TerseJSONToConsoleLogObserver(observer.location)) + observers.append( + TerseJSONToConsoleLogObserver(observer.location, metadata={}) + ) # File drains if observer.type == DrainType.FILE: @@ -336,7 +343,7 @@ def setup_structured_logging(hs, config, log_config, logBeginner=globalLogBeginn return publisher -def reload_structured_logging(*args, log_config=None): +def reload_structured_logging(*args, log_config=None) -> None: # TODO: Reload the structured logging system. Since we don't implement any # sort of file rotation, we don't need to worry about doing that here. pass diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 3bd995575f46..6542a1c05a9b 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -19,6 +19,7 @@ import sys from collections import deque +from typing.io import TextIO import attr from simplejson import dumps @@ -30,71 +31,108 @@ from twisted.python.failure import Failure -def flatten_event(_event, metadata, include_time=False): +def flatten_event(_event: dict, metadata: dict, include_time: bool = False): """ - Flatten a Twisted logging event to something that makes more sense to go - into a structured logging aggregation system. + Flatten a Twisted logging event to an dictionary capable of being sent + as a log event to a logging aggregation system. - The format is vastly simplified and + The format is vastly simplified and is not designed to be a "human readable + string" in the sense that traditional logs are. Instead, the structure is + optimised for searchability and filtering, with human-understandable log + keys. + + Args: + _event (dict): The Twisted logging event we are flattening. + metadata (dict): Additional data to include with each log message. This + can be information like the server name. Since the target log + consumer does not know who we are other than by host IP, this + allows us to forward through static information. + include_time (bool): Should we include the `time` key? If False, the + event time is stripped from the event. """ event = {} - keys_to_delete = [ - "isError", - "log_failure", - "log_format", - "log_level", - "log_logger", - "log_source", - "log_system", - "log_time", - "log_text", - "observer", - "warning", - ] - + # If it's from the Twisted legacy logger (twisted.python.log), it adds some + # more keys we want to purge. if _event.get("log_namespace") == "log_legacy": keys_to_delete.extend(["message", "system", "time"]) + # If it's a failure, make the new event's log_failure be the traceback text. if "log_failure" in _event: event["log_failure"] = _event["log_failure"].getTraceback() + # If it's a warning, copy over a string representation of the warning. if "warning" in _event: event["warning"] = str(_event["warning"]) + # Stdlib logging events have "log_text" as their human-readable portion, + # Twisted ones have "log_format". For now, include the log_format, so that + # context only given in the log format (e.g. what is being logged) is + # available. if "log_text" in _event: event["log"] = _event["log_text"] else: event["log"] = _event["log_format"] + # We want to include the timestamp when forwarding over the network, but + # exclude it when we are writing to stdout. This is because the log ingester + # (e.g. logstash, fluentd) can add its own timestamp. if include_time: event["time"] = _event["log_time"] - event["level"] = _event["log_level"].name + # Convert the log level to a textual representation. + event["level"] = _event["log_level"].name.upper() + # Ignore these keys, and do not transfer them over to the new log object. + # They are either useless (isError), transferred manually above (log_time, + # log_level, etc), or contain Python objects which are not useful for output + # (log_logger, log_source). + keys_to_delete = [ + "isError", + "log_failure", + "log_format", + "log_level", + "log_logger", + "log_source", + "log_system", + "log_time", + "log_text", + "observer", + "warning", + ] + + # Rather than modify the dictionary in place, construct a new one with only + # the content we want. The original event should be considered 'frozen'. for key in _event.keys(): if key in keys_to_delete: continue if isinstance(_event[key], (str, int, bool, float)) or _event[key] is None: + # If it's a plain type, include it as is. event[key] = _event[key] else: + # If it's not one of those basic types, write out a string + # representation. This should probably be a warning in development, + # so that we are sure we are only outputting useful data. event[key] = str(_event[key]) - return {**event, **metadata} + # Add the metadata information to the event (e.g. the server_name). + event.update(metadata) + + return event -def TerseJSONToConsoleLogObserver(outFile, metadata={}): +def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: """ A log observer that formats events to a flattened JSON representation. Args: - outFile (file object): The file object to write to. - metadata (dict): Metadata to be added to the log file. + outFile: The file object to write to. + metadata: Metadata to be added to each log object. """ - def formatEvent(_event): + def formatEvent(_event: dict) -> str: flattened = flatten_event(_event, metadata) return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" @@ -103,22 +141,30 @@ def formatEvent(_event): @attr.s class TerseJSONToTCPLogObserver(object): + """ + An IObserver that writes JSON logs to a TCP target. + + Args: + hs (HomeServer): The Homeserver that is being logged for. + host: The host of the logging target. + port: The logging target's port. + metadata: Metadata to be added to each log entry. + """ hs = attr.ib() - host = attr.ib() - port = attr.ib() - metadata = attr.ib() - retry_time = attr.ib(default=5) - _buffer = attr.ib(default=attr.Factory(deque)) + host = attr.ib(type=str) + port = attr.ib(type=int) + metadata = attr.ib(type=dict) + _buffer = attr.ib(default=attr.Factory(deque), type=deque) _writer = attr.ib(default=None) - def start(self): + def start(self) -> None: endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) factory = Factory.forProtocol(Protocol) self._service = ClientService(endpoint, factory) self._service.startService() - def _write_loop(self): + def _write_loop(self) -> None: """ Implement the write loop. """ @@ -132,7 +178,7 @@ def _(r): if isinstance(r, Failure): r.printTraceback(file=sys.__stderr__) self._writer = None - self.hs.get_reactor().callLater(self.retry_time, self._write_loop) + self.hs.get_reactor().callLater(1, self._write_loop) return try: @@ -142,9 +188,9 @@ def _(r): sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) self._writer = False - self.hs.get_reactor().callLater(self.retry_time, self._write_loop) + self.hs.get_reactor().callLater(1, self._write_loop) - def __call__(self, _event): + def __call__(self, _event: dict) -> None: flattened = flatten_event(_event, self.metadata, include_time=True) self._buffer.append( dumps(flattened, ensure_ascii=False, separators=(",", ":")).encode("utf8") From b611e53d69510ae6f5e85d75fdcbd7fb7241cb2e Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 9 Aug 2019 21:24:55 +1000 Subject: [PATCH 39/50] run mypy and check for errors --- .gitignore | 4 ++-- synapse/handlers/federation.py | 3 +-- synapse/logging/_terse_json.py | 10 +++++----- tox.ini | 8 ++++++++ 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 8374a7d0afb2..eb38ffcb338a 100644 --- a/.gitignore +++ b/.gitignore @@ -29,8 +29,9 @@ _trial_temp*/ /.vscode/ # build products -/.coverage* !/.coveragerc +/.coverage* +/.mypy_cache/ /.tox /build/ /coverage.* @@ -38,4 +39,3 @@ _trial_temp*/ /docs/build/ /htmlcov /pip-wheel-metadata/ - diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c86903b98bba..79cf8727aa04 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -326,8 +326,7 @@ def on_receive_pdu(self, origin, pdu, sent_to_us_directly=False): ours = yield self.store.get_state_groups_ids(room_id, seen) # state_maps is a list of mappings from (type, state_key) to event_id - # type: list[dict[tuple[str, str], str]] - state_maps = list(ours.values()) + state_maps = list(ours.values()) # type: list[dict[tuple[str, str], str]] # we don't need this any more, let's delete it. del ours diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 6542a1c05a9b..3a50394b407c 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -52,11 +52,6 @@ def flatten_event(_event: dict, metadata: dict, include_time: bool = False): """ event = {} - # If it's from the Twisted legacy logger (twisted.python.log), it adds some - # more keys we want to purge. - if _event.get("log_namespace") == "log_legacy": - keys_to_delete.extend(["message", "system", "time"]) - # If it's a failure, make the new event's log_failure be the traceback text. if "log_failure" in _event: event["log_failure"] = _event["log_failure"].getTraceback() @@ -101,6 +96,11 @@ def flatten_event(_event: dict, metadata: dict, include_time: bool = False): "warning", ] + # If it's from the Twisted legacy logger (twisted.python.log), it adds some + # more keys we want to purge. + if _event.get("log_namespace") == "log_legacy": + keys_to_delete.extend(["message", "system", "time"]) + # Rather than modify the dictionary in place, construct a new one with only # the content we want. The original event should be considered 'frozen'. for key in _event.keys(): diff --git a/tox.ini b/tox.ini index 09b4b8fc3ca7..8c08314bd1e9 100644 --- a/tox.ini +++ b/tox.ini @@ -146,3 +146,11 @@ commands = coverage combine coverage xml codecov -X gcov + +[testenv:mypy] +basepython = python3.5 +deps = + {[base]deps} + mypy +extras = all +commands = mypy --ignore-missing-imports synapse/ \ No newline at end of file From 77f23348701a514274be9539578dc2028b0b583d Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 9 Aug 2019 21:31:45 +1000 Subject: [PATCH 40/50] run mypy and check for errors --- synapse/handlers/federation.py | 4 +++- synapse/logging/_structured.py | 12 +++++++----- synapse/logging/context.py | 3 ++- tox.ini | 4 +++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 79cf8727aa04..94306c94a963 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -326,7 +326,9 @@ def on_receive_pdu(self, origin, pdu, sent_to_us_directly=False): ours = yield self.store.get_state_groups_ids(room_id, seen) # state_maps is a list of mappings from (type, state_key) to event_id - state_maps = list(ours.values()) # type: list[dict[tuple[str, str], str]] + state_maps = list( + ours.values() + ) # type: list[dict[tuple[str, str], str]] # we don't need this any more, let's delete it. del ours diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index a3ecfd29b0e0..93a5f5162070 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -177,12 +177,14 @@ class DrainConfiguration(object): DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} -def parse_drain_configs(drains: list) -> typing.List[DrainConfiguration]: +def parse_drain_configs( + drains: dict +) -> typing.Generator[DrainConfiguration, None, None]: """ Parse the drain configurations. Args: - drains (list): A list of drain configurations. + drains (dict): A list of drain configurations. Yields: DrainConfiguration instances. @@ -306,11 +308,11 @@ def setup_structured_logging( if observer.type == DrainType.NETWORK_JSON_TERSE: metadata = {"server_name": hs.config.server_name} - observer = TerseJSONToTCPLogObserver( + log_observer = TerseJSONToTCPLogObserver( hs, observer.location[0], observer.location[1], metadata ) - observer.start() - observers.append(observer) + log_observer.start() + observers.append(log_observer) publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() diff --git a/synapse/logging/context.py b/synapse/logging/context.py index df16cf722119..63379bfb9378 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -25,6 +25,7 @@ import logging import threading import types +from typing import Any, List from twisted.internet import defer, threads @@ -194,7 +195,7 @@ class LoggingContext(object): class Sentinel(object): """Sentinel to represent the root context""" - __slots__ = [] + __slots__ = [] # type: List[Any] def __str__(self): return "sentinel" diff --git a/tox.ini b/tox.ini index 8c08314bd1e9..f9a3b7e49ac6 100644 --- a/tox.ini +++ b/tox.ini @@ -153,4 +153,6 @@ deps = {[base]deps} mypy extras = all -commands = mypy --ignore-missing-imports synapse/ \ No newline at end of file +commands = mypy --ignore-missing-imports \ + synapse/logging/_structured.py \ + synapse/logging/_terse_json.py \ No newline at end of file From eb3c75d189e000e465261a2de51ac4e62ad53db2 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 9 Aug 2019 21:32:42 +1000 Subject: [PATCH 41/50] add it to CI --- .buildkite/pipeline.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index ec4a8157e402..d9327227ed78 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -45,8 +45,15 @@ steps: - docker#v3.0.1: image: "python:3.6" - - wait + - command: + - "python -m pip install tox" + - "tox -e mypy" + label: ":mypy: mypy" + plugins: + - docker#v3.0.1: + image: "python:3.5" + - wait - command: - "apt-get update && apt-get install -y python3.5 python3.5-dev python3-pip libxml2-dev libxslt-dev zlib1g-dev" From 71d695a3c9811e5838f232d8a006e812c8b3f8d8 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 9 Aug 2019 21:36:50 +1000 Subject: [PATCH 42/50] docs --- docs/structured_logging.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/structured_logging.md b/docs/structured_logging.md index 8ff3bec9d7d3..decec9b8fa1e 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -46,6 +46,16 @@ Arguments: - `location`: Either `stdout` or `stderr`. +### `console_json_terse` + +Outputs machine-readable JSON logs to the console, separated by newlines. This +format is not designed to be read and re-formatted into human-readable text, but +is optimal for a logging aggregation system. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + ### `file` Outputs human-readable logs to a file. @@ -60,4 +70,14 @@ Outputs machine-readable logs to a file. Arguments: -- `location`: An absolute path to the file to log to. \ No newline at end of file +- `location`: An absolute path to the file to log to. + +### `network_json_terse` + +Delivers machine-readable JSON logs to a log aggregator over TCP. This is +compatible with LogStash's TCP input with the codec set to `json_lines`. + +Arguments: + +- `host`: Hostname or IP address of the log aggregator. +- `port`: Numerical port to contact on the host. \ No newline at end of file From 04f084cb221bd61562ef0ccb872c0db8728affa6 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 27 Aug 2019 17:07:09 +1000 Subject: [PATCH 43/50] some introductory logs --- synapse/logging/_structured.py | 35 ++++++++----- synapse/logging/_terse_json.py | 85 +++++++++++++++++++++----------- tests/logging/test_terse_json.py | 84 +++++++++++++++++++++++++++++++ tests/server.py | 24 ++++++++- 4 files changed, 186 insertions(+), 42 deletions(-) create mode 100644 tests/logging/test_terse_json.py diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 93a5f5162070..79c4fbca6921 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -17,6 +17,7 @@ import os.path import sys import typing +import warnings import attr from constantly import NamedConstant, Names, ValueConstant, Values @@ -172,6 +173,7 @@ class DrainConfiguration(object): name = attr.ib() type = attr.ib() location = attr.ib() + options = attr.ib(default=attr.Factory(dict)) DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} @@ -239,8 +241,12 @@ def parse_drain_configs( elif logging_type in [DrainType.NETWORK_JSON_TERSE]: host = config.get("host") port = config.get("port") + maximum_buffer = config.get("maximum_buffer", 1000) yield DrainConfiguration( - name=name, type=logging_type, location=(host, port) + name=name, + type=logging_type, + location=(host, port), + options={"maximum_buffer": maximum_buffer}, ) else: @@ -280,12 +286,12 @@ def setup_structured_logging( "Starting up the {name} console logger drain", name=observer.name ) observers.append(SynapseFileLogObserver(observer.location)) - if observer.type == DrainType.CONSOLE_JSON: + elif observer.type == DrainType.CONSOLE_JSON: logger.debug( "Starting up the {name} JSON console logger drain", name=observer.name ) observers.append(jsonFileLogObserver(observer.location)) - if observer.type == DrainType.CONSOLE_JSON_TERSE: + elif observer.type == DrainType.CONSOLE_JSON_TERSE: logger.debug( "Starting up the {name} terse JSON console logger drain", name=observer.name, @@ -295,24 +301,31 @@ def setup_structured_logging( ) # File drains - if observer.type == DrainType.FILE: + elif observer.type == DrainType.FILE: logger.debug("Starting up the {name} file logger drain", name=observer.name) log_file = open(observer.location, "at", buffering=1, encoding="utf8") observers.append(SynapseFileLogObserver(log_file)) - if observer.type == DrainType.FILE_JSON: + elif observer.type == DrainType.FILE_JSON: logger.debug( "Starting up the {name} JSON file logger drain", name=observer.name ) log_file = open(observer.location, "at", buffering=1, encoding="utf8") observers.append(jsonFileLogObserver(log_file)) - if observer.type == DrainType.NETWORK_JSON_TERSE: + elif observer.type == DrainType.NETWORK_JSON_TERSE: metadata = {"server_name": hs.config.server_name} log_observer = TerseJSONToTCPLogObserver( - hs, observer.location[0], observer.location[1], metadata + hs, + observer.location[0], + observer.location[1], + metadata, + **observer.options ) log_observer.start() observers.append(log_observer) + else: + # We should never get here, but, just in case, throw an error. + raise ConfigError("%s drain type cannot be configured" % (observer.type,)) publisher = LogPublisher(*observers) log_filter = LogLevelFilterPredicate() @@ -335,7 +348,7 @@ def setup_structured_logging( lco = LogContextObserver(f) stuff_into_twisted = PythonStdlibToTwistedLogger(lco) - stdliblogger = logging.getLogger("") + stdliblogger = logging.getLogger() stdliblogger.addHandler(stuff_into_twisted) # Always redirect standard I/O, otherwise other logging outputs might miss @@ -346,6 +359,6 @@ def setup_structured_logging( def reload_structured_logging(*args, log_config=None) -> None: - # TODO: Reload the structured logging system. Since we don't implement any - # sort of file rotation, we don't need to worry about doing that here. - pass + warnings.warn( + "Currently the structured logging system can not be reloaded, doing nothing" + ) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 3a50394b407c..31a1eb8f85a4 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -19,19 +19,24 @@ import sys from collections import deque +from ipaddress import IPv4Address, IPv6Address, ip_address from typing.io import TextIO import attr from simplejson import dumps from twisted.application.internet import ClientService -from twisted.internet.endpoints import HostnameEndpoint +from twisted.internet.endpoints import ( + HostnameEndpoint, + TCP4ClientEndpoint, + TCP6ClientEndpoint, +) from twisted.internet.protocol import Factory, Protocol from twisted.logger import FileLogObserver from twisted.python.failure import Failure -def flatten_event(_event: dict, metadata: dict, include_time: bool = False): +def flatten_event(event: dict, metadata: dict, include_time: bool = False): """ Flatten a Twisted logging event to an dictionary capable of being sent as a log event to a logging aggregation system. @@ -42,7 +47,7 @@ def flatten_event(_event: dict, metadata: dict, include_time: bool = False): keys. Args: - _event (dict): The Twisted logging event we are flattening. + event (dict): The Twisted logging event we are flattening. metadata (dict): Additional data to include with each log message. This can be information like the server name. Since the target log consumer does not know who we are other than by host IP, this @@ -50,33 +55,33 @@ def flatten_event(_event: dict, metadata: dict, include_time: bool = False): include_time (bool): Should we include the `time` key? If False, the event time is stripped from the event. """ - event = {} + new_event = {} # If it's a failure, make the new event's log_failure be the traceback text. - if "log_failure" in _event: - event["log_failure"] = _event["log_failure"].getTraceback() + if "log_failure" in event: + new_event["log_failure"] = event["log_failure"].getTraceback() # If it's a warning, copy over a string representation of the warning. - if "warning" in _event: - event["warning"] = str(_event["warning"]) + if "warning" in event: + new_event["warning"] = str(event["warning"]) # Stdlib logging events have "log_text" as their human-readable portion, # Twisted ones have "log_format". For now, include the log_format, so that # context only given in the log format (e.g. what is being logged) is # available. - if "log_text" in _event: - event["log"] = _event["log_text"] + if "log_text" in event: + new_event["log"] = event["log_text"] else: - event["log"] = _event["log_format"] + new_event["log"] = event["log_format"] # We want to include the timestamp when forwarding over the network, but # exclude it when we are writing to stdout. This is because the log ingester # (e.g. logstash, fluentd) can add its own timestamp. if include_time: - event["time"] = _event["log_time"] + new_event["time"] = round(event["log_time"], 2) # Convert the log level to a textual representation. - event["level"] = _event["log_level"].name.upper() + new_event["level"] = event["log_level"].name.upper() # Ignore these keys, and do not transfer them over to the new log object. # They are either useless (isError), transferred manually above (log_time, @@ -98,29 +103,29 @@ def flatten_event(_event: dict, metadata: dict, include_time: bool = False): # If it's from the Twisted legacy logger (twisted.python.log), it adds some # more keys we want to purge. - if _event.get("log_namespace") == "log_legacy": + if event.get("log_namespace") == "log_legacy": keys_to_delete.extend(["message", "system", "time"]) # Rather than modify the dictionary in place, construct a new one with only # the content we want. The original event should be considered 'frozen'. - for key in _event.keys(): + for key in event.keys(): if key in keys_to_delete: continue - if isinstance(_event[key], (str, int, bool, float)) or _event[key] is None: + if isinstance(event[key], (str, int, bool, float)) or event[key] is None: # If it's a plain type, include it as is. - event[key] = _event[key] + new_event[key] = event[key] else: # If it's not one of those basic types, write out a string # representation. This should probably be a warning in development, # so that we are sure we are only outputting useful data. - event[key] = str(_event[key]) + new_event[key] = str(event[key]) # Add the metadata information to the event (e.g. the server_name). - event.update(metadata) + new_event.update(metadata) - return event + return new_event def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: @@ -155,13 +160,28 @@ class TerseJSONToTCPLogObserver(object): host = attr.ib(type=str) port = attr.ib(type=int) metadata = attr.ib(type=dict) + maximum_buffer = attr.ib(type=int) _buffer = attr.ib(default=attr.Factory(deque), type=deque) _writer = attr.ib(default=None) def start(self) -> None: - endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) + + # Connect without DNS lookups if it's a direct IP. + try: + ip = ip_address(self.host) + if isinstance(ip, IPv4Address): + endpoint = TCP4ClientEndpoint( + self.hs.get_reactor(), self.host, self.port + ) + elif isinstance(ip, IPv6Address): + endpoint = TCP6ClientEndpoint( + self.hs.get_reactor(), self.host, self.port + ) + except ValueError: + endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) + factory = Factory.forProtocol(Protocol) - self._service = ClientService(endpoint, factory) + self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor()) self._service.startService() def _write_loop(self) -> None: @@ -174,7 +194,7 @@ def _write_loop(self) -> None: self._writer = self._service.whenConnected() @self._writer.addBoth - def _(r): + def writer(r): if isinstance(r, Failure): r.printTraceback(file=sys.__stderr__) self._writer = None @@ -182,7 +202,13 @@ def _(r): return try: - r.transport.write(b"\n".join(reversed(self._buffer)) + b"\n") + for event in reversed(self._buffer): + r.transport.write( + dumps(event, ensure_ascii=False, separators=(",", ":")).encode( + "utf8" + ) + ) + r.transport.write(b"\n") self._buffer.clear() except Exception as e: sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) @@ -190,10 +216,11 @@ def _(r): self._writer = False self.hs.get_reactor().callLater(1, self._write_loop) - def __call__(self, _event: dict) -> None: - flattened = flatten_event(_event, self.metadata, include_time=True) - self._buffer.append( - dumps(flattened, ensure_ascii=False, separators=(",", ":")).encode("utf8") - ) + def _handle_pressure(self) -> None: + pass + + def __call__(self, event: dict) -> None: + flattened = flatten_event(event, self.metadata, include_time=True) + self._buffer.append(flattened) # Try and write immediately self._write_loop() diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py new file mode 100644 index 000000000000..e96b362d5d35 --- /dev/null +++ b/tests/logging/test_terse_json.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from twisted.logger import Logger + +from synapse.logging._structured import setup_structured_logging + +from tests.server import connect_client +from tests.unittest import HomeserverTestCase + +from .test_structured import FakeBeginner + + +class TerseJSONTCPTestCase(HomeserverTestCase): + def test_write_loop(self): + """ + """ + log_config = { + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + } + } + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + logger = Logger( + namespace="tests.logging.test_terse_json", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Trigger the connection + self.pump() + + server = connect_client(self.reactor, self.reactor.tcpClients[0][2]) + + # Trigger data being sent + self.pump() + + # One log message, with a single trailing newline + logs = b"\n".split(server.data) + self.assertEqual(len(logs), 1) + self.assertEqual(server.data.count(b"\n")) + + log = json.loads(logs[0]) + + expected_log_keys = [ + "log", + "time", + "level", + "log_namespace", + "request", + "scope", + "server_name", + "name", + ] + + self.assertEqual(set(log.keys()), set(expected_log_keys)) + + # It contains the data we expect. + self.assertEqual(log["name"], "wally") + + print(server.data) diff --git a/tests/server.py b/tests/server.py index e573c4e4c5ce..d155e77c075e 100644 --- a/tests/server.py +++ b/tests/server.py @@ -11,9 +11,13 @@ from twisted.internet._resolver import SimpleResolverComplexifier from twisted.internet.defer import Deferred, fail, succeed from twisted.internet.error import DNSLookupError -from twisted.internet.interfaces import IReactorPluggableNameResolver, IResolverSimple +from twisted.internet.interfaces import ( + IReactorPluggableNameResolver, + IReactorTCP, + IResolverSimple, +) from twisted.python.failure import Failure -from twisted.test.proto_helpers import MemoryReactorClock +from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock from twisted.web.http import unquote from twisted.web.http_headers import Headers @@ -465,3 +469,19 @@ def flush(self, maxbytes=None): self.buffer = self.buffer[len(to_write) :] if self.buffer and self.autoflush: self._reactor.callLater(0.0, self.flush) + + +def connect_client(reactor: IReactorTCP, factory) -> AccumulatingProtocol: + """ + Connect a client to a fake TCP transport. + + Args: + reactor + factory: The connecting factory to build. + """ + client = factory.buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, reactor)) + client.makeConnection(FakeTransport(server, reactor)) + + return server From 8697d51f172197c649e9d9cf2bf5be49cd6ecde5 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 27 Aug 2019 23:36:45 +1000 Subject: [PATCH 44/50] failing test --- synapse/logging/_structured.py | 13 +++-- tests/logging/test_terse_json.py | 81 +++++++++++++++++++++++++++++--- tests/server.py | 7 ++- 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 79c4fbca6921..2b2675996253 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -257,7 +257,11 @@ def parse_drain_configs( def setup_structured_logging( - hs, config, log_config: dict, logBeginner: LogBeginner = globalLogBeginner + hs, + config, + log_config: dict, + logBeginner: LogBeginner = globalLogBeginner, + redirect_stdlib_logging: bool = True, ) -> LogPublisher: """ Set up Twisted's structured logging system. @@ -346,10 +350,11 @@ def setup_structured_logging( f = FilteringLogObserver(publisher, [log_filter]) lco = LogContextObserver(f) - stuff_into_twisted = PythonStdlibToTwistedLogger(lco) - stdliblogger = logging.getLogger() - stdliblogger.addHandler(stuff_into_twisted) + if redirect_stdlib_logging: + stuff_into_twisted = PythonStdlibToTwistedLogger(lco) + stdliblogger = logging.getLogger() + stdliblogger.addHandler(stuff_into_twisted) # Always redirect standard I/O, otherwise other logging outputs might miss # it. diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index e96b362d5d35..65f248a79bb9 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -26,8 +26,9 @@ class TerseJSONTCPTestCase(HomeserverTestCase): - def test_write_loop(self): + def test_log_output(self): """ + The Terse JSON outputter delivers simplified structured logs over TCP. """ log_config = { "drains": { @@ -53,18 +54,19 @@ def test_write_loop(self): # Trigger the connection self.pump() - server = connect_client(self.reactor, self.reactor.tcpClients[0][2]) + _, server = connect_client(self.reactor, 0) # Trigger data being sent self.pump() # One log message, with a single trailing newline - logs = b"\n".split(server.data) + logs = server.data.splitlines() self.assertEqual(len(logs), 1) - self.assertEqual(server.data.count(b"\n")) + self.assertEqual(server.data.count(b"\n"), 1) log = json.loads(logs[0]) + # The terse logger should give us these keys. expected_log_keys = [ "log", "time", @@ -75,10 +77,77 @@ def test_write_loop(self): "server_name", "name", ] - self.assertEqual(set(log.keys()), set(expected_log_keys)) # It contains the data we expect. self.assertEqual(log["name"], "wally") - print(server.data) + def test_log_backpressure(self): + + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Trigger the connection + self.pump() + + client, server = connect_client(self.reactor, 0) + + # Trigger data being sent + self.pump() + + # One log message, with a single trailing newline + logs = server.data.splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(server.data.count(b"\n"), 1) + server.data = b"" + + # Disconnect the server + client.connectionLost(None) + + self.pump() + + # Send a bunch of messages that hits the maximum limit + logger.debug("debug 1") + logger.debug("debug 2") + + for i in range(0, 7): + logger.info("test message") + + logger.debug("debug 3") + logger.debug("debug 4") + + # Assert they're not sent + self.pump(1) + self.assertEqual(server.data, b"") + + # Reconnect + client, server = connect_client(self.reactor, 0) + self.pump() + + # Only the 7 infos made it through, the debugs were elided + logs = server.data.splitlines() + self.assertEqual(len(logs), 7) diff --git a/tests/server.py b/tests/server.py index d155e77c075e..c8269619b1fd 100644 --- a/tests/server.py +++ b/tests/server.py @@ -471,7 +471,7 @@ def flush(self, maxbytes=None): self._reactor.callLater(0.0, self.flush) -def connect_client(reactor: IReactorTCP, factory) -> AccumulatingProtocol: +def connect_client(reactor: IReactorTCP, client_id: int) -> AccumulatingProtocol: """ Connect a client to a fake TCP transport. @@ -479,9 +479,12 @@ def connect_client(reactor: IReactorTCP, factory) -> AccumulatingProtocol: reactor factory: The connecting factory to build. """ + factory = reactor.tcpClients[client_id][2] client = factory.buildProtocol(None) server = AccumulatingProtocol() server.makeConnection(FakeTransport(client, reactor)) client.makeConnection(FakeTransport(server, reactor)) - return server + reactor.tcpClients.pop(client_id) + + return client, server From 4477c69d5c0fa09493a8e676e23f3497779557f2 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 27 Aug 2019 23:37:59 +1000 Subject: [PATCH 45/50] failing test --- tests/logging/test_terse_json.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 65f248a79bb9..0d53df3d8fec 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -131,20 +131,20 @@ def test_log_backpressure(self): self.pump() # Send a bunch of messages that hits the maximum limit - logger.debug("debug 1") - logger.debug("debug 2") + for i in range(0, 5): + logger.debug("debug") for i in range(0, 7): logger.info("test message") - logger.debug("debug 3") - logger.debug("debug 4") + for i in range(0, 5): + logger.debug("debug") # Assert they're not sent self.pump(1) self.assertEqual(server.data, b"") - # Reconnect + # Allow the reconnection client, server = connect_client(self.reactor, 0) self.pump() From 98642205ee94f3bbdf93beef346b23c0730dbdf8 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 16:21:52 +1000 Subject: [PATCH 46/50] fix the test --- synapse/logging/_terse_json.py | 54 ++++++++++++- tests/logging/test_terse_json.py | 133 +++++++++++++++++++++++++------ 2 files changed, 159 insertions(+), 28 deletions(-) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 31a1eb8f85a4..eee5b501a0a4 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -20,6 +20,7 @@ import sys from collections import deque from ipaddress import IPv4Address, IPv6Address, ip_address +from math import floor from typing.io import TextIO import attr @@ -202,7 +203,7 @@ def writer(r): return try: - for event in reversed(self._buffer): + for event in self._buffer: r.transport.write( dumps(event, ensure_ascii=False, separators=(",", ":")).encode( "utf8" @@ -217,10 +218,59 @@ def writer(r): self.hs.get_reactor().callLater(1, self._write_loop) def _handle_pressure(self) -> None: - pass + """ + Handle backpressure by shedding events. + + The buffer will, in this order, until the buffer is below the maximum: + - Shed DEBUG events + - Shed INFO events + - Shed the middle 50% of the events. + """ + if len(self._buffer) <= self.maximum_buffer: + return + + # Strip out DEBUGs + self._buffer = deque( + filter(lambda event: event["level"] != "DEBUG", self._buffer) + ) + + if len(self._buffer) <= self.maximum_buffer: + return + + # Strip out INFOs + self._buffer = deque( + filter(lambda event: event["level"] != "INFO", self._buffer) + ) + + if len(self._buffer) <= self.maximum_buffer: + return + + # Cut the middle entries out + buffer_split = floor(self.maximum_buffer / 2) + + old_buffer = self._buffer + self._buffer = deque() + + for i in range(buffer_split): + self._buffer.append(old_buffer.popleft()) + + end_buffer = [] + for i in range(buffer_split): + end_buffer.append(old_buffer.pop()) + + self._buffer.extend(reversed(end_buffer)) def __call__(self, event: dict) -> None: flattened = flatten_event(event, self.metadata, include_time=True) self._buffer.append(flattened) + + # Handle backpressure, if it exists. + try: + self._handle_pressure() + except Exception as e: + print(e) + finally: + pass + # Try and write immediately self._write_loop() diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 0d53df3d8fec..1216c6e518e8 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -14,6 +14,7 @@ # limitations under the License. import json +from collections import Counter from twisted.logger import Logger @@ -82,8 +83,10 @@ def test_log_output(self): # It contains the data we expect. self.assertEqual(log["name"], "wally") - def test_log_backpressure(self): - + def test_log_backpressure_debug(self): + """ + When backpressure is hit, DEBUG logs will be shed. + """ log_config = { "loggers": {"synapse": {"level": "DEBUG"}}, "drains": { @@ -109,45 +112,123 @@ def test_log_backpressure(self): logger = Logger( namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] ) - logger.info("Hello there, {name}!", name="wally") - # Trigger the connection - self.pump() + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) - client, server = connect_client(self.reactor, 0) + # Send a bunch of useful messages + for i in range(0, 7): + logger.info("test message %s" % (i,)) - # Trigger data being sent + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + client, server = connect_client(self.reactor, 0) self.pump() - # One log message, with a single trailing newline + # Only the 7 infos made it through, the debugs were elided logs = server.data.splitlines() - self.assertEqual(len(logs), 1) - self.assertEqual(server.data.count(b"\n"), 1) - server.data = b"" + self.assertEqual(len(logs), 7) - # Disconnect the server - client.connectionLost(None) + def test_log_backpressure_info(self): + """ + When backpressure is hit, DEBUG and INFO logs will be shed. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 10): + logger.warn("test warn %s" % (i,)) + + # Send a bunch of info messages + for i in range(0, 3): + logger.info("test message %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + client, server = connect_client(self.reactor, 0) self.pump() - # Send a bunch of messages that hits the maximum limit - for i in range(0, 5): - logger.debug("debug") + # The 10 warnings made it through, the debugs and infos were elided + logs = list(map(json.loads, server.data.splitlines())) + self.assertEqual(len(logs), 10) - for i in range(0, 7): - logger.info("test message") + self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) + + def test_log_backpressure_cut_middle(self): + """ + When backpressure is hit, and DEBUG and INFOs cannot be culled, it will + cut the middle 50% out. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) - for i in range(0, 5): - logger.debug("debug") + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) - # Assert they're not sent - self.pump(1) - self.assertEqual(server.data, b"") + # Send a bunch of useful messages + for i in range(0, 20): + logger.warn("test warn", num=i) # Allow the reconnection client, server = connect_client(self.reactor, 0) self.pump() - # Only the 7 infos made it through, the debugs were elided - logs = server.data.splitlines() - self.assertEqual(len(logs), 7) + # The first five and last five warnings made it through, the debugs and + # infos were elided + logs = list(map(json.loads, server.data.splitlines())) + self.assertEqual(len(logs), 10) + self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) + self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs]) From c400d155e089c05df4511bd8f4be6acef6f63bbb Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 16:23:05 +1000 Subject: [PATCH 47/50] fix the tests --- tests/logging/test_terse_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 1216c6e518e8..ee68b705df30 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -189,8 +189,8 @@ def test_log_backpressure_info(self): def test_log_backpressure_cut_middle(self): """ - When backpressure is hit, and DEBUG and INFOs cannot be culled, it will - cut the middle 50% out. + When backpressure is hit, and no more DEBUG and INFOs cannot be culled, + it will cut the middle messages out. """ log_config = { "loggers": {"synapse": {"level": "DEBUG"}}, From 267501a334607e2e99ac54aab68bb1684378f20d Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 16:29:47 +1000 Subject: [PATCH 48/50] fix mypy --- synapse/logging/_structured.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 2b2675996253..0367d6dfc4b3 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -173,7 +173,12 @@ class DrainConfiguration(object): name = attr.ib() type = attr.ib() location = attr.ib() - options = attr.ib(default=attr.Factory(dict)) + options = attr.ib(default=None) + + +@attr.s +class NetworkJSONTerseOptions(object): + maximum_buffer = attr.ib(type=int) DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} @@ -246,7 +251,7 @@ def parse_drain_configs( name=name, type=logging_type, location=(host, port), - options={"maximum_buffer": maximum_buffer}, + options=NetworkJSONTerseOptions(maximum_buffer=maximum_buffer), ) else: @@ -319,11 +324,11 @@ def setup_structured_logging( elif observer.type == DrainType.NETWORK_JSON_TERSE: metadata = {"server_name": hs.config.server_name} log_observer = TerseJSONToTCPLogObserver( - hs, - observer.location[0], - observer.location[1], - metadata, - **observer.options + hs=hs, + host=observer.location[0], + port=observer.location[1], + metadata=metadata, + maximum_buffer=observer.options.maximum_buffer, ) log_observer.start() observers.append(log_observer) From 518e48e046c9f6b7ff3e7bcf69e23307a24628ec Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 16:44:34 +1000 Subject: [PATCH 49/50] fix old tests --- synapse/logging/_terse_json.py | 14 ++++++++------ tests/logging/test_terse_json.py | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index eee5b501a0a4..7f1e8f23fe89 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -33,7 +33,7 @@ TCP6ClientEndpoint, ) from twisted.internet.protocol import Factory, Protocol -from twisted.logger import FileLogObserver +from twisted.logger import FileLogObserver, Logger from twisted.python.failure import Failure @@ -164,6 +164,7 @@ class TerseJSONToTCPLogObserver(object): maximum_buffer = attr.ib(type=int) _buffer = attr.ib(default=attr.Factory(deque), type=deque) _writer = attr.ib(default=None) + _logger = attr.ib(default=attr.Factory(Logger)) def start(self) -> None: @@ -267,10 +268,11 @@ def __call__(self, event: dict) -> None: # Handle backpressure, if it exists. try: self._handle_pressure() - except Exception as e: - print(e) - finally: - pass + except Exception: + # If handling backpressure fails,clear the buffer and log the + # exception. + self._buffer.clear() + self._logger.failure("Failed clearing backpressure") - # Try and write immediately + # Try and write immediately. self._write_loop() diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index ee68b705df30..f58b488139e0 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -61,7 +61,7 @@ def test_log_output(self): self.pump() # One log message, with a single trailing newline - logs = server.data.splitlines() + logs = server.data.decode('utf8').splitlines() self.assertEqual(len(logs), 1) self.assertEqual(server.data.count(b"\n"), 1) @@ -125,7 +125,7 @@ def test_log_backpressure_debug(self): logger.debug("too much debug") # Allow the reconnection - client, server = connect_client(self.reactor, 0) + _, server = connect_client(self.reactor, 0) self.pump() # Only the 7 infos made it through, the debugs were elided @@ -182,7 +182,7 @@ def test_log_backpressure_info(self): self.pump() # The 10 warnings made it through, the debugs and infos were elided - logs = list(map(json.loads, server.data.splitlines())) + logs = list(map(json.loads, server.data.decode('utf8').splitlines())) self.assertEqual(len(logs), 10) self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) @@ -228,7 +228,7 @@ def test_log_backpressure_cut_middle(self): # The first five and last five warnings made it through, the debugs and # infos were elided - logs = list(map(json.loads, server.data.splitlines())) + logs = list(map(json.loads, server.data.decode('utf8').splitlines())) self.assertEqual(len(logs), 10) self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs]) From 0a0845f3789995d7e1fa624c914495c9fe599610 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 16:51:00 +1000 Subject: [PATCH 50/50] fix old tests --- tests/logging/test_terse_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index f58b488139e0..514282591d8a 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -61,7 +61,7 @@ def test_log_output(self): self.pump() # One log message, with a single trailing newline - logs = server.data.decode('utf8').splitlines() + logs = server.data.decode("utf8").splitlines() self.assertEqual(len(logs), 1) self.assertEqual(server.data.count(b"\n"), 1) @@ -182,7 +182,7 @@ def test_log_backpressure_info(self): self.pump() # The 10 warnings made it through, the debugs and infos were elided - logs = list(map(json.loads, server.data.decode('utf8').splitlines())) + logs = list(map(json.loads, server.data.decode("utf8").splitlines())) self.assertEqual(len(logs), 10) self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) @@ -228,7 +228,7 @@ def test_log_backpressure_cut_middle(self): # The first five and last five warnings made it through, the debugs and # infos were elided - logs = list(map(json.loads, server.data.decode('utf8').splitlines())) + logs = list(map(json.loads, server.data.decode("utf8").splitlines())) self.assertEqual(len(logs), 10) self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs])