Skip to content

Commit

Permalink
Revert "Revert "Global logging format changes" (ray-project#34126)" (r…
Browse files Browse the repository at this point in the history
…ay-project#34182)

Attempts to consolidate logging configuration by introducing reasonable defaults in ray/log.py.
This new logging configuration is done once in ray/__init__.py at the top of the module. Subsequent calls to the configuration are ignored.
A logger for ray.rllib is configured at the WARN level, to address Revert "Simplify logging configuration. (ray-project#30863)" ray-project#31858. With this change,
Revert "Simplify logging configuration. (ray-project#30863)" ray-project#31858 can be reverted, again simplifying and consolidating logging configuration.
Modified test_output.py::test_logger_config to test only the logger config, not launch a ray cluster. The test was failing intermittently, I think due to a race condition between the launch of the cluster and the reading of the subprocess's stdout, and anyway it wasn't necessary to call ray.init here to check that logging was configured correctly.
Modified python/ray/tune/tests/test_commands.py::test_ls_with_cfg to test the underlying data, not what gets printed to stdout (which has changed with the new logging system).
Modified a logging message in ray.tune.automl.search_policy.AutoMLSearcher.on_trial_complete, which in certain cases emits a logging message which tries to format a NoneType into a %f during log message formatting. This was a previously-undetected bug which showed up because the default log level is now INFO. This fixes a test that was failing in test_automl_searcher.py::AutoMLSearcherTest.

Signed-off-by: e428265 <[email protected]>
  • Loading branch information
peytondmurray authored and arvind-chandra committed Aug 31, 2023
1 parent b745551 commit 360f405
Show file tree
Hide file tree
Showing 34 changed files with 537 additions and 192 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
.jar
/dashboard/client/build

# Kuberay config lives in a separate repository
python/ray/autoscaler/kuberay/config

# Files generated by flatc should be ignored
/src/ray/gcs/format/*_generated.h
/src/ray/object_manager/format/*_generated.h
Expand Down
8 changes: 7 additions & 1 deletion ci/lint/check_api_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,13 @@ def verify(symbol, scanned, ok, output, prefix=None, ignore=None):
verify(ray.air, set(), ok, output)
verify(ray.train, set(), ok, output)
verify(ray.tune, set(), ok, output)
verify(ray, set(), ok, output, ignore=["ray.workflow", "ray.tune", "ray.serve"])
verify(
ray,
set(),
ok,
output,
ignore=["ray.workflow", "ray.tune", "ray.serve"],
)
verify(ray.serve, set(), ok, output)
assert len(ok) >= 500, len(ok)
# TODO(ekl) enable it for all modules.
Expand Down
18 changes: 17 additions & 1 deletion dashboard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import logging.handlers
import os
import pathlib
import sys
import signal

Expand All @@ -18,7 +19,10 @@
from ray._private.gcs_pubsub import GcsAioPublisher
from ray._raylet import GcsClient
from ray._private.gcs_utils import GcsAioClient
from ray._private.ray_logging import setup_component_logger
from ray._private.ray_logging import (
setup_component_logger,
configure_log_file,
)
from ray.core.generated import agent_manager_pb2, agent_manager_pb2_grpc
from ray.experimental.internal_kv import (
_initialize_internal_kv,
Expand Down Expand Up @@ -341,6 +345,14 @@ async def _check_parent():
await self.http_server.cleanup()


def open_capture_files(log_dir):
filename = f"agent-{args.agent_id}"
return (
ray._private.utils.open_log(pathlib.Path(log_dir) / f"{filename}.out"),
ray._private.utils.open_log(pathlib.Path(log_dir) / f"{filename}.err"),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Dashboard agent.")
parser.add_argument(
Expand Down Expand Up @@ -507,6 +519,10 @@ async def _check_parent():
# w.r.t grpc server init in the DashboardAgent initializer.
loop = ray._private.utils.get_or_create_event_loop()

# Setup stdout/stderr redirect files
out_file, err_file = open_capture_files(args.log_dir)
configure_log_file(out_file, err_file)

agent = DashboardAgent(
args.node_ip_address,
args.dashboard_agent_port,
Expand Down
71 changes: 54 additions & 17 deletions doc/source/ray-observability/ray-logging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,43 @@

Logging
=======
This document will explain Ray's logging system and its best practices.
This document explains Ray's logging system and related best practices.

Internal Ray Logging Configuration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When ``import ray`` is executed, Ray's logger is initialized, generating a sensible configuration given in ``python/ray/_private/log.py``. The default logging level is ``logging.INFO``.

All ray loggers are automatically configured in ``ray._private.ray_logging``. To change the Ray library logging configuration:

.. code-block:: python
import logging
logger = logging.getLogger("ray")
logger # Modify the ray logging config
Similarly, to modify the logging configuration for any Ray subcomponent, specify the appropriate logger name:

.. code-block:: python
import logging
# First, get the handle for the logger you want to modify
ray_data_logger = logging.getLogger("ray.data")
ray_tune_logger = logging.getLogger("ray.tune")
ray_rllib_logger = logging.getLogger("ray.rllib")
ray_air_logger = logging.getLogger("ray.air")
ray_train_logger = logging.getLogger("ray.train")
ray_workflow_logger = logging.getLogger("ray.workflow")
# Modify the ray.data logging level
ray_data_logger.setLevel(logging.WARNING)
# Other loggers can be modified similarly.
# Here's how to add an aditional file handler for ray tune:
ray_tune_logger.addHandler(logging.FileHandler("extra_ray_tune_log.log"))
For more information about logging in workers, see :ref:`Customizing worker loggers`.

Driver logs
~~~~~~~~~~~
Expand All @@ -16,12 +52,12 @@ The log file consists of the stdout of the entrypoint command of the job. For t

.. _ray-worker-logs:

Worker logs
Worker stdout and stderr
~~~~~~~~~~~
Ray's tasks or actors are executed remotely within Ray's worker processes. Ray has special support to improve the visibility of logs produced by workers.
Ray's tasks or actors are executed remotely within Ray's worker processes. Ray has special support to improve the visibility of stdout and stderr produced by workers.

- By default, all of the tasks/actors stdout and stderr are redirected to the worker log files. Check out :ref:`Logging directory structure <logging-directory-structure>` to learn how Ray's logging directory is structured.
- By default, all of the tasks/actors stdout and stderr that is redirected to worker log files are published to the driver. Drivers display logs generated from its tasks/actors to its stdout and stderr.
- By default, stdout and stderr from all tasks and actors are redirected to the worker log files, including any log messages generated by the worker. See :ref:`Logging directory structure <logging-directory-structure>` to understand the structure of the Ray logging directory.
- By default, the driver reads the worker log files to which the stdout and stderr for all tasks and actors are redirected. Drivers display all stdout and stderr generated from their tasks or actors to their own stdout and stderr.

Let's look at a code example to see how this works.

Expand All @@ -37,7 +73,7 @@ Let's look at a code example to see how this works.
ray.get(task.remote())
You should be able to see the string `task` from your driver stdout.
You should be able to see the string `task` from your driver stdout.

When logs are printed, the process id (pid) and an IP address of the node that executes tasks/actors are printed together. Check out the output below.

Expand Down Expand Up @@ -139,10 +175,9 @@ Limitations:
By default, the builtin print will also be patched to use `ray.experimental.tqdm_ray.safe_print` when `tqdm_ray` is used.
This avoids progress bar corruption on driver print statements. To disable this, set `RAY_TQDM_PATCH_PRINT=0`.

How to set up loggers
Customizing Worker Loggers
~~~~~~~~~~~~~~~~~~~~~
When using ray, all of the tasks and actors are executed remotely in Ray's worker processes.
Since Python logger module creates a singleton logger per process, loggers should be configured on per task/actor basis.
When using Ray, all tasks and actors are executed remotely in Ray's worker processes.

.. note::

Expand All @@ -164,22 +199,24 @@ Since Python logger module creates a singleton logger per process, loggers shoul
logging.basicConfig(level=logging.INFO)
def log(self, msg):
logging.info(msg)
logger = logging.getLogger(__name__)
logger.info(msg)
actor = Actor.remote()
ray.get(actor.log.remote("A log message for an actor."))
@ray.remote
def f(msg):
logging.basicConfig(level=logging.INFO)
logging.info(msg)
logger = logging.getLogger(__name__)
logger.info(msg)
ray.get(f.remote("A log message for a task"))
ray.get(f.remote("A log message for a task."))
.. code-block:: bash
(pid=95193) INFO:root:A log message for a task
(pid=95192) INFO:root:A log message for an actor.
(Actor pid=179641) INFO:__main__:A log message for an actor.
(f pid=177572) INFO:__main__:A log message for a task.
How to use structured logging
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -206,11 +243,11 @@ Logging directory structure
---------------------------
.. _logging-directory-structure:

By default, Ray logs are stored in a ``/tmp/ray/session_*/logs`` directory.
By default, Ray logs are stored in a ``/tmp/ray/session_*/logs`` directory.

.. note::

The default temp directory is ``/tmp/ray`` (for Linux and Mac OS). If you'd like to change the temp directory, you can specify it when ``ray start`` or ``ray.init()`` is called.
The default temp directory is ``/tmp/ray`` (for Linux and MacOS). To change the temp directory, specify it when you call ``ray start`` or ``ray.init()``.

A new Ray instance creates a new session ID to the temp directory. The latest session ID is symlinked to ``/tmp/ray/session_latest``.

Expand All @@ -235,7 +272,7 @@ Here's a Ray log directory structure. Note that ``.out`` is logs from stdout/std
For the logs of the actual installations (including e.g. ``pip install`` logs), see the ``runtime_env_setup-[job_id].log`` file (see below).
- ``runtime_env_setup-[job_id].log``: Logs from installing :ref:`runtime environments <runtime-environments>` for a task, actor or job. This file will only be present if a runtime environment is installed.
- ``runtime_env_setup-ray_client_server_[port].log``: Logs from installing :ref:`runtime environments <runtime-environments>` for a job when connecting via :ref:`Ray Client <ray-client-ref>`.
- ``worker-[worker_id]-[job_id]-[pid].[out|err]``: Python/Java part of Ray drivers and workers. All of stdout and stderr from tasks/actors are streamed here. Note that job_id is an id of the driver.-
- ``worker-[worker_id]-[job_id]-[pid].[out|err]``: Python or Java part of Ray drivers and workers. All of stdout and stderr from tasks or actors are streamed here. Note that job_id is an id of the driver.-

.. _ray-log-rotation:

Expand Down
2 changes: 2 additions & 0 deletions python/ray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# isort: skip_file
from ray._private import log # isort: skip # noqa: F401
import logging
import os
import sys

log.generate_logging_config()
logger = logging.getLogger(__name__)


Expand Down
136 changes: 136 additions & 0 deletions python/ray/_private/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import logging
import re
from logging.config import dictConfig
import threading


class ContextFilter(logging.Filter):
"""A filter that adds ray context info to log records.
This filter adds a package name to append to the message as well as information
about what worker emitted the message, if applicable.
"""

logger_regex = re.compile(r"ray(\.(?P<subpackage>\w+))?(\..*)?")
package_message_names = {
"air": "AIR",
"data": "Data",
"rllib": "RLlib",
"serve": "Serve",
"train": "Train",
"tune": "Tune",
"workflow": "Workflow",
}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def filter(self, record: logging.LogRecord) -> bool:
"""Add context information to the log record.
This filter adds a package name from where the message was generated as
well as the worker IP address, if applicable.
Args:
record: Record to be filtered
Returns:
True if the record is to be logged, False otherwise. (This filter only
adds context, so records are always logged.)
"""
match = self.logger_regex.search(record.name)
if match and match["subpackage"] in self.package_message_names:
record.package = f"[Ray {self.package_message_names[match['subpackage']]}]"
else:
record.package = ""

return True


class PlainRayHandler(logging.StreamHandler):
"""A plain log handler.
This handler writes to whatever sys.stderr points to at emit-time,
not at instantiation time. See docs for logging._StderrHandler.
"""

def __init__(self):
super().__init__()
self.plain_handler = logging._StderrHandler()
self.plain_handler.level = self.level
self.plain_handler.formatter = logging.Formatter(fmt="%(message)s")

def emit(self, record: logging.LogRecord):
"""Emit the log message.
If this is a worker, bypass fancy logging and just emit the log record.
If this is the driver, emit the message using the appropriate console handler.
Args:
record: Log record to be emitted
"""
import ray

if (
hasattr(ray, "_private")
and ray._private.worker.global_worker.mode
== ray._private.worker.WORKER_MODE
):
self.plain_handler.emit(record)
else:
logging._StderrHandler.emit(self, record)


logger_initialized = False
logging_config_lock = threading.Lock()


def generate_logging_config():
"""Generate the default Ray logging configuration."""
with logging_config_lock:
global logger_initialized
if logger_initialized:
return
logger_initialized = True

formatters = {
"plain": {
"datefmt": "[%Y-%m-%d %H:%M:%S]",
"format": "%(asctime)s %(package)s %(levelname)s %(name)s::%(message)s",
},
}
filters = {"context_filter": {"()": ContextFilter}}
handlers = {
"default": {
"()": PlainRayHandler,
"formatter": "plain",
"filters": ["context_filter"],
}
}

loggers = {
# Default ray logger; any log message that gets propagated here will be
# logged to the console. Disable propagation, as many users will use
# basicConfig to set up a default handler. If so, logs will be
# printed twice unless we prevent propagation here.
"ray": {
"level": "INFO",
"handlers": ["default"],
"propagate": False,
},
# Special handling for ray.rllib: only warning-level messages passed through
# See https://github.com/ray-project/ray/pull/31858 for related PR
"ray.rllib": {
"level": "WARN",
},
}

dictConfig(
{
"version": 1,
"formatters": formatters,
"filters": filters,
"handlers": handlers,
"loggers": loggers,
}
)
Loading

0 comments on commit 360f405

Please sign in to comment.