Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Revert "Global logging format changes" (#34126)" #34182

Merged
merged 3 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
.jar
/dashboard/client/build

# Kuberay config lives in a separate repository
python/ray/autoscaler/kuberay/config

# Files generated by flatc should be ignored
/src/ray/gcs/format/*_generated.h
/src/ray/object_manager/format/*_generated.h
Expand Down
8 changes: 7 additions & 1 deletion ci/lint/check_api_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,13 @@ def verify(symbol, scanned, ok, output, prefix=None, ignore=None):
verify(ray.air, set(), ok, output)
verify(ray.train, set(), ok, output)
verify(ray.tune, set(), ok, output)
verify(ray, set(), ok, output, ignore=["ray.workflow", "ray.tune", "ray.serve"])
verify(
ray,
set(),
ok,
output,
ignore=["ray.workflow", "ray.tune", "ray.serve"],
)
verify(ray.serve, set(), ok, output)
assert len(ok) >= 500, len(ok)
# TODO(ekl) enable it for all modules.
Expand Down
18 changes: 17 additions & 1 deletion dashboard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import logging.handlers
import os
import pathlib
import sys
import signal

Expand All @@ -18,7 +19,10 @@
from ray._private.gcs_pubsub import GcsAioPublisher
from ray._raylet import GcsClient
from ray._private.gcs_utils import GcsAioClient
from ray._private.ray_logging import setup_component_logger
from ray._private.ray_logging import (
setup_component_logger,
configure_log_file,
)
from ray.core.generated import agent_manager_pb2, agent_manager_pb2_grpc
from ray.experimental.internal_kv import (
_initialize_internal_kv,
Expand Down Expand Up @@ -341,6 +345,14 @@ async def _check_parent():
await self.http_server.cleanup()


def open_capture_files(log_dir):
filename = f"agent-{args.agent_id}"
return (
ray._private.utils.open_log(pathlib.Path(log_dir) / f"{filename}.out"),
ray._private.utils.open_log(pathlib.Path(log_dir) / f"{filename}.err"),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Dashboard agent.")
parser.add_argument(
Expand Down Expand Up @@ -507,6 +519,10 @@ async def _check_parent():
# w.r.t grpc server init in the DashboardAgent initializer.
loop = ray._private.utils.get_or_create_event_loop()

# Setup stdout/stderr redirect files
out_file, err_file = open_capture_files(args.log_dir)
configure_log_file(out_file, err_file)

agent = DashboardAgent(
args.node_ip_address,
args.dashboard_agent_port,
Expand Down
71 changes: 54 additions & 17 deletions doc/source/ray-observability/ray-logging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,43 @@

Logging
=======
This document will explain Ray's logging system and its best practices.
This document explains Ray's logging system and related best practices.

Internal Ray Logging Configuration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When ``import ray`` is executed, Ray's logger is initialized, generating a sensible configuration given in ``python/ray/_private/log.py``. The default logging level is ``logging.INFO``.

All ray loggers are automatically configured in ``ray._private.ray_logging``. To change the Ray library logging configuration:

.. code-block:: python

import logging

logger = logging.getLogger("ray")
logger # Modify the ray logging config

Similarly, to modify the logging configuration for any Ray subcomponent, specify the appropriate logger name:

.. code-block:: python

import logging

# First, get the handle for the logger you want to modify
ray_data_logger = logging.getLogger("ray.data")
ray_tune_logger = logging.getLogger("ray.tune")
ray_rllib_logger = logging.getLogger("ray.rllib")
ray_air_logger = logging.getLogger("ray.air")
ray_train_logger = logging.getLogger("ray.train")
ray_workflow_logger = logging.getLogger("ray.workflow")

# Modify the ray.data logging level
ray_data_logger.setLevel(logging.WARNING)

# Other loggers can be modified similarly.
# Here's how to add an aditional file handler for ray tune:
ray_tune_logger.addHandler(logging.FileHandler("extra_ray_tune_log.log"))

For more information about logging in workers, see :ref:`Customizing worker loggers`.

Driver logs
~~~~~~~~~~~
Expand All @@ -16,12 +52,12 @@ The log file consists of the stdout of the entrypoint command of the job. For t

.. _ray-worker-logs:

Worker logs
Worker stdout and stderr
~~~~~~~~~~~
Ray's tasks or actors are executed remotely within Ray's worker processes. Ray has special support to improve the visibility of logs produced by workers.
Ray's tasks or actors are executed remotely within Ray's worker processes. Ray has special support to improve the visibility of stdout and stderr produced by workers.

- By default, all of the tasks/actors stdout and stderr are redirected to the worker log files. Check out :ref:`Logging directory structure <logging-directory-structure>` to learn how Ray's logging directory is structured.
- By default, all of the tasks/actors stdout and stderr that is redirected to worker log files are published to the driver. Drivers display logs generated from its tasks/actors to its stdout and stderr.
- By default, stdout and stderr from all tasks and actors are redirected to the worker log files, including any log messages generated by the worker. See :ref:`Logging directory structure <logging-directory-structure>` to understand the structure of the Ray logging directory.
- By default, the driver reads the worker log files to which the stdout and stderr for all tasks and actors are redirected. Drivers display all stdout and stderr generated from their tasks or actors to their own stdout and stderr.

Let's look at a code example to see how this works.

Expand All @@ -37,7 +73,7 @@ Let's look at a code example to see how this works.

ray.get(task.remote())

You should be able to see the string `task` from your driver stdout.
You should be able to see the string `task` from your driver stdout.

When logs are printed, the process id (pid) and an IP address of the node that executes tasks/actors are printed together. Check out the output below.

Expand Down Expand Up @@ -139,10 +175,9 @@ Limitations:
By default, the builtin print will also be patched to use `ray.experimental.tqdm_ray.safe_print` when `tqdm_ray` is used.
This avoids progress bar corruption on driver print statements. To disable this, set `RAY_TQDM_PATCH_PRINT=0`.

How to set up loggers
Customizing Worker Loggers
~~~~~~~~~~~~~~~~~~~~~
When using ray, all of the tasks and actors are executed remotely in Ray's worker processes.
Since Python logger module creates a singleton logger per process, loggers should be configured on per task/actor basis.
When using Ray, all tasks and actors are executed remotely in Ray's worker processes.

.. note::

Expand All @@ -164,22 +199,24 @@ Since Python logger module creates a singleton logger per process, loggers shoul
logging.basicConfig(level=logging.INFO)

def log(self, msg):
logging.info(msg)
logger = logging.getLogger(__name__)
logger.info(msg)

actor = Actor.remote()
ray.get(actor.log.remote("A log message for an actor."))

@ray.remote
def f(msg):
logging.basicConfig(level=logging.INFO)
logging.info(msg)
logger = logging.getLogger(__name__)
logger.info(msg)

ray.get(f.remote("A log message for a task"))
ray.get(f.remote("A log message for a task."))

.. code-block:: bash

(pid=95193) INFO:root:A log message for a task
(pid=95192) INFO:root:A log message for an actor.
(Actor pid=179641) INFO:__main__:A log message for an actor.
(f pid=177572) INFO:__main__:A log message for a task.

How to use structured logging
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -206,11 +243,11 @@ Logging directory structure
---------------------------
.. _logging-directory-structure:

By default, Ray logs are stored in a ``/tmp/ray/session_*/logs`` directory.
By default, Ray logs are stored in a ``/tmp/ray/session_*/logs`` directory.

.. note::

The default temp directory is ``/tmp/ray`` (for Linux and Mac OS). If you'd like to change the temp directory, you can specify it when ``ray start`` or ``ray.init()`` is called.
The default temp directory is ``/tmp/ray`` (for Linux and MacOS). To change the temp directory, specify it when you call ``ray start`` or ``ray.init()``.

A new Ray instance creates a new session ID to the temp directory. The latest session ID is symlinked to ``/tmp/ray/session_latest``.

Expand All @@ -235,7 +272,7 @@ Here's a Ray log directory structure. Note that ``.out`` is logs from stdout/std
For the logs of the actual installations (including e.g. ``pip install`` logs), see the ``runtime_env_setup-[job_id].log`` file (see below).
- ``runtime_env_setup-[job_id].log``: Logs from installing :ref:`runtime environments <runtime-environments>` for a task, actor or job. This file will only be present if a runtime environment is installed.
- ``runtime_env_setup-ray_client_server_[port].log``: Logs from installing :ref:`runtime environments <runtime-environments>` for a job when connecting via :ref:`Ray Client <ray-client-ref>`.
- ``worker-[worker_id]-[job_id]-[pid].[out|err]``: Python/Java part of Ray drivers and workers. All of stdout and stderr from tasks/actors are streamed here. Note that job_id is an id of the driver.-
- ``worker-[worker_id]-[job_id]-[pid].[out|err]``: Python or Java part of Ray drivers and workers. All of stdout and stderr from tasks or actors are streamed here. Note that job_id is an id of the driver.-

.. _ray-log-rotation:

Expand Down
2 changes: 2 additions & 0 deletions python/ray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# isort: skip_file
from ray._private import log # isort: skip # noqa: F401
import logging
import os
import sys

log.generate_logging_config()
logger = logging.getLogger(__name__)


Expand Down
136 changes: 136 additions & 0 deletions python/ray/_private/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import logging
import re
from logging.config import dictConfig
import threading


class ContextFilter(logging.Filter):
"""A filter that adds ray context info to log records.

This filter adds a package name to append to the message as well as information
about what worker emitted the message, if applicable.
"""

logger_regex = re.compile(r"ray(\.(?P<subpackage>\w+))?(\..*)?")
package_message_names = {
"air": "AIR",
"data": "Data",
"rllib": "RLlib",
"serve": "Serve",
"train": "Train",
"tune": "Tune",
"workflow": "Workflow",
}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def filter(self, record: logging.LogRecord) -> bool:
"""Add context information to the log record.

This filter adds a package name from where the message was generated as
well as the worker IP address, if applicable.

Args:
record: Record to be filtered

Returns:
True if the record is to be logged, False otherwise. (This filter only
adds context, so records are always logged.)
"""
match = self.logger_regex.search(record.name)
if match and match["subpackage"] in self.package_message_names:
record.package = f"[Ray {self.package_message_names[match['subpackage']]}]"
else:
record.package = ""

return True


class PlainRayHandler(logging.StreamHandler):
"""A plain log handler.

This handler writes to whatever sys.stderr points to at emit-time,
not at instantiation time. See docs for logging._StderrHandler.
"""

def __init__(self):
super().__init__()
self.plain_handler = logging._StderrHandler()
self.plain_handler.level = self.level
self.plain_handler.formatter = logging.Formatter(fmt="%(message)s")

def emit(self, record: logging.LogRecord):
"""Emit the log message.

If this is a worker, bypass fancy logging and just emit the log record.
If this is the driver, emit the message using the appropriate console handler.

Args:
record: Log record to be emitted
"""
import ray

if (
hasattr(ray, "_private")
and ray._private.worker.global_worker.mode
== ray._private.worker.WORKER_MODE
):
self.plain_handler.emit(record)
else:
logging._StderrHandler.emit(self, record)


logger_initialized = False
logging_config_lock = threading.Lock()


def generate_logging_config():
"""Generate the default Ray logging configuration."""
with logging_config_lock:
global logger_initialized
if logger_initialized:
return
logger_initialized = True

formatters = {
"plain": {
"datefmt": "[%Y-%m-%d %H:%M:%S]",
"format": "%(asctime)s %(package)s %(levelname)s %(name)s::%(message)s",
},
}
filters = {"context_filter": {"()": ContextFilter}}
handlers = {
"default": {
"()": PlainRayHandler,
"formatter": "plain",
"filters": ["context_filter"],
}
}

loggers = {
# Default ray logger; any log message that gets propagated here will be
# logged to the console. Disable propagation, as many users will use
# basicConfig to set up a default handler. If so, logs will be
# printed twice unless we prevent propagation here.
"ray": {
"level": "INFO",
"handlers": ["default"],
"propagate": False,
},
# Special handling for ray.rllib: only warning-level messages passed through
# See https://github.com/ray-project/ray/pull/31858 for related PR
"ray.rllib": {
"level": "WARN",
},
}

dictConfig(
{
"version": 1,
"formatters": formatters,
"filters": filters,
"handlers": handlers,
"loggers": loggers,
}
)
Loading