Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data] Truncate progress bar description #46801

Merged
merged 5 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion python/ray/data/_internal/progress_bar.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import logging
import threading
from typing import Any, List, Optional

import ray
from ray.experimental import tqdm_ray
from ray.types import ObjectRef
from ray.util.annotations import Deprecated
from ray.util.debug import log_once

logger = logging.getLogger(__name__)

try:
import tqdm
Expand Down Expand Up @@ -44,6 +48,10 @@ class ProgressBar:
because no tasks have finished yet), doesn't display the full
progress bar. Still displays basic progress stats from tqdm."""

# If the name/description of the progress bar exceeds this length,
# it will be truncated.
MAX_NAME_LENGTH = 100

def __init__(
self,
name: str,
Expand All @@ -52,7 +60,7 @@ def __init__(
position: int = 0,
enabled: Optional[bool] = None,
):
self._desc = name
self._desc = self._truncate_name(name)
self._progress = 0
# Prepend a space to the unit for better formatting.
if unit[0] != " ":
Expand Down Expand Up @@ -83,6 +91,34 @@ def __init__(
needs_warning = False
self._bar = None

def _truncate_name(self, name: str) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a warn-once that the name is getting truncated and that the behavior can be disabled with DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION? Not sure if users will know how to disable it otherwise

ctx = ray.data.context.DataContext.get_current()
if (
not ctx.enable_progress_bar_name_truncation
or len(name) <= self.MAX_NAME_LENGTH
):
return name

if log_once("ray_data_truncate_operator_name"):
logger.warning(
f"Truncating long operator name to {self.MAX_NAME_LENGTH} characters."
"To disable this behavior, set `ray.data.DataContext.get_current()."
"DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = False`."
)
op_names = name.split("->")
# Include as many operators as possible without exceeding `MAX_NAME_LENGTH`.
# Always include the first and last operator names so
# it is easy to identify the DAG.
truncated_op_names = [op_names[0]]
for i, op_name in enumerate(op_names[1:-1]):
if len("->".join(truncated_op_names)) + len(op_name) > self.MAX_NAME_LENGTH:
truncated_op_names.append("...")
break
truncated_op_names.append(op_name)
if len(op_names) > 1:
truncated_op_names.append(op_names[-1])
return "->".join(truncated_op_names)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Not a big deal, but I think there are some edge cases where the truncated name can exceed MAX_NAME_LENGTH because we don't account for the last name or the additional "->"s.

Suggested change
op_names = name.split("->")
# Include as many operators as possible without exceeding `MAX_NAME_LENGTH`.
# Always include the first and last operator names so
# it is easy to identify the DAG.
truncated_op_names = [op_names[0]]
for i, op_name in enumerate(op_names[1:-1]):
if len("->".join(truncated_op_names)) + len(op_name) > self.MAX_NAME_LENGTH:
truncated_op_names.append("...")
break
truncated_op_names.append(op_name)
if len(op_names) > 1:
truncated_op_names.append(op_names[-1])
return "->".join(truncated_op_names)
op_names = name.split("->")
if len(op_names) == 1:
return op_names[0]
else:
# Include as many operators as possible without exceeding `MAX_NAME_LENGTH`.
# Always include the first and last operator names so
# it is easy to identify the DAG.
truncated_op_names = [op_names[0]]
for op_name in op_names[1:-1]:
if len("->".join(truncated_op_names)) + len("->") + len(op_name) + len("->") + len(op_names[-1]) > self.MAX_NAME_LENGTH:
truncated_op_names.append("...")
break
truncated_op_names.append(op_name)
truncated_op_names.append(op_names[-1])
return "->".join(truncated_op_names)


def block_until_complete(self, remaining: List[ObjectRef]) -> None:
t = threading.current_thread()
while remaining:
Expand Down Expand Up @@ -117,6 +153,7 @@ def fetch_until_complete(self, refs: List[ObjectRef]) -> List[Any]:
return [ref_to_result[ref] for ref in refs]

def set_description(self, name: str) -> None:
name = self._truncate_name(name)
if self._bar and name != self._desc:
self._desc = name
self._bar.set_description(self._desc)
Expand Down
9 changes: 9 additions & 0 deletions python/ray/data/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@
DEFAULT_ENABLE_PROGRESS_BARS = not bool(
env_integer("RAY_DATA_DISABLE_PROGRESS_BARS", 0)
)
DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = env_bool(
"RAY_DATA_ENABLE_PROGRESS_BAR_NAME_TRUNCATION", True
)

DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICS = False

Expand Down Expand Up @@ -209,6 +212,9 @@ class DataContext:
to use.
use_ray_tqdm: Whether to enable distributed tqdm.
enable_progress_bars: Whether to enable progress bars.
enable_progress_bar_name_truncation: If True, the name of the progress bar
(often the operator name) will be truncated if it exceeds
`ProgressBar.MAX_NAME_LENGTH`. Otherwise, the full operator name is shown.
enable_get_object_locations_for_metrics: Whether to enable
``get_object_locations`` for metrics.
write_file_retry_on_errors: A list of substrings of error messages that should
Expand Down Expand Up @@ -271,6 +277,9 @@ class DataContext:
)
use_ray_tqdm: bool = DEFAULT_USE_RAY_TQDM
enable_progress_bars: bool = DEFAULT_ENABLE_PROGRESS_BARS
enable_progress_bar_name_truncation: bool = (
DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION
)
enable_get_object_locations_for_metrics: bool = (
DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICS
)
Expand Down