From 58163729d912d14761139cccbcb9c155e4736694 Mon Sep 17 00:00:00 2001 From: Scott Lee Date: Thu, 25 Jul 2024 12:30:06 -0700 Subject: [PATCH 1/5] truncate progress bar description Signed-off-by: Scott Lee --- python/ray/data/_internal/progress_bar.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/ray/data/_internal/progress_bar.py b/python/ray/data/_internal/progress_bar.py index e117dc1be481..0b6d7efc9db2 100644 --- a/python/ray/data/_internal/progress_bar.py +++ b/python/ray/data/_internal/progress_bar.py @@ -44,6 +44,10 @@ class ProgressBar: because no tasks have finished yet), doesn't display the full progress bar. Still displays basic progress stats from tqdm.""" + # If the name/description of the progress bar exceeds this length, + # it will be truncated. + MAX_NAME_LENGTH = 100 + def __init__( self, name: str, @@ -51,8 +55,11 @@ def __init__( unit: str, position: int = 0, enabled: Optional[bool] = None, + display_full_name: bool = False, ): - self._desc = name + # If True, disables name trunctating. + self._display_full_name = display_full_name + self._desc = self._truncate_name(name) self._progress = 0 # Prepend a space to the unit for better formatting. if unit[0] != " ": @@ -83,6 +90,11 @@ def __init__( needs_warning = False self._bar = None + def _truncate_name(self, name: str) -> str: + if not self._display_full_name and len(name) > self.MAX_NAME_LENGTH: + return name[: self.MAX_NAME_LENGTH - 3] + "..." + return name + def block_until_complete(self, remaining: List[ObjectRef]) -> None: t = threading.current_thread() while remaining: @@ -117,6 +129,7 @@ def fetch_until_complete(self, refs: List[ObjectRef]) -> List[Any]: return [ref_to_result[ref] for ref in refs] def set_description(self, name: str) -> None: + name = self._truncate_name(name) if self._bar and name != self._desc: self._desc = name self._bar.set_description(self._desc) From 670bb30519eaa6d4324f0d770277eba1148bef28 Mon Sep 17 00:00:00 2001 From: Scott Lee Date: Thu, 25 Jul 2024 20:39:08 -0700 Subject: [PATCH 2/5] add context var Signed-off-by: Scott Lee --- python/ray/data/_internal/progress_bar.py | 6 ++---- python/ray/data/context.py | 9 +++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/python/ray/data/_internal/progress_bar.py b/python/ray/data/_internal/progress_bar.py index 0b6d7efc9db2..a9ccdb1d72c5 100644 --- a/python/ray/data/_internal/progress_bar.py +++ b/python/ray/data/_internal/progress_bar.py @@ -55,10 +55,7 @@ def __init__( unit: str, position: int = 0, enabled: Optional[bool] = None, - display_full_name: bool = False, ): - # If True, disables name trunctating. - self._display_full_name = display_full_name self._desc = self._truncate_name(name) self._progress = 0 # Prepend a space to the unit for better formatting. @@ -91,7 +88,8 @@ def __init__( self._bar = None def _truncate_name(self, name: str) -> str: - if not self._display_full_name and len(name) > self.MAX_NAME_LENGTH: + ctx = ray.data.context.DataContext.get_current() + if ctx.enable_progress_bar_name_truncation and len(name) > self.MAX_NAME_LENGTH: return name[: self.MAX_NAME_LENGTH - 3] + "..." return name diff --git a/python/ray/data/context.py b/python/ray/data/context.py index 4f1c7d508c50..ad266336eb9b 100644 --- a/python/ray/data/context.py +++ b/python/ray/data/context.py @@ -83,6 +83,9 @@ DEFAULT_ENABLE_PROGRESS_BARS = not bool( env_integer("RAY_DATA_DISABLE_PROGRESS_BARS", 0) ) +DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = env_bool( + "RAY_DATA_ENABLE_PROGRESS_BAR_NAME_TRUNCATION", True +) DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICS = False @@ -209,6 +212,9 @@ class DataContext: to use. use_ray_tqdm: Whether to enable distributed tqdm. enable_progress_bars: Whether to enable progress bars. + enable_progress_bar_name_truncation: If True, the name of the progress bar + (often the operator name) will be truncated if it exceeds + `ProgressBar.MAX_NAME_LENGTH`. Otherwise, the full operator name is shown. enable_get_object_locations_for_metrics: Whether to enable ``get_object_locations`` for metrics. write_file_retry_on_errors: A list of substrings of error messages that should @@ -271,6 +277,9 @@ class DataContext: ) use_ray_tqdm: bool = DEFAULT_USE_RAY_TQDM enable_progress_bars: bool = DEFAULT_ENABLE_PROGRESS_BARS + enable_progress_bar_name_truncation: bool = ( + DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION + ) enable_get_object_locations_for_metrics: bool = ( DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICS ) From dc0095aee4fd845ecb5860b35d1777d104e418b7 Mon Sep 17 00:00:00 2001 From: Scott Lee Date: Mon, 29 Jul 2024 11:07:28 -0700 Subject: [PATCH 3/5] include first/last op Signed-off-by: Scott Lee --- python/ray/data/_internal/progress_bar.py | 31 ++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/python/ray/data/_internal/progress_bar.py b/python/ray/data/_internal/progress_bar.py index a9ccdb1d72c5..64efcb83a569 100644 --- a/python/ray/data/_internal/progress_bar.py +++ b/python/ray/data/_internal/progress_bar.py @@ -1,3 +1,4 @@ +import logging import threading from typing import Any, List, Optional @@ -5,6 +6,9 @@ from ray.experimental import tqdm_ray from ray.types import ObjectRef from ray.util.annotations import Deprecated +from ray.util.debug import log_once + +logger = logging.getLogger(__name__) try: import tqdm @@ -89,9 +93,30 @@ def __init__( def _truncate_name(self, name: str) -> str: ctx = ray.data.context.DataContext.get_current() - if ctx.enable_progress_bar_name_truncation and len(name) > self.MAX_NAME_LENGTH: - return name[: self.MAX_NAME_LENGTH - 3] + "..." - return name + if ( + not ctx.enable_progress_bar_name_truncation + or len(name) <= self.MAX_NAME_LENGTH + ): + return name + + if log_once("ray_data_truncate_operator_name"): + logger.warning( + f"Truncating long operator name to {self.MAX_NAME_LENGTH} characters." + "To disable this behavior, set `ray.data.DataContext.get_current()." + "DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = False`." + ) + op_names = name.split("->") + # Include as many operators as possible without exceeding `MAX_NAME_LENGTH`. + # Always include the first and last operator names so + # it is easy to identify the DAG. + truncated_op_names = [op_names[0]] + for i, op_name in enumerate(op_names[1:-1]): + if len("->".join(truncated_op_names)) + len(op_name) > self.MAX_NAME_LENGTH: + truncated_op_names.append("...") + break + truncated_op_names.append(op_name) + truncated_op_names.append(op_names[-1]) + return "->".join(truncated_op_names) def block_until_complete(self, remaining: List[ObjectRef]) -> None: t = threading.current_thread() From a963e89202b109f5f5a7d1075004758694f60b00 Mon Sep 17 00:00:00 2001 From: Scott Lee Date: Mon, 29 Jul 2024 11:20:29 -0700 Subject: [PATCH 4/5] handle last op Signed-off-by: Scott Lee --- python/ray/data/_internal/progress_bar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/ray/data/_internal/progress_bar.py b/python/ray/data/_internal/progress_bar.py index 64efcb83a569..7bfc01e326af 100644 --- a/python/ray/data/_internal/progress_bar.py +++ b/python/ray/data/_internal/progress_bar.py @@ -115,7 +115,8 @@ def _truncate_name(self, name: str) -> str: truncated_op_names.append("...") break truncated_op_names.append(op_name) - truncated_op_names.append(op_names[-1]) + if len(op_names) > 1: + truncated_op_names.append(op_names[-1]) return "->".join(truncated_op_names) def block_until_complete(self, remaining: List[ObjectRef]) -> None: From 02a2b307888c132ee8385f9e50263c152e9098f4 Mon Sep 17 00:00:00 2001 From: Scott Lee Date: Tue, 30 Jul 2024 10:04:11 -0700 Subject: [PATCH 5/5] comments Signed-off-by: Scott Lee --- python/ray/data/_internal/progress_bar.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/python/ray/data/_internal/progress_bar.py b/python/ray/data/_internal/progress_bar.py index 7bfc01e326af..e7cfa667010a 100644 --- a/python/ray/data/_internal/progress_bar.py +++ b/python/ray/data/_internal/progress_bar.py @@ -106,17 +106,25 @@ def _truncate_name(self, name: str) -> str: "DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = False`." ) op_names = name.split("->") - # Include as many operators as possible without exceeding `MAX_NAME_LENGTH`. - # Always include the first and last operator names so - # it is easy to identify the DAG. + if len(op_names) == 1: + return op_names[0] + + # Include as many operators as possible without approximately + # exceeding `MAX_NAME_LENGTH`. Always include the first and + # last operator names soit is easy to identify the DAG. truncated_op_names = [op_names[0]] - for i, op_name in enumerate(op_names[1:-1]): - if len("->".join(truncated_op_names)) + len(op_name) > self.MAX_NAME_LENGTH: + for op_name in op_names[1:-1]: + if ( + len("->".join(truncated_op_names)) + + len("->") + + len(op_name) + + len("->") + + len(op_names[-1]) + ) > self.MAX_NAME_LENGTH: truncated_op_names.append("...") break truncated_op_names.append(op_name) - if len(op_names) > 1: - truncated_op_names.append(op_names[-1]) + truncated_op_names.append(op_names[-1]) return "->".join(truncated_op_names) def block_until_complete(self, remaining: List[ObjectRef]) -> None: