From ee7d0fbdd630fc06c4202edb5e824d87882a3eda Mon Sep 17 00:00:00 2001 From: rmorotti Date: Fri, 28 Jun 2024 17:11:01 +0100 Subject: [PATCH] PERF: download in chunks of 256 kB + limit progress bar to 5 refresh/sec --- news/12810.feature.rst | 5 +++++ src/pip/_internal/cli/progress_bars.py | 2 +- src/pip/_internal/network/download.py | 6 +++--- src/pip/_internal/network/utils.py | 6 ++++-- src/pip/_internal/utils/misc.py | 12 +++++------- 5 files changed, 18 insertions(+), 13 deletions(-) create mode 100644 news/12810.feature.rst diff --git a/news/12810.feature.rst b/news/12810.feature.rst new file mode 100644 index 00000000000..fd236947e4d --- /dev/null +++ b/news/12810.feature.rst @@ -0,0 +1,5 @@ +Improve download performance. Download packages and update the +progress bar in larger chunks of 256 kB, up from 10 kB. +Limit the progress bar to 5 refresh per second. +Improve hash performance. Read package files in larger chunks of 1 MB, +up from 8192 bytes. diff --git a/src/pip/_internal/cli/progress_bars.py b/src/pip/_internal/cli/progress_bars.py index b842b1b316a..883359c9ce7 100644 --- a/src/pip/_internal/cli/progress_bars.py +++ b/src/pip/_internal/cli/progress_bars.py @@ -49,7 +49,7 @@ def _rich_progress_bar( TimeRemainingColumn(), ) - progress = Progress(*columns, refresh_per_second=30) + progress = Progress(*columns, refresh_per_second=5) task_id = progress.add_task(" " * (get_indentation() + 2), total=total) with progress: for chunk in iterable: diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 032fdd0314f..5c3bce3d2fd 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -7,7 +7,7 @@ import os from typing import Iterable, Optional, Tuple -from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response +from pip._vendor.requests.models import Response from pip._internal.cli.progress_bars import get_download_progress_renderer from pip._internal.exceptions import NetworkConnectionError @@ -56,12 +56,12 @@ def _prepare_download( show_progress = False elif not total_length: show_progress = True - elif total_length > (40 * 1000): + elif total_length > (512 * 1024): show_progress = True else: show_progress = False - chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) + chunks = response_chunks(resp) if not show_progress: return chunks diff --git a/src/pip/_internal/network/utils.py b/src/pip/_internal/network/utils.py index 134848ae526..bba4c265e89 100644 --- a/src/pip/_internal/network/utils.py +++ b/src/pip/_internal/network/utils.py @@ -1,6 +1,6 @@ from typing import Dict, Generator -from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response +from pip._vendor.requests.models import Response from pip._internal.exceptions import NetworkConnectionError @@ -25,6 +25,8 @@ # possible to make this work. HEADERS: Dict[str, str] = {"Accept-Encoding": "identity"} +DOWNLOAD_CHUNK_SIZE = 256 * 1024 + def raise_for_status(resp: Response) -> None: http_error_msg = "" @@ -55,7 +57,7 @@ def raise_for_status(resp: Response) -> None: def response_chunks( - response: Response, chunk_size: int = CONTENT_CHUNK_SIZE + response: Response, chunk_size: int = DOWNLOAD_CHUNK_SIZE ) -> Generator[bytes, None, None]: """Given a requests Response, provide the data chunks.""" try: diff --git a/src/pip/_internal/utils/misc.py b/src/pip/_internal/utils/misc.py index 16d5a1cee10..3707e872684 100644 --- a/src/pip/_internal/utils/misc.py +++ b/src/pip/_internal/utils/misc.py @@ -1,7 +1,6 @@ import errno import getpass import hashlib -import io import logging import os import posixpath @@ -70,6 +69,8 @@ OnExc = Callable[[FunctionType, Path, BaseException], Any] OnErr = Callable[[FunctionType, Path, ExcInfo], Any] +FILE_CHUNK_SIZE = 1024 * 1024 + def get_pip_version() -> str: pip_pkg_dir = os.path.join(os.path.dirname(__file__), "..", "..") @@ -122,9 +123,7 @@ def get_prog() -> str: # Retry every half second for up to 3 seconds @retry(stop_after_delay=3, wait=0.5) def rmtree( - dir: str, - ignore_errors: bool = False, - onexc: Optional[OnExc] = None, + dir: str, ignore_errors: bool = False, onexc: Optional[OnExc] = None ) -> None: if ignore_errors: onexc = _onerror_ignore @@ -313,7 +312,7 @@ def is_installable_dir(path: str) -> bool: def read_chunks( - file: BinaryIO, size: int = io.DEFAULT_BUFFER_SIZE + file: BinaryIO, size: int = FILE_CHUNK_SIZE ) -> Generator[bytes, None, None]: """Yield pieces of data from a file-like object until EOF.""" while True: @@ -643,8 +642,7 @@ def pairwise(iterable: Iterable[Any]) -> Iterator[Tuple[Any, Any]]: def partition( - pred: Callable[[T], bool], - iterable: Iterable[T], + pred: Callable[[T], bool], iterable: Iterable[T] ) -> Tuple[Iterable[T], Iterable[T]]: """ Use a predicate to partition entries into false entries and true entries,