Skip to content

Commit

Permalink
Merge pull request #12810 from man-group/perf-download
Browse files Browse the repository at this point in the history
PERF: download and compute hashes in chunks of 1MB, did you know the progress bar was 30% of the runtime!
  • Loading branch information
pfmoore authored Jul 17, 2024
2 parents 7ec5fc3 + ee7d0fb commit 5fb46a3
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 13 deletions.
5 changes: 5 additions & 0 deletions news/12810.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Improve download performance. Download packages and update the
progress bar in larger chunks of 256 kB, up from 10 kB.
Limit the progress bar to 5 refresh per second.
Improve hash performance. Read package files in larger chunks of 1 MB,
up from 8192 bytes.
2 changes: 1 addition & 1 deletion src/pip/_internal/cli/progress_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _rich_progress_bar(
TimeRemainingColumn(),
)

progress = Progress(*columns, refresh_per_second=30)
progress = Progress(*columns, refresh_per_second=5)
task_id = progress.add_task(" " * (get_indentation() + 2), total=total)
with progress:
for chunk in iterable:
Expand Down
6 changes: 3 additions & 3 deletions src/pip/_internal/network/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
from typing import Iterable, Optional, Tuple

from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._vendor.requests.models import Response

from pip._internal.cli.progress_bars import get_download_progress_renderer
from pip._internal.exceptions import NetworkConnectionError
Expand Down Expand Up @@ -56,12 +56,12 @@ def _prepare_download(
show_progress = False
elif not total_length:
show_progress = True
elif total_length > (40 * 1000):
elif total_length > (512 * 1024):
show_progress = True
else:
show_progress = False

chunks = response_chunks(resp, CONTENT_CHUNK_SIZE)
chunks = response_chunks(resp)

if not show_progress:
return chunks
Expand Down
6 changes: 4 additions & 2 deletions src/pip/_internal/network/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, Generator

from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._vendor.requests.models import Response

from pip._internal.exceptions import NetworkConnectionError

Expand All @@ -25,6 +25,8 @@
# possible to make this work.
HEADERS: Dict[str, str] = {"Accept-Encoding": "identity"}

DOWNLOAD_CHUNK_SIZE = 256 * 1024


def raise_for_status(resp: Response) -> None:
http_error_msg = ""
Expand Down Expand Up @@ -55,7 +57,7 @@ def raise_for_status(resp: Response) -> None:


def response_chunks(
response: Response, chunk_size: int = CONTENT_CHUNK_SIZE
response: Response, chunk_size: int = DOWNLOAD_CHUNK_SIZE
) -> Generator[bytes, None, None]:
"""Given a requests Response, provide the data chunks."""
try:
Expand Down
12 changes: 5 additions & 7 deletions src/pip/_internal/utils/misc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import errno
import getpass
import hashlib
import io
import logging
import os
import posixpath
Expand Down Expand Up @@ -70,6 +69,8 @@
OnExc = Callable[[FunctionType, Path, BaseException], Any]
OnErr = Callable[[FunctionType, Path, ExcInfo], Any]

FILE_CHUNK_SIZE = 1024 * 1024


def get_pip_version() -> str:
pip_pkg_dir = os.path.join(os.path.dirname(__file__), "..", "..")
Expand Down Expand Up @@ -122,9 +123,7 @@ def get_prog() -> str:
# Retry every half second for up to 3 seconds
@retry(stop_after_delay=3, wait=0.5)
def rmtree(
dir: str,
ignore_errors: bool = False,
onexc: Optional[OnExc] = None,
dir: str, ignore_errors: bool = False, onexc: Optional[OnExc] = None
) -> None:
if ignore_errors:
onexc = _onerror_ignore
Expand Down Expand Up @@ -313,7 +312,7 @@ def is_installable_dir(path: str) -> bool:


def read_chunks(
file: BinaryIO, size: int = io.DEFAULT_BUFFER_SIZE
file: BinaryIO, size: int = FILE_CHUNK_SIZE
) -> Generator[bytes, None, None]:
"""Yield pieces of data from a file-like object until EOF."""
while True:
Expand Down Expand Up @@ -643,8 +642,7 @@ def pairwise(iterable: Iterable[Any]) -> Iterator[Tuple[Any, Any]]:


def partition(
pred: Callable[[T], bool],
iterable: Iterable[T],
pred: Callable[[T], bool], iterable: Iterable[T]
) -> Tuple[Iterable[T], Iterable[T]]:
"""
Use a predicate to partition entries into false entries and true entries,
Expand Down

0 comments on commit 5fb46a3

Please sign in to comment.