Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor requirement preparer to remove duplicated code paths for metadata-only dists #12871

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/12863.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``.
1 change: 1 addition & 0 deletions news/12871.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor much of ``RequirementPreparer`` to avoid duplicated code paths for metadata-only requirements.
14 changes: 13 additions & 1 deletion src/pip/_internal/distributions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.distributions.sdist import SourceDistribution
from pip._internal.distributions.wheel import WheelDistribution
from pip._internal.req.req_install import InstallRequirement
Expand All @@ -7,7 +8,18 @@
def make_distribution_for_install_requirement(
install_req: InstallRequirement,
) -> AbstractDistribution:
"""Returns a Distribution for the given InstallRequirement"""
"""Returns an AbstractDistribution for the given InstallRequirement.

As AbstractDistribution only covers installable artifacts, this method may only be
invoked at the conclusion of a resolve, when the RequirementPreparer has downloaded
the file corresponding to the resolved dist. Commands which intend to consume
metadata-only resolves without downloading should not call this method or
consume AbstractDistribution objects.
"""
# Only pre-installed requirements will have a .satisfied_by dist.
if install_req.satisfied_by:
return InstalledDistribution(install_req)

# Editable requirements will always be source distributions. They use the
# legacy logic until we create a modern standard for them.
if install_req.editable:
Expand Down
19 changes: 16 additions & 3 deletions src/pip/_internal/distributions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,17 @@ def build_tracker_id(self) -> Optional[str]:

If None, then this dist has no work to do in the build tracker, and
``.prepare_distribution_metadata()`` will not be called."""
raise NotImplementedError()
...

@abc.abstractmethod
def get_metadata_distribution(self) -> BaseDistribution:
raise NotImplementedError()
"""Generate a concrete ``BaseDistribution`` instance for this artifact.

The implementation should also cache the result with
``self.req.cache_concrete_dist()`` so the distribution is available to other
users of the ``InstallRequirement``. This method is not called within the build
tracker context, so it should not identify any new setup requirements."""
...

@abc.abstractmethod
def prepare_distribution_metadata(
Expand All @@ -50,4 +56,11 @@ def prepare_distribution_metadata(
build_isolation: bool,
check_build_deps: bool,
) -> None:
raise NotImplementedError()
"""Generate the information necessary to extract metadata from the artifact.

This method will be executed within the context of ``BuildTracker#track()``, so
it needs to fully identify any setup requirements so they can be added to the
same active set of tracked builds, while ``.get_metadata_distribution()`` takes
care of generating and caching the ``BaseDistribution`` to expose to the rest of
the resolve."""
...
14 changes: 9 additions & 5 deletions src/pip/_internal/distributions/installed.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import Optional
from typing import TYPE_CHECKING, Optional

from pip._internal.distributions.base import AbstractDistribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution

if TYPE_CHECKING:
from pip._internal.index.package_finder import PackageFinder


class InstalledDistribution(AbstractDistribution):
"""Represents an installed package.
Expand All @@ -17,12 +19,14 @@ def build_tracker_id(self) -> Optional[str]:
return None

def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.satisfied_by is not None, "not actually installed"
return self.req.satisfied_by
dist = self.req.satisfied_by
assert dist is not None, "not actually installed"
self.req.cache_concrete_dist(dist)
return dist

def prepare_distribution_metadata(
self,
finder: PackageFinder,
finder: "PackageFinder",
build_isolation: bool,
check_build_deps: bool,
) -> None:
Expand Down
20 changes: 15 additions & 5 deletions src/pip/_internal/distributions/sdist.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from typing import TYPE_CHECKING, Iterable, Optional, Set, Tuple
from typing import TYPE_CHECKING, Iterable, Set, Tuple

from pip._internal.build_env import BuildEnvironment
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.exceptions import InstallationError
from pip._internal.metadata import BaseDistribution
from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.utils.subprocess import runner_with_spinner_message

if TYPE_CHECKING:
Expand All @@ -21,13 +21,19 @@ class SourceDistribution(AbstractDistribution):
"""

@property
def build_tracker_id(self) -> Optional[str]:
def build_tracker_id(self) -> str:
"""Identify this requirement uniquely by its link."""
assert self.req.link
return self.req.link.url_without_fragment

def get_metadata_distribution(self) -> BaseDistribution:
return self.req.get_dist()
assert (
self.req.metadata_directory
), "Set as part of .prepare_distribution_metadata()"
dist = get_directory_distribution(self.req.metadata_directory)
self.req.cache_concrete_dist(dist)
self.req.validate_sdist_metadata()
return dist

def prepare_distribution_metadata(
self,
Expand Down Expand Up @@ -66,7 +72,11 @@ def prepare_distribution_metadata(
self._raise_conflicts("the backend dependencies", conflicting)
if missing:
self._raise_missing_reqs(missing)
self.req.prepare_metadata()

# NB: we must still call .cache_concrete_dist() and .validate_sdist_metadata()
# before the InstallRequirement itself has been updated with the metadata from
# this directory!
self.req.prepare_metadata_directory()

def _prepare_build_backend(self, finder: "PackageFinder") -> None:
# Isolate in a BuildEnvironment and install the build-time
Expand Down
4 changes: 3 additions & 1 deletion src/pip/_internal/distributions/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.local_file_path, "Set as part of preparation during download"
assert self.req.name, "Wheels are never unnamed"
wheel = FilesystemWheel(self.req.local_file_path)
return get_wheel_distribution(wheel, canonicalize_name(self.req.name))
dist = get_wheel_distribution(wheel, canonicalize_name(self.req.name))
self.req.cache_concrete_dist(dist)
return dist

def prepare_distribution_metadata(
self,
Expand Down
21 changes: 21 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ class RequiresEntry(NamedTuple):


class BaseDistribution(Protocol):
@property
def is_concrete(self) -> bool:
"""Whether the distribution really exists somewhere on disk.

If this is false, it has been synthesized from metadata, e.g. via
``.from_metadata_file_contents()``, or ``.from_wheel()`` against
a ``MemoryWheel``."""
raise NotImplementedError()

@classmethod
def from_directory(cls, directory: str) -> "BaseDistribution":
"""Load the distribution from a metadata directory.
Expand Down Expand Up @@ -667,6 +676,10 @@ def iter_installed_distributions(
class Wheel(Protocol):
location: str

@property
def is_concrete(self) -> bool:
raise NotImplementedError()

def as_zipfile(self) -> zipfile.ZipFile:
raise NotImplementedError()

Expand All @@ -675,6 +688,10 @@ class FilesystemWheel(Wheel):
def __init__(self, location: str) -> None:
self.location = location

@property
def is_concrete(self) -> bool:
return True

def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.location, allowZip64=True)

Expand All @@ -684,5 +701,9 @@ def __init__(self, location: str, stream: IO[bytes]) -> None:
self.location = location
self.stream = stream

@property
def is_concrete(self) -> bool:
return False

def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.stream, allowZip64=True)
19 changes: 16 additions & 3 deletions src/pip/_internal/metadata/importlib/_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,22 @@ def __init__(
dist: importlib.metadata.Distribution,
info_location: Optional[BasePath],
installed_location: Optional[BasePath],
concrete: bool,
) -> None:
self._dist = dist
self._info_location = info_location
self._installed_location = installed_location
self._concrete = concrete

@property
def is_concrete(self) -> bool:
return self._concrete

@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
info_location = pathlib.Path(directory)
dist = importlib.metadata.Distribution.at(info_location)
return cls(dist, info_location, info_location.parent)
return cls(dist, info_location, info_location.parent, concrete=True)

@classmethod
def from_metadata_file_contents(
Expand All @@ -128,7 +134,7 @@ def from_metadata_file_contents(
metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
return cls(dist, metadata_path.parent, None)
return cls(dist, metadata_path.parent, None, concrete=False)

@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
Expand All @@ -137,7 +143,14 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
dist = WheelDistribution.from_zipfile(zf, name, wheel.location)
except zipfile.BadZipFile as e:
raise InvalidWheel(wheel.location, name) from e
return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location))
except UnsupportedWheel as e:
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
return cls(
dist,
dist.info_location,
pathlib.PurePosixPath(wheel.location),
concrete=wheel.is_concrete,
)

@property
def location(self) -> Optional[str]:
Expand Down
8 changes: 4 additions & 4 deletions src/pip/_internal/metadata/importlib/_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def find(self, location: str) -> Iterator[BaseDistribution]:
installed_location: Optional[BasePath] = None
else:
installed_location = info_location.parent
yield Distribution(dist, info_location, installed_location)
yield Distribution(dist, info_location, installed_location, concrete=True)

def find_linked(self, location: str) -> Iterator[BaseDistribution]:
"""Read location in egg-link files and return distributions in there.
Expand All @@ -104,7 +104,7 @@ def find_linked(self, location: str) -> Iterator[BaseDistribution]:
continue
target_location = str(path.joinpath(target_rel))
for dist, info_location in self._find_impl(target_location):
yield Distribution(dist, info_location, path)
yield Distribution(dist, info_location, path, concrete=True)

def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
from pip._vendor.pkg_resources import find_distributions
Expand All @@ -116,7 +116,7 @@ def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
if not entry.name.endswith(".egg"):
continue
for dist in find_distributions(entry.path):
yield legacy.Distribution(dist)
yield legacy.Distribution(dist, concrete=True)

def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
from pip._vendor.pkg_resources import find_eggs_in_zip
Expand All @@ -128,7 +128,7 @@ def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
except zipimport.ZipImportError:
return
for dist in find_eggs_in_zip(importer, location):
yield legacy.Distribution(dist)
yield legacy.Distribution(dist, concrete=True)

def find_eggs(self, location: str) -> Iterator[BaseDistribution]:
"""Find eggs in a location.
Expand Down
15 changes: 10 additions & 5 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def run_script(self, script_name: str, namespace: str) -> None:


class Distribution(BaseDistribution):
def __init__(self, dist: pkg_resources.Distribution) -> None:
def __init__(self, dist: pkg_resources.Distribution, concrete: bool) -> None:
self._dist = dist
self._concrete = concrete
# This is populated lazily, to avoid loading metadata for all possible
# distributions eagerly.
self.__extra_mapping: Optional[Mapping[NormalizedName, str]] = None
Expand All @@ -96,6 +97,10 @@ def _extra_mapping(self) -> Mapping[NormalizedName, str]:

return self.__extra_mapping

@property
def is_concrete(self) -> bool:
return self._concrete

@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
dist_dir = directory.rstrip(os.sep)
Expand All @@ -114,7 +119,7 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0]

dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
return cls(dist)
return cls(dist, concrete=True)

@classmethod
def from_metadata_file_contents(
Expand All @@ -131,7 +136,7 @@ def from_metadata_file_contents(
metadata=InMemoryMetadata(metadata_dict, filename),
project_name=project_name,
)
return cls(dist)
return cls(dist, concrete=False)

@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
Expand All @@ -152,7 +157,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
metadata=InMemoryMetadata(metadata_dict, wheel.location),
project_name=name,
)
return cls(dist)
return cls(dist, concrete=wheel.is_concrete)

@property
def location(self) -> Optional[str]:
Expand Down Expand Up @@ -264,7 +269,7 @@ def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment:

def _iter_distributions(self) -> Iterator[BaseDistribution]:
for dist in self._ws:
yield Distribution(dist)
yield Distribution(dist, concrete=True)

def _search_distribution(self, name: str) -> Optional[BaseDistribution]:
"""Find a distribution matching the ``name`` in the environment.
Expand Down
Loading