From e1c41a01a158458af7201451f7b40a0a7b91119c Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 30 Jan 2023 12:59:45 -0500 Subject: [PATCH 1/5] pypi_provider: better invalid requirement handling Signed-off-by: William Woodruff --- .../resolvelib/pypi_provider.py | 89 ++++++++++--------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index 683d861a..cef6940b 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -26,7 +26,7 @@ import html5lib import requests from cachecontrol import CacheControl -from packaging.requirements import Requirement +from packaging.requirements import InvalidRequirement, Requirement from packaging.specifiers import InvalidSpecifier, SpecifierSet from packaging.utils import canonicalize_name, parse_sdist_filename, parse_wheel_filename from packaging.version import Version @@ -34,6 +34,7 @@ from resolvelib.resolvers import RequirementInformation from pip_audit._dependency_source import RequirementHashes, UnsupportedHashAlgorithm +from pip_audit._dependency_source.interface import DependencySourceError from pip_audit._service import SkippedDependency from pip_audit._state import AuditState from pip_audit._util import python_version @@ -134,7 +135,14 @@ def _get_dependencies(self) -> Iterator[Requirement]: extras = self.extras if self.extras else [""] for d in deps: - r = Requirement(d) + # NOTE: packaging 22 and later are more strict about PEP 440 requirements, + # meaning that pre-existing packages might have invalid requirements + # specifiers in their dependency sets (e.g. `pytz (>dev)`). + try: + r = Requirement(d) + except InvalidRequirement as exc: + raise DependencySourceError(f"{self.name} has invalid requirement {d}: {exc}") + if r.marker is None: yield r else: @@ -440,49 +448,48 @@ def find_matches( for r in requirements: extras |= r.extras - # Need to pass the extras to the search, so they - # are added to the candidate at creation - we - # treat candidates as immutable once created. - all_candidates = get_project_from_indexes( - self.index_urls, - self.session, - identifier, - requirements, - extras, - self.req_hashes, - self.timeout, - self._state, - ) - - candidates: list[ResolvedCandidate] try: - candidates = sorted( - [ - candidate - for candidate in all_candidates - if candidate.version not in bad_versions - # NOTE(ww): We use `filter(...)` instead of checking - # `candidate.version in r.specifier` because the former has subtle (and PEP 440 - # mandated) behavior around prereleases. Specifically, `filter(...)` - # returns prereleases even if not explicitly configured, but only if - # there are no non-prereleases. - # See: https://github.com/pypa/pip-audit/issues/472 - and all([any(r.specifier.filter((candidate.version,))) for r in requirements]) - # HACK(ww): Additionally check that each candidate's name matches the - # expected project name (identifier). - # This technically shouldn't be required, but parsing distribution names - # from package indices is imprecise/unreliable when distribution filenames - # are PEP 440 compliant but not normalized. - # See: https://github.com/pypa/packaging/issues/527 - and candidate.name == identifier - ], - key=attrgetter("version", "is_wheel"), - reverse=True, + # Need to pass the extras to the search, so they + # are added to the candidate at creation - we + # treat candidates as immutable once created. + all_candidates = get_project_from_indexes( + self.index_urls, + self.session, + identifier, + requirements, + extras, + self.req_hashes, + self.timeout, + self._state, ) - except PyPINotFoundError as e: - yield SkippedCandidate(name=identifier, skip_reason=str(e)) + except PyPINotFoundError as exc: + yield SkippedCandidate(name=identifier, skip_reason=str(exc)) return + candidates: list[ResolvedCandidate] = sorted( + [ + candidate + for candidate in all_candidates + if candidate.version not in bad_versions + # NOTE(ww): We use `filter(...)` instead of checking + # `candidate.version in r.specifier` because the former has subtle (and PEP 440 + # mandated) behavior around prereleases. Specifically, `filter(...)` + # returns prereleases even if not explicitly configured, but only if + # there are no non-prereleases. + # See: https://github.com/pypa/pip-audit/issues/472 + and all([any(r.specifier.filter((candidate.version,))) for r in requirements]) + # HACK(ww): Additionally check that each candidate's name matches the + # expected project name (identifier). + # This technically shouldn't be required, but parsing distribution names + # from package indices is imprecise/unreliable when distribution filenames + # are PEP 440 compliant but not normalized. + # See: https://github.com/pypa/packaging/issues/527 + and candidate.name == identifier + ], + key=attrgetter("version", "is_wheel"), + reverse=True, + ) + logger.debug(f"{identifier} has candidates: {candidates}") # If we have multiple candidates for a single version and some are wheels, From 6c4163c8aa28d00168e8afd8370623c0bb211466 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 30 Jan 2023 14:52:56 -0500 Subject: [PATCH 2/5] exception plumbing, bring cov back up Signed-off-by: William Woodruff --- pip_audit/_dependency_source/__init__.py | 2 + pip_audit/_dependency_source/interface.py | 7 ++++ .../resolvelib/pypi_provider.py | 38 +++++++++++-------- .../resolvelib/test_pypi_provider.py | 28 +++++++++++++- 4 files changed, 59 insertions(+), 16 deletions(-) diff --git a/pip_audit/_dependency_source/__init__.py b/pip_audit/_dependency_source/__init__.py index 97f5db2c..32fa4818 100644 --- a/pip_audit/_dependency_source/__init__.py +++ b/pip_audit/_dependency_source/__init__.py @@ -10,6 +10,7 @@ DependencySourceError, HashMismatchError, HashMissingError, + InvalidRequirementSpecifier, RequirementHashes, UnsupportedHashAlgorithm, ) @@ -27,6 +28,7 @@ "DependencySourceError", "HashMismatchError", "HashMissingError", + "InvalidRequirementSpecifier", "PipSource", "PipSourceError", "PyProjectSource", diff --git a/pip_audit/_dependency_source/interface.py b/pip_audit/_dependency_source/interface.py index 337f83e5..b4efa550 100644 --- a/pip_audit/_dependency_source/interface.py +++ b/pip_audit/_dependency_source/interface.py @@ -86,6 +86,13 @@ class UnsupportedHashAlgorithm(Exception): pass +class InvalidRequirementSpecifier(DependencySourceError): + """ + A `DependencySourceError` specialized for the case of a non-PEP 440 requirements + specifier. + """ + + class RequirementHashes: """ Represents the hashes contained within a requirements file. diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index cef6940b..f5bc2639 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -33,8 +33,11 @@ from resolvelib.providers import AbstractProvider from resolvelib.resolvers import RequirementInformation -from pip_audit._dependency_source import RequirementHashes, UnsupportedHashAlgorithm -from pip_audit._dependency_source.interface import DependencySourceError +from pip_audit._dependency_source import ( + InvalidRequirementSpecifier, + RequirementHashes, + UnsupportedHashAlgorithm, +) from pip_audit._service import SkippedDependency from pip_audit._state import AuditState from pip_audit._util import python_version @@ -141,7 +144,9 @@ def _get_dependencies(self) -> Iterator[Requirement]: try: r = Requirement(d) except InvalidRequirement as exc: - raise DependencySourceError(f"{self.name} has invalid requirement {d}: {exc}") + raise InvalidRequirementSpecifier( + f"{self.name} has invalid requirement '{d}': {exc}" + ) if r.marker is None: yield r @@ -449,18 +454,21 @@ def find_matches( extras |= r.extras try: - # Need to pass the extras to the search, so they - # are added to the candidate at creation - we - # treat candidates as immutable once created. - all_candidates = get_project_from_indexes( - self.index_urls, - self.session, - identifier, - requirements, - extras, - self.req_hashes, - self.timeout, - self._state, + # Need to pass the extras to the search, so that they are added to the candidate at + # creation, since we treat candidates as immutable once created. + # We also eagerly collect the candidate list here, to pull any "not found" + # exceptions out before doing candidate filtering below. + all_candidates = list( + get_project_from_indexes( + self.index_urls, + self.session, + identifier, + requirements, + extras, + self.req_hashes, + self.timeout, + self._state, + ) ) except PyPINotFoundError as exc: yield SkippedCandidate(name=identifier, skip_reason=str(exc)) diff --git a/test/dependency_source/resolvelib/test_pypi_provider.py b/test/dependency_source/resolvelib/test_pypi_provider.py index 171e7e9a..608de45e 100644 --- a/test/dependency_source/resolvelib/test_pypi_provider.py +++ b/test/dependency_source/resolvelib/test_pypi_provider.py @@ -1,3 +1,4 @@ +from email.message import Message from pathlib import Path from subprocess import CalledProcessError @@ -6,12 +7,13 @@ from packaging.version import Version from pip_audit._dependency_source import RequirementHashes +from pip_audit._dependency_source.interface import InvalidRequirementSpecifier from pip_audit._dependency_source.resolvelib import pypi_provider from pip_audit._dependency_source.resolvelib.pypi_provider import ResolvedCandidate from pip_audit._virtual_env import VirtualEnvError -class TestCandidate: +class TestResolvedCandidate: def test_get_metadata_for_sdist_venv_create_fails(self, monkeypatch): virtualenv_obj = pretend.stub( create=pretend.call_recorder( @@ -53,6 +55,30 @@ def test_get_metadata_for_sdist_venv_create_fails(self, monkeypatch): "Installing source distribution in isolated environment for fakepkg (1.0.0)" ) + @pytest.mark.parametrize("invalid", ["pytz (>dev)", "fakedep>=3.*"]) + def test_get_dependencies_invalid_req_specifer(self, invalid, monkeypatch): + candidate = ResolvedCandidate( + "fakepkg", + "fakepkg", + Path("fakepath"), + Version("1.0.0"), + url="hxxps://fake.url", + extras=set(), + is_wheel=False, + reqs=[], + session=pretend.stub(), + timeout=None, + state=pretend.stub(), + req_hashes=RequirementHashes(), + ) + + metadata = Message() + metadata["Requires-Dist"] = invalid + monkeypatch.setattr(candidate, "_metadata", metadata) + + with pytest.raises(InvalidRequirementSpecifier): + list(candidate._get_dependencies()) + def test_get_project_from_index_relative_url(): data = """ From 9fae3599830611db7522fdfbd1730291d4ba322d Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 30 Jan 2023 15:04:08 -0500 Subject: [PATCH 3/5] requirement: better message on invalid requirement Signed-off-by: William Woodruff --- pip_audit/_dependency_source/requirement.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pip_audit/_dependency_source/requirement.py b/pip_audit/_dependency_source/requirement.py index 97cf60c8..5557b94e 100644 --- a/pip_audit/_dependency_source/requirement.py +++ b/pip_audit/_dependency_source/requirement.py @@ -26,6 +26,7 @@ DependencySource, DependencySourceError, RequirementHashes, + InvalidRequirementSpecifier, ) from pip_audit._fix import ResolvedFixVersion from pip_audit._service import Dependency @@ -89,10 +90,11 @@ def collect(self) -> Iterator[Dependency]: for filename in self._filenames: try: rf = RequirementsFile.from_file(filename) - if rf.invalid_lines: - raise RequirementSourceError( - f"requirement file {filename} contains invalid lines: " - f"{str(rf.invalid_lines)}" + if len(rf.invalid_lines) > 0: + invalid = rf.invalid_lines[0] + raise InvalidRequirementSpecifier( + f"requirement file {filename} contains invalid specifier at " + f"line {invalid.line_number}: {invalid.error_message}" ) reqs: list[InstallRequirement] = [] From 0d9c3d60c891d311256162c4b434a5a73bad1e44 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 30 Jan 2023 15:05:45 -0500 Subject: [PATCH 4/5] requirement: lintage Signed-off-by: William Woodruff --- pip_audit/_dependency_source/requirement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pip_audit/_dependency_source/requirement.py b/pip_audit/_dependency_source/requirement.py index 5557b94e..a0b19149 100644 --- a/pip_audit/_dependency_source/requirement.py +++ b/pip_audit/_dependency_source/requirement.py @@ -25,8 +25,8 @@ DependencyResolverError, DependencySource, DependencySourceError, - RequirementHashes, InvalidRequirementSpecifier, + RequirementHashes, ) from pip_audit._fix import ResolvedFixVersion from pip_audit._service import Dependency From 2affce1133be9e34faa93f1e0b543175ca4b9b42 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 30 Jan 2023 15:07:21 -0500 Subject: [PATCH 5/5] CHANGELOG: record changes Signed-off-by: William Woodruff --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d7ae396..f11698f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ All versions prior to 0.0.9 are untracked. ## [Unreleased] +### Changed + +* Improved error messaging when a requirements input or indirect dependency + has an invalid (non-PEP 440) requirements specifier + ([#507](https://github.com/pypa/pip-audit/pull/507)) + ### Fixed * Fixed an issue where hash checking would fail when using third-party indices