From 618af7aec63d45316e29a715fe6f8582eabf1e43 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sun, 30 Jun 2024 20:39:58 +1000 Subject: [PATCH 01/26] add repository information parsing --- news/11784.feature.rst | 1 + src/pip/_internal/index/collector.py | 27 +++++++++++++++++++++++++-- src/pip/_internal/models/link.py | 20 ++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 news/11784.feature.rst diff --git a/news/11784.feature.rst b/news/11784.feature.rst new file mode 100644 index 00000000000..c2a0ce56040 --- /dev/null +++ b/news/11784.feature.rst @@ -0,0 +1 @@ +Introduce repository alternate locations and project tracking, as per PEP 708. \ No newline at end of file diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index 5f8fdee3d46..bedb0f6638f 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -230,7 +230,12 @@ def parse_links(page: "IndexContent") -> Iterable[Link]: if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) for file in data.get("files", []): - link = Link.from_json(file, page.url) + link = Link.from_json( + file, + page_url=page.url, + project_track_urls=data.get("meta", {}).get('tracks', []), + repo_alt_urls=data.get("alternate-locations", []), + ) if link is None: continue yield link @@ -243,7 +248,13 @@ def parse_links(page: "IndexContent") -> Iterable[Link]: url = page.url base_url = parser.base_url or url for anchor in parser.anchors: - link = Link.from_element(anchor, page_url=url, base_url=base_url) + link = Link.from_element( + anchor, + page_url=url, + base_url=base_url, + project_track_urls=parser.project_track_urls, + repo_alt_urls=parser.repo_alt_urls, + ) if link is None: continue yield link @@ -282,6 +293,8 @@ def __init__(self, url: str) -> None: self.url: str = url self.base_url: Optional[str] = None self.anchors: List[Dict[str, Optional[str]]] = [] + self.project_track_urls: List[str] = [] + self.repo_alt_urls: List[str] = [] def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: if tag == "base" and self.base_url is None: @@ -290,6 +303,16 @@ def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> N self.base_url = href elif tag == "a": self.anchors.append(dict(attrs)) + elif tag == "meta": + for name, value in attrs: + if not value or not value.strip(): + continue + url = value.strip() + # PEP 708 + if name == "pypi:tracks" and url not in self.project_track_urls: + self.project_track_urls.append(url) + elif name == "pypi:alternate-locations" and url not in self.repo_alt_urls: + self.repo_alt_urls.append(url) def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: for name, value in attrs: diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 2f41f2f6a09..5f7d327fdf5 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -192,6 +192,8 @@ class Link: "metadata_file_data", "cache_link_parsing", "egg_fragment", + "project_track_urls", + "repo_alt_urls", ] def __init__( @@ -203,6 +205,8 @@ def __init__( metadata_file_data: Optional[MetadataFile] = None, cache_link_parsing: bool = True, hashes: Optional[Mapping[str, str]] = None, + project_track_urls: Optional[List[str]] = None, + repo_alt_urls: Optional[List[str]] = None, ) -> None: """ :param url: url of the resource pointed to (href of the link) @@ -227,6 +231,10 @@ def __init__( URLs should generally have this set to False, for example. :param hashes: A mapping of hash names to digests to allow us to determine the validity of a download. + :param project_track_urls: An optional list of urls pointing to the same + project in other repositories. Defined by the repository operators. + :param repo_alt_urls: An optional list of urls pointing to alternate + locations for the project. Defined by the project owners. """ # The comes_from, requires_python, and metadata_file_data arguments are @@ -257,11 +265,17 @@ def __init__( self.cache_link_parsing = cache_link_parsing self.egg_fragment = self._egg_fragment() + # PEP 708 + self.project_track_urls = project_track_urls or [] + self.repo_alt_urls = repo_alt_urls or [] + @classmethod def from_json( cls, file_data: Dict[str, Any], page_url: str, + project_track_urls: Optional[List[str]] = None, + repo_alt_urls: Optional[List[str]] = None, ) -> Optional["Link"]: """ Convert an pypi json document from a simple repository page into a Link. @@ -306,6 +320,8 @@ def from_json( yanked_reason=yanked_reason, hashes=hashes, metadata_file_data=metadata_file_data, + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, ) @classmethod @@ -314,6 +330,8 @@ def from_element( anchor_attribs: Dict[str, Optional[str]], page_url: str, base_url: str, + project_track_urls: Optional[List[str]] = None, + repo_alt_urls: Optional[List[str]] = None, ) -> Optional["Link"]: """ Convert an anchor element's attributes in a simple repository page to a Link. @@ -358,6 +376,8 @@ def from_element( requires_python=pyrequire, yanked_reason=yanked_reason, metadata_file_data=metadata_file_data, + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, ) def __str__(self) -> str: From 02c5f4afcabf529b8533f2793d2aa311898a8196 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sat, 27 Jul 2024 17:17:31 +1000 Subject: [PATCH 02/26] add tests for parsing repository details --- src/pip/_internal/index/collector.py | 58 ++++++++++----- src/pip/_internal/models/link.py | 21 +++--- .../repository-alternate-01/simple/index.html | 11 +++ .../repository-alternate-02/simple/index.html | 10 +++ .../repository-tracks-01/simple/index.html | 10 +++ tests/unit/test_collector.py | 71 ++++++++++++++++++- 6 files changed, 153 insertions(+), 28 deletions(-) create mode 100644 tests/data/indexes/repository-alternate-01/simple/index.html create mode 100644 tests/data/indexes/repository-alternate-02/simple/index.html create mode 100644 tests/data/indexes/repository-tracks-01/simple/index.html diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index bedb0f6638f..69bb812f5c9 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -24,6 +24,7 @@ Optional, Protocol, Sequence, + Set, Tuple, Union, ) @@ -33,7 +34,12 @@ from pip._vendor.requests.exceptions import RetryError, SSLError from pip._internal.exceptions import NetworkConnectionError -from pip._internal.models.link import Link +from pip._internal.models.link import ( + Link, + HEAD_META_PREFIX, + HEAD_META_ALTERNATE_LOCATIONS, + HEAD_META_TRACKS, +) from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status @@ -224,17 +230,21 @@ def wrapper_wrapper(page: "IndexContent") -> List[Link]: def parse_links(page: "IndexContent") -> Iterable[Link]: """ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. + Includes known metadata from the HTML header. """ - + url = page.url content_type_l = page.content_type.lower() if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) + project_track_urls = set(data.get("meta", {}).get("tracks", [])) + repo_alt_urls = set(data.get("alternate-locations", [])) + repo_alt_urls.add(page.url) for file in data.get("files", []): link = Link.from_json( file, page_url=page.url, - project_track_urls=data.get("meta", {}).get('tracks', []), - repo_alt_urls=data.get("alternate-locations", []), + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, ) if link is None: continue @@ -245,15 +255,16 @@ def parse_links(page: "IndexContent") -> Iterable[Link]: encoding = page.encoding or "utf-8" parser.feed(page.content.decode(encoding)) - url = page.url base_url = parser.base_url or url for anchor in parser.anchors: + repo_alt_urls = parser.repo_alt_urls or set() + repo_alt_urls.add(page.url) link = Link.from_element( anchor, page_url=url, base_url=base_url, project_track_urls=parser.project_track_urls, - repo_alt_urls=parser.repo_alt_urls, + repo_alt_urls=repo_alt_urls, ) if link is None: continue @@ -293,8 +304,8 @@ def __init__(self, url: str) -> None: self.url: str = url self.base_url: Optional[str] = None self.anchors: List[Dict[str, Optional[str]]] = [] - self.project_track_urls: List[str] = [] - self.repo_alt_urls: List[str] = [] + self.project_track_urls: Set[str] = set() + self.repo_alt_urls: Set[str] = set() def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: if tag == "base" and self.base_url is None: @@ -304,15 +315,20 @@ def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> N elif tag == "a": self.anchors.append(dict(attrs)) elif tag == "meta": - for name, value in attrs: - if not value or not value.strip(): - continue - url = value.strip() - # PEP 708 - if name == "pypi:tracks" and url not in self.project_track_urls: - self.project_track_urls.append(url) - elif name == "pypi:alternate-locations" and url not in self.repo_alt_urls: - self.repo_alt_urls.append(url) + meta_attrs = dict(attrs) + meta_key = meta_attrs.get("name", "").strip() + meta_val = meta_attrs.get("content", "").strip() + if meta_key and meta_val: + if ( + meta_key == self._meta_key_tracks + and meta_val not in self.project_track_urls + ): + self.project_track_urls.add(meta_val) + elif ( + meta_key == self._meta_key_alternate_locations + and meta_val not in self.repo_alt_urls + ): + self.repo_alt_urls.add(meta_val) def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: for name, value in attrs: @@ -320,6 +336,14 @@ def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: return value return None + @functools.cached_property + def _meta_key_tracks(self): + return f"{HEAD_META_PREFIX}:{HEAD_META_TRACKS}" + + @functools.cached_property + def _meta_key_alternate_locations(self): + return f"{HEAD_META_PREFIX}:{HEAD_META_ALTERNATE_LOCATIONS}" + def _handle_get_simple_fail( link: Link, diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 5f7d327fdf5..2bdbb827029 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -14,6 +14,7 @@ Mapping, NamedTuple, Optional, + Set, Tuple, Union, ) @@ -39,6 +40,10 @@ # we will pick to use. _SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5") +HEAD_META_PREFIX = "pypi" +HEAD_META_ALTERNATE_LOCATIONS = "alternate-locations" +HEAD_META_TRACKS = "tracks" + @dataclass(frozen=True) class LinkHash: @@ -205,8 +210,8 @@ def __init__( metadata_file_data: Optional[MetadataFile] = None, cache_link_parsing: bool = True, hashes: Optional[Mapping[str, str]] = None, - project_track_urls: Optional[List[str]] = None, - repo_alt_urls: Optional[List[str]] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> None: """ :param url: url of the resource pointed to (href of the link) @@ -266,16 +271,16 @@ def __init__( self.egg_fragment = self._egg_fragment() # PEP 708 - self.project_track_urls = project_track_urls or [] - self.repo_alt_urls = repo_alt_urls or [] + self.project_track_urls = project_track_urls or set() + self.repo_alt_urls = repo_alt_urls or set() @classmethod def from_json( cls, file_data: Dict[str, Any], page_url: str, - project_track_urls: Optional[List[str]] = None, - repo_alt_urls: Optional[List[str]] = None, + project_track_urls: Optional[set[str]] = None, + repo_alt_urls: Optional[set[str]] = None, ) -> Optional["Link"]: """ Convert an pypi json document from a simple repository page into a Link. @@ -330,8 +335,8 @@ def from_element( anchor_attribs: Dict[str, Optional[str]], page_url: str, base_url: str, - project_track_urls: Optional[List[str]] = None, - repo_alt_urls: Optional[List[str]] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> Optional["Link"]: """ Convert an anchor element's attributes in a simple repository page to a Link. diff --git a/tests/data/indexes/repository-alternate-01/simple/index.html b/tests/data/indexes/repository-alternate-01/simple/index.html new file mode 100644 index 00000000000..e9174877f7d --- /dev/null +++ b/tests/data/indexes/repository-alternate-01/simple/index.html @@ -0,0 +1,11 @@ + + + + + + + + +simple-1.0.tar.gz + + diff --git a/tests/data/indexes/repository-alternate-02/simple/index.html b/tests/data/indexes/repository-alternate-02/simple/index.html new file mode 100644 index 00000000000..c5e6f02039c --- /dev/null +++ b/tests/data/indexes/repository-alternate-02/simple/index.html @@ -0,0 +1,10 @@ + + + + + + + +simple-1.0.tar.gz + + diff --git a/tests/data/indexes/repository-tracks-01/simple/index.html b/tests/data/indexes/repository-tracks-01/simple/index.html new file mode 100644 index 00000000000..5b7aa5ac26f --- /dev/null +++ b/tests/data/indexes/repository-tracks-01/simple/index.html @@ -0,0 +1,10 @@ + + + + + + + +simple-1.0.tar.gz + + diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 89da25b73d8..7a8bc9efa5e 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -482,10 +482,18 @@ def test_parse_links__requires_python( # in test_download.py execute over both html and json indices with # a pytest.mark.parameterize decorator to ensure nothing slips through the cracks. def test_parse_links_json() -> None: + meta_tracks_alt_repos = [ + "https://example.com/simple/holygrail/", + "https://example.net/simple/holygrail/", + ] json_bytes = json.dumps( { - "meta": {"api-version": "1.0"}, + "meta": { + "api-version": "1.0", + "tracks": list(meta_tracks_alt_repos), + }, "name": "holygrail", + "alternate-locations": list(meta_tracks_alt_repos), "files": [ { "filename": "holygrail-1.0.tar.gz", @@ -529,23 +537,26 @@ def test_parse_links_json() -> None: ], } ).encode("utf8") + url = f"https://example.com/simple-{uuid.uuid4()}/" page = IndexContent( json_bytes, "application/vnd.pypi.simple.v1+json", encoding=None, # parse_links() is cached by url, so we inject a random uuid to ensure # the page content isn't cached. - url=f"https://example.com/simple-{uuid.uuid4()}/", + url=url, ) links = list(parse_links(page)) - assert links == [ + expected = [ Link( "https://example.com/files/holygrail-1.0.tar.gz", comes_from=page.url, requires_python=">=3.7", yanked_reason="Had a vulnerability", hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -553,6 +564,8 @@ def test_parse_links_json() -> None: requires_python=">=3.7", yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -561,6 +574,8 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -569,6 +584,8 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -577,9 +594,17 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), ] + def _convert_links(link_item): + return {key: getattr(link_item, key, None) for key in link_item.__slots__} + + for index, link in enumerate(links): + assert _convert_links(link) == _convert_links(expected[index]) + # Ensure the metadata info can be parsed into the correct link. metadata_link = links[2].metadata_link() assert metadata_link is not None @@ -721,6 +746,46 @@ def test_parse_links_caches_same_page_by_url() -> None: assert "pkg2" in parsed_links_3[0].url +@pytest.mark.parametrize( + ("index_name", "expected_project_track_urls", "expected_repo_alt_urls"), + [ + ( + "repository-alternate-01", + set(), + { + "../../repository-alternate-02/simple/", + "../../repository-tracks-01/simple/", + }, + ), + ("repository-alternate-02", set(), {"../../repository-alternate-01/simple/"}), + ("repository-tracks-01", {"../../repository-alternate-01/simple/"}, set()), + ], +) +def test_parse_links__alternate_locations_and_tracks( + index_name: str, + expected_project_track_urls: list[str], + expected_repo_alt_urls: list[str], + data: TestData, +) -> None: + package_name = "simple" + html_path = data.indexes / index_name / package_name / "index.html" + html = html_path.read_text() + html_bytes = html.encode("utf-8") + url = f"https://example.com/simple-{uuid.uuid4()}/" + page = IndexContent( + html_bytes, + "text/html", + encoding=None, + # parse_links() is cached by url, so we inject a random uuid to ensure + # the page content isn't cached. + url=url, + ) + links = list(parse_links(page)) + for link in links: + assert link.project_track_urls == expected_project_track_urls + assert link.repo_alt_urls == {*expected_repo_alt_urls, url} + + @mock.patch("pip._internal.index.collector.raise_for_status") def test_request_http_error( mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture From b36be68e531191c340f50665a6be4758aa4716d8 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sat, 3 Aug 2024 15:18:48 +1000 Subject: [PATCH 03/26] add initial checks from specification Add exceptions. Fix some lint issues. --- news/11784.feature.rst | 2 +- src/pip/_internal/exceptions.py | 102 ++++++++++ src/pip/_internal/index/collector.py | 8 +- src/pip/_internal/index/package_finder.py | 225 ++++++++++++++++++++++ tests/unit/test_collector.py | 6 +- tests/unit/test_index.py | 4 + 6 files changed, 339 insertions(+), 8 deletions(-) diff --git a/news/11784.feature.rst b/news/11784.feature.rst index c2a0ce56040..6d9c77d23d8 100644 --- a/news/11784.feature.rst +++ b/news/11784.feature.rst @@ -1 +1 @@ -Introduce repository alternate locations and project tracking, as per PEP 708. \ No newline at end of file +Introduce repository alternate locations and project tracking, as per PEP 708. diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 2587740f73a..2dd4c37145b 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -775,3 +775,105 @@ def __init__(self, *, distribution: "BaseDistribution") -> None: ), hint_stmt=None, ) + + +class InvalidMultipleRemoteRepositories(DiagnosticPipError): + """Common error for issues with multiple remote repositories.""" + + reference = "invalid-multiple-remote-repositories" + + +class InvalidTracksUrl(InvalidMultipleRemoteRepositories): + """There was an issue with a Tracks metadata url. + + Tracks urls must point to the actual URLs for that project, + point to the repositories that own the namespaces, and + point to a project with the exact same name (after normalization). + """ + + reference = "invalid-tracks-url" + + +class InvalidAlternativeLocationsUrl(InvalidMultipleRemoteRepositories): + """The list of Alternate Locations for each repository do not match. + + In order for this metadata to be trusted, there MUST be agreement between + all locations where that project is found as to what the alternate locations are. + """ + + reference = "invalid-alternative-locations" + + def __init__( + self, + *, + package: str, + remote_repositories: set[str], + invalid_locations: set[str], + ) -> None: + super().__init__( + kind="error", + message=Text( + f"One or more Alternate Locations for {escape(package)} " + "were different among the remote repositories. " + "The remote repositories are " + f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." + "The alternate locations not not agreed by all remote " + "repository are " + f"{'; '.join(sorted(escape(r) for r in invalid_locations))}." + ), + context=Text( + "To be able to trust the remote repository Alternate Locations, " + "all remote repositories must agree on the list of Locations." + ), + hint_stmt=None, + note_stmt=Text( + "The way to resolve this error is to contact the owners of the package " + "at each remote repository, and ask that the list of " + "Alternate Locations at each is set to the same list. " + "See PEP 708 for the specification. " + "You can override this check, which will disable the security " + "protection it provides from dependency confusion attacks, " + "by passing --insecure-multiple-remote-repositories." + ), + ) + + +class UnsafeMultipleRemoteRepositories(InvalidMultipleRemoteRepositories): + """More than one remote repository was provided for a package, + with no indication that the remote repositories can be safely merged. + + The repositories, packages, or user did not indicate that + it is safe to merge remote repositories. + + Multiple remote repositories are not merged by default + to reduce the risk of dependency confusion attacks.""" + + reference = "unsafe-multiple-remote-repositories" + + def __init__(self, *, package: str, remote_repositories: set[str]) -> None: + super().__init__( + kind="error", + message=Text( + f"More than one remote repository was found for {escape(package)}, " + "with no indication that the remote repositories can be safely merged. " + f"The repositories are {'; '.join(sorted(escape(r) for r in remote_repositories))}." + ), + context=Text( + "Multiple remote repositories are not merged by default " + "to reduce the risk of dependency confusion attacks." + ), + hint_stmt=Text( + "Remote repositories can be specified or discovered using " + "--index-url, --extra-index-url, and --find-links. " + "Please check the pip command to see if these are in use." + ), + note_stmt=Text( + "The way to resolve this error is to contact the remote repositories " + "and package owners, and ask if it makes sense to configure them to " + "merge namespaces. " + "See PEP 708 for the specification. " + "You can override this check, which will disable the security " + "protection it provides from dependency confusion attacks, " + "by passing --insecure-multiple-remote-repositories." + ), + ) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index 69bb812f5c9..8a31d442450 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -35,10 +35,10 @@ from pip._internal.exceptions import NetworkConnectionError from pip._internal.models.link import ( - Link, - HEAD_META_PREFIX, HEAD_META_ALTERNATE_LOCATIONS, + HEAD_META_PREFIX, HEAD_META_TRACKS, + Link, ) from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession @@ -337,11 +337,11 @@ def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: return None @functools.cached_property - def _meta_key_tracks(self): + def _meta_key_tracks(self) -> str: return f"{HEAD_META_PREFIX}:{HEAD_META_TRACKS}" @functools.cached_property - def _meta_key_alternate_locations(self): + def _meta_key_alternate_locations(self) -> str: return f"{HEAD_META_PREFIX}:{HEAD_META_ALTERNATE_LOCATIONS}" diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 0d65ce35f37..4e77ec395fc 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -17,7 +17,9 @@ from pip._internal.exceptions import ( BestVersionAlreadyInstalled, DistributionNotFound, + InvalidAlternativeLocationsUrl, InvalidWheelFilename, + UnsafeMultipleRemoteRepositories, UnsupportedWheel, ) from pip._internal.index.collector import LinkCollector, parse_links @@ -475,6 +477,11 @@ def get_applicable_candidates( project_name=self._project_name, ) + check_multiple_remote_repositories( + candidates=filtered_applicable_candidates, + project_name=self._project_name, + ) + return sorted(filtered_applicable_candidates, key=self._sort_key) def _sort_key(self, candidate: InstallationCandidate) -> CandidateSortingKey: @@ -1018,3 +1025,221 @@ def _extract_version_from_fragment(fragment: str, canonical_name: str) -> Option if not version: return None return version + + +def check_multiple_remote_repositories( + candidates: List[InstallationCandidate], project_name: str +) -> None: + """ + Check whether two or more different namespaces can be flattened into one. + + This check will raise an error when packages from different sources will be merged, + without clear configuration to indicate the namespace merge is allowed. + See `PEP 708`_. + + This approach allows safely merging separate namespaces that are actually one + logical namespace, while enforcing a more secure default. + + Returns None if checks pass, otherwise will raise an error with details of the + failed checks. + """ + + # NOTE: The checks in this function must occur after: + # Specification: Filter out any files that do not match known hashes from a + # lockfile or requirements file. + + # NOTE: The checks in this function must occur before: + # Specification: Filter out any files that do not match the current platform, + # Python version, etc. We check for the metadata in this PEP before filtering out + # based on platform, Python version, etc., because we don’t want errors that only + # show up on certain platforms, Python versions, etc. + + # NOTE: Implemented in 'filter_unallowed_hashes': + # Specification: Users who are using lock files or requirements files that include + # specific hashes of artifacts that are “valid” are assumed to be protected by + # nature of those hashes, since the rest of these recommendations would apply + # during hash generation. Thus, we filter out unknown hashes up front. + + # NOTE: Implemented in 'collector.py' 'parse_links': + # Specification: When using alternate locations, clients MUST implicitly assume + # that the url the response was fetched from was included in the list. + + # NOTE: Implemented in 'collector.py' 'parse_links': + # Specification: Order of the elements within the array does not have any + # particular meaning. + + # NOTE: Implemented by this function, by not checking all repo tracks metadata is + # the exact same. + # Specification: Mixed use repositories where some names track a repository and + # some names do not are explicitly allowed. + + # TODO: This requirement doesn't look like something that pip can do anything about. + # Specification: All [Tracks metadata] URLs MUST represent the same “project” as + # the project in the extending repository. This does not mean that they need to + # serve the same files. It is valid for them to include binaries built on + # different platforms, copies with local patches being applied, etc. This is + # purposefully left vague as it’s ultimately up to the expectations that the + # users have of the repository and its operators what exactly constitutes + # the “same” project. + + # TODO: This requirement doesn't look like something that pip can do anything about. + # Specification: It [Tracks metadata] is NOT required that every name in a + # repository tracks the same repository, or that they all track a repository at all. + + # TODO: This requirement doesn't look like something that pip can do anything about. + # Specification: It [repository Tracks metadata] MUST be under the control of the + # repository operators themselves, not any individual publisher using that + # repository. “Repository Operator” can also include anyone who managed the overall + # namespace for a particular repository, which may be the case in situations like + # hosted repository services where one entity operates the software but another + # owns/manages the entire namespace of that repository. + + # TODO: Consider making requests to repositories revealed via Alternate Locations + # or Tracks metadata, to assess the metadata of those additional repositories. + # Specification: When an installer encounters a project that is using the + # alternate locations metadata it SHOULD consider that all repositories named are + # extending the same namespace across multiple repositories. + + # TODO: Consider whether pip already has, or should add, ways to indicate exactly + # which individual projects to get from which individual repositories. + # Specification: If the user has explicitly told the installer that it wants to + # fetch a project from a certain set of repositories, then there is no reason + # to question that and we assume that they’ve made sure it is safe to merge + # those namespaces. If the end user has explicitly told the installer to fetch + # the project from specific repositories, filter out all other repositories. + + # When no candidates are provided, then no checks are relevant, so just return. + if candidates is None or len(candidates) == 0: + logger.debug("No candidates given to multiple remote repository checks") + return + + # Pre-calculate the canonical name for later comparisons. + canonical_name = canonicalize_name(project_name) + + # Specification: Look to see if the discovered files span multiple repositories; + # if they do then determine if either “Tracks” or “Alternate Locations” metadata + # allows safely merging ALL the repositories where files were discovered together. + scheme_file = "file://" + remote_candidates = [] + all_remote_urls = set() + all_project_track_urls = [] + all_repo_alt_urls = [] + remote_repositories = set() + for candidate in candidates: + candidate_name = candidate.name + candidate_canonical_name = canonicalize_name(candidate_name) + + file_path = None + try: + file_path = candidate.link.file_path + except Exception: + pass + url = candidate.link.url + + project_track_urls = candidate.link.project_track_urls or set() + all_project_track_urls.append(project_track_urls) + remote_repositories.update(project_track_urls) + + repo_alt_urls = candidate.link.repo_alt_urls or set() + all_repo_alt_urls.append(repo_alt_urls) + remote_repositories.update(repo_alt_urls) + + other_repo_urls = {*project_track_urls, *repo_alt_urls} + has_other_repo_urls = len(other_repo_urls) > 0 + + is_local_url = url and url.startswith(scheme_file) + if url and not is_local_url: + all_remote_urls.add(url) + + # Specification: Repositories that exist on the local filesystem SHOULD always + # be implicitly allowed to be merged to any remote repository. + is_local_file = file_path or is_local_url + has_remote_repo = has_other_repo_urls and any( + not i.startswith(scheme_file) for i in other_repo_urls + ) + if is_local_file and not has_remote_repo: + # Ignore any local candidates in later comparisons. + logger.debug( + "Ignore local candidate %s in multiple remote repository checks", + candidate, + ) + continue + + remote_candidates.append( + { + "name": candidate_name, + "canonical_name": candidate_canonical_name, + "file_path": file_path, + "url": url, + "project_track_urls": project_track_urls, + "repo_alt_urls": repo_alt_urls, + "other_repo_urls": other_repo_urls, + "is_local_file": is_local_file, + "has_remote_repo": has_remote_repo, + } + ) + + # If there are no remote candidates, then allow merging repositories. + if len(remote_candidates) == 0: + logger.debug("No remote candidates for multiple remote repository checks") + return + + # TODO + if logger.isEnabledFor(logging.INFO): + logger.info("Remote candidates for multiple remote repository checks:") + for candidate in remote_candidates: + logger.info(candidate) + + # TODO: This checks the list of Alternate Locations for the candidates that were + # retrieved. It does not request the metadata from any additional repositories + # revealed via the lists of Alternate Locations. + # Specification: In order for this metadata to be trusted, there MUST be agreement + # between all locations where that project is found as to what the alternate + # locations are. + if len(all_repo_alt_urls) > 1: + match_alt_locs = set(all_repo_alt_urls[0]) + invalid_locations = set() + + for item in all_repo_alt_urls[1:]: + match_alt_locs.intersection_update(item) + invalid_locations.update(match_alt_locs.symmetric_difference(item)) + + if len(invalid_locations) > 0: + raise InvalidAlternativeLocationsUrl( + package=project_name, + remote_repositories=remote_repositories, + invalid_locations=invalid_locations, + ) + + # TODO + # Specification: Otherwise [if metadata allows] we merge the namespaces, + # and continue on. + + # TODO + # Specification: It [Tracks metadata] MUST point to the actual URLs for that + # project, not the base URL for the extended repositories. + # raise InvalidTracksUrl + + # TODO + # Specification: It [Tracks metadata] MUST point to the repositories that “own” + # the namespaces, not another repository that is also tracking that namespace. + # raise InvalidTracksUrl + + # TODO + # Specification: It [Tracks metadata] MUST point to a project with the exact same + # name (after normalization). + # raise InvalidTracksUrl + + # TODO + # Specification: If the project in question only comes from a single repository, + # then there is no chance of dependency confusion, so there’s no reason to do + # anything but allow. + + # Specification: If nothing tells us merging the namespaces is safe, we refuse to + # implicitly assume it is, and generate an error instead. + # Specification: If that metadata does NOT allow [merging namespaces], then + # generate an error. + error = UnsafeMultipleRemoteRepositories( + package=project_name, remote_repositories=remote_repositories + ) + raise error diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 7a8bc9efa5e..45c0f138f71 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -599,7 +599,7 @@ def test_parse_links_json() -> None: ), ] - def _convert_links(link_item): + def _convert_links(link_item: Link) -> dict[str, Optional[str]]: return {key: getattr(link_item, key, None) for key in link_item.__slots__} for index, link in enumerate(links): @@ -763,8 +763,8 @@ def test_parse_links_caches_same_page_by_url() -> None: ) def test_parse_links__alternate_locations_and_tracks( index_name: str, - expected_project_track_urls: list[str], - expected_repo_alt_urls: list[str], + expected_project_track_urls: set[str], + expected_repo_alt_urls: set[str], data: TestData, ) -> None: package_name = "simple" diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 78837b94e8b..884f2f8beec 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -901,3 +901,7 @@ def test_extract_version_from_fragment( ) -> None: version = _extract_version_from_fragment(fragment, canonical_name) assert version == expected + + +class TestCheckMultipleRemoteRepositories: + pass From 0c5a3ef74af3aad814d9886f5b92337bab215ecd Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Thu, 19 Sep 2024 18:56:42 +1000 Subject: [PATCH 04/26] work in progress --- src/pip/_internal/exceptions.py | 50 ++++-- src/pip/_internal/index/package_finder.py | 154 +++++++++-------- src/pip/_internal/models/candidate.py | 18 ++ src/pip/_internal/models/link.py | 28 ++++ tests/unit/test_collector.py | 8 +- tests/unit/test_index.py | 192 +++++++++++++++++++++- 6 files changed, 363 insertions(+), 87 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 2dd4c37145b..8e3654baf2a 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -781,6 +781,12 @@ class InvalidMultipleRemoteRepositories(DiagnosticPipError): """Common error for issues with multiple remote repositories.""" reference = "invalid-multiple-remote-repositories" + _note_suffix = ( + "See PEP 708 for the specification. " + "You can override this check, which will disable the security " + "protection it provides from dependency confusion attacks, " + "by passing --insecure-multiple-remote-repositories." + ) class InvalidTracksUrl(InvalidMultipleRemoteRepositories): @@ -793,6 +799,36 @@ class InvalidTracksUrl(InvalidMultipleRemoteRepositories): reference = "invalid-tracks-url" + def __init__( + self, + *, + package: str, + remote_repositories: set[str], + invalid_tracks: set[str], + ) -> None: + super().__init__( + kind="error", + message=Text( + f"One or more Tracks for {escape(package)} " + "were not valid. " + "The remote repositories are " + f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." + "The invalid tracks are " + f"{'; '.join(sorted(escape(r) for r in invalid_tracks))}." + ), + context=Text( + "Tracks urls must point to the actual URLs for a project, " + "point to the repositories that own the namespaces, and " + "point to a project with the exact same normalized name." + ), + hint_stmt=None, + note_stmt=Text( + "The way to resolve this error is to contact the owners of " + "each remote repository, and ask if it makes sense to " + "configure them to merge namespaces. " + self._note_suffix + ), + ) + class InvalidAlternativeLocationsUrl(InvalidMultipleRemoteRepositories): """The list of Alternate Locations for each repository do not match. @@ -828,12 +864,8 @@ def __init__( hint_stmt=None, note_stmt=Text( "The way to resolve this error is to contact the owners of the package " - "at each remote repository, and ask that the list of " - "Alternate Locations at each is set to the same list. " - "See PEP 708 for the specification. " - "You can override this check, which will disable the security " - "protection it provides from dependency confusion attacks, " - "by passing --insecure-multiple-remote-repositories." + "at each remote repository, and ask if it makes sense to " + "configure them to merge namespaces. " + self._note_suffix ), ) @@ -870,10 +902,6 @@ def __init__(self, *, package: str, remote_repositories: set[str]) -> None: note_stmt=Text( "The way to resolve this error is to contact the remote repositories " "and package owners, and ask if it makes sense to configure them to " - "merge namespaces. " - "See PEP 708 for the specification. " - "You can override this check, which will disable the security " - "protection it provides from dependency confusion attacks, " - "by passing --insecure-multiple-remote-repositories." + "merge namespaces. " + self._note_suffix ), ) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 4e77ec395fc..75a588e100f 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -4,7 +4,9 @@ import functools import itertools import logging +import pathlib import re +import urllib.parse from dataclasses import dataclass from typing import TYPE_CHECKING, FrozenSet, Iterable, List, Optional, Set, Tuple, Union @@ -19,6 +21,7 @@ DistributionNotFound, InvalidAlternativeLocationsUrl, InvalidWheelFilename, + InvalidTracksUrl, UnsafeMultipleRemoteRepositories, UnsupportedWheel, ) @@ -1099,9 +1102,13 @@ def check_multiple_remote_repositories( # Specification: When an installer encounters a project that is using the # alternate locations metadata it SHOULD consider that all repositories named are # extending the same namespace across multiple repositories. + # Implementation: It is not possible to enforce this requirement without access + # to the metadata for all of the remote repositories, as the Tracks metadata + # might point at a repository that does not exist, or at a repository that is + # Tracking another repository. # TODO: Consider whether pip already has, or should add, ways to indicate exactly - # which individual projects to get from which individual repositories. + # which individual projects to get from exactly which repositories. # Specification: If the user has explicitly told the installer that it wants to # fetch a project from a certain set of repositories, then there is no reason # to question that and we assume that they’ve made sure it is safe to merge @@ -1113,77 +1120,62 @@ def check_multiple_remote_repositories( logger.debug("No candidates given to multiple remote repository checks") return - # Pre-calculate the canonical name for later comparisons. + # Calculate the canonical name for later comparisons. canonical_name = canonicalize_name(project_name) # Specification: Look to see if the discovered files span multiple repositories; # if they do then determine if either “Tracks” or “Alternate Locations” metadata - # allows safely merging ALL the repositories where files were discovered together. - scheme_file = "file://" + # allows safely merging together ALL the repositories where files were discovered. remote_candidates = [] - all_remote_urls = set() - all_project_track_urls = [] - all_repo_alt_urls = [] remote_repositories = set() + for candidate in candidates: candidate_name = candidate.name - candidate_canonical_name = canonicalize_name(candidate_name) - - file_path = None - try: - file_path = candidate.link.file_path - except Exception: - pass - url = candidate.link.url - - project_track_urls = candidate.link.project_track_urls or set() - all_project_track_urls.append(project_track_urls) - remote_repositories.update(project_track_urls) - - repo_alt_urls = candidate.link.repo_alt_urls or set() - all_repo_alt_urls.append(repo_alt_urls) - remote_repositories.update(repo_alt_urls) - - other_repo_urls = {*project_track_urls, *repo_alt_urls} - has_other_repo_urls = len(other_repo_urls) > 0 + link = candidate.link + comes_from = link.comes_from - is_local_url = url and url.startswith(scheme_file) - if url and not is_local_url: - all_remote_urls.add(url) + candidate_canonical_name = canonicalize_name(candidate_name) # Specification: Repositories that exist on the local filesystem SHOULD always # be implicitly allowed to be merged to any remote repository. - is_local_file = file_path or is_local_url - has_remote_repo = has_other_repo_urls and any( - not i.startswith(scheme_file) for i in other_repo_urls - ) - if is_local_file and not has_remote_repo: + if link.is_local_only: # Ignore any local candidates in later comparisons. logger.debug( - "Ignore local candidate %s in multiple remote repository checks", + "Ignoring local candidate %s in multiple remote repository checks", candidate, ) continue - remote_candidates.append( - { - "name": candidate_name, - "canonical_name": candidate_canonical_name, - "file_path": file_path, - "url": url, - "project_track_urls": project_track_urls, - "repo_alt_urls": repo_alt_urls, - "other_repo_urls": other_repo_urls, - "is_local_file": is_local_file, - "has_remote_repo": has_remote_repo, - } - ) + try: + page_url = comes_from.url.lstrip() + except AttributeError: + page_url = comes_from.lstrip() + + item = { + "candidate": candidate, + "canonical_name": candidate_canonical_name, + } + remote_candidates.append(item) + + remote_repositories.add(page_url) + remote_repositories.update(link.project_track_urls) + remote_repositories.update(link.repo_alt_urls) # If there are no remote candidates, then allow merging repositories. if len(remote_candidates) == 0: logger.debug("No remote candidates for multiple remote repository checks") return + # Specification: If the project in question only comes from a single repository, + # then there is no chance of dependency confusion, so there’s no reason to do + # anything but allow. + if len(remote_repositories) < 2: + logger.debug( + "No chance for dependency confusion when there is only " + "one remote candidate for multiple remote repository checks" + ) + return + # TODO if logger.isEnabledFor(logging.INFO): logger.info("Remote candidates for multiple remote repository checks:") @@ -1192,10 +1184,11 @@ def check_multiple_remote_repositories( # TODO: This checks the list of Alternate Locations for the candidates that were # retrieved. It does not request the metadata from any additional repositories - # revealed via the lists of Alternate Locations. + # revealed via the lists of Alternate Locations urls or Tracks urls. # Specification: In order for this metadata to be trusted, there MUST be agreement # between all locations where that project is found as to what the alternate # locations are. + all_repo_alt_urls = list(map_alt_urls.keys()) if len(all_repo_alt_urls) > 1: match_alt_locs = set(all_repo_alt_urls[0]) invalid_locations = set() @@ -1204,6 +1197,9 @@ def check_multiple_remote_repositories( match_alt_locs.intersection_update(item) invalid_locations.update(match_alt_locs.symmetric_difference(item)) + logger.debug(match_alt_locs) + logger.debug(invalid_locations) + if len(invalid_locations) > 0: raise InvalidAlternativeLocationsUrl( package=project_name, @@ -1211,29 +1207,49 @@ def check_multiple_remote_repositories( invalid_locations=invalid_locations, ) - # TODO - # Specification: Otherwise [if metadata allows] we merge the namespaces, - # and continue on. - - # TODO - # Specification: It [Tracks metadata] MUST point to the actual URLs for that - # project, not the base URL for the extended repositories. - # raise InvalidTracksUrl - - # TODO - # Specification: It [Tracks metadata] MUST point to the repositories that “own” - # the namespaces, not another repository that is also tracking that namespace. - # raise InvalidTracksUrl + for remote_candidate in remote_candidates: + project_track_urls = remote_candidate.get("candidate").link.project_track_urls + project_track_urls = remote_candidate.get("candidate").link.project_track_urls + page_url = remote_candidate.get("url") + # url_parts = pathlib.Path(urllib.parse.urlsplit(url).path).parts + for project_track_url in project_track_urls: + parts = pathlib.Path(urllib.parse.urlsplit(project_track_url).path).parts + + # Specification: It [Tracks metadata] MUST point to the actual URLs + # for that project, not the base URL for the extended repositories. + # Specification: It [Tracks metadata] MUST point to a project with + # the exact same normalized name. + # Implementation: The normalised project name must be present in one + # of the Tracks url path parts. + # TODO: This assumption about the structure of the url may not hold true + # for all remote repositories. + if not parts or not any( + [canonical_name == canonicalize_name(p) for p in parts] + ): + raise InvalidTracksUrl( + package=project_name, + remote_repositories={page_url}, + invalid_tracks={project_track_url}, + ) - # TODO - # Specification: It [Tracks metadata] MUST point to a project with the exact same - # name (after normalization). - # raise InvalidTracksUrl + # Specification: It [Tracks metadata] MUST point to the repositories + # that “own” the namespaces, not another repository that is also + # tracking that namespace. + # Implementation: An 'owner' repository is one that does not Track the + # same namespace. + # TODO: Without requesting all repositories revealed by metadata, this + # check might pass with incomplete metadata, + # when it would fail with complete metadata. + if project_track_url in map_track_urls: + raise InvalidTracksUrl( + package=project_name, + remote_repositories={page_url}, + invalid_tracks={project_track_url}, + ) # TODO - # Specification: If the project in question only comes from a single repository, - # then there is no chance of dependency confusion, so there’s no reason to do - # anything but allow. + # Specification: Otherwise [if metadata allows] we merge the namespaces, + # and continue on. # Specification: If nothing tells us merging the namespaces is safe, we refuse to # implicitly assume it is, and generate an error instead. diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index f27f283154a..5c6dbec6fe5 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -23,3 +23,21 @@ def __init__(self, name: str, version: str, link: Link) -> None: def __str__(self) -> str: return f"{self.name!r} candidate (version {self.version} at {self.link})" + + +@dataclass(frozen=True) +class RemoteInstallationCandidate: + """Represents a potential "candidate" for installation.""" + + __slots__ = ["canonical_name", "candidate"] + + canonical_name: str + candidate: InstallationCandidate + + def __init__(self, candidate: InstallationCandidate) -> None: + object.__setattr__(self, "name", name) + object.__setattr__(self, "version", parse_version(version)) + object.__setattr__(self, "link", link) + + def __str__(self) -> str: + return f"{self.name!r} candidate (version {self.version} at {self.link})" diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 2bdbb827029..78fcbae778f 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -318,6 +318,9 @@ def from_json( elif not yanked_reason: yanked_reason = None + project_track_urls = {i.strip() for i in project_track_urls if i and i.strip()} + repo_alt_urls = {i.strip() for i in repo_alt_urls if i and i.strip()} + return cls( url, comes_from=page_url, @@ -375,6 +378,9 @@ def from_element( ) metadata_file_data = MetadataFile(None) + project_track_urls = {i.strip() for i in project_track_urls if i and i.strip()} + repo_alt_urls = {i.strip() for i in repo_alt_urls if i and i.strip()} + return cls( url, comes_from=page_url, @@ -551,6 +557,28 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: return False return any(hashes.is_hash_allowed(k, v) for k, v in self._hashes.items()) + @property + def is_local_only(self): + """ + Is this link entirely local, with no metadata pointing to a remote url? + """ + logger.debug( + { + "is_file": self.is_file, + "file_path": self.file_path if self.is_file else None, + "comes_from": self.comes_from, + "metadata_urls": {*self.project_track_urls, *self.repo_alt_urls}, + } + ) + return ( + (self.is_file and self.file_path) + and not self.comes_from + and not any( + not i.startswith("file:") + for i in {*self.project_track_urls, *self.repo_alt_urls} + ) + ) + class _CleanResult(NamedTuple): """Convert link for equivalency check. diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 45c0f138f71..40ee2fc274f 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -537,13 +537,13 @@ def test_parse_links_json() -> None: ], } ).encode("utf8") + # parse_links() is cached by url, so we inject a random uuid to ensure + # the page content isn't cached. url = f"https://example.com/simple-{uuid.uuid4()}/" page = IndexContent( json_bytes, "application/vnd.pypi.simple.v1+json", encoding=None, - # parse_links() is cached by url, so we inject a random uuid to ensure - # the page content isn't cached. url=url, ) links = list(parse_links(page)) @@ -771,13 +771,13 @@ def test_parse_links__alternate_locations_and_tracks( html_path = data.indexes / index_name / package_name / "index.html" html = html_path.read_text() html_bytes = html.encode("utf-8") + # parse_links() is cached by url, so we inject a random uuid to ensure + # the page content isn't cached. url = f"https://example.com/simple-{uuid.uuid4()}/" page = IndexContent( html_bytes, "text/html", encoding=None, - # parse_links() is cached by url, so we inject a random uuid to ensure - # the page content isn't cached. url=url, ) links = list(parse_links(page)) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 884f2f8beec..df9ffbe093a 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,11 +1,19 @@ import logging +import uuid from typing import FrozenSet, List, Optional, Set, Tuple import pytest +from pip._internal.models.candidate import InstallationCandidate from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.tags import Tag -from pip._internal.index.collector import LinkCollector +from pip._internal.index.collector import LinkCollector, IndexContent +from pip._internal.exceptions import ( + InvalidAlternativeLocationsUrl, + InvalidMultipleRemoteRepositories, + InvalidTracksUrl, + UnsafeMultipleRemoteRepositories, +) from pip._internal.index.package_finder import ( CandidateEvaluator, CandidatePreferences, @@ -16,6 +24,7 @@ _check_link_requires_python, _extract_version_from_fragment, _find_name_version_sep, + check_multiple_remote_repositories, filter_unallowed_hashes, ) from pip._internal.models.link import Link @@ -903,5 +912,182 @@ def test_extract_version_from_fragment( assert version == expected -class TestCheckMultipleRemoteRepositories: - pass +def _make_mock_candidate_check_remote_repo( + candidate_name: Optional[str] = None, + version: Optional[str] = None, + comes_from_url: Optional[str] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, +) -> InstallationCandidate: + if candidate_name is None: + candidate_name = "mypackage" + + if version is None: + version = "1.0" + + comes_from = None + if comes_from_url is not None: + comes_from = IndexContent( + b"", + "text/html", + encoding=None, + url=comes_from_url, + ) + + url = f"https://example.com/pkg-{version}.tar.gz" + + link = Link( + url, + comes_from=comes_from, + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, + ) + candidate = InstallationCandidate(candidate_name, version, link) + + return candidate + + +@pytest.mark.parametrize( + "candidates, project_name, expected", + [ + # checks pass when no candidates + ([], "my_package", None), + # checks pass when only one candidate + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ) + ], + "mypackage", + None, + ), + # checks fail when two candidates with different remotes + # and no metadata to enable merging namespaces + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + ), + ], + "mypackage", + UnsafeMultipleRemoteRepositories, + ), + # checks pass when only one candidate with tracks url + # TODO: not making requests to repos revealed via metadata + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + project_track_urls={"https://a.example.com/simple/mypackage"}, + ), + ], + "mypackage", + None, + ), + # checks pass when ony one candidate with alt loc url + # TODO: not making requests to repos revealed via metadata + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + repo_alt_urls={"https://a.example.com/simple/mypackage"}, + ), + ], + "mypackage", + None, + ), + # checks fail when alternate location urls do not agree + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + repo_alt_urls={"https://b.example.com/simple/mypackage"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + repo_alt_urls={"https://c.example.com/simple/mypackage"}, + ), + ], + "mypackage", + InvalidAlternativeLocationsUrl, + ), + # checks fails when track url points to the base url instead of + # the project url + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + project_track_urls={"https://b.example.com"}, + ), + ], + "mypackage", + InvalidTracksUrl, + ), + # checks fail when track url points to another tracker instead of + # the 'owner' project url + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + project_track_urls={"https://b.example.com/simple/mypackage"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + project_track_urls={"https://c.example.com/simple/mypackage"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://c.example.com/simple/mypackage", + ), + ], + "mypackage", + InvalidTracksUrl, + ), + # checks fail when track url points to different name instead of + # the project url with same name + ( + [ + _make_mock_candidate_check_remote_repo( + candidate_name="othername", + comes_from_url=f"https://a.example.com/simple/othername", + project_track_urls={"https://b.example.com/simple/othername"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + ), + ], + "mypackage", + InvalidTracksUrl, + ), + # checks pass when track url points to same normalized name + ( + [ + _make_mock_candidate_check_remote_repo( + candidate_name="a.b-c_d", + comes_from_url=f"https://a.example.com/simple/mypackage", + project_track_urls={"https://b.example.com/simple/a_b.c-d"}, + ), + ], + "a-b_c.d", + None, + ), + ], +) +def test_check_multiple_remote_repositories( + caplog, candidates: List[InstallationCandidate], project_name: str, expected +): + caplog.set_level(logging.DEBUG) + if expected: + with pytest.raises(expected): + check_multiple_remote_repositories(candidates, project_name) + else: + assert check_multiple_remote_repositories(candidates, project_name) is None From 5f1c5c64d8c4c638015551e5583a02b1909f2f52 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Thu, 19 Sep 2024 20:02:58 +1000 Subject: [PATCH 05/26] fix lints --- src/pip/_internal/exceptions.py | 15 ++++++----- src/pip/_internal/index/package_finder.py | 2 +- src/pip/_internal/models/link.py | 4 +-- tests/unit/test_collector.py | 6 ++--- tests/unit/test_index.py | 32 +++++++++++------------ 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 60f5cff78b8..fd8f594b2b5 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -13,7 +13,7 @@ import re import sys from itertools import chain, groupby, repeat -from typing import TYPE_CHECKING, Dict, Iterator, List, Literal, Optional, Union +from typing import TYPE_CHECKING, Dict, Iterator, List, Literal, Optional, Set, Union from pip._vendor.rich.console import Console, ConsoleOptions, RenderResult from pip._vendor.rich.markup import escape @@ -803,8 +803,8 @@ def __init__( self, *, package: str, - remote_repositories: set[str], - invalid_tracks: set[str], + remote_repositories: Set[str], + invalid_tracks: Set[str], ) -> None: super().__init__( kind="error", @@ -843,8 +843,8 @@ def __init__( self, *, package: str, - remote_repositories: set[str], - invalid_locations: set[str], + remote_repositories: Set[str], + invalid_locations: Set[str], ) -> None: super().__init__( kind="error", @@ -882,13 +882,14 @@ class UnsafeMultipleRemoteRepositories(InvalidMultipleRemoteRepositories): reference = "unsafe-multiple-remote-repositories" - def __init__(self, *, package: str, remote_repositories: set[str]) -> None: + def __init__(self, *, package: str, remote_repositories: Set[str]) -> None: super().__init__( kind="error", message=Text( f"More than one remote repository was found for {escape(package)}, " "with no indication that the remote repositories can be safely merged. " - f"The repositories are {'; '.join(sorted(escape(r) for r in remote_repositories))}." + "The repositories are " + f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." ), context=Text( "Multiple remote repositories are not merged by default " diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 75a588e100f..e4d2ce8b39a 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1224,7 +1224,7 @@ def check_multiple_remote_repositories( # TODO: This assumption about the structure of the url may not hold true # for all remote repositories. if not parts or not any( - [canonical_name == canonicalize_name(p) for p in parts] + canonical_name == canonicalize_name(p) for p in parts ): raise InvalidTracksUrl( package=project_name, diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 78fcbae778f..309ab87cd1f 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -279,8 +279,8 @@ def from_json( cls, file_data: Dict[str, Any], page_url: str, - project_track_urls: Optional[set[str]] = None, - repo_alt_urls: Optional[set[str]] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> Optional["Link"]: """ Convert an pypi json document from a simple repository page into a Link. diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index c5d3658fa40..1c7d1ceae42 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -6,7 +6,7 @@ import uuid from pathlib import Path from textwrap import dedent -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Set from unittest import mock import pytest @@ -763,8 +763,8 @@ def test_parse_links_caches_same_page_by_url() -> None: ) def test_parse_links__alternate_locations_and_tracks( index_name: str, - expected_project_track_urls: set[str], - expected_repo_alt_urls: set[str], + expected_project_track_urls: Set[str], + expected_repo_alt_urls: Set[str], data: TestData, ) -> None: package_name = "simple" diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 4a6719ed1b9..ba9f8b21ac0 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -956,7 +956,7 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ) ], "mypackage", @@ -967,10 +967,10 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", ), ], "mypackage", @@ -981,10 +981,10 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", project_track_urls={"https://a.example.com/simple/mypackage"}, ), ], @@ -996,10 +996,10 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", repo_alt_urls={"https://a.example.com/simple/mypackage"}, ), ], @@ -1010,11 +1010,11 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", repo_alt_urls={"https://b.example.com/simple/mypackage"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", repo_alt_urls={"https://c.example.com/simple/mypackage"}, ), ], @@ -1026,7 +1026,7 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", project_track_urls={"https://b.example.com"}, ), ], @@ -1038,15 +1038,15 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", project_track_urls={"https://b.example.com/simple/mypackage"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", project_track_urls={"https://c.example.com/simple/mypackage"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://c.example.com/simple/mypackage", + comes_from_url="https://c.example.com/simple/mypackage", ), ], "mypackage", @@ -1058,11 +1058,11 @@ def _make_mock_candidate_check_remote_repo( [ _make_mock_candidate_check_remote_repo( candidate_name="othername", - comes_from_url=f"https://a.example.com/simple/othername", + comes_from_url="https://a.example.com/simple/othername", project_track_urls={"https://b.example.com/simple/othername"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", ), ], "mypackage", @@ -1073,7 +1073,7 @@ def _make_mock_candidate_check_remote_repo( [ _make_mock_candidate_check_remote_repo( candidate_name="a.b-c_d", - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", project_track_urls={"https://b.example.com/simple/a_b.c-d"}, ), ], From 1f77352a7f0b7a996611ffd470dac78ee3a4457a Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sat, 28 Sep 2024 19:00:05 +1000 Subject: [PATCH 06/26] work in progress --- src/pip/_internal/exceptions.py | 2 +- src/pip/_internal/index/package_finder.py | 181 +++++++++++++--------- src/pip/_internal/models/candidate.py | 56 ++++++- src/pip/_internal/models/link.py | 1 - 4 files changed, 160 insertions(+), 80 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index fd8f594b2b5..8aac5da072e 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -853,7 +853,7 @@ def __init__( "were different among the remote repositories. " "The remote repositories are " f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." - "The alternate locations not not agreed by all remote " + "The alternate locations not agreed by all remote " "repository are " f"{'; '.join(sorted(escape(r) for r in invalid_locations))}." ), diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index e4d2ce8b39a..fb280453cab 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -26,7 +26,8 @@ UnsupportedWheel, ) from pip._internal.index.collector import LinkCollector, parse_links -from pip._internal.models.candidate import InstallationCandidate +from pip._internal.models.candidate import InstallationCandidate, \ + RemoteInstallationCandidate from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope @@ -1044,7 +1045,7 @@ def check_multiple_remote_repositories( logical namespace, while enforcing a more secure default. Returns None if checks pass, otherwise will raise an error with details of the - failed checks. + first failed check. """ # NOTE: The checks in this function must occur after: @@ -1077,6 +1078,7 @@ def check_multiple_remote_repositories( # some names do not are explicitly allowed. # TODO: This requirement doesn't look like something that pip can do anything about. + # How would pip be able to tell whether two urls are the 'same project'? # Specification: All [Tracks metadata] URLs MUST represent the same “project” as # the project in the extending repository. This does not mean that they need to # serve the same files. It is valid for them to include binaries built on @@ -1086,10 +1088,14 @@ def check_multiple_remote_repositories( # the “same” project. # TODO: This requirement doesn't look like something that pip can do anything about. + # Tracks metadata is not required, so anything that uses tracks meta + # must deal with it not being available. # Specification: It [Tracks metadata] is NOT required that every name in a # repository tracks the same repository, or that they all track a repository at all. # TODO: This requirement doesn't look like something that pip can do anything about. + # How would pip be able to tell whether the metadata is under the control of + # the repository operators and not any individual publisher using the repository? # Specification: It [repository Tracks metadata] MUST be under the control of the # repository operators themselves, not any individual publisher using that # repository. “Repository Operator” can also include anyone who managed the overall @@ -1098,7 +1104,9 @@ def check_multiple_remote_repositories( # owns/manages the entire namespace of that repository. # TODO: Consider making requests to repositories revealed via Alternate Locations - # or Tracks metadata, to assess the metadata of those additional repositories. + # or Tracks metadata, to assess the metadata of those additional repositories. + # This would be a fair bit more functionality to include. + # Maybe a subsequent PR, if this functionality is desirable? # Specification: When an installer encounters a project that is using the # alternate locations metadata it SHOULD consider that all repositories named are # extending the same namespace across multiple repositories. @@ -1109,6 +1117,7 @@ def check_multiple_remote_repositories( # TODO: Consider whether pip already has, or should add, ways to indicate exactly # which individual projects to get from exactly which repositories. + # This seems to be separate functionality, that could be added in a separate PR. # Specification: If the user has explicitly told the installer that it wants to # fetch a project from a certain set of repositories, then there is no reason # to question that and we assume that they’ve made sure it is safe to merge @@ -1118,7 +1127,7 @@ def check_multiple_remote_repositories( # When no candidates are provided, then no checks are relevant, so just return. if candidates is None or len(candidates) == 0: logger.debug("No candidates given to multiple remote repository checks") - return + return None # Calculate the canonical name for later comparisons. canonical_name = canonicalize_name(project_name) @@ -1127,14 +1136,19 @@ def check_multiple_remote_repositories( # if they do then determine if either “Tracks” or “Alternate Locations” metadata # allows safely merging together ALL the repositories where files were discovered. remote_candidates = [] - remote_repositories = set() - + # all known remote repositories + known_remote_repo_urls = set() + # all known alternate location urls + known_alternate_urls = set() + # the alternate location urls that are not present in all remote candidates + mismatch_alternate_urls = set() + # all known remote repositories that do not track any other repos + known_owner_repo_urls = set() + # all known remote repositories that do track other repos + known_tracker_repo_urls = set() for candidate in candidates: candidate_name = candidate.name link = candidate.link - comes_from = link.comes_from - - candidate_canonical_name = canonicalize_name(candidate_name) # Specification: Repositories that exist on the local filesystem SHOULD always # be implicitly allowed to be merged to any remote repository. @@ -1146,83 +1160,88 @@ def check_multiple_remote_repositories( ) continue - try: - page_url = comes_from.url.lstrip() - except AttributeError: - page_url = comes_from.lstrip() + remote_candidate = RemoteInstallationCandidate( + canonical_name=canonicalize_name(candidate_name), + candidate=candidate, + ) + remote_candidates.append(remote_candidate) - item = { - "candidate": candidate, - "canonical_name": candidate_canonical_name, - } - remote_candidates.append(item) + # populate the known urls + known_remote_repo_urls.update(remote_candidate.remote_repository_urls) + known_alternate_urls.update(remote_candidate.alternate_location_urls) + if len(remote_candidate.project_track_urls) > 0: + known_tracker_repo_urls.update(remote_candidate.url) + else: + known_owner_repo_urls.update(remote_candidate.url) - remote_repositories.add(page_url) - remote_repositories.update(link.project_track_urls) - remote_repositories.update(link.repo_alt_urls) + # Update the set, keeping only elements found in either set, but not in both. + # This should allow all the items that are not present for all remote candidates + # to be tracked. + mismatch_alternate_urls.symmetric_difference_update( + remote_candidate.alternate_location_urls + ) # If there are no remote candidates, then allow merging repositories. if len(remote_candidates) == 0: logger.debug("No remote candidates for multiple remote repository checks") - return + return None # Specification: If the project in question only comes from a single repository, # then there is no chance of dependency confusion, so there’s no reason to do # anything but allow. - if len(remote_repositories) < 2: + if len(known_remote_repo_urls) == 1: logger.debug( "No chance for dependency confusion when there is only " "one remote candidate for multiple remote repository checks" ) - return - - # TODO - if logger.isEnabledFor(logging.INFO): - logger.info("Remote candidates for multiple remote repository checks:") - for candidate in remote_candidates: - logger.info(candidate) - - # TODO: This checks the list of Alternate Locations for the candidates that were - # retrieved. It does not request the metadata from any additional repositories - # revealed via the lists of Alternate Locations urls or Tracks urls. + return None + if len(known_remote_repo_urls) == 0: + msg = ("Unexpected situation where there are remote candidates, " + "but no remote repositories") + logger.warning(msg) + raise ValueError(msg) + + # This checks the list of Alternate Locations for the candidates that were + # retrieved. Each remote candidate may define a different list of Tracks and + # Alternate Location metadata. + # TODO: The Alternate Locations and Tracks urls revealed by the candidate metadata + # are not requested to check that their metadata matches the candidate metadata. + # This means that the known candidate metadata might agree and pass this check, + # while retrieving the metadata from additional urls would not agree and would + # fail this check. # Specification: In order for this metadata to be trusted, there MUST be agreement # between all locations where that project is found as to what the alternate # locations are. - all_repo_alt_urls = list(map_alt_urls.keys()) - if len(all_repo_alt_urls) > 1: - match_alt_locs = set(all_repo_alt_urls[0]) - invalid_locations = set() - - for item in all_repo_alt_urls[1:]: - match_alt_locs.intersection_update(item) - invalid_locations.update(match_alt_locs.symmetric_difference(item)) - - logger.debug(match_alt_locs) - logger.debug(invalid_locations) - - if len(invalid_locations) > 0: - raise InvalidAlternativeLocationsUrl( - package=project_name, - remote_repositories=remote_repositories, - invalid_locations=invalid_locations, - ) + if len(mismatch_alternate_urls) > 0: + raise InvalidAlternativeLocationsUrl( + package=project_name, + remote_repositories=known_remote_repo_urls, + invalid_locations=mismatch_alternate_urls, + ) + # Check the Tracks metadata. for remote_candidate in remote_candidates: - project_track_urls = remote_candidate.get("candidate").link.project_track_urls - project_track_urls = remote_candidate.get("candidate").link.project_track_urls - page_url = remote_candidate.get("url") - # url_parts = pathlib.Path(urllib.parse.urlsplit(url).path).parts + project_track_urls = remote_candidate.project_track_urls + page_url = remote_candidate.url for project_track_url in project_track_urls: parts = pathlib.Path(urllib.parse.urlsplit(project_track_url).path).parts + parts = list(parts) + parts.reverse() # Specification: It [Tracks metadata] MUST point to the actual URLs # for that project, not the base URL for the extended repositories. + # Implementation Note: Assume that the actual url for the project will + # contain the canonical project name as a path part, and the base URL + # will not contain the canonical project name as a path part. + # Specification: It [Tracks metadata] MUST point to a project with # the exact same normalized name. - # Implementation: The normalised project name must be present in one - # of the Tracks url path parts. - # TODO: This assumption about the structure of the url may not hold true - # for all remote repositories. + # Implementation Note: Assume that projects that are configured to be the + # 'exact same' will have a Tracks url path part with the canonical + # project name, usually the last path part. + + # Note: These assumptions about the structure of the Tracks url may not + # hold true for all remote repositories. if not parts or not any( canonical_name == canonicalize_name(p) for p in parts ): @@ -1235,27 +1254,43 @@ def check_multiple_remote_repositories( # Specification: It [Tracks metadata] MUST point to the repositories # that “own” the namespaces, not another repository that is also # tracking that namespace. - # Implementation: An 'owner' repository is one that does not Track the - # same namespace. + # Implementation Note: An 'owner' repository is one that has no Tracks + # metadata. # TODO: Without requesting all repositories revealed by metadata, this - # check might pass with incomplete metadata, - # when it would fail with complete metadata. - if project_track_url in map_track_urls: + # check might pass with incomplete knowledge of all metadata, + # when it would fail after retrieving all metadata. + if project_track_url not in known_owner_repo_urls: raise InvalidTracksUrl( package=project_name, remote_repositories={page_url}, invalid_tracks={project_track_url}, ) - # TODO - # Specification: Otherwise [if metadata allows] we merge the namespaces, - # and continue on. - # Specification: If nothing tells us merging the namespaces is safe, we refuse to # implicitly assume it is, and generate an error instead. # Specification: If that metadata does NOT allow [merging namespaces], then # generate an error. - error = UnsafeMultipleRemoteRepositories( - package=project_name, remote_repositories=remote_repositories - ) - raise error + # Implementation Note: If there are two or more remote candidates, and any of them + # don't have valid Alternate Locations and/or Tracks metadata, then fail. + for remote_candidate in remote_candidates: + candidate_alt_urls = remote_candidate.alternate_location_urls + + invalid_alt_urls = known_alternate_urls - candidate_alt_urls + has_alts = len(candidate_alt_urls) > 0 + has_tracks = len(remote_candidate.project_track_urls) > 0 + + is_invalid = any([ + not has_alts and not has_tracks, + not has_tracks and invalid_alt_urls, + ]) + + if is_invalid: + error = UnsafeMultipleRemoteRepositories( + package=project_name, + remote_repositories=known_remote_repo_urls, + ) + raise error + + # Specification: Otherwise [if metadata allows] we merge the namespaces, + # and continue on. + return None diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index 5c6dbec6fe5..f53b05f20f7 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Set, Optional from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version @@ -34,10 +35,55 @@ class RemoteInstallationCandidate: canonical_name: str candidate: InstallationCandidate - def __init__(self, candidate: InstallationCandidate) -> None: - object.__setattr__(self, "name", name) - object.__setattr__(self, "version", parse_version(version)) - object.__setattr__(self, "link", link) + def __init__(self, canonical_name:str, candidate: InstallationCandidate) -> None: + object.__setattr__(self, "canonical_name", canonical_name) + object.__setattr__(self, "candidate", candidate) def __str__(self) -> str: - return f"{self.name!r} candidate (version {self.version} at {self.link})" + return f"{self.canonical_name!r} candidate" + + @property + def _link(self) -> Optional[Link]: + if not self.candidate: + return None + if not self.candidate.link: + return None + return self.candidate.link + + @property + def url(self) -> Optional[str]: + """The remote url that contains the metadata for this installation candidate.""" + link = self._link + if not link: + return None + if not link.comes_from: + return None + if hasattr(link.comes_from, 'url') and link.comes_from.url: + return self.candidate.link.comes_from.url.lstrip() + if link.comes_from: + return self.candidate.link.comes_from.lstrip() + return None + + + @property + def remote_repository_urls(self) -> Set[str]: + """Remote repository urls from Tracks and Alternate Locations metadata.""" + return {*self.project_track_urls, *self.alternate_location_urls} + + @property + def project_track_urls(self) -> Set[str]: + """Remote repository urls from Tracks metadata.""" + result = {} + link = self._link + if link and link.project_track_urls: + result.update(link.project_track_urls) + return {i for i in result if i} + + @property + def alternate_location_urls(self) -> Set[str]: + """Remote repository urls from Alternate Locations metadata.""" + result = {self.url} + link = self._link + if link and link.repo_alt_urls: + result.update(link.repo_alt_urls) + return {i for i in result if i} diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 309ab87cd1f..9f008a27c7b 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -270,7 +270,6 @@ def __init__( self.cache_link_parsing = cache_link_parsing self.egg_fragment = self._egg_fragment() - # PEP 708 self.project_track_urls = project_track_urls or set() self.repo_alt_urls = repo_alt_urls or set() From 6cdb2fd234b939b77c573ea3d938951334e2ac27 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sun, 30 Jun 2024 20:39:58 +1000 Subject: [PATCH 07/26] add repository information parsing --- news/11784.feature.rst | 1 + src/pip/_internal/index/collector.py | 35 +++++++++++++++++++++++----- src/pip/_internal/models/link.py | 26 ++++++++++++++++++--- 3 files changed, 53 insertions(+), 9 deletions(-) create mode 100644 news/11784.feature.rst diff --git a/news/11784.feature.rst b/news/11784.feature.rst new file mode 100644 index 00000000000..c2a0ce56040 --- /dev/null +++ b/news/11784.feature.rst @@ -0,0 +1 @@ +Introduce repository alternate locations and project tracking, as per PEP 708. \ No newline at end of file diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index a9a8dde8ba4..aacd1e69dab 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -225,7 +225,12 @@ def parse_links(page: IndexContent) -> Iterable[Link]: if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) for file in data.get("files", []): - link = Link.from_json(file, page.url) + link = Link.from_json( + file, + page_url=page.url, + project_track_urls=data.get("meta", {}).get('tracks', []), + repo_alt_urls=data.get("alternate-locations", []), + ) if link is None: continue yield link @@ -238,7 +243,13 @@ def parse_links(page: IndexContent) -> Iterable[Link]: url = page.url base_url = parser.base_url or url for anchor in parser.anchors: - link = Link.from_element(anchor, page_url=url, base_url=base_url) + link = Link.from_element( + anchor, + page_url=url, + base_url=base_url, + project_track_urls=parser.project_track_urls, + repo_alt_urls=parser.repo_alt_urls, + ) if link is None: continue yield link @@ -275,8 +286,10 @@ def __init__(self, url: str) -> None: super().__init__(convert_charrefs=True) self.url: str = url - self.base_url: str | None = None - self.anchors: list[dict[str, str | None]] = [] + self.base_url: Optional[str] = None + self.anchors: List[Dict[str, Optional[str]]] = [] + self.project_track_urls: List[str] = [] + self.repo_alt_urls: List[str] = [] def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: if tag == "base" and self.base_url is None: @@ -285,8 +298,18 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None self.base_url = href elif tag == "a": self.anchors.append(dict(attrs)) - - def get_href(self, attrs: list[tuple[str, str | None]]) -> str | None: + elif tag == "meta": + for name, value in attrs: + if not value or not value.strip(): + continue + url = value.strip() + # PEP 708 + if name == "pypi:tracks" and url not in self.project_track_urls: + self.project_track_urls.append(url) + elif name == "pypi:alternate-locations" and url not in self.repo_alt_urls: + self.repo_alt_urls.append(url) + + def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: for name, value in attrs: if name == "href": return value diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 295035fc8bf..a6055c302f5 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -209,6 +209,8 @@ class Link: "metadata_file_data", "cache_link_parsing", "egg_fragment", + "project_track_urls", + "repo_alt_urls", ] def __init__( @@ -219,7 +221,9 @@ def __init__( yanked_reason: str | None = None, metadata_file_data: MetadataFile | None = None, cache_link_parsing: bool = True, - hashes: Mapping[str, str] | None = None, + hashes: Optional[Mapping[str, str]] = None, + project_track_urls: Optional[List[str]] = None, + repo_alt_urls: Optional[List[str]] = None, ) -> None: """ :param url: url of the resource pointed to (href of the link) @@ -244,6 +248,10 @@ def __init__( URLs should generally have this set to False, for example. :param hashes: A mapping of hash names to digests to allow us to determine the validity of a download. + :param project_track_urls: An optional list of urls pointing to the same + project in other repositories. Defined by the repository operators. + :param repo_alt_urls: An optional list of urls pointing to alternate + locations for the project. Defined by the project owners. """ # The comes_from, requires_python, and metadata_file_data arguments are @@ -276,12 +284,18 @@ def __init__( self.cache_link_parsing = cache_link_parsing self.egg_fragment = self._egg_fragment() + # PEP 708 + self.project_track_urls = project_track_urls or [] + self.repo_alt_urls = repo_alt_urls or [] + @classmethod def from_json( cls, file_data: dict[str, Any], page_url: str, - ) -> Link | None: + project_track_urls: Optional[List[str]] = None, + repo_alt_urls: Optional[List[str]] = None, + ) -> Optional["Link"]: """ Convert an pypi json document from a simple repository page into a Link. """ @@ -325,6 +339,8 @@ def from_json( yanked_reason=yanked_reason, hashes=hashes, metadata_file_data=metadata_file_data, + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, ) @classmethod @@ -333,7 +349,9 @@ def from_element( anchor_attribs: dict[str, str | None], page_url: str, base_url: str, - ) -> Link | None: + project_track_urls: Optional[List[str]] = None, + repo_alt_urls: Optional[List[str]] = None, + ) -> Optional["Link"]: """ Convert an anchor element's attributes in a simple repository page to a Link. """ @@ -377,6 +395,8 @@ def from_element( requires_python=pyrequire, yanked_reason=yanked_reason, metadata_file_data=metadata_file_data, + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, ) def __str__(self) -> str: From 7059db3fad820cef3dfff551b58df69c62324d7f Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sat, 27 Jul 2024 17:17:31 +1000 Subject: [PATCH 08/26] add tests for parsing repository details --- src/pip/_internal/index/collector.py | 61 +++++++++++----- src/pip/_internal/models/link.py | 24 ++++--- .../repository-alternate-01/simple/index.html | 11 +++ .../repository-alternate-02/simple/index.html | 10 +++ .../repository-tracks-01/simple/index.html | 10 +++ tests/unit/test_collector.py | 71 ++++++++++++++++++- 6 files changed, 159 insertions(+), 28 deletions(-) create mode 100644 tests/data/indexes/repository-alternate-01/simple/index.html create mode 100644 tests/data/indexes/repository-alternate-02/simple/index.html create mode 100644 tests/data/indexes/repository-tracks-01/simple/index.html diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index aacd1e69dab..5b4a69f099d 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -20,6 +20,10 @@ Callable, NamedTuple, Protocol, + Sequence, + Set, + Tuple, + Union, ) from pip._vendor import requests @@ -27,7 +31,12 @@ from pip._vendor.requests.exceptions import RetryError, SSLError from pip._internal.exceptions import NetworkConnectionError -from pip._internal.models.link import Link +from pip._internal.models.link import ( + Link, + HEAD_META_PREFIX, + HEAD_META_ALTERNATE_LOCATIONS, + HEAD_META_TRACKS, +) from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status @@ -219,17 +228,21 @@ def wrapper_wrapper(page: IndexContent) -> list[Link]: def parse_links(page: IndexContent) -> Iterable[Link]: """ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. + Includes known metadata from the HTML header. """ - + url = page.url content_type_l = page.content_type.lower() if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) + project_track_urls = set(data.get("meta", {}).get("tracks", [])) + repo_alt_urls = set(data.get("alternate-locations", [])) + repo_alt_urls.add(page.url) for file in data.get("files", []): link = Link.from_json( file, page_url=page.url, - project_track_urls=data.get("meta", {}).get('tracks', []), - repo_alt_urls=data.get("alternate-locations", []), + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, ) if link is None: continue @@ -240,15 +253,16 @@ def parse_links(page: IndexContent) -> Iterable[Link]: encoding = page.encoding or "utf-8" parser.feed(page.content.decode(encoding)) - url = page.url base_url = parser.base_url or url for anchor in parser.anchors: + repo_alt_urls = parser.repo_alt_urls or set() + repo_alt_urls.add(page.url) link = Link.from_element( anchor, page_url=url, base_url=base_url, project_track_urls=parser.project_track_urls, - repo_alt_urls=parser.repo_alt_urls, + repo_alt_urls=repo_alt_urls, ) if link is None: continue @@ -288,8 +302,8 @@ def __init__(self, url: str) -> None: self.url: str = url self.base_url: Optional[str] = None self.anchors: List[Dict[str, Optional[str]]] = [] - self.project_track_urls: List[str] = [] - self.repo_alt_urls: List[str] = [] + self.project_track_urls: Set[str] = set() + self.repo_alt_urls: Set[str] = set() def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: if tag == "base" and self.base_url is None: @@ -299,15 +313,20 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None elif tag == "a": self.anchors.append(dict(attrs)) elif tag == "meta": - for name, value in attrs: - if not value or not value.strip(): - continue - url = value.strip() - # PEP 708 - if name == "pypi:tracks" and url not in self.project_track_urls: - self.project_track_urls.append(url) - elif name == "pypi:alternate-locations" and url not in self.repo_alt_urls: - self.repo_alt_urls.append(url) + meta_attrs = dict(attrs) + meta_key = meta_attrs.get("name", "").strip() + meta_val = meta_attrs.get("content", "").strip() + if meta_key and meta_val: + if ( + meta_key == self._meta_key_tracks + and meta_val not in self.project_track_urls + ): + self.project_track_urls.add(meta_val) + elif ( + meta_key == self._meta_key_alternate_locations + and meta_val not in self.repo_alt_urls + ): + self.repo_alt_urls.add(meta_val) def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: for name, value in attrs: @@ -315,6 +334,14 @@ def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: return value return None + @functools.cached_property + def _meta_key_tracks(self): + return f"{HEAD_META_PREFIX}:{HEAD_META_TRACKS}" + + @functools.cached_property + def _meta_key_alternate_locations(self): + return f"{HEAD_META_PREFIX}:{HEAD_META_ALTERNATE_LOCATIONS}" + def _handle_get_simple_fail( link: Link, diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index a6055c302f5..1771c9af730 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -13,6 +13,10 @@ TYPE_CHECKING, Any, NamedTuple, + Optional, + Set, + Tuple, + Union, ) from pip._internal.utils.deprecation import deprecated @@ -36,6 +40,10 @@ # we will pick to use. _SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5") +HEAD_META_PREFIX = "pypi" +HEAD_META_ALTERNATE_LOCATIONS = "alternate-locations" +HEAD_META_TRACKS = "tracks" + @dataclass(frozen=True) class LinkHash: @@ -222,8 +230,8 @@ def __init__( metadata_file_data: MetadataFile | None = None, cache_link_parsing: bool = True, hashes: Optional[Mapping[str, str]] = None, - project_track_urls: Optional[List[str]] = None, - repo_alt_urls: Optional[List[str]] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> None: """ :param url: url of the resource pointed to (href of the link) @@ -285,16 +293,16 @@ def __init__( self.egg_fragment = self._egg_fragment() # PEP 708 - self.project_track_urls = project_track_urls or [] - self.repo_alt_urls = repo_alt_urls or [] + self.project_track_urls = project_track_urls or set() + self.repo_alt_urls = repo_alt_urls or set() @classmethod def from_json( cls, file_data: dict[str, Any], page_url: str, - project_track_urls: Optional[List[str]] = None, - repo_alt_urls: Optional[List[str]] = None, + project_track_urls: Optional[set[str]] = None, + repo_alt_urls: Optional[set[str]] = None, ) -> Optional["Link"]: """ Convert an pypi json document from a simple repository page into a Link. @@ -349,8 +357,8 @@ def from_element( anchor_attribs: dict[str, str | None], page_url: str, base_url: str, - project_track_urls: Optional[List[str]] = None, - repo_alt_urls: Optional[List[str]] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> Optional["Link"]: """ Convert an anchor element's attributes in a simple repository page to a Link. diff --git a/tests/data/indexes/repository-alternate-01/simple/index.html b/tests/data/indexes/repository-alternate-01/simple/index.html new file mode 100644 index 00000000000..e9174877f7d --- /dev/null +++ b/tests/data/indexes/repository-alternate-01/simple/index.html @@ -0,0 +1,11 @@ + + + + + + + + +simple-1.0.tar.gz + + diff --git a/tests/data/indexes/repository-alternate-02/simple/index.html b/tests/data/indexes/repository-alternate-02/simple/index.html new file mode 100644 index 00000000000..c5e6f02039c --- /dev/null +++ b/tests/data/indexes/repository-alternate-02/simple/index.html @@ -0,0 +1,10 @@ + + + + + + + +simple-1.0.tar.gz + + diff --git a/tests/data/indexes/repository-tracks-01/simple/index.html b/tests/data/indexes/repository-tracks-01/simple/index.html new file mode 100644 index 00000000000..5b7aa5ac26f --- /dev/null +++ b/tests/data/indexes/repository-tracks-01/simple/index.html @@ -0,0 +1,10 @@ + + + + + + + +simple-1.0.tar.gz + + diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index fa688f8e42f..31c01423e2b 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -478,10 +478,18 @@ def test_parse_links__requires_python(anchor_html: str, expected: str | None) -> # in test_download.py execute over both html and json indices with # a pytest.mark.parameterize decorator to ensure nothing slips through the cracks. def test_parse_links_json() -> None: + meta_tracks_alt_repos = [ + "https://example.com/simple/holygrail/", + "https://example.net/simple/holygrail/", + ] json_bytes = json.dumps( { - "meta": {"api-version": "1.0"}, + "meta": { + "api-version": "1.0", + "tracks": list(meta_tracks_alt_repos), + }, "name": "holygrail", + "alternate-locations": list(meta_tracks_alt_repos), "files": [ { "filename": "holygrail-1.0.tar.gz", @@ -525,23 +533,26 @@ def test_parse_links_json() -> None: ], } ).encode("utf8") + url = f"https://example.com/simple-{uuid.uuid4()}/" page = IndexContent( json_bytes, "application/vnd.pypi.simple.v1+json", encoding=None, # parse_links() is cached by url, so we inject a random uuid to ensure # the page content isn't cached. - url=f"https://example.com/simple-{uuid.uuid4()}/", + url=url, ) links = list(parse_links(page)) - assert links == [ + expected = [ Link( "https://example.com/files/holygrail-1.0.tar.gz", comes_from=page.url, requires_python=">=3.7", yanked_reason="Had a vulnerability", hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -549,6 +560,8 @@ def test_parse_links_json() -> None: requires_python=">=3.7", yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -557,6 +570,8 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -565,6 +580,8 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), Link( "https://example.com/files/holygrail-1.0-py3-none-any.whl", @@ -573,9 +590,17 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + project_track_urls=set(meta_tracks_alt_repos), + repo_alt_urls={*meta_tracks_alt_repos, url}, ), ] + def _convert_links(link_item): + return {key: getattr(link_item, key, None) for key in link_item.__slots__} + + for index, link in enumerate(links): + assert _convert_links(link) == _convert_links(expected[index]) + # Ensure the metadata info can be parsed into the correct link. metadata_link = links[2].metadata_link() assert metadata_link is not None @@ -717,6 +742,46 @@ def test_parse_links_caches_same_page_by_url() -> None: assert "pkg2" in parsed_links_3[0].url +@pytest.mark.parametrize( + ("index_name", "expected_project_track_urls", "expected_repo_alt_urls"), + [ + ( + "repository-alternate-01", + set(), + { + "../../repository-alternate-02/simple/", + "../../repository-tracks-01/simple/", + }, + ), + ("repository-alternate-02", set(), {"../../repository-alternate-01/simple/"}), + ("repository-tracks-01", {"../../repository-alternate-01/simple/"}, set()), + ], +) +def test_parse_links__alternate_locations_and_tracks( + index_name: str, + expected_project_track_urls: list[str], + expected_repo_alt_urls: list[str], + data: TestData, +) -> None: + package_name = "simple" + html_path = data.indexes / index_name / package_name / "index.html" + html = html_path.read_text() + html_bytes = html.encode("utf-8") + url = f"https://example.com/simple-{uuid.uuid4()}/" + page = IndexContent( + html_bytes, + "text/html", + encoding=None, + # parse_links() is cached by url, so we inject a random uuid to ensure + # the page content isn't cached. + url=url, + ) + links = list(parse_links(page)) + for link in links: + assert link.project_track_urls == expected_project_track_urls + assert link.repo_alt_urls == {*expected_repo_alt_urls, url} + + @mock.patch("pip._internal.index.collector.raise_for_status") def test_request_http_error( mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture From 30f8af0f0f7d1333b334e00bc5c60917f8de5330 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sat, 3 Aug 2024 15:18:48 +1000 Subject: [PATCH 09/26] add initial checks from specification Add exceptions. Fix some lint issues. --- news/11784.feature.rst | 2 +- src/pip/_internal/exceptions.py | 154 +++++++-------- src/pip/_internal/index/collector.py | 8 +- src/pip/_internal/index/package_finder.py | 225 ++++++++++++++++++++++ tests/unit/test_collector.py | 6 +- tests/unit/test_index.py | 4 + 6 files changed, 316 insertions(+), 83 deletions(-) diff --git a/news/11784.feature.rst b/news/11784.feature.rst index c2a0ce56040..6d9c77d23d8 100644 --- a/news/11784.feature.rst +++ b/news/11784.feature.rst @@ -1 +1 @@ -Introduce repository alternate locations and project tracking, as per PEP 708. \ No newline at end of file +Introduce repository alternate locations and project tracking, as per PEP 708. diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 8bcb46212f8..0726d949f3d 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -800,99 +800,103 @@ def __init__(self, *, distribution: BaseDistribution) -> None: ) -class InvalidInstalledPackage(DiagnosticPipError): - reference = "invalid-installed-package" +class InvalidMultipleRemoteRepositories(DiagnosticPipError): + """Common error for issues with multiple remote repositories.""" + + reference = "invalid-multiple-remote-repositories" + + +class InvalidTracksUrl(InvalidMultipleRemoteRepositories): + """There was an issue with a Tracks metadata url. + + Tracks urls must point to the actual URLs for that project, + point to the repositories that own the namespaces, and + point to a project with the exact same name (after normalization). + """ + + reference = "invalid-tracks-url" + + +class InvalidAlternativeLocationsUrl(InvalidMultipleRemoteRepositories): + """The list of Alternate Locations for each repository do not match. + + In order for this metadata to be trusted, there MUST be agreement between + all locations where that project is found as to what the alternate locations are. + """ + + reference = "invalid-alternative-locations" def __init__( self, *, - dist: BaseDistribution, - invalid_exc: InvalidRequirement | InvalidVersion, + package: str, + remote_repositories: set[str], + invalid_locations: set[str], ) -> None: - installed_location = dist.installed_location - - if isinstance(invalid_exc, InvalidRequirement): - invalid_type = "requirement" - else: - invalid_type = "version" - super().__init__( + kind="error", message=Text( - f"Cannot process installed package {dist} " - + (f"in {installed_location!r} " if installed_location else "") - + f"because it has an invalid {invalid_type}:\n{invalid_exc.args[0]}" + f"One or more Alternate Locations for {escape(package)} " + "were different among the remote repositories. " + "The remote repositories are " + f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." + "The alternate locations not not agreed by all remote " + "repository are " + f"{'; '.join(sorted(escape(r) for r in invalid_locations))}." ), - context=( - "Starting with pip 24.1, packages with invalid " - f"{invalid_type}s can not be processed." + context=Text( + "To be able to trust the remote repository Alternate Locations, " + "all remote repositories must agree on the list of Locations." + ), + hint_stmt=None, + note_stmt=Text( + "The way to resolve this error is to contact the owners of the package " + "at each remote repository, and ask that the list of " + "Alternate Locations at each is set to the same list. " + "See PEP 708 for the specification. " + "You can override this check, which will disable the security " + "protection it provides from dependency confusion attacks, " + "by passing --insecure-multiple-remote-repositories." ), - hint_stmt="To proceed this package must be uninstalled.", ) -class IncompleteDownloadError(DiagnosticPipError): - """Raised when the downloader receives fewer bytes than advertised - in the Content-Length header.""" - - reference = "incomplete-download" - - def __init__(self, download: _FileDownload) -> None: - # Dodge circular import. - from pip._internal.utils.misc import format_size +class UnsafeMultipleRemoteRepositories(InvalidMultipleRemoteRepositories): + """More than one remote repository was provided for a package, + with no indication that the remote repositories can be safely merged. - assert download.size is not None - download_status = ( - f"{format_size(download.bytes_received)}/{format_size(download.size)}" - ) - if download.reattempts: - retry_status = f"after {download.reattempts + 1} attempts " - hint = "Use --resume-retries to configure resume attempt limit." - else: - # Download retrying is not enabled. - retry_status = "" - hint = "Consider using --resume-retries to enable download resumption." - message = Text( - f"Download failed {retry_status}because not enough bytes " - f"were received ({download_status})" - ) + The repositories, packages, or user did not indicate that + it is safe to merge remote repositories. - super().__init__( - message=message, - context=f"URL: {download.link.redacted_url}", - hint_stmt=hint, - note_stmt="This is an issue with network connectivity, not pip.", - ) - - -class ResolutionTooDeepError(DiagnosticPipError): - """Raised when the dependency resolver exceeds the maximum recursion depth.""" + Multiple remote repositories are not merged by default + to reduce the risk of dependency confusion attacks.""" - reference = "resolution-too-deep" + reference = "unsafe-multiple-remote-repositories" - def __init__(self) -> None: + def __init__(self, *, package: str, remote_repositories: set[str]) -> None: super().__init__( - message="Dependency resolution exceeded maximum depth", - context=( - "Pip cannot resolve the current dependencies as the dependency graph " - "is too complex for pip to solve efficiently." + kind="error", + message=Text( + f"More than one remote repository was found for {escape(package)}, " + "with no indication that the remote repositories can be safely merged. " + f"The repositories are {'; '.join(sorted(escape(r) for r in remote_repositories))}." ), - hint_stmt=( - "Try adding lower bounds to constrain your dependencies, " - "for example: 'package>=2.0.0' instead of just 'package'. " + context=Text( + "Multiple remote repositories are not merged by default " + "to reduce the risk of dependency confusion attacks." ), - link="https://pip.pypa.io/en/stable/topics/dependency-resolution/#handling-resolution-too-deep-errors", - ) - - -class InstallWheelBuildError(DiagnosticPipError): - reference = "failed-wheel-build-for-install" - - def __init__(self, failed: list[InstallRequirement]) -> None: - super().__init__( - message=( - "Failed to build installable wheels for some " - "pyproject.toml based projects" + hint_stmt=Text( + "Remote repositories can be specified or discovered using " + "--index-url, --extra-index-url, and --find-links. " + "Please check the pip command to see if these are in use." + ), + note_stmt=Text( + "The way to resolve this error is to contact the remote repositories " + "and package owners, and ask if it makes sense to configure them to " + "merge namespaces. " + "See PEP 708 for the specification. " + "You can override this check, which will disable the security " + "protection it provides from dependency confusion attacks, " + "by passing --insecure-multiple-remote-repositories." ), - context=", ".join(r.name for r in failed), # type: ignore - hint_stmt=None, ) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index 5b4a69f099d..efa64204f37 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -32,10 +32,10 @@ from pip._internal.exceptions import NetworkConnectionError from pip._internal.models.link import ( - Link, - HEAD_META_PREFIX, HEAD_META_ALTERNATE_LOCATIONS, + HEAD_META_PREFIX, HEAD_META_TRACKS, + Link, ) from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession @@ -335,11 +335,11 @@ def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: return None @functools.cached_property - def _meta_key_tracks(self): + def _meta_key_tracks(self) -> str: return f"{HEAD_META_PREFIX}:{HEAD_META_TRACKS}" @functools.cached_property - def _meta_key_alternate_locations(self): + def _meta_key_alternate_locations(self) -> str: return f"{HEAD_META_PREFIX}:{HEAD_META_ALTERNATE_LOCATIONS}" diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index a9f039ad1f3..7a832e8eafb 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -24,7 +24,9 @@ from pip._internal.exceptions import ( BestVersionAlreadyInstalled, DistributionNotFound, + InvalidAlternativeLocationsUrl, InvalidWheelFilename, + UnsafeMultipleRemoteRepositories, UnsupportedWheel, ) from pip._internal.index.collector import LinkCollector, parse_links @@ -486,6 +488,11 @@ def get_applicable_candidates( project_name=self._project_name, ) + check_multiple_remote_repositories( + candidates=filtered_applicable_candidates, + project_name=self._project_name, + ) + return sorted(filtered_applicable_candidates, key=self._sort_key) def _sort_key(self, candidate: InstallationCandidate) -> CandidateSortingKey: @@ -1063,3 +1070,221 @@ def _extract_version_from_fragment(fragment: str, canonical_name: str) -> str | if not version: return None return version + + +def check_multiple_remote_repositories( + candidates: List[InstallationCandidate], project_name: str +) -> None: + """ + Check whether two or more different namespaces can be flattened into one. + + This check will raise an error when packages from different sources will be merged, + without clear configuration to indicate the namespace merge is allowed. + See `PEP 708`_. + + This approach allows safely merging separate namespaces that are actually one + logical namespace, while enforcing a more secure default. + + Returns None if checks pass, otherwise will raise an error with details of the + failed checks. + """ + + # NOTE: The checks in this function must occur after: + # Specification: Filter out any files that do not match known hashes from a + # lockfile or requirements file. + + # NOTE: The checks in this function must occur before: + # Specification: Filter out any files that do not match the current platform, + # Python version, etc. We check for the metadata in this PEP before filtering out + # based on platform, Python version, etc., because we don’t want errors that only + # show up on certain platforms, Python versions, etc. + + # NOTE: Implemented in 'filter_unallowed_hashes': + # Specification: Users who are using lock files or requirements files that include + # specific hashes of artifacts that are “valid” are assumed to be protected by + # nature of those hashes, since the rest of these recommendations would apply + # during hash generation. Thus, we filter out unknown hashes up front. + + # NOTE: Implemented in 'collector.py' 'parse_links': + # Specification: When using alternate locations, clients MUST implicitly assume + # that the url the response was fetched from was included in the list. + + # NOTE: Implemented in 'collector.py' 'parse_links': + # Specification: Order of the elements within the array does not have any + # particular meaning. + + # NOTE: Implemented by this function, by not checking all repo tracks metadata is + # the exact same. + # Specification: Mixed use repositories where some names track a repository and + # some names do not are explicitly allowed. + + # TODO: This requirement doesn't look like something that pip can do anything about. + # Specification: All [Tracks metadata] URLs MUST represent the same “project” as + # the project in the extending repository. This does not mean that they need to + # serve the same files. It is valid for them to include binaries built on + # different platforms, copies with local patches being applied, etc. This is + # purposefully left vague as it’s ultimately up to the expectations that the + # users have of the repository and its operators what exactly constitutes + # the “same” project. + + # TODO: This requirement doesn't look like something that pip can do anything about. + # Specification: It [Tracks metadata] is NOT required that every name in a + # repository tracks the same repository, or that they all track a repository at all. + + # TODO: This requirement doesn't look like something that pip can do anything about. + # Specification: It [repository Tracks metadata] MUST be under the control of the + # repository operators themselves, not any individual publisher using that + # repository. “Repository Operator” can also include anyone who managed the overall + # namespace for a particular repository, which may be the case in situations like + # hosted repository services where one entity operates the software but another + # owns/manages the entire namespace of that repository. + + # TODO: Consider making requests to repositories revealed via Alternate Locations + # or Tracks metadata, to assess the metadata of those additional repositories. + # Specification: When an installer encounters a project that is using the + # alternate locations metadata it SHOULD consider that all repositories named are + # extending the same namespace across multiple repositories. + + # TODO: Consider whether pip already has, or should add, ways to indicate exactly + # which individual projects to get from which individual repositories. + # Specification: If the user has explicitly told the installer that it wants to + # fetch a project from a certain set of repositories, then there is no reason + # to question that and we assume that they’ve made sure it is safe to merge + # those namespaces. If the end user has explicitly told the installer to fetch + # the project from specific repositories, filter out all other repositories. + + # When no candidates are provided, then no checks are relevant, so just return. + if candidates is None or len(candidates) == 0: + logger.debug("No candidates given to multiple remote repository checks") + return + + # Pre-calculate the canonical name for later comparisons. + canonical_name = canonicalize_name(project_name) + + # Specification: Look to see if the discovered files span multiple repositories; + # if they do then determine if either “Tracks” or “Alternate Locations” metadata + # allows safely merging ALL the repositories where files were discovered together. + scheme_file = "file://" + remote_candidates = [] + all_remote_urls = set() + all_project_track_urls = [] + all_repo_alt_urls = [] + remote_repositories = set() + for candidate in candidates: + candidate_name = candidate.name + candidate_canonical_name = canonicalize_name(candidate_name) + + file_path = None + try: + file_path = candidate.link.file_path + except Exception: + pass + url = candidate.link.url + + project_track_urls = candidate.link.project_track_urls or set() + all_project_track_urls.append(project_track_urls) + remote_repositories.update(project_track_urls) + + repo_alt_urls = candidate.link.repo_alt_urls or set() + all_repo_alt_urls.append(repo_alt_urls) + remote_repositories.update(repo_alt_urls) + + other_repo_urls = {*project_track_urls, *repo_alt_urls} + has_other_repo_urls = len(other_repo_urls) > 0 + + is_local_url = url and url.startswith(scheme_file) + if url and not is_local_url: + all_remote_urls.add(url) + + # Specification: Repositories that exist on the local filesystem SHOULD always + # be implicitly allowed to be merged to any remote repository. + is_local_file = file_path or is_local_url + has_remote_repo = has_other_repo_urls and any( + not i.startswith(scheme_file) for i in other_repo_urls + ) + if is_local_file and not has_remote_repo: + # Ignore any local candidates in later comparisons. + logger.debug( + "Ignore local candidate %s in multiple remote repository checks", + candidate, + ) + continue + + remote_candidates.append( + { + "name": candidate_name, + "canonical_name": candidate_canonical_name, + "file_path": file_path, + "url": url, + "project_track_urls": project_track_urls, + "repo_alt_urls": repo_alt_urls, + "other_repo_urls": other_repo_urls, + "is_local_file": is_local_file, + "has_remote_repo": has_remote_repo, + } + ) + + # If there are no remote candidates, then allow merging repositories. + if len(remote_candidates) == 0: + logger.debug("No remote candidates for multiple remote repository checks") + return + + # TODO + if logger.isEnabledFor(logging.INFO): + logger.info("Remote candidates for multiple remote repository checks:") + for candidate in remote_candidates: + logger.info(candidate) + + # TODO: This checks the list of Alternate Locations for the candidates that were + # retrieved. It does not request the metadata from any additional repositories + # revealed via the lists of Alternate Locations. + # Specification: In order for this metadata to be trusted, there MUST be agreement + # between all locations where that project is found as to what the alternate + # locations are. + if len(all_repo_alt_urls) > 1: + match_alt_locs = set(all_repo_alt_urls[0]) + invalid_locations = set() + + for item in all_repo_alt_urls[1:]: + match_alt_locs.intersection_update(item) + invalid_locations.update(match_alt_locs.symmetric_difference(item)) + + if len(invalid_locations) > 0: + raise InvalidAlternativeLocationsUrl( + package=project_name, + remote_repositories=remote_repositories, + invalid_locations=invalid_locations, + ) + + # TODO + # Specification: Otherwise [if metadata allows] we merge the namespaces, + # and continue on. + + # TODO + # Specification: It [Tracks metadata] MUST point to the actual URLs for that + # project, not the base URL for the extended repositories. + # raise InvalidTracksUrl + + # TODO + # Specification: It [Tracks metadata] MUST point to the repositories that “own” + # the namespaces, not another repository that is also tracking that namespace. + # raise InvalidTracksUrl + + # TODO + # Specification: It [Tracks metadata] MUST point to a project with the exact same + # name (after normalization). + # raise InvalidTracksUrl + + # TODO + # Specification: If the project in question only comes from a single repository, + # then there is no chance of dependency confusion, so there’s no reason to do + # anything but allow. + + # Specification: If nothing tells us merging the namespaces is safe, we refuse to + # implicitly assume it is, and generate an error instead. + # Specification: If that metadata does NOT allow [merging namespaces], then + # generate an error. + error = UnsafeMultipleRemoteRepositories( + package=project_name, remote_repositories=remote_repositories + ) + raise error diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 31c01423e2b..2c0328b7c16 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -595,7 +595,7 @@ def test_parse_links_json() -> None: ), ] - def _convert_links(link_item): + def _convert_links(link_item: Link) -> dict[str, Optional[str]]: return {key: getattr(link_item, key, None) for key in link_item.__slots__} for index, link in enumerate(links): @@ -759,8 +759,8 @@ def test_parse_links_caches_same_page_by_url() -> None: ) def test_parse_links__alternate_locations_and_tracks( index_name: str, - expected_project_track_urls: list[str], - expected_repo_alt_urls: list[str], + expected_project_track_urls: set[str], + expected_repo_alt_urls: set[str], data: TestData, ) -> None: package_name = "simple" diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index e571b441f9d..ab44a922ddd 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -905,3 +905,7 @@ def test_extract_version_from_fragment( ) -> None: version = _extract_version_from_fragment(fragment, canonical_name) assert version == expected + + +class TestCheckMultipleRemoteRepositories: + pass From dd8db2356490d010bcc518ebc16ac43c9decee51 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Thu, 19 Sep 2024 18:56:42 +1000 Subject: [PATCH 10/26] work in progress --- src/pip/_internal/exceptions.py | 50 ++++-- src/pip/_internal/index/package_finder.py | 154 +++++++++-------- src/pip/_internal/models/candidate.py | 18 ++ src/pip/_internal/models/link.py | 28 ++++ tests/unit/test_collector.py | 8 +- tests/unit/test_index.py | 194 +++++++++++++++++++++- 6 files changed, 364 insertions(+), 88 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 0726d949f3d..dd00d2bec40 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -804,6 +804,12 @@ class InvalidMultipleRemoteRepositories(DiagnosticPipError): """Common error for issues with multiple remote repositories.""" reference = "invalid-multiple-remote-repositories" + _note_suffix = ( + "See PEP 708 for the specification. " + "You can override this check, which will disable the security " + "protection it provides from dependency confusion attacks, " + "by passing --insecure-multiple-remote-repositories." + ) class InvalidTracksUrl(InvalidMultipleRemoteRepositories): @@ -816,6 +822,36 @@ class InvalidTracksUrl(InvalidMultipleRemoteRepositories): reference = "invalid-tracks-url" + def __init__( + self, + *, + package: str, + remote_repositories: set[str], + invalid_tracks: set[str], + ) -> None: + super().__init__( + kind="error", + message=Text( + f"One or more Tracks for {escape(package)} " + "were not valid. " + "The remote repositories are " + f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." + "The invalid tracks are " + f"{'; '.join(sorted(escape(r) for r in invalid_tracks))}." + ), + context=Text( + "Tracks urls must point to the actual URLs for a project, " + "point to the repositories that own the namespaces, and " + "point to a project with the exact same normalized name." + ), + hint_stmt=None, + note_stmt=Text( + "The way to resolve this error is to contact the owners of " + "each remote repository, and ask if it makes sense to " + "configure them to merge namespaces. " + self._note_suffix + ), + ) + class InvalidAlternativeLocationsUrl(InvalidMultipleRemoteRepositories): """The list of Alternate Locations for each repository do not match. @@ -851,12 +887,8 @@ def __init__( hint_stmt=None, note_stmt=Text( "The way to resolve this error is to contact the owners of the package " - "at each remote repository, and ask that the list of " - "Alternate Locations at each is set to the same list. " - "See PEP 708 for the specification. " - "You can override this check, which will disable the security " - "protection it provides from dependency confusion attacks, " - "by passing --insecure-multiple-remote-repositories." + "at each remote repository, and ask if it makes sense to " + "configure them to merge namespaces. " + self._note_suffix ), ) @@ -893,10 +925,6 @@ def __init__(self, *, package: str, remote_repositories: set[str]) -> None: note_stmt=Text( "The way to resolve this error is to contact the remote repositories " "and package owners, and ask if it makes sense to configure them to " - "merge namespaces. " - "See PEP 708 for the specification. " - "You can override this check, which will disable the security " - "protection it provides from dependency confusion attacks, " - "by passing --insecure-multiple-remote-repositories." + "merge namespaces. " + self._note_suffix ), ) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 7a832e8eafb..ac20e38204e 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -6,8 +6,10 @@ import functools import itertools import logging +import pathlib import re from collections.abc import Iterable +import urllib.parse from dataclasses import dataclass from typing import ( TYPE_CHECKING, @@ -26,6 +28,7 @@ DistributionNotFound, InvalidAlternativeLocationsUrl, InvalidWheelFilename, + InvalidTracksUrl, UnsafeMultipleRemoteRepositories, UnsupportedWheel, ) @@ -1144,9 +1147,13 @@ def check_multiple_remote_repositories( # Specification: When an installer encounters a project that is using the # alternate locations metadata it SHOULD consider that all repositories named are # extending the same namespace across multiple repositories. + # Implementation: It is not possible to enforce this requirement without access + # to the metadata for all of the remote repositories, as the Tracks metadata + # might point at a repository that does not exist, or at a repository that is + # Tracking another repository. # TODO: Consider whether pip already has, or should add, ways to indicate exactly - # which individual projects to get from which individual repositories. + # which individual projects to get from exactly which repositories. # Specification: If the user has explicitly told the installer that it wants to # fetch a project from a certain set of repositories, then there is no reason # to question that and we assume that they’ve made sure it is safe to merge @@ -1158,77 +1165,62 @@ def check_multiple_remote_repositories( logger.debug("No candidates given to multiple remote repository checks") return - # Pre-calculate the canonical name for later comparisons. + # Calculate the canonical name for later comparisons. canonical_name = canonicalize_name(project_name) # Specification: Look to see if the discovered files span multiple repositories; # if they do then determine if either “Tracks” or “Alternate Locations” metadata - # allows safely merging ALL the repositories where files were discovered together. - scheme_file = "file://" + # allows safely merging together ALL the repositories where files were discovered. remote_candidates = [] - all_remote_urls = set() - all_project_track_urls = [] - all_repo_alt_urls = [] remote_repositories = set() + for candidate in candidates: candidate_name = candidate.name - candidate_canonical_name = canonicalize_name(candidate_name) - - file_path = None - try: - file_path = candidate.link.file_path - except Exception: - pass - url = candidate.link.url - - project_track_urls = candidate.link.project_track_urls or set() - all_project_track_urls.append(project_track_urls) - remote_repositories.update(project_track_urls) - - repo_alt_urls = candidate.link.repo_alt_urls or set() - all_repo_alt_urls.append(repo_alt_urls) - remote_repositories.update(repo_alt_urls) - - other_repo_urls = {*project_track_urls, *repo_alt_urls} - has_other_repo_urls = len(other_repo_urls) > 0 + link = candidate.link + comes_from = link.comes_from - is_local_url = url and url.startswith(scheme_file) - if url and not is_local_url: - all_remote_urls.add(url) + candidate_canonical_name = canonicalize_name(candidate_name) # Specification: Repositories that exist on the local filesystem SHOULD always # be implicitly allowed to be merged to any remote repository. - is_local_file = file_path or is_local_url - has_remote_repo = has_other_repo_urls and any( - not i.startswith(scheme_file) for i in other_repo_urls - ) - if is_local_file and not has_remote_repo: + if link.is_local_only: # Ignore any local candidates in later comparisons. logger.debug( - "Ignore local candidate %s in multiple remote repository checks", + "Ignoring local candidate %s in multiple remote repository checks", candidate, ) continue - remote_candidates.append( - { - "name": candidate_name, - "canonical_name": candidate_canonical_name, - "file_path": file_path, - "url": url, - "project_track_urls": project_track_urls, - "repo_alt_urls": repo_alt_urls, - "other_repo_urls": other_repo_urls, - "is_local_file": is_local_file, - "has_remote_repo": has_remote_repo, - } - ) + try: + page_url = comes_from.url.lstrip() + except AttributeError: + page_url = comes_from.lstrip() + + item = { + "candidate": candidate, + "canonical_name": candidate_canonical_name, + } + remote_candidates.append(item) + + remote_repositories.add(page_url) + remote_repositories.update(link.project_track_urls) + remote_repositories.update(link.repo_alt_urls) # If there are no remote candidates, then allow merging repositories. if len(remote_candidates) == 0: logger.debug("No remote candidates for multiple remote repository checks") return + # Specification: If the project in question only comes from a single repository, + # then there is no chance of dependency confusion, so there’s no reason to do + # anything but allow. + if len(remote_repositories) < 2: + logger.debug( + "No chance for dependency confusion when there is only " + "one remote candidate for multiple remote repository checks" + ) + return + # TODO if logger.isEnabledFor(logging.INFO): logger.info("Remote candidates for multiple remote repository checks:") @@ -1237,10 +1229,11 @@ def check_multiple_remote_repositories( # TODO: This checks the list of Alternate Locations for the candidates that were # retrieved. It does not request the metadata from any additional repositories - # revealed via the lists of Alternate Locations. + # revealed via the lists of Alternate Locations urls or Tracks urls. # Specification: In order for this metadata to be trusted, there MUST be agreement # between all locations where that project is found as to what the alternate # locations are. + all_repo_alt_urls = list(map_alt_urls.keys()) if len(all_repo_alt_urls) > 1: match_alt_locs = set(all_repo_alt_urls[0]) invalid_locations = set() @@ -1249,6 +1242,9 @@ def check_multiple_remote_repositories( match_alt_locs.intersection_update(item) invalid_locations.update(match_alt_locs.symmetric_difference(item)) + logger.debug(match_alt_locs) + logger.debug(invalid_locations) + if len(invalid_locations) > 0: raise InvalidAlternativeLocationsUrl( package=project_name, @@ -1256,29 +1252,49 @@ def check_multiple_remote_repositories( invalid_locations=invalid_locations, ) - # TODO - # Specification: Otherwise [if metadata allows] we merge the namespaces, - # and continue on. - - # TODO - # Specification: It [Tracks metadata] MUST point to the actual URLs for that - # project, not the base URL for the extended repositories. - # raise InvalidTracksUrl - - # TODO - # Specification: It [Tracks metadata] MUST point to the repositories that “own” - # the namespaces, not another repository that is also tracking that namespace. - # raise InvalidTracksUrl + for remote_candidate in remote_candidates: + project_track_urls = remote_candidate.get("candidate").link.project_track_urls + project_track_urls = remote_candidate.get("candidate").link.project_track_urls + page_url = remote_candidate.get("url") + # url_parts = pathlib.Path(urllib.parse.urlsplit(url).path).parts + for project_track_url in project_track_urls: + parts = pathlib.Path(urllib.parse.urlsplit(project_track_url).path).parts + + # Specification: It [Tracks metadata] MUST point to the actual URLs + # for that project, not the base URL for the extended repositories. + # Specification: It [Tracks metadata] MUST point to a project with + # the exact same normalized name. + # Implementation: The normalised project name must be present in one + # of the Tracks url path parts. + # TODO: This assumption about the structure of the url may not hold true + # for all remote repositories. + if not parts or not any( + [canonical_name == canonicalize_name(p) for p in parts] + ): + raise InvalidTracksUrl( + package=project_name, + remote_repositories={page_url}, + invalid_tracks={project_track_url}, + ) - # TODO - # Specification: It [Tracks metadata] MUST point to a project with the exact same - # name (after normalization). - # raise InvalidTracksUrl + # Specification: It [Tracks metadata] MUST point to the repositories + # that “own” the namespaces, not another repository that is also + # tracking that namespace. + # Implementation: An 'owner' repository is one that does not Track the + # same namespace. + # TODO: Without requesting all repositories revealed by metadata, this + # check might pass with incomplete metadata, + # when it would fail with complete metadata. + if project_track_url in map_track_urls: + raise InvalidTracksUrl( + package=project_name, + remote_repositories={page_url}, + invalid_tracks={project_track_url}, + ) # TODO - # Specification: If the project in question only comes from a single repository, - # then there is no chance of dependency confusion, so there’s no reason to do - # anything but allow. + # Specification: Otherwise [if metadata allows] we merge the namespaces, + # and continue on. # Specification: If nothing tells us merging the namespaces is safe, we refuse to # implicitly assume it is, and generate an error instead. diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index f27f283154a..5c6dbec6fe5 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -23,3 +23,21 @@ def __init__(self, name: str, version: str, link: Link) -> None: def __str__(self) -> str: return f"{self.name!r} candidate (version {self.version} at {self.link})" + + +@dataclass(frozen=True) +class RemoteInstallationCandidate: + """Represents a potential "candidate" for installation.""" + + __slots__ = ["canonical_name", "candidate"] + + canonical_name: str + candidate: InstallationCandidate + + def __init__(self, candidate: InstallationCandidate) -> None: + object.__setattr__(self, "name", name) + object.__setattr__(self, "version", parse_version(version)) + object.__setattr__(self, "link", link) + + def __str__(self) -> str: + return f"{self.name!r} candidate (version {self.version} at {self.link})" diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 1771c9af730..a73833b0a23 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -340,6 +340,9 @@ def from_json( elif not yanked_reason: yanked_reason = None + project_track_urls = {i.strip() for i in project_track_urls if i and i.strip()} + repo_alt_urls = {i.strip() for i in repo_alt_urls if i and i.strip()} + return cls( url, comes_from=page_url, @@ -397,6 +400,9 @@ def from_element( ) metadata_file_data = MetadataFile(None) + project_track_urls = {i.strip() for i in project_track_urls if i and i.strip()} + repo_alt_urls = {i.strip() for i in repo_alt_urls if i and i.strip()} + return cls( url, comes_from=page_url, @@ -577,6 +583,28 @@ def is_hash_allowed(self, hashes: Hashes | None) -> bool: return False return any(hashes.is_hash_allowed(k, v) for k, v in self._hashes.items()) + @property + def is_local_only(self): + """ + Is this link entirely local, with no metadata pointing to a remote url? + """ + logger.debug( + { + "is_file": self.is_file, + "file_path": self.file_path if self.is_file else None, + "comes_from": self.comes_from, + "metadata_urls": {*self.project_track_urls, *self.repo_alt_urls}, + } + ) + return ( + (self.is_file and self.file_path) + and not self.comes_from + and not any( + not i.startswith("file:") + for i in {*self.project_track_urls, *self.repo_alt_urls} + ) + ) + class _CleanResult(NamedTuple): """Convert link for equivalency check. diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 2c0328b7c16..731b01b937f 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -533,13 +533,13 @@ def test_parse_links_json() -> None: ], } ).encode("utf8") + # parse_links() is cached by url, so we inject a random uuid to ensure + # the page content isn't cached. url = f"https://example.com/simple-{uuid.uuid4()}/" page = IndexContent( json_bytes, "application/vnd.pypi.simple.v1+json", encoding=None, - # parse_links() is cached by url, so we inject a random uuid to ensure - # the page content isn't cached. url=url, ) links = list(parse_links(page)) @@ -767,13 +767,13 @@ def test_parse_links__alternate_locations_and_tracks( html_path = data.indexes / index_name / package_name / "index.html" html = html_path.read_text() html_bytes = html.encode("utf-8") + # parse_links() is cached by url, so we inject a random uuid to ensure + # the page content isn't cached. url = f"https://example.com/simple-{uuid.uuid4()}/" page = IndexContent( html_bytes, "text/html", encoding=None, - # parse_links() is cached by url, so we inject a random uuid to ensure - # the page content isn't cached. url=url, ) links = list(parse_links(page)) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index ab44a922ddd..44d4bde2f75 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,14 +1,22 @@ from __future__ import annotations import logging +import uuid +from typing import FrozenSet, List, Optional, Set, Tuple import pytest - +from pip._internal.models.candidate import InstallationCandidate from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.tags import Tag from pip._vendor.packaging.utils import canonicalize_name -from pip._internal.index.collector import LinkCollector +from pip._internal.index.collector import LinkCollector, IndexContent +from pip._internal.exceptions import ( + InvalidAlternativeLocationsUrl, + InvalidMultipleRemoteRepositories, + InvalidTracksUrl, + UnsafeMultipleRemoteRepositories, +) from pip._internal.index.package_finder import ( CandidateEvaluator, CandidatePreferences, @@ -19,6 +27,7 @@ _check_link_requires_python, _extract_version_from_fragment, _find_name_version_sep, + check_multiple_remote_repositories, filter_unallowed_hashes, ) from pip._internal.models.link import Link @@ -907,5 +916,182 @@ def test_extract_version_from_fragment( assert version == expected -class TestCheckMultipleRemoteRepositories: - pass +def _make_mock_candidate_check_remote_repo( + candidate_name: Optional[str] = None, + version: Optional[str] = None, + comes_from_url: Optional[str] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, +) -> InstallationCandidate: + if candidate_name is None: + candidate_name = "mypackage" + + if version is None: + version = "1.0" + + comes_from = None + if comes_from_url is not None: + comes_from = IndexContent( + b"", + "text/html", + encoding=None, + url=comes_from_url, + ) + + url = f"https://example.com/pkg-{version}.tar.gz" + + link = Link( + url, + comes_from=comes_from, + project_track_urls=project_track_urls, + repo_alt_urls=repo_alt_urls, + ) + candidate = InstallationCandidate(candidate_name, version, link) + + return candidate + + +@pytest.mark.parametrize( + "candidates, project_name, expected", + [ + # checks pass when no candidates + ([], "my_package", None), + # checks pass when only one candidate + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ) + ], + "mypackage", + None, + ), + # checks fail when two candidates with different remotes + # and no metadata to enable merging namespaces + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + ), + ], + "mypackage", + UnsafeMultipleRemoteRepositories, + ), + # checks pass when only one candidate with tracks url + # TODO: not making requests to repos revealed via metadata + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + project_track_urls={"https://a.example.com/simple/mypackage"}, + ), + ], + "mypackage", + None, + ), + # checks pass when ony one candidate with alt loc url + # TODO: not making requests to repos revealed via metadata + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + repo_alt_urls={"https://a.example.com/simple/mypackage"}, + ), + ], + "mypackage", + None, + ), + # checks fail when alternate location urls do not agree + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + repo_alt_urls={"https://b.example.com/simple/mypackage"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + repo_alt_urls={"https://c.example.com/simple/mypackage"}, + ), + ], + "mypackage", + InvalidAlternativeLocationsUrl, + ), + # checks fails when track url points to the base url instead of + # the project url + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + project_track_urls={"https://b.example.com"}, + ), + ], + "mypackage", + InvalidTracksUrl, + ), + # checks fail when track url points to another tracker instead of + # the 'owner' project url + ( + [ + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://a.example.com/simple/mypackage", + project_track_urls={"https://b.example.com/simple/mypackage"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + project_track_urls={"https://c.example.com/simple/mypackage"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://c.example.com/simple/mypackage", + ), + ], + "mypackage", + InvalidTracksUrl, + ), + # checks fail when track url points to different name instead of + # the project url with same name + ( + [ + _make_mock_candidate_check_remote_repo( + candidate_name="othername", + comes_from_url=f"https://a.example.com/simple/othername", + project_track_urls={"https://b.example.com/simple/othername"}, + ), + _make_mock_candidate_check_remote_repo( + comes_from_url=f"https://b.example.com/simple/mypackage", + ), + ], + "mypackage", + InvalidTracksUrl, + ), + # checks pass when track url points to same normalized name + ( + [ + _make_mock_candidate_check_remote_repo( + candidate_name="a.b-c_d", + comes_from_url=f"https://a.example.com/simple/mypackage", + project_track_urls={"https://b.example.com/simple/a_b.c-d"}, + ), + ], + "a-b_c.d", + None, + ), + ], +) +def test_check_multiple_remote_repositories( + caplog, candidates: List[InstallationCandidate], project_name: str, expected +): + caplog.set_level(logging.DEBUG) + if expected: + with pytest.raises(expected): + check_multiple_remote_repositories(candidates, project_name) + else: + assert check_multiple_remote_repositories(candidates, project_name) is None From 64568a743769acfeb428b9474ab9d614d5e7b601 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Thu, 19 Sep 2024 20:02:58 +1000 Subject: [PATCH 11/26] fix lints --- src/pip/_internal/exceptions.py | 15 ++++++----- src/pip/_internal/index/package_finder.py | 2 +- src/pip/_internal/models/link.py | 4 +-- tests/unit/test_collector.py | 5 ++-- tests/unit/test_index.py | 32 +++++++++++------------ 5 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index dd00d2bec40..39a717d531d 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -16,7 +16,7 @@ import sys from collections.abc import Iterator from itertools import chain, groupby, repeat -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Dict, Iterator, List, Literal, Optional, Set, Union from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.version import InvalidVersion @@ -826,8 +826,8 @@ def __init__( self, *, package: str, - remote_repositories: set[str], - invalid_tracks: set[str], + remote_repositories: Set[str], + invalid_tracks: Set[str], ) -> None: super().__init__( kind="error", @@ -866,8 +866,8 @@ def __init__( self, *, package: str, - remote_repositories: set[str], - invalid_locations: set[str], + remote_repositories: Set[str], + invalid_locations: Set[str], ) -> None: super().__init__( kind="error", @@ -905,13 +905,14 @@ class UnsafeMultipleRemoteRepositories(InvalidMultipleRemoteRepositories): reference = "unsafe-multiple-remote-repositories" - def __init__(self, *, package: str, remote_repositories: set[str]) -> None: + def __init__(self, *, package: str, remote_repositories: Set[str]) -> None: super().__init__( kind="error", message=Text( f"More than one remote repository was found for {escape(package)}, " "with no indication that the remote repositories can be safely merged. " - f"The repositories are {'; '.join(sorted(escape(r) for r in remote_repositories))}." + "The repositories are " + f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." ), context=Text( "Multiple remote repositories are not merged by default " diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index ac20e38204e..a4806379821 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1269,7 +1269,7 @@ def check_multiple_remote_repositories( # TODO: This assumption about the structure of the url may not hold true # for all remote repositories. if not parts or not any( - [canonical_name == canonicalize_name(p) for p in parts] + canonical_name == canonicalize_name(p) for p in parts ): raise InvalidTracksUrl( package=project_name, diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index a73833b0a23..e9f2f50346b 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -301,8 +301,8 @@ def from_json( cls, file_data: dict[str, Any], page_url: str, - project_track_urls: Optional[set[str]] = None, - repo_alt_urls: Optional[set[str]] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> Optional["Link"]: """ Convert an pypi json document from a simple repository page into a Link. diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 731b01b937f..5d3e36952fe 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -8,6 +8,7 @@ import uuid from pathlib import Path from textwrap import dedent +from typing import Dict, List, Optional, Tuple, Set from unittest import mock import pytest @@ -759,8 +760,8 @@ def test_parse_links_caches_same_page_by_url() -> None: ) def test_parse_links__alternate_locations_and_tracks( index_name: str, - expected_project_track_urls: set[str], - expected_repo_alt_urls: set[str], + expected_project_track_urls: Set[str], + expected_repo_alt_urls: Set[str], data: TestData, ) -> None: package_name = "simple" diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 44d4bde2f75..6af6963dc00 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -960,7 +960,7 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ) ], "mypackage", @@ -971,10 +971,10 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", ), ], "mypackage", @@ -985,10 +985,10 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", project_track_urls={"https://a.example.com/simple/mypackage"}, ), ], @@ -1000,10 +1000,10 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", repo_alt_urls={"https://a.example.com/simple/mypackage"}, ), ], @@ -1014,11 +1014,11 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", repo_alt_urls={"https://b.example.com/simple/mypackage"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", repo_alt_urls={"https://c.example.com/simple/mypackage"}, ), ], @@ -1030,7 +1030,7 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", project_track_urls={"https://b.example.com"}, ), ], @@ -1042,15 +1042,15 @@ def _make_mock_candidate_check_remote_repo( ( [ _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", project_track_urls={"https://b.example.com/simple/mypackage"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", project_track_urls={"https://c.example.com/simple/mypackage"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://c.example.com/simple/mypackage", + comes_from_url="https://c.example.com/simple/mypackage", ), ], "mypackage", @@ -1062,11 +1062,11 @@ def _make_mock_candidate_check_remote_repo( [ _make_mock_candidate_check_remote_repo( candidate_name="othername", - comes_from_url=f"https://a.example.com/simple/othername", + comes_from_url="https://a.example.com/simple/othername", project_track_urls={"https://b.example.com/simple/othername"}, ), _make_mock_candidate_check_remote_repo( - comes_from_url=f"https://b.example.com/simple/mypackage", + comes_from_url="https://b.example.com/simple/mypackage", ), ], "mypackage", @@ -1077,7 +1077,7 @@ def _make_mock_candidate_check_remote_repo( [ _make_mock_candidate_check_remote_repo( candidate_name="a.b-c_d", - comes_from_url=f"https://a.example.com/simple/mypackage", + comes_from_url="https://a.example.com/simple/mypackage", project_track_urls={"https://b.example.com/simple/a_b.c-d"}, ), ], From 91d2f0ad6d3256c107b325450925241ca1263362 Mon Sep 17 00:00:00 2001 From: Mark Cottman-Fields Date: Sat, 28 Sep 2024 19:00:05 +1000 Subject: [PATCH 12/26] work in progress --- src/pip/_internal/exceptions.py | 2 +- src/pip/_internal/index/package_finder.py | 182 +++++++++++++--------- src/pip/_internal/models/candidate.py | 56 ++++++- src/pip/_internal/models/link.py | 1 - 4 files changed, 160 insertions(+), 81 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 39a717d531d..139928d2acc 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -876,7 +876,7 @@ def __init__( "were different among the remote repositories. " "The remote repositories are " f"{'; '.join(sorted(escape(r) for r in remote_repositories))}." - "The alternate locations not not agreed by all remote " + "The alternate locations not agreed by all remote " "repository are " f"{'; '.join(sorted(escape(r) for r in invalid_locations))}." ), diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index a4806379821..9c62837fb10 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -33,8 +33,8 @@ UnsupportedWheel, ) from pip._internal.index.collector import LinkCollector, parse_links -from pip._internal.metadata import select_backend -from pip._internal.models.candidate import InstallationCandidate +from pip._internal.models.candidate import InstallationCandidate, \ + RemoteInstallationCandidate from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope @@ -1089,7 +1089,7 @@ def check_multiple_remote_repositories( logical namespace, while enforcing a more secure default. Returns None if checks pass, otherwise will raise an error with details of the - failed checks. + first failed check. """ # NOTE: The checks in this function must occur after: @@ -1122,6 +1122,7 @@ def check_multiple_remote_repositories( # some names do not are explicitly allowed. # TODO: This requirement doesn't look like something that pip can do anything about. + # How would pip be able to tell whether two urls are the 'same project'? # Specification: All [Tracks metadata] URLs MUST represent the same “project” as # the project in the extending repository. This does not mean that they need to # serve the same files. It is valid for them to include binaries built on @@ -1131,10 +1132,14 @@ def check_multiple_remote_repositories( # the “same” project. # TODO: This requirement doesn't look like something that pip can do anything about. + # Tracks metadata is not required, so anything that uses tracks meta + # must deal with it not being available. # Specification: It [Tracks metadata] is NOT required that every name in a # repository tracks the same repository, or that they all track a repository at all. # TODO: This requirement doesn't look like something that pip can do anything about. + # How would pip be able to tell whether the metadata is under the control of + # the repository operators and not any individual publisher using the repository? # Specification: It [repository Tracks metadata] MUST be under the control of the # repository operators themselves, not any individual publisher using that # repository. “Repository Operator” can also include anyone who managed the overall @@ -1143,7 +1148,9 @@ def check_multiple_remote_repositories( # owns/manages the entire namespace of that repository. # TODO: Consider making requests to repositories revealed via Alternate Locations - # or Tracks metadata, to assess the metadata of those additional repositories. + # or Tracks metadata, to assess the metadata of those additional repositories. + # This would be a fair bit more functionality to include. + # Maybe a subsequent PR, if this functionality is desirable? # Specification: When an installer encounters a project that is using the # alternate locations metadata it SHOULD consider that all repositories named are # extending the same namespace across multiple repositories. @@ -1154,6 +1161,7 @@ def check_multiple_remote_repositories( # TODO: Consider whether pip already has, or should add, ways to indicate exactly # which individual projects to get from exactly which repositories. + # This seems to be separate functionality, that could be added in a separate PR. # Specification: If the user has explicitly told the installer that it wants to # fetch a project from a certain set of repositories, then there is no reason # to question that and we assume that they’ve made sure it is safe to merge @@ -1163,7 +1171,7 @@ def check_multiple_remote_repositories( # When no candidates are provided, then no checks are relevant, so just return. if candidates is None or len(candidates) == 0: logger.debug("No candidates given to multiple remote repository checks") - return + return None # Calculate the canonical name for later comparisons. canonical_name = canonicalize_name(project_name) @@ -1172,14 +1180,19 @@ def check_multiple_remote_repositories( # if they do then determine if either “Tracks” or “Alternate Locations” metadata # allows safely merging together ALL the repositories where files were discovered. remote_candidates = [] - remote_repositories = set() - + # all known remote repositories + known_remote_repo_urls = set() + # all known alternate location urls + known_alternate_urls = set() + # the alternate location urls that are not present in all remote candidates + mismatch_alternate_urls = set() + # all known remote repositories that do not track any other repos + known_owner_repo_urls = set() + # all known remote repositories that do track other repos + known_tracker_repo_urls = set() for candidate in candidates: candidate_name = candidate.name link = candidate.link - comes_from = link.comes_from - - candidate_canonical_name = canonicalize_name(candidate_name) # Specification: Repositories that exist on the local filesystem SHOULD always # be implicitly allowed to be merged to any remote repository. @@ -1191,83 +1204,88 @@ def check_multiple_remote_repositories( ) continue - try: - page_url = comes_from.url.lstrip() - except AttributeError: - page_url = comes_from.lstrip() + remote_candidate = RemoteInstallationCandidate( + canonical_name=canonicalize_name(candidate_name), + candidate=candidate, + ) + remote_candidates.append(remote_candidate) - item = { - "candidate": candidate, - "canonical_name": candidate_canonical_name, - } - remote_candidates.append(item) + # populate the known urls + known_remote_repo_urls.update(remote_candidate.remote_repository_urls) + known_alternate_urls.update(remote_candidate.alternate_location_urls) + if len(remote_candidate.project_track_urls) > 0: + known_tracker_repo_urls.update(remote_candidate.url) + else: + known_owner_repo_urls.update(remote_candidate.url) - remote_repositories.add(page_url) - remote_repositories.update(link.project_track_urls) - remote_repositories.update(link.repo_alt_urls) + # Update the set, keeping only elements found in either set, but not in both. + # This should allow all the items that are not present for all remote candidates + # to be tracked. + mismatch_alternate_urls.symmetric_difference_update( + remote_candidate.alternate_location_urls + ) # If there are no remote candidates, then allow merging repositories. if len(remote_candidates) == 0: logger.debug("No remote candidates for multiple remote repository checks") - return + return None # Specification: If the project in question only comes from a single repository, # then there is no chance of dependency confusion, so there’s no reason to do # anything but allow. - if len(remote_repositories) < 2: + if len(known_remote_repo_urls) == 1: logger.debug( "No chance for dependency confusion when there is only " "one remote candidate for multiple remote repository checks" ) - return - - # TODO - if logger.isEnabledFor(logging.INFO): - logger.info("Remote candidates for multiple remote repository checks:") - for candidate in remote_candidates: - logger.info(candidate) - - # TODO: This checks the list of Alternate Locations for the candidates that were - # retrieved. It does not request the metadata from any additional repositories - # revealed via the lists of Alternate Locations urls or Tracks urls. + return None + if len(known_remote_repo_urls) == 0: + msg = ("Unexpected situation where there are remote candidates, " + "but no remote repositories") + logger.warning(msg) + raise ValueError(msg) + + # This checks the list of Alternate Locations for the candidates that were + # retrieved. Each remote candidate may define a different list of Tracks and + # Alternate Location metadata. + # TODO: The Alternate Locations and Tracks urls revealed by the candidate metadata + # are not requested to check that their metadata matches the candidate metadata. + # This means that the known candidate metadata might agree and pass this check, + # while retrieving the metadata from additional urls would not agree and would + # fail this check. # Specification: In order for this metadata to be trusted, there MUST be agreement # between all locations where that project is found as to what the alternate # locations are. - all_repo_alt_urls = list(map_alt_urls.keys()) - if len(all_repo_alt_urls) > 1: - match_alt_locs = set(all_repo_alt_urls[0]) - invalid_locations = set() - - for item in all_repo_alt_urls[1:]: - match_alt_locs.intersection_update(item) - invalid_locations.update(match_alt_locs.symmetric_difference(item)) - - logger.debug(match_alt_locs) - logger.debug(invalid_locations) - - if len(invalid_locations) > 0: - raise InvalidAlternativeLocationsUrl( - package=project_name, - remote_repositories=remote_repositories, - invalid_locations=invalid_locations, - ) + if len(mismatch_alternate_urls) > 0: + raise InvalidAlternativeLocationsUrl( + package=project_name, + remote_repositories=known_remote_repo_urls, + invalid_locations=mismatch_alternate_urls, + ) + # Check the Tracks metadata. for remote_candidate in remote_candidates: - project_track_urls = remote_candidate.get("candidate").link.project_track_urls - project_track_urls = remote_candidate.get("candidate").link.project_track_urls - page_url = remote_candidate.get("url") - # url_parts = pathlib.Path(urllib.parse.urlsplit(url).path).parts + project_track_urls = remote_candidate.project_track_urls + page_url = remote_candidate.url for project_track_url in project_track_urls: parts = pathlib.Path(urllib.parse.urlsplit(project_track_url).path).parts + parts = list(parts) + parts.reverse() # Specification: It [Tracks metadata] MUST point to the actual URLs # for that project, not the base URL for the extended repositories. + # Implementation Note: Assume that the actual url for the project will + # contain the canonical project name as a path part, and the base URL + # will not contain the canonical project name as a path part. + # Specification: It [Tracks metadata] MUST point to a project with # the exact same normalized name. - # Implementation: The normalised project name must be present in one - # of the Tracks url path parts. - # TODO: This assumption about the structure of the url may not hold true - # for all remote repositories. + # Implementation Note: Assume that projects that are configured to be the + # 'exact same' will have a Tracks url path part with the canonical + # project name, usually the last path part. + + # Note: These assumptions about the structure of the Tracks url may not + # hold true for all remote repositories. if not parts or not any( canonical_name == canonicalize_name(p) for p in parts ): @@ -1280,27 +1298,43 @@ def check_multiple_remote_repositories( # Specification: It [Tracks metadata] MUST point to the repositories # that “own” the namespaces, not another repository that is also # tracking that namespace. - # Implementation: An 'owner' repository is one that does not Track the - # same namespace. + # Implementation Note: An 'owner' repository is one that has no Tracks + # metadata. # TODO: Without requesting all repositories revealed by metadata, this - # check might pass with incomplete metadata, - # when it would fail with complete metadata. - if project_track_url in map_track_urls: + # check might pass with incomplete knowledge of all metadata, + # when it would fail after retrieving all metadata. + if project_track_url not in known_owner_repo_urls: raise InvalidTracksUrl( package=project_name, remote_repositories={page_url}, invalid_tracks={project_track_url}, ) - # TODO - # Specification: Otherwise [if metadata allows] we merge the namespaces, - # and continue on. - # Specification: If nothing tells us merging the namespaces is safe, we refuse to # implicitly assume it is, and generate an error instead. # Specification: If that metadata does NOT allow [merging namespaces], then # generate an error. - error = UnsafeMultipleRemoteRepositories( - package=project_name, remote_repositories=remote_repositories - ) - raise error + # Implementation Note: If there are two or more remote candidates, and any of them + # don't have valid Alternate Locations and/or Tracks metadata, then fail. + for remote_candidate in remote_candidates: + candidate_alt_urls = remote_candidate.alternate_location_urls + + invalid_alt_urls = known_alternate_urls - candidate_alt_urls + has_alts = len(candidate_alt_urls) > 0 + has_tracks = len(remote_candidate.project_track_urls) > 0 + + is_invalid = any([ + not has_alts and not has_tracks, + not has_tracks and invalid_alt_urls, + ]) + + if is_invalid: + error = UnsafeMultipleRemoteRepositories( + package=project_name, + remote_repositories=known_remote_repo_urls, + ) + raise error + + # Specification: Otherwise [if metadata allows] we merge the namespaces, + # and continue on. + return None diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index 5c6dbec6fe5..f53b05f20f7 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Set, Optional from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version @@ -34,10 +35,55 @@ class RemoteInstallationCandidate: canonical_name: str candidate: InstallationCandidate - def __init__(self, candidate: InstallationCandidate) -> None: - object.__setattr__(self, "name", name) - object.__setattr__(self, "version", parse_version(version)) - object.__setattr__(self, "link", link) + def __init__(self, canonical_name:str, candidate: InstallationCandidate) -> None: + object.__setattr__(self, "canonical_name", canonical_name) + object.__setattr__(self, "candidate", candidate) def __str__(self) -> str: - return f"{self.name!r} candidate (version {self.version} at {self.link})" + return f"{self.canonical_name!r} candidate" + + @property + def _link(self) -> Optional[Link]: + if not self.candidate: + return None + if not self.candidate.link: + return None + return self.candidate.link + + @property + def url(self) -> Optional[str]: + """The remote url that contains the metadata for this installation candidate.""" + link = self._link + if not link: + return None + if not link.comes_from: + return None + if hasattr(link.comes_from, 'url') and link.comes_from.url: + return self.candidate.link.comes_from.url.lstrip() + if link.comes_from: + return self.candidate.link.comes_from.lstrip() + return None + + + @property + def remote_repository_urls(self) -> Set[str]: + """Remote repository urls from Tracks and Alternate Locations metadata.""" + return {*self.project_track_urls, *self.alternate_location_urls} + + @property + def project_track_urls(self) -> Set[str]: + """Remote repository urls from Tracks metadata.""" + result = {} + link = self._link + if link and link.project_track_urls: + result.update(link.project_track_urls) + return {i for i in result if i} + + @property + def alternate_location_urls(self) -> Set[str]: + """Remote repository urls from Alternate Locations metadata.""" + result = {self.url} + link = self._link + if link and link.repo_alt_urls: + result.update(link.repo_alt_urls) + return {i for i in result if i} diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index e9f2f50346b..c8ee4a1550b 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -292,7 +292,6 @@ def __init__( self.cache_link_parsing = cache_link_parsing self.egg_fragment = self._egg_fragment() - # PEP 708 self.project_track_urls = project_track_urls or set() self.repo_alt_urls = repo_alt_urls or set() From 4724536200caa64b5cb6393705610781124532bb Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Mon, 1 Dec 2025 14:38:43 +0330 Subject: [PATCH 13/26] Fix errors --- src/pip/_internal/exceptions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 139928d2acc..3e05f501517 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -811,6 +811,9 @@ class InvalidMultipleRemoteRepositories(DiagnosticPipError): "by passing --insecure-multiple-remote-repositories." ) +class IncompleteDownloadError(DiagnosticPipError): + """Raised when the downloader receives fewer bytes than advertised + in the Content-Length header.""" class InvalidTracksUrl(InvalidMultipleRemoteRepositories): """There was an issue with a Tracks metadata url. From b5d02e98a19737d5df25417ee2a5276c57674d1e Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Mon, 1 Dec 2025 15:49:19 +0330 Subject: [PATCH 14/26] Stabilize --- src/pip/_internal/exceptions.py | 107 ++++++++++++++++++++-- src/pip/_internal/index/collector.py | 9 +- src/pip/_internal/index/package_finder.py | 57 ++++++------ src/pip/_internal/models/candidate.py | 33 ++++--- src/pip/_internal/models/link.py | 22 +++-- tests/unit/test_collector.py | 2 +- tests/unit/test_index.py | 16 ++-- 7 files changed, 175 insertions(+), 71 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 3e05f501517..17ca5569db6 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -16,7 +16,7 @@ import sys from collections.abc import Iterator from itertools import chain, groupby, repeat -from typing import TYPE_CHECKING, Dict, Iterator, List, Literal, Optional, Set, Union +from typing import TYPE_CHECKING, Literal, Set from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.version import InvalidVersion @@ -800,6 +800,104 @@ def __init__(self, *, distribution: BaseDistribution) -> None: ) +class InvalidInstalledPackage(DiagnosticPipError): + reference = "invalid-installed-package" + + def __init__( + self, + *, + dist: BaseDistribution, + invalid_exc: InvalidRequirement | InvalidVersion, + ) -> None: + installed_location = dist.installed_location + + if isinstance(invalid_exc, InvalidRequirement): + invalid_type = "requirement" + else: + invalid_type = "version" + + super().__init__( + message=Text( + f"Cannot process installed package {dist} " + + (f"in {installed_location!r} " if installed_location else "") + + f"because it has an invalid {invalid_type}:\n{invalid_exc.args[0]}" + ), + context=( + "Starting with pip 24.1, packages with invalid " + f"{invalid_type}s can not be processed." + ), + hint_stmt="To proceed this package must be uninstalled.", + ) + + +class IncompleteDownloadError(DiagnosticPipError): + """Raised when the downloader receives fewer bytes than advertised + in the Content-Length header.""" + + reference = "incomplete-download" + + def __init__(self, download: _FileDownload) -> None: + # Dodge circular import. + from pip._internal.utils.misc import format_size + + assert download.size is not None + download_status = ( + f"{format_size(download.bytes_received)}/{format_size(download.size)}" + ) + if download.reattempts: + retry_status = f"after {download.reattempts + 1} attempts " + hint = "Use --resume-retries to configure resume attempt limit." + else: + # Download retrying is not enabled. + retry_status = "" + hint = "Consider using --resume-retries to enable download resumption." + message = Text( + f"Download failed {retry_status}because not enough bytes " + f"were received ({download_status})" + ) + + super().__init__( + message=message, + context=f"URL: {download.link.redacted_url}", + hint_stmt=hint, + note_stmt="This is an issue with network connectivity, not pip.", + ) + + +class ResolutionTooDeepError(DiagnosticPipError): + """Raised when the dependency resolver exceeds the maximum recursion depth.""" + + reference = "resolution-too-deep" + + def __init__(self) -> None: + super().__init__( + message="Dependency resolution exceeded maximum depth", + context=( + "Pip cannot resolve the current dependencies as the dependency graph " + "is too complex for pip to solve efficiently." + ), + hint_stmt=( + "Try adding lower bounds to constrain your dependencies, " + "for example: 'package>=2.0.0' instead of just 'package'. " + ), + link="https://pip.pypa.io/en/stable/topics/dependency-resolution/#handling-resolution-too-deep-errors", + ) + + +class InstallWheelBuildError(DiagnosticPipError): + reference = "failed-wheel-build-for-install" + + def __init__(self, failed: list[InstallRequirement]) -> None: + super().__init__( + message=( + "Failed to build installable wheels for some " + "pyproject.toml based projects" + ), + context=", ".join(r.name for r in failed), # type: ignore + hint_stmt=None, + ) + + class InvalidMultipleRemoteRepositories(DiagnosticPipError): """Common error for issues with multiple remote repositories.""" @@ -811,13 +909,9 @@ class InvalidMultipleRemoteRepositories(DiagnosticPipError): "by passing --insecure-multiple-remote-repositories." ) -class IncompleteDownloadError(DiagnosticPipError): - """Raised when the downloader receives fewer bytes than advertised - in the Content-Length header.""" class InvalidTracksUrl(InvalidMultipleRemoteRepositories): """There was an issue with a Tracks metadata url. - Tracks urls must point to the actual URLs for that project, point to the repositories that own the namespaces, and point to a project with the exact same name (after normalization). @@ -858,7 +952,6 @@ def __init__( class InvalidAlternativeLocationsUrl(InvalidMultipleRemoteRepositories): """The list of Alternate Locations for each repository do not match. - In order for this metadata to be trusted, there MUST be agreement between all locations where that project is found as to what the alternate locations are. """ @@ -899,10 +992,8 @@ def __init__( class UnsafeMultipleRemoteRepositories(InvalidMultipleRemoteRepositories): """More than one remote repository was provided for a package, with no indication that the remote repositories can be safely merged. - The repositories, packages, or user did not indicate that it is safe to merge remote repositories. - Multiple remote repositories are not merged by default to reduce the risk of dependency confusion attacks.""" diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index efa64204f37..56f6ebff0fc 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -18,12 +18,13 @@ from optparse import Values from typing import ( Callable, + Dict, + List, NamedTuple, + Optional, Protocol, - Sequence, Set, Tuple, - Union, ) from pip._vendor import requests @@ -314,8 +315,8 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None self.anchors.append(dict(attrs)) elif tag == "meta": meta_attrs = dict(attrs) - meta_key = meta_attrs.get("name", "").strip() - meta_val = meta_attrs.get("content", "").strip() + meta_key = (meta_attrs.get("name") or "").strip() + meta_val = (meta_attrs.get("content") or "").strip() if meta_key and meta_val: if ( meta_key == self._meta_key_tracks diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index c73199e55e8..af1c5560be8 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -9,12 +9,9 @@ import pathlib import re import urllib.parse +from collections.abc import Iterable from dataclasses import dataclass -from typing import ( - TYPE_CHECKING, - Optional, - Union, -) +from typing import TYPE_CHECKING, List, Optional, Union from pip._vendor.packaging import specifiers from pip._vendor.packaging.tags import Tag @@ -26,14 +23,17 @@ BestVersionAlreadyInstalled, DistributionNotFound, InvalidAlternativeLocationsUrl, - InvalidWheelFilename, InvalidTracksUrl, + InvalidWheelFilename, UnsafeMultipleRemoteRepositories, UnsupportedWheel, ) from pip._internal.index.collector import LinkCollector, parse_links -from pip._internal.models.candidate import InstallationCandidate, \ - RemoteInstallationCandidate +from pip._internal.models.candidate import ( + InstallationCandidate, + RemoteInstallationCandidate, +) +from pip._internal.metadata import select_backend from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope @@ -1184,11 +1184,11 @@ def check_multiple_remote_repositories( # all known alternate location urls known_alternate_urls = set() # the alternate location urls that are not present in all remote candidates - mismatch_alternate_urls = set() + mismatch_alternate_urls: set[str] = set() # all known remote repositories that do not track any other repos - known_owner_repo_urls = set() + known_owner_repo_urls: set[str | None] = set() # all known remote repositories that do track other repos - known_tracker_repo_urls = set() + known_tracker_repo_urls: set[str | None] = set() for candidate in candidates: candidate_name = candidate.name link = candidate.link @@ -1213,9 +1213,9 @@ def check_multiple_remote_repositories( known_remote_repo_urls.update(remote_candidate.remote_repository_urls) known_alternate_urls.update(remote_candidate.alternate_location_urls) if len(remote_candidate.project_track_urls) > 0: - known_tracker_repo_urls.update(remote_candidate.url) + known_tracker_repo_urls.add(remote_candidate.url) else: - known_owner_repo_urls.update(remote_candidate.url) + known_owner_repo_urls.add(remote_candidate.url) # Update the set, keeping only elements found in either set, but not in both. # This should allow all the items that are not present for all remote candidates @@ -1239,8 +1239,10 @@ def check_multiple_remote_repositories( ) return None if len(known_remote_repo_urls) == 0: - msg = ("Unexpected situation where there are remote candidates, " - "but no remote repositories") + msg = ( + "Unexpected situation where there are remote candidates, " + "but no remote repositories" + ) logger.warning(msg) raise ValueError(msg) @@ -1268,8 +1270,9 @@ def check_multiple_remote_repositories( page_url = remote_candidate.url for project_track_url in project_track_urls: parts = pathlib.Path(urllib.parse.urlsplit(project_track_url).path).parts - parts = list(parts) - parts.reverse() + path_parts = list(parts) + path_parts.reverse() + path_parts.reverse() # Specification: It [Tracks metadata] MUST point to the actual URLs # for that project, not the base URL for the extended repositories. @@ -1285,12 +1288,12 @@ def check_multiple_remote_repositories( # Note: These assumptions about the structure of the Tracks url may not # hold true for all remote repositories. - if not parts or not any( - canonical_name == canonicalize_name(p) for p in parts + if not path_parts or not any( + canonical_name == canonicalize_name(p) for p in path_parts ): raise InvalidTracksUrl( package=project_name, - remote_repositories={page_url}, + remote_repositories={page_url} if page_url is not None else set(), invalid_tracks={project_track_url}, ) @@ -1305,7 +1308,7 @@ def check_multiple_remote_repositories( if project_track_url not in known_owner_repo_urls: raise InvalidTracksUrl( package=project_name, - remote_repositories={page_url}, + remote_repositories={page_url} if page_url is not None else set(), invalid_tracks={project_track_url}, ) @@ -1318,14 +1321,16 @@ def check_multiple_remote_repositories( for remote_candidate in remote_candidates: candidate_alt_urls = remote_candidate.alternate_location_urls - invalid_alt_urls = known_alternate_urls - candidate_alt_urls + invalid_alt_urls = known_alternate_urls - candidate_alt_urls has_alts = len(candidate_alt_urls) > 0 has_tracks = len(remote_candidate.project_track_urls) > 0 - is_invalid = any([ - not has_alts and not has_tracks, - not has_tracks and invalid_alt_urls, - ]) + is_invalid = any( + [ + not has_alts and not has_tracks, + not has_tracks and invalid_alt_urls, + ] + ) if is_invalid: error = UnsafeMultipleRemoteRepositories( diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index f53b05f20f7..1992c98c8c4 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Set, Optional +from typing import Optional, Set from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version @@ -35,7 +35,7 @@ class RemoteInstallationCandidate: canonical_name: str candidate: InstallationCandidate - def __init__(self, canonical_name:str, candidate: InstallationCandidate) -> None: + def __init__(self, canonical_name: str, candidate: InstallationCandidate) -> None: object.__setattr__(self, "canonical_name", canonical_name) object.__setattr__(self, "candidate", candidate) @@ -56,14 +56,16 @@ def url(self) -> Optional[str]: link = self._link if not link: return None - if not link.comes_from: + cf = link.comes_from + if cf is None: return None - if hasattr(link.comes_from, 'url') and link.comes_from.url: - return self.candidate.link.comes_from.url.lstrip() - if link.comes_from: - return self.candidate.link.comes_from.lstrip() - return None + url_attr = getattr(cf, "url", None) + if isinstance(url_attr, str): + return url_attr.lstrip() + if isinstance(cf, str): + return cf.lstrip() + return None @property def remote_repository_urls(self) -> Set[str]: @@ -73,17 +75,18 @@ def remote_repository_urls(self) -> Set[str]: @property def project_track_urls(self) -> Set[str]: """Remote repository urls from Tracks metadata.""" - result = {} link = self._link - if link and link.project_track_urls: - result.update(link.project_track_urls) - return {i for i in result if i} + if not link or not link.project_track_urls: + return set() + return {i for i in link.project_track_urls if i} @property def alternate_location_urls(self) -> Set[str]: """Remote repository urls from Alternate Locations metadata.""" - result = {self.url} + urls: Set[str] = set() + if self.url: + urls.add(self.url) link = self._link if link and link.repo_alt_urls: - result.update(link.repo_alt_urls) - return {i for i in result if i} + urls.update({i for i in link.repo_alt_urls if i}) + return urls diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index c8ee4a1550b..d99cbdefae6 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -15,8 +15,6 @@ NamedTuple, Optional, Set, - Tuple, - Union, ) from pip._internal.utils.deprecation import deprecated @@ -302,7 +300,7 @@ def from_json( page_url: str, project_track_urls: Optional[Set[str]] = None, repo_alt_urls: Optional[Set[str]] = None, - ) -> Optional["Link"]: + ) -> Optional[Link]: """ Convert an pypi json document from a simple repository page into a Link. """ @@ -339,8 +337,10 @@ def from_json( elif not yanked_reason: yanked_reason = None - project_track_urls = {i.strip() for i in project_track_urls if i and i.strip()} - repo_alt_urls = {i.strip() for i in repo_alt_urls if i and i.strip()} + project_track_urls = { + i.strip() for i in project_track_urls or [] if i and i.strip() + } + repo_alt_urls = {i.strip() for i in repo_alt_urls or [] if i and i.strip()} return cls( url, @@ -361,7 +361,7 @@ def from_element( base_url: str, project_track_urls: Optional[Set[str]] = None, repo_alt_urls: Optional[Set[str]] = None, - ) -> Optional["Link"]: + ) -> Optional[Link]: """ Convert an anchor element's attributes in a simple repository page to a Link. """ @@ -399,8 +399,10 @@ def from_element( ) metadata_file_data = MetadataFile(None) - project_track_urls = {i.strip() for i in project_track_urls if i and i.strip()} - repo_alt_urls = {i.strip() for i in repo_alt_urls if i and i.strip()} + project_track_urls = { + i.strip() for i in project_track_urls or [] if i and i.strip() + } + repo_alt_urls = {i.strip() for i in repo_alt_urls or [] if i and i.strip()} return cls( url, @@ -583,7 +585,7 @@ def is_hash_allowed(self, hashes: Hashes | None) -> bool: return any(hashes.is_hash_allowed(k, v) for k, v in self._hashes.items()) @property - def is_local_only(self): + def is_local_only(self) -> bool: """ Is this link entirely local, with no metadata pointing to a remote url? """ @@ -595,7 +597,7 @@ def is_local_only(self): "metadata_urls": {*self.project_track_urls, *self.repo_alt_urls}, } ) - return ( + return bool( (self.is_file and self.file_path) and not self.comes_from and not any( diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 5d3e36952fe..3ced86f372a 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -8,7 +8,7 @@ import uuid from pathlib import Path from textwrap import dedent -from typing import Dict, List, Optional, Tuple, Set +from typing import Optional, Set from unittest import mock import pytest diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 6af6963dc00..079d07eb93e 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,22 +1,20 @@ from __future__ import annotations import logging -import uuid -from typing import FrozenSet, List, Optional, Set, Tuple +from typing import List, Optional, Set import pytest -from pip._internal.models.candidate import InstallationCandidate + from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.tags import Tag from pip._vendor.packaging.utils import canonicalize_name -from pip._internal.index.collector import LinkCollector, IndexContent from pip._internal.exceptions import ( InvalidAlternativeLocationsUrl, - InvalidMultipleRemoteRepositories, InvalidTracksUrl, UnsafeMultipleRemoteRepositories, ) +from pip._internal.index.collector import IndexContent, LinkCollector from pip._internal.index.package_finder import ( CandidateEvaluator, CandidatePreferences, @@ -30,6 +28,7 @@ check_multiple_remote_repositories, filter_unallowed_hashes, ) +from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope from pip._internal.models.selection_prefs import SelectionPreferences @@ -995,7 +994,7 @@ def _make_mock_candidate_check_remote_repo( "mypackage", None, ), - # checks pass when ony one candidate with alt loc url + # checks pass when only one candidate with alt loc url # TODO: not making requests to repos revealed via metadata ( [ @@ -1087,7 +1086,10 @@ def _make_mock_candidate_check_remote_repo( ], ) def test_check_multiple_remote_repositories( - caplog, candidates: List[InstallationCandidate], project_name: str, expected + caplog: pytest.LogCaptureFixture, + candidates: list[InstallationCandidate], + project_name: str, + expected ): caplog.set_level(logging.DEBUG) if expected: From b93991e7878503846bce19ef1e03ea9f57887e9e Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Mon, 1 Dec 2025 15:52:03 +0330 Subject: [PATCH 15/26] Fix errors --- src/pip/_internal/exceptions.py | 12 ++++++------ src/pip/_internal/index/collector.py | 15 +++++---------- src/pip/_internal/index/package_finder.py | 6 +++--- src/pip/_internal/models/candidate.py | 15 ++++++++------- src/pip/_internal/models/link.py | 20 +++++++++----------- tests/unit/test_collector.py | 9 ++++----- tests/unit/test_index.py | 13 ++++++------- 7 files changed, 41 insertions(+), 49 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 17ca5569db6..23edabcdf17 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -16,7 +16,7 @@ import sys from collections.abc import Iterator from itertools import chain, groupby, repeat -from typing import TYPE_CHECKING, Literal, Set +from typing import TYPE_CHECKING, Literal from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.version import InvalidVersion @@ -923,8 +923,8 @@ def __init__( self, *, package: str, - remote_repositories: Set[str], - invalid_tracks: Set[str], + remote_repositories: set[str], + invalid_tracks: set[str], ) -> None: super().__init__( kind="error", @@ -962,8 +962,8 @@ def __init__( self, *, package: str, - remote_repositories: Set[str], - invalid_locations: Set[str], + remote_repositories: set[str], + invalid_locations: set[str], ) -> None: super().__init__( kind="error", @@ -999,7 +999,7 @@ class UnsafeMultipleRemoteRepositories(InvalidMultipleRemoteRepositories): reference = "unsafe-multiple-remote-repositories" - def __init__(self, *, package: str, remote_repositories: Set[str]) -> None: + def __init__(self, *, package: str, remote_repositories: set[str]) -> None: super().__init__( kind="error", message=Text( diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index 56f6ebff0fc..cf8bcfaa262 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -18,13 +18,8 @@ from optparse import Values from typing import ( Callable, - Dict, - List, NamedTuple, - Optional, Protocol, - Set, - Tuple, ) from pip._vendor import requests @@ -301,10 +296,10 @@ def __init__(self, url: str) -> None: super().__init__(convert_charrefs=True) self.url: str = url - self.base_url: Optional[str] = None - self.anchors: List[Dict[str, Optional[str]]] = [] - self.project_track_urls: Set[str] = set() - self.repo_alt_urls: Set[str] = set() + self.base_url: str | None = None + self.anchors: list[dict[str, str | None]] = [] + self.project_track_urls: set[str] = set() + self.repo_alt_urls: set[str] = set() def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: if tag == "base" and self.base_url is None: @@ -329,7 +324,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None ): self.repo_alt_urls.add(meta_val) - def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: + def get_href(self, attrs: list[tuple[str, str | None]]) -> str | None: for name, value in attrs: if name == "href": return value diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index af1c5560be8..ac46951ce1d 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -11,7 +11,7 @@ import urllib.parse from collections.abc import Iterable from dataclasses import dataclass -from typing import TYPE_CHECKING, List, Optional, Union +from typing import TYPE_CHECKING, Optional, Union from pip._vendor.packaging import specifiers from pip._vendor.packaging.tags import Tag @@ -29,11 +29,11 @@ UnsupportedWheel, ) from pip._internal.index.collector import LinkCollector, parse_links +from pip._internal.metadata import select_backend from pip._internal.models.candidate import ( InstallationCandidate, RemoteInstallationCandidate, ) -from pip._internal.metadata import select_backend from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope @@ -1075,7 +1075,7 @@ def _extract_version_from_fragment(fragment: str, canonical_name: str) -> str | def check_multiple_remote_repositories( - candidates: List[InstallationCandidate], project_name: str + candidates: list[InstallationCandidate], project_name: str ) -> None: """ Check whether two or more different namespaces can be flattened into one. diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index 1992c98c8c4..8aa04590e88 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -1,5 +1,6 @@ +from __future__ import annotations + from dataclasses import dataclass -from typing import Optional, Set from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version @@ -43,7 +44,7 @@ def __str__(self) -> str: return f"{self.canonical_name!r} candidate" @property - def _link(self) -> Optional[Link]: + def _link(self) -> Link | None: if not self.candidate: return None if not self.candidate.link: @@ -51,7 +52,7 @@ def _link(self) -> Optional[Link]: return self.candidate.link @property - def url(self) -> Optional[str]: + def url(self) -> str | None: """The remote url that contains the metadata for this installation candidate.""" link = self._link if not link: @@ -68,12 +69,12 @@ def url(self) -> Optional[str]: return None @property - def remote_repository_urls(self) -> Set[str]: + def remote_repository_urls(self) -> set[str]: """Remote repository urls from Tracks and Alternate Locations metadata.""" return {*self.project_track_urls, *self.alternate_location_urls} @property - def project_track_urls(self) -> Set[str]: + def project_track_urls(self) -> set[str]: """Remote repository urls from Tracks metadata.""" link = self._link if not link or not link.project_track_urls: @@ -81,9 +82,9 @@ def project_track_urls(self) -> Set[str]: return {i for i in link.project_track_urls if i} @property - def alternate_location_urls(self) -> Set[str]: + def alternate_location_urls(self) -> set[str]: """Remote repository urls from Alternate Locations metadata.""" - urls: Set[str] = set() + urls: set[str] = set() if self.url: urls.add(self.url) link = self._link diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index d99cbdefae6..6ec39040b62 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -13,8 +13,6 @@ TYPE_CHECKING, Any, NamedTuple, - Optional, - Set, ) from pip._internal.utils.deprecation import deprecated @@ -227,9 +225,9 @@ def __init__( yanked_reason: str | None = None, metadata_file_data: MetadataFile | None = None, cache_link_parsing: bool = True, - hashes: Optional[Mapping[str, str]] = None, - project_track_urls: Optional[Set[str]] = None, - repo_alt_urls: Optional[Set[str]] = None, + hashes: Mapping[str, str] | None = None, + project_track_urls: set[str] | None = None, + repo_alt_urls: set[str] | None = None, ) -> None: """ :param url: url of the resource pointed to (href of the link) @@ -298,9 +296,9 @@ def from_json( cls, file_data: dict[str, Any], page_url: str, - project_track_urls: Optional[Set[str]] = None, - repo_alt_urls: Optional[Set[str]] = None, - ) -> Optional[Link]: + project_track_urls: set[str] | None = None, + repo_alt_urls: set[str] | None = None, + ) -> Link | None: """ Convert an pypi json document from a simple repository page into a Link. """ @@ -359,9 +357,9 @@ def from_element( anchor_attribs: dict[str, str | None], page_url: str, base_url: str, - project_track_urls: Optional[Set[str]] = None, - repo_alt_urls: Optional[Set[str]] = None, - ) -> Optional[Link]: + project_track_urls: set[str] | None = None, + repo_alt_urls: set[str] | None = None, + ) -> Link | None: """ Convert an anchor element's attributes in a simple repository page to a Link. """ diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 3ced86f372a..3e859bcda70 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -8,7 +8,6 @@ import uuid from pathlib import Path from textwrap import dedent -from typing import Optional, Set from unittest import mock import pytest @@ -596,7 +595,7 @@ def test_parse_links_json() -> None: ), ] - def _convert_links(link_item: Link) -> dict[str, Optional[str]]: + def _convert_links(link_item: Link) -> dict[str, str | None]: return {key: getattr(link_item, key, None) for key in link_item.__slots__} for index, link in enumerate(links): @@ -744,7 +743,7 @@ def test_parse_links_caches_same_page_by_url() -> None: @pytest.mark.parametrize( - ("index_name", "expected_project_track_urls", "expected_repo_alt_urls"), + "index_name, expected_project_track_urls, expected_repo_alt_urls", [ ( "repository-alternate-01", @@ -760,8 +759,8 @@ def test_parse_links_caches_same_page_by_url() -> None: ) def test_parse_links__alternate_locations_and_tracks( index_name: str, - expected_project_track_urls: Set[str], - expected_repo_alt_urls: Set[str], + expected_project_track_urls: set[str], + expected_repo_alt_urls: set[str], data: TestData, ) -> None: package_name = "simple" diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 079d07eb93e..a833318eff8 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -from typing import List, Optional, Set import pytest @@ -916,11 +915,11 @@ def test_extract_version_from_fragment( def _make_mock_candidate_check_remote_repo( - candidate_name: Optional[str] = None, - version: Optional[str] = None, - comes_from_url: Optional[str] = None, - project_track_urls: Optional[Set[str]] = None, - repo_alt_urls: Optional[Set[str]] = None, + candidate_name: str | None = None, + version: str | None = None, + comes_from_url: str | None = None, + project_track_urls: set[str] | None = None, + repo_alt_urls: set[str] | None = None, ) -> InstallationCandidate: if candidate_name is None: candidate_name = "mypackage" @@ -1089,7 +1088,7 @@ def test_check_multiple_remote_repositories( caplog: pytest.LogCaptureFixture, candidates: list[InstallationCandidate], project_name: str, - expected + expected, ): caplog.set_level(logging.DEBUG) if expected: From 026eed73a5d405cbd188448d5e41a5bd731fee14 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Mon, 1 Dec 2025 18:39:11 +0330 Subject: [PATCH 16/26] Fix mypy errors --- tests/unit/test_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index a833318eff8..37f6f89847b 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1088,8 +1088,8 @@ def test_check_multiple_remote_repositories( caplog: pytest.LogCaptureFixture, candidates: list[InstallationCandidate], project_name: str, - expected, -): + expected: type[Exception] | None, +)-> None: caplog.set_level(logging.DEBUG) if expected: with pytest.raises(expected): From f9214e74f7f9bbaaf886de560b7938000068f387 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Mon, 1 Dec 2025 18:50:32 +0330 Subject: [PATCH 17/26] Fix mypy errors --- tests/unit/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 37f6f89847b..fbe7fc4ebc2 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1089,7 +1089,7 @@ def test_check_multiple_remote_repositories( candidates: list[InstallationCandidate], project_name: str, expected: type[Exception] | None, -)-> None: +) -> None: caplog.set_level(logging.DEBUG) if expected: with pytest.raises(expected): From 0dd87cdbf171c2584a78e0508b2b2844719f3042 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Tue, 2 Dec 2025 09:09:49 +0330 Subject: [PATCH 18/26] Fix test_index.py errors --- tests/unit/test_index.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index fbe7fc4ebc2..4fc14221929 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,19 +1,20 @@ from __future__ import annotations import logging - +import uuid import pytest - +from pip._internal.models.candidate import InstallationCandidate from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.tags import Tag from pip._vendor.packaging.utils import canonicalize_name +from pip._internal.index.collector import LinkCollector, IndexContent from pip._internal.exceptions import ( InvalidAlternativeLocationsUrl, + InvalidMultipleRemoteRepositories, InvalidTracksUrl, UnsafeMultipleRemoteRepositories, ) -from pip._internal.index.collector import IndexContent, LinkCollector from pip._internal.index.package_finder import ( CandidateEvaluator, CandidatePreferences, @@ -24,10 +25,8 @@ _check_link_requires_python, _extract_version_from_fragment, _find_name_version_sep, - check_multiple_remote_repositories, filter_unallowed_hashes, ) -from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope from pip._internal.models.selection_prefs import SelectionPreferences @@ -913,13 +912,12 @@ def test_extract_version_from_fragment( version = _extract_version_from_fragment(fragment, canonical_name) assert version == expected - def _make_mock_candidate_check_remote_repo( - candidate_name: str | None = None, - version: str | None = None, - comes_from_url: str | None = None, - project_track_urls: set[str] | None = None, - repo_alt_urls: set[str] | None = None, + candidate_name: Optional[str] = None, + version: Optional[str] = None, + comes_from_url: Optional[str] = None, + project_track_urls: Optional[Set[str]] = None, + repo_alt_urls: Optional[Set[str]] = None, ) -> InstallationCandidate: if candidate_name is None: candidate_name = "mypackage" @@ -993,7 +991,7 @@ def _make_mock_candidate_check_remote_repo( "mypackage", None, ), - # checks pass when only one candidate with alt loc url + # checks pass when ony one candidate with alt loc url # TODO: not making requests to repos revealed via metadata ( [ @@ -1085,14 +1083,11 @@ def _make_mock_candidate_check_remote_repo( ], ) def test_check_multiple_remote_repositories( - caplog: pytest.LogCaptureFixture, - candidates: list[InstallationCandidate], - project_name: str, - expected: type[Exception] | None, -) -> None: + caplog, candidates: List[InstallationCandidate], project_name: str, expected +): caplog.set_level(logging.DEBUG) if expected: with pytest.raises(expected): check_multiple_remote_repositories(candidates, project_name) else: - assert check_multiple_remote_repositories(candidates, project_name) is None + assert check_multiple_remote_repositories(candidates, project_name) is None \ No newline at end of file From d53d07b5ab454c4e3fcab0eca7ed3f9550c03fb9 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Tue, 2 Dec 2025 10:30:38 +0330 Subject: [PATCH 19/26] Fix errors --- tests/unit/test_index.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 4fc14221929..cd7f013ad07 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,20 +1,19 @@ from __future__ import annotations import logging -import uuid + import pytest -from pip._internal.models.candidate import InstallationCandidate + from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.tags import Tag from pip._vendor.packaging.utils import canonicalize_name -from pip._internal.index.collector import LinkCollector, IndexContent from pip._internal.exceptions import ( InvalidAlternativeLocationsUrl, - InvalidMultipleRemoteRepositories, InvalidTracksUrl, UnsafeMultipleRemoteRepositories, ) +from pip._internal.index.collector import IndexContent, LinkCollector from pip._internal.index.package_finder import ( CandidateEvaluator, CandidatePreferences, @@ -25,8 +24,10 @@ _check_link_requires_python, _extract_version_from_fragment, _find_name_version_sep, + check_multiple_remote_repositories, filter_unallowed_hashes, ) +from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope from pip._internal.models.selection_prefs import SelectionPreferences @@ -912,6 +913,7 @@ def test_extract_version_from_fragment( version = _extract_version_from_fragment(fragment, canonical_name) assert version == expected + def _make_mock_candidate_check_remote_repo( candidate_name: Optional[str] = None, version: Optional[str] = None, @@ -1090,4 +1092,4 @@ def test_check_multiple_remote_repositories( with pytest.raises(expected): check_multiple_remote_repositories(candidates, project_name) else: - assert check_multiple_remote_repositories(candidates, project_name) is None \ No newline at end of file + assert check_multiple_remote_repositories(candidates, project_name) is None From 38c6424ca3b334c5bd7375ed8696de040a53f083 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Tue, 2 Dec 2025 13:23:29 +0330 Subject: [PATCH 20/26] Fix value error bug --- tests/unit/test_index.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index cd7f013ad07..404a8e67d73 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -927,26 +927,28 @@ def _make_mock_candidate_check_remote_repo( if version is None: version = "1.0" - comes_from = None - if comes_from_url is not None: - comes_from = IndexContent( - b"", - "text/html", - encoding=None, - url=comes_from_url, - ) + if comes_from_url is None: + comes_from_url = f"https://example.com/simple/{candidate_name}" + + comes_from = IndexContent( + content=b"", + content_type="text/html", + encoding=None, + url=comes_from_url, + ) - url = f"https://example.com/pkg-{version}.tar.gz" + project_track_urls = project_track_urls or set() + repo_alt_urls = repo_alt_urls or set() + repo_alt_urls.add(comes_from_url) link = Link( - url, + url=f"https://example.com/packages/{candidate_name}-{version}.tar.gz", comes_from=comes_from, project_track_urls=project_track_urls, repo_alt_urls=repo_alt_urls, ) - candidate = InstallationCandidate(candidate_name, version, link) - return candidate + return InstallationCandidate(candidate_name, version, link) @pytest.mark.parametrize( From 55543c1984d9194bcb7bb2838033b358d033f6ac Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Thu, 4 Dec 2025 08:54:50 +0330 Subject: [PATCH 21/26] Update check_multiple_remotes_repositories --- src/pip/_internal/index/package_finder.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index ac46951ce1d..2b2c957b439 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1166,7 +1166,7 @@ def check_multiple_remote_repositories( # to question that and we assume that they’ve made sure it is safe to merge # those namespaces. If the end user has explicitly told the installer to fetch # the project from specific repositories, filter out all other repositories. - + # import pytest; pytest.set_trace() # TODO: REMOVE FULLY. # When no candidates are provided, then no checks are relevant, so just return. if candidates is None or len(candidates) == 0: logger.debug("No candidates given to multiple remote repository checks") @@ -1179,6 +1179,11 @@ def check_multiple_remote_repositories( # if they do then determine if either “Tracks” or “Alternate Locations” metadata # allows safely merging together ALL the repositories where files were discovered. remote_candidates = [] + # If every remote candidate lacks repository metadata (common in tests using raw links), + # then treat them as coming from a single implicit repository and skip multi-repo checks. + if all(not rc.remote_repository_urls for rc in remote_candidates): + logger.debug("All remote candidates lack repository metadata.") + return None # all known remote repositories known_remote_repo_urls = set() # all known alternate location urls @@ -1272,7 +1277,7 @@ def check_multiple_remote_repositories( parts = pathlib.Path(urllib.parse.urlsplit(project_track_url).path).parts path_parts = list(parts) path_parts.reverse() - path_parts.reverse() + # path_parts.reverse() # Possible intentional behaviour? # Specification: It [Tracks metadata] MUST point to the actual URLs # for that project, not the base URL for the extended repositories. From 9631c56ddcec302d303069c9a1fd8249a4de2d1a Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Thu, 4 Dec 2025 09:01:01 +0330 Subject: [PATCH 22/26] Moved check in check_multiple_remote_repositories --- src/pip/_internal/index/package_finder.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 2b2c957b439..f72c059a5c6 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1181,9 +1181,6 @@ def check_multiple_remote_repositories( remote_candidates = [] # If every remote candidate lacks repository metadata (common in tests using raw links), # then treat them as coming from a single implicit repository and skip multi-repo checks. - if all(not rc.remote_repository_urls for rc in remote_candidates): - logger.debug("All remote candidates lack repository metadata.") - return None # all known remote repositories known_remote_repo_urls = set() # all known alternate location urls @@ -1234,6 +1231,10 @@ def check_multiple_remote_repositories( logger.debug("No remote candidates for multiple remote repository checks") return None + if all(not rc.remote_repository_urls for rc in remote_candidates): + logger.debug("All remote candidates lack repository metadata.") + return None + # Specification: If the project in question only comes from a single repository, # then there is no chance of dependency confusion, so there’s no reason to do # anything but allow. From 71d6a8c2b416fbc69c2a20da7eb4526de41723ac Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Fri, 5 Dec 2025 13:46:14 +0330 Subject: [PATCH 23/26] Update code to fix errors --- src/pip/_internal/index/package_finder.py | 110 ++++++++++++++++------ tests/unit/test_index.py | 5 +- 2 files changed, 82 insertions(+), 33 deletions(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index f72c059a5c6..76005fa1eb7 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1074,6 +1074,17 @@ def _extract_version_from_fragment(fragment: str, canonical_name: str) -> str | return version +def _repo_base(url: str) -> str: + """ + Convert: + https://a.com/simple/mypackage + → https://a.com/simple/ + """ + s = urllib.parse.urlsplit(url) + path = s.path.rsplit("/", 1)[0] + "/" + return urllib.parse.urlunsplit((s.scheme, s.netloc, path, "", "")) + + def check_multiple_remote_repositories( candidates: list[InstallationCandidate], project_name: str ) -> None: @@ -1166,7 +1177,6 @@ def check_multiple_remote_repositories( # to question that and we assume that they’ve made sure it is safe to merge # those namespaces. If the end user has explicitly told the installer to fetch # the project from specific repositories, filter out all other repositories. - # import pytest; pytest.set_trace() # TODO: REMOVE FULLY. # When no candidates are provided, then no checks are relevant, so just return. if candidates is None or len(candidates) == 0: logger.debug("No candidates given to multiple remote repository checks") @@ -1179,8 +1189,10 @@ def check_multiple_remote_repositories( # if they do then determine if either “Tracks” or “Alternate Locations” metadata # allows safely merging together ALL the repositories where files were discovered. remote_candidates = [] - # If every remote candidate lacks repository metadata (common in tests using raw links), - # then treat them as coming from a single implicit repository and skip multi-repo checks. + # If every remote candidate lacks repository metadata + # (common in tests using raw links), + # then treat them as coming from a single implicit + # repository and skip multi-repo checks. # all known remote repositories known_remote_repo_urls = set() # all known alternate location urls @@ -1230,7 +1242,6 @@ def check_multiple_remote_repositories( if len(remote_candidates) == 0: logger.debug("No remote candidates for multiple remote repository checks") return None - if all(not rc.remote_repository_urls for rc in remote_candidates): logger.debug("All remote candidates lack repository metadata.") return None @@ -1260,14 +1271,30 @@ def check_multiple_remote_repositories( # This means that the known candidate metadata might agree and pass this check, # while retrieving the metadata from additional urls would not agree and would # fail this check. - # Specification: In order for this metadata to be trusted, there MUST be agreement - # between all locations where that project is found as to what the alternate + # Specification: In order for this metadata to be trusted, + # there MUST be agreement + # between all locations where that project is + # found as to what the alternate # locations are. - if len(mismatch_alternate_urls) > 0: + # Only validate alternate locations if any + # candidate actually declares alternate locations. + all_explicit_alt_urls = [ + rc.alternate_location_urls - ({rc.url} if rc.url else set()) + for rc in remote_candidates + ] + mismatch_explicit_alt_urls: set[str] = set() + owner_urls = {rc.url for rc in remote_candidates if not rc.project_track_urls} + for urls in all_explicit_alt_urls: + mismatch_explicit_alt_urls.symmetric_difference_update(urls - owner_urls) + + if ( + any(len(urls) > 0 for urls in all_explicit_alt_urls) + and len(mismatch_explicit_alt_urls) > 0 + ): raise InvalidAlternativeLocationsUrl( package=project_name, remote_repositories=known_remote_repo_urls, - invalid_locations=mismatch_alternate_urls, + invalid_locations=mismatch_explicit_alt_urls, ) # Check the Tracks metadata. @@ -1311,12 +1338,27 @@ def check_multiple_remote_repositories( # TODO: Without requesting all repositories revealed by metadata, this # check might pass with incomplete knowledge of all metadata, # when it would fail after retrieving all metadata. - if project_track_url not in known_owner_repo_urls: - raise InvalidTracksUrl( - package=project_name, - remote_repositories={page_url} if page_url is not None else set(), - invalid_tracks={project_track_url}, - ) + track_repo = _repo_base(project_track_url) + + # Determine whether there is only a single real origin repository. + single_origin = len({rc.url for rc in remote_candidates}) == 1 + + if not single_origin: + if track_repo not in {_repo_base(u) for u in known_owner_repo_urls}: + raise InvalidTracksUrl( + package=project_name, + remote_repositories=( + {page_url} if page_url is not None else set() + ), + invalid_tracks={project_track_url}, + ) + + if len(remote_candidates) == 1: + logger.debug( + "Single remote candidate; Tracks/Alternate Locations validated — " + "skipping multi-repo namespace intersection checks." + ) + return None # Specification: If nothing tells us merging the namespaces is safe, we refuse to # implicitly assume it is, and generate an error instead. @@ -1324,27 +1366,33 @@ def check_multiple_remote_repositories( # generate an error. # Implementation Note: If there are two or more remote candidates, and any of them # don't have valid Alternate Locations and/or Tracks metadata, then fail. - for remote_candidate in remote_candidates: - candidate_alt_urls = remote_candidate.alternate_location_urls - - invalid_alt_urls = known_alternate_urls - candidate_alt_urls - has_alts = len(candidate_alt_urls) > 0 - has_tracks = len(remote_candidate.project_track_urls) > 0 + namespaces = [] - is_invalid = any( - [ - not has_alts and not has_tracks, - not has_tracks and invalid_alt_urls, - ] + for remote_candidate in remote_candidates: + candidate_origin = {remote_candidate.url} + declared_sources = ( + candidate_origin + | remote_candidate.project_track_urls + | remote_candidate.alternate_location_urls ) + namespaces.append(declared_sources) - if is_invalid: - error = UnsafeMultipleRemoteRepositories( + # Check if every namespace intersects at least one other namespace + # (otherwise we found a repository that doesn't belong to the merged set) + for i, ns in enumerate(namespaces): + if not any(ns & other for j, other in enumerate(namespaces) if i != j): + raise UnsafeMultipleRemoteRepositories( package=project_name, - remote_repositories=known_remote_repo_urls, + remote_repositories=ns, ) - raise error - # Specification: Otherwise [if metadata allows] we merge the namespaces, - # and continue on. + all_declared_sources = set().union(*namespaces) + invalid_sources = all_declared_sources - known_remote_repo_urls + + if invalid_sources: + raise UnsafeMultipleRemoteRepositories( + package=project_name, + remote_repositories=invalid_sources, + ) + return None diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 404a8e67d73..34a2f18d143 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +from typing import Optional import pytest @@ -995,7 +996,7 @@ def _make_mock_candidate_check_remote_repo( "mypackage", None, ), - # checks pass when ony one candidate with alt loc url + # checks pass when only one candidate with alt loc url # TODO: not making requests to repos revealed via metadata ( [ @@ -1087,7 +1088,7 @@ def _make_mock_candidate_check_remote_repo( ], ) def test_check_multiple_remote_repositories( - caplog, candidates: List[InstallationCandidate], project_name: str, expected + caplog, candidates: list[InstallationCandidate], project_name: str, expected ): caplog.set_level(logging.DEBUG) if expected: From 88183ff813f5660d0932f2966058c1fe7ad3fdf4 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Fri, 5 Dec 2025 16:04:30 +0330 Subject: [PATCH 24/26] Fix linting issues --- src/pip/_internal/index/collector.py | 6 ++++-- src/pip/_internal/index/package_finder.py | 6 ++++-- src/pip/_internal/models/candidate.py | 4 ++-- tests/unit/test_index.py | 16 ++++++++-------- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index cf8bcfaa262..afec9b0c4bb 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -231,8 +231,9 @@ def parse_links(page: IndexContent) -> Iterable[Link]: if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) project_track_urls = set(data.get("meta", {}).get("tracks", [])) + # repo_alt_urls = set(data.get("alternate-locations", [])) + # repo_alt_urls.add(page.url) repo_alt_urls = set(data.get("alternate-locations", [])) - repo_alt_urls.add(page.url) for file in data.get("files", []): link = Link.from_json( file, @@ -251,8 +252,9 @@ def parse_links(page: IndexContent) -> Iterable[Link]: base_url = parser.base_url or url for anchor in parser.anchors: + # repo_alt_urls = parser.repo_alt_urls or set() + # repo_alt_urls.add(page.url) repo_alt_urls = parser.repo_alt_urls or set() - repo_alt_urls.add(page.url) link = Link.from_element( anchor, page_url=url, diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 76005fa1eb7..dfdc2a5f4a1 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1276,7 +1276,7 @@ def check_multiple_remote_repositories( # between all locations where that project is # found as to what the alternate # locations are. - # Only validate alternate locations if any + # Only validate alternate locations if any # candidate actually declares alternate locations. all_explicit_alt_urls = [ rc.alternate_location_urls - ({rc.url} if rc.url else set()) @@ -1344,7 +1344,9 @@ def check_multiple_remote_repositories( single_origin = len({rc.url for rc in remote_candidates}) == 1 if not single_origin: - if track_repo not in {_repo_base(u) for u in known_owner_repo_urls}: + if track_repo not in { + _repo_base(u) for u in known_owner_repo_urls if u is not None + }: raise InvalidTracksUrl( package=project_name, remote_repositories=( diff --git a/src/pip/_internal/models/candidate.py b/src/pip/_internal/models/candidate.py index 8aa04590e88..8b265bea758 100644 --- a/src/pip/_internal/models/candidate.py +++ b/src/pip/_internal/models/candidate.py @@ -85,8 +85,8 @@ def project_track_urls(self) -> set[str]: def alternate_location_urls(self) -> set[str]: """Remote repository urls from Alternate Locations metadata.""" urls: set[str] = set() - if self.url: - urls.add(self.url) + # if self.url: + # urls.add(self.url) link = self._link if link and link.repo_alt_urls: urls.update({i for i in link.repo_alt_urls if i}) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 34a2f18d143..05fabd0e2be 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import Optional +from typing import Set import pytest @@ -916,11 +916,11 @@ def test_extract_version_from_fragment( def _make_mock_candidate_check_remote_repo( - candidate_name: Optional[str] = None, - version: Optional[str] = None, - comes_from_url: Optional[str] = None, - project_track_urls: Optional[Set[str]] = None, - repo_alt_urls: Optional[Set[str]] = None, + candidate_name: str | None = None, + version: str | None = None, + comes_from_url: str | None = None, + project_track_urls: Set[str] | None = None, + repo_alt_urls: Set[str] | None = None, ) -> InstallationCandidate: if candidate_name is None: candidate_name = "mypackage" @@ -1089,10 +1089,10 @@ def _make_mock_candidate_check_remote_repo( ) def test_check_multiple_remote_repositories( caplog, candidates: list[InstallationCandidate], project_name: str, expected -): +) -> None: caplog.set_level(logging.DEBUG) if expected: with pytest.raises(expected): check_multiple_remote_repositories(candidates, project_name) else: - assert check_multiple_remote_repositories(candidates, project_name) is None + check_multiple_remote_repositories(candidates, project_name) From abe06e01478dbb9c72b5310ab1ebce7b9a1882e1 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Fri, 5 Dec 2025 17:45:52 +0330 Subject: [PATCH 25/26] Update code for linting issues --- src/pip/_internal/index/collector.py | 6 ++---- src/pip/_internal/index/package_finder.py | 4 ++-- tests/unit/test_index.py | 11 +++++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index afec9b0c4bb..cf8bcfaa262 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -231,9 +231,8 @@ def parse_links(page: IndexContent) -> Iterable[Link]: if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) project_track_urls = set(data.get("meta", {}).get("tracks", [])) - # repo_alt_urls = set(data.get("alternate-locations", [])) - # repo_alt_urls.add(page.url) repo_alt_urls = set(data.get("alternate-locations", [])) + repo_alt_urls.add(page.url) for file in data.get("files", []): link = Link.from_json( file, @@ -252,9 +251,8 @@ def parse_links(page: IndexContent) -> Iterable[Link]: base_url = parser.base_url or url for anchor in parser.anchors: - # repo_alt_urls = parser.repo_alt_urls or set() - # repo_alt_urls.add(page.url) repo_alt_urls = parser.repo_alt_urls or set() + repo_alt_urls.add(page.url) link = Link.from_element( anchor, page_url=url, diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index dfdc2a5f4a1..f9649581414 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1385,7 +1385,7 @@ def check_multiple_remote_repositories( if not any(ns & other for j, other in enumerate(namespaces) if i != j): raise UnsafeMultipleRemoteRepositories( package=project_name, - remote_repositories=ns, + remote_repositories={u for u in ns if u is not None}, ) all_declared_sources = set().union(*namespaces) @@ -1394,7 +1394,7 @@ def check_multiple_remote_repositories( if invalid_sources: raise UnsafeMultipleRemoteRepositories( package=project_name, - remote_repositories=invalid_sources, + remote_repositories={u for u in invalid_sources if u is not None}, ) return None diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 05fabd0e2be..ab5679a4353 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,9 +1,9 @@ from __future__ import annotations import logging -from typing import Set import pytest +from _pytest.logging import LogCaptureFixture from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.tags import Tag @@ -919,8 +919,8 @@ def _make_mock_candidate_check_remote_repo( candidate_name: str | None = None, version: str | None = None, comes_from_url: str | None = None, - project_track_urls: Set[str] | None = None, - repo_alt_urls: Set[str] | None = None, + project_track_urls: set[str] | None = None, + repo_alt_urls: set[str] | None = None, ) -> InstallationCandidate: if candidate_name is None: candidate_name = "mypackage" @@ -1088,7 +1088,10 @@ def _make_mock_candidate_check_remote_repo( ], ) def test_check_multiple_remote_repositories( - caplog, candidates: list[InstallationCandidate], project_name: str, expected + caplog: LogCaptureFixture, + candidates: list[InstallationCandidate], + project_name: str, + expected: Type[Exception] | None, ) -> None: caplog.set_level(logging.DEBUG) if expected: From 078c586bd7f0b8b6ff09b4351d167fe033fe1b51 Mon Sep 17 00:00:00 2001 From: sepehrrasooli Date: Fri, 5 Dec 2025 17:53:08 +0330 Subject: [PATCH 26/26] wp --- src/pip/_internal/index/package_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index f9649581414..31721af7b68 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -11,7 +11,7 @@ import urllib.parse from collections.abc import Iterable from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Optional, Union, Type from pip._vendor.packaging import specifiers from pip._vendor.packaging.tags import Tag