123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- import logging
- import mimetypes
- import os
- import pathlib
- from typing import Callable, Iterable, Optional, Tuple
- from pip._internal.models.candidate import InstallationCandidate
- from pip._internal.models.link import Link
- from pip._internal.utils.urls import path_to_url, url_to_path
- from pip._internal.vcs import is_url
- logger = logging.getLogger(__name__)
- FoundCandidates = Iterable[InstallationCandidate]
- FoundLinks = Iterable[Link]
- CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
- PageValidator = Callable[[Link], bool]
- class LinkSource:
- @property
- def link(self) -> Optional[Link]:
- """Returns the underlying link, if there's one."""
- raise NotImplementedError()
- def page_candidates(self) -> FoundCandidates:
- """Candidates found by parsing an archive listing HTML file."""
- raise NotImplementedError()
- def file_links(self) -> FoundLinks:
- """Links found by specifying archives directly."""
- raise NotImplementedError()
- def _is_html_file(file_url: str) -> bool:
- return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
- class _FlatDirectorySource(LinkSource):
- """Link source specified by ``--find-links=<path-to-dir>``.
- This looks the content of the directory, and returns:
- * ``page_candidates``: Links listed on each HTML file in the directory.
- * ``file_candidates``: Archives in the directory.
- """
- def __init__(
- self,
- candidates_from_page: CandidatesFromPage,
- path: str,
- ) -> None:
- self._candidates_from_page = candidates_from_page
- self._path = pathlib.Path(os.path.realpath(path))
- @property
- def link(self) -> Optional[Link]:
- return None
- def page_candidates(self) -> FoundCandidates:
- for path in self._path.iterdir():
- url = path_to_url(str(path))
- if not _is_html_file(url):
- continue
- yield from self._candidates_from_page(Link(url))
- def file_links(self) -> FoundLinks:
- for path in self._path.iterdir():
- url = path_to_url(str(path))
- if _is_html_file(url):
- continue
- yield Link(url)
- class _LocalFileSource(LinkSource):
- """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
- If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
- the option, it is converted to a URL first. This returns:
- * ``page_candidates``: Links listed on an HTML file.
- * ``file_candidates``: The non-HTML file.
- """
- def __init__(
- self,
- candidates_from_page: CandidatesFromPage,
- link: Link,
- ) -> None:
- self._candidates_from_page = candidates_from_page
- self._link = link
- @property
- def link(self) -> Optional[Link]:
- return self._link
- def page_candidates(self) -> FoundCandidates:
- if not _is_html_file(self._link.url):
- return
- yield from self._candidates_from_page(self._link)
- def file_links(self) -> FoundLinks:
- if _is_html_file(self._link.url):
- return
- yield self._link
- class _RemoteFileSource(LinkSource):
- """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
- This returns:
- * ``page_candidates``: Links listed on an HTML file.
- * ``file_candidates``: The non-HTML file.
- """
- def __init__(
- self,
- candidates_from_page: CandidatesFromPage,
- page_validator: PageValidator,
- link: Link,
- ) -> None:
- self._candidates_from_page = candidates_from_page
- self._page_validator = page_validator
- self._link = link
- @property
- def link(self) -> Optional[Link]:
- return self._link
- def page_candidates(self) -> FoundCandidates:
- if not self._page_validator(self._link):
- return
- yield from self._candidates_from_page(self._link)
- def file_links(self) -> FoundLinks:
- yield self._link
- class _IndexDirectorySource(LinkSource):
- """``--[extra-]index-url=<path-to-directory>``.
- This is treated like a remote URL; ``candidates_from_page`` contains logic
- for this by appending ``index.html`` to the link.
- """
- def __init__(
- self,
- candidates_from_page: CandidatesFromPage,
- link: Link,
- ) -> None:
- self._candidates_from_page = candidates_from_page
- self._link = link
- @property
- def link(self) -> Optional[Link]:
- return self._link
- def page_candidates(self) -> FoundCandidates:
- yield from self._candidates_from_page(self._link)
- def file_links(self) -> FoundLinks:
- return ()
- def build_source(
- location: str,
- *,
- candidates_from_page: CandidatesFromPage,
- page_validator: PageValidator,
- expand_dir: bool,
- cache_link_parsing: bool,
- ) -> Tuple[Optional[str], Optional[LinkSource]]:
- path: Optional[str] = None
- url: Optional[str] = None
- if os.path.exists(location): # Is a local path.
- url = path_to_url(location)
- path = location
- elif location.startswith("file:"): # A file: URL.
- url = location
- path = url_to_path(location)
- elif is_url(location):
- url = location
- if url is None:
- msg = (
- "Location '%s' is ignored: "
- "it is either a non-existing path or lacks a specific scheme."
- )
- logger.warning(msg, location)
- return (None, None)
- if path is None:
- source: LinkSource = _RemoteFileSource(
- candidates_from_page=candidates_from_page,
- page_validator=page_validator,
- link=Link(url, cache_link_parsing=cache_link_parsing),
- )
- return (url, source)
- if os.path.isdir(path):
- if expand_dir:
- source = _FlatDirectorySource(
- candidates_from_page=candidates_from_page,
- path=path,
- )
- else:
- source = _IndexDirectorySource(
- candidates_from_page=candidates_from_page,
- link=Link(url, cache_link_parsing=cache_link_parsing),
- )
- return (url, source)
- elif os.path.isfile(path):
- source = _LocalFileSource(
- candidates_from_page=candidates_from_page,
- link=Link(url, cache_link_parsing=cache_link_parsing),
- )
- return (url, source)
- logger.warning(
- "Location '%s' is ignored: it is neither a file nor a directory.",
- location,
- )
- return (url, None)
|