spec.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. # Copyright (c) 2016-2018, 2020 Claudiu Popa <pcmanticore@gmail.com>
  2. # Copyright (c) 2016 Derek Gustafson <degustaf@gmail.com>
  3. # Copyright (c) 2017 Chris Philip <chrisp533@gmail.com>
  4. # Copyright (c) 2017 Hugo <hugovk@users.noreply.github.com>
  5. # Copyright (c) 2017 ioanatia <ioanatia@users.noreply.github.com>
  6. # Copyright (c) 2017 Calen Pennington <cale@edx.org>
  7. # Copyright (c) 2018 Nick Drozd <nicholasdrozd@gmail.com>
  8. # Copyright (c) 2019 Hugo van Kemenade <hugovk@users.noreply.github.com>
  9. # Copyright (c) 2019 Ashley Whetter <ashley@awhetter.co.uk>
  10. # Copyright (c) 2020-2021 hippo91 <guillaume.peillex@gmail.com>
  11. # Copyright (c) 2020 Peter Kolbus <peter.kolbus@gmail.com>
  12. # Copyright (c) 2020 Raphael Gaschignard <raphael@rtpg.co>
  13. # Copyright (c) 2021 Pierre Sassoulas <pierre.sassoulas@gmail.com>
  14. # Copyright (c) 2021 Daniël van Noord <13665637+DanielNoord@users.noreply.github.com>
  15. # Copyright (c) 2021 DudeNr33 <3929834+DudeNr33@users.noreply.github.com>
  16. # Copyright (c) 2021 Marc Mueller <30130371+cdce8p@users.noreply.github.com>
  17. import abc
  18. import collections
  19. import enum
  20. import importlib.machinery
  21. import os
  22. import sys
  23. import zipimport
  24. from functools import lru_cache
  25. from . import util
  26. ModuleType = enum.Enum(
  27. "ModuleType",
  28. "C_BUILTIN C_EXTENSION PKG_DIRECTORY "
  29. "PY_CODERESOURCE PY_COMPILED PY_FROZEN PY_RESOURCE "
  30. "PY_SOURCE PY_ZIPMODULE PY_NAMESPACE",
  31. )
  32. _ModuleSpec = collections.namedtuple(
  33. "_ModuleSpec", "name type location " "origin submodule_search_locations"
  34. )
  35. class ModuleSpec(_ModuleSpec):
  36. """Defines a class similar to PEP 420's ModuleSpec
  37. A module spec defines a name of a module, its type, location
  38. and where submodules can be found, if the module is a package.
  39. """
  40. def __new__(
  41. cls,
  42. name,
  43. module_type,
  44. location=None,
  45. origin=None,
  46. submodule_search_locations=None,
  47. ):
  48. return _ModuleSpec.__new__(
  49. cls,
  50. name=name,
  51. type=module_type,
  52. location=location,
  53. origin=origin,
  54. submodule_search_locations=submodule_search_locations,
  55. )
  56. class Finder:
  57. """A finder is a class which knows how to find a particular module."""
  58. def __init__(self, path=None):
  59. self._path = path or sys.path
  60. @abc.abstractmethod
  61. def find_module(self, modname, module_parts, processed, submodule_path):
  62. """Find the given module
  63. Each finder is responsible for each protocol of finding, as long as
  64. they all return a ModuleSpec.
  65. :param str modname: The module which needs to be searched.
  66. :param list module_parts: It should be a list of strings,
  67. where each part contributes to the module's
  68. namespace.
  69. :param list processed: What parts from the module parts were processed
  70. so far.
  71. :param list submodule_path: A list of paths where the module
  72. can be looked into.
  73. :returns: A ModuleSpec, describing how and where the module was found,
  74. None, otherwise.
  75. """
  76. def contribute_to_path(self, spec, processed):
  77. """Get a list of extra paths where this finder can search."""
  78. class ImportlibFinder(Finder):
  79. """A finder based on the importlib module."""
  80. _SUFFIXES = (
  81. [(s, ModuleType.C_EXTENSION) for s in importlib.machinery.EXTENSION_SUFFIXES]
  82. + [(s, ModuleType.PY_SOURCE) for s in importlib.machinery.SOURCE_SUFFIXES]
  83. + [(s, ModuleType.PY_COMPILED) for s in importlib.machinery.BYTECODE_SUFFIXES]
  84. )
  85. def find_module(self, modname, module_parts, processed, submodule_path):
  86. if not isinstance(modname, str):
  87. raise TypeError(f"'modname' must be a str, not {type(modname)}")
  88. if submodule_path is not None:
  89. submodule_path = list(submodule_path)
  90. else:
  91. try:
  92. spec = importlib.util.find_spec(modname)
  93. if spec:
  94. if spec.loader is importlib.machinery.BuiltinImporter:
  95. return ModuleSpec(
  96. name=modname,
  97. location=None,
  98. module_type=ModuleType.C_BUILTIN,
  99. )
  100. if spec.loader is importlib.machinery.FrozenImporter:
  101. return ModuleSpec(
  102. name=modname,
  103. location=None,
  104. module_type=ModuleType.PY_FROZEN,
  105. )
  106. except ValueError:
  107. pass
  108. submodule_path = sys.path
  109. for entry in submodule_path:
  110. package_directory = os.path.join(entry, modname)
  111. for suffix in (".py", importlib.machinery.BYTECODE_SUFFIXES[0]):
  112. package_file_name = "__init__" + suffix
  113. file_path = os.path.join(package_directory, package_file_name)
  114. if os.path.isfile(file_path):
  115. return ModuleSpec(
  116. name=modname,
  117. location=package_directory,
  118. module_type=ModuleType.PKG_DIRECTORY,
  119. )
  120. for suffix, type_ in ImportlibFinder._SUFFIXES:
  121. file_name = modname + suffix
  122. file_path = os.path.join(entry, file_name)
  123. if os.path.isfile(file_path):
  124. return ModuleSpec(
  125. name=modname, location=file_path, module_type=type_
  126. )
  127. return None
  128. def contribute_to_path(self, spec, processed):
  129. if spec.location is None:
  130. # Builtin.
  131. return None
  132. if _is_setuptools_namespace(spec.location):
  133. # extend_path is called, search sys.path for module/packages
  134. # of this name see pkgutil.extend_path documentation
  135. path = [
  136. os.path.join(p, *processed)
  137. for p in sys.path
  138. if os.path.isdir(os.path.join(p, *processed))
  139. ]
  140. else:
  141. path = [spec.location]
  142. return path
  143. class ExplicitNamespacePackageFinder(ImportlibFinder):
  144. """A finder for the explicit namespace packages, generated through pkg_resources."""
  145. def find_module(self, modname, module_parts, processed, submodule_path):
  146. if processed:
  147. modname = ".".join(processed + [modname])
  148. if util.is_namespace(modname) and modname in sys.modules:
  149. submodule_path = sys.modules[modname].__path__
  150. return ModuleSpec(
  151. name=modname,
  152. location="",
  153. origin="namespace",
  154. module_type=ModuleType.PY_NAMESPACE,
  155. submodule_search_locations=submodule_path,
  156. )
  157. return None
  158. def contribute_to_path(self, spec, processed):
  159. return spec.submodule_search_locations
  160. class ZipFinder(Finder):
  161. """Finder that knows how to find a module inside zip files."""
  162. def __init__(self, path):
  163. super().__init__(path)
  164. self._zipimporters = _precache_zipimporters(path)
  165. def find_module(self, modname, module_parts, processed, submodule_path):
  166. try:
  167. file_type, filename, path = _search_zip(module_parts, self._zipimporters)
  168. except ImportError:
  169. return None
  170. return ModuleSpec(
  171. name=modname,
  172. location=filename,
  173. origin="egg",
  174. module_type=file_type,
  175. submodule_search_locations=path,
  176. )
  177. class PathSpecFinder(Finder):
  178. """Finder based on importlib.machinery.PathFinder."""
  179. def find_module(self, modname, module_parts, processed, submodule_path):
  180. spec = importlib.machinery.PathFinder.find_spec(modname, path=submodule_path)
  181. if spec:
  182. # origin can be either a string on older Python versions
  183. # or None in case it is a namespace package:
  184. # https://github.com/python/cpython/pull/5481
  185. is_namespace_pkg = spec.origin in {"namespace", None}
  186. location = spec.origin if not is_namespace_pkg else None
  187. module_type = ModuleType.PY_NAMESPACE if is_namespace_pkg else None
  188. spec = ModuleSpec(
  189. name=spec.name,
  190. location=location,
  191. origin=spec.origin,
  192. module_type=module_type,
  193. submodule_search_locations=list(spec.submodule_search_locations or []),
  194. )
  195. return spec
  196. def contribute_to_path(self, spec, processed):
  197. if spec.type == ModuleType.PY_NAMESPACE:
  198. return spec.submodule_search_locations
  199. return None
  200. _SPEC_FINDERS = (
  201. ImportlibFinder,
  202. ZipFinder,
  203. PathSpecFinder,
  204. ExplicitNamespacePackageFinder,
  205. )
  206. def _is_setuptools_namespace(location):
  207. try:
  208. with open(os.path.join(location, "__init__.py"), "rb") as stream:
  209. data = stream.read(4096)
  210. except OSError:
  211. return None
  212. else:
  213. extend_path = b"pkgutil" in data and b"extend_path" in data
  214. declare_namespace = (
  215. b"pkg_resources" in data and b"declare_namespace(__name__)" in data
  216. )
  217. return extend_path or declare_namespace
  218. @lru_cache()
  219. def _cached_set_diff(left, right):
  220. result = set(left)
  221. result.difference_update(right)
  222. return result
  223. def _precache_zipimporters(path=None):
  224. """
  225. For each path that has not been already cached
  226. in the sys.path_importer_cache, create a new zipimporter
  227. instance and add it into the cache.
  228. Return a dict associating all paths, stored in the cache, to corresponding
  229. zipimporter instances.
  230. :param path: paths that has to be added into the cache
  231. :return: association between paths stored in the cache and zipimporter instances
  232. """
  233. pic = sys.path_importer_cache
  234. # When measured, despite having the same complexity (O(n)),
  235. # converting to tuples and then caching the conversion to sets
  236. # and the set difference is faster than converting to sets
  237. # and then only caching the set difference.
  238. req_paths = tuple(path or sys.path)
  239. cached_paths = tuple(pic)
  240. new_paths = _cached_set_diff(req_paths, cached_paths)
  241. # pylint: disable=no-member
  242. for entry_path in new_paths:
  243. try:
  244. pic[entry_path] = zipimport.zipimporter(entry_path)
  245. except zipimport.ZipImportError:
  246. continue
  247. return {
  248. key: value
  249. for key, value in pic.items()
  250. if isinstance(value, zipimport.zipimporter)
  251. }
  252. def _search_zip(modpath, pic):
  253. for filepath, importer in list(pic.items()):
  254. if importer is not None:
  255. found = importer.find_module(modpath[0])
  256. if found:
  257. if not importer.find_module(os.path.sep.join(modpath)):
  258. raise ImportError(
  259. "No module named %s in %s/%s"
  260. % (".".join(modpath[1:]), filepath, modpath)
  261. )
  262. # import code; code.interact(local=locals())
  263. return (
  264. ModuleType.PY_ZIPMODULE,
  265. os.path.abspath(filepath) + os.path.sep + os.path.sep.join(modpath),
  266. filepath,
  267. )
  268. raise ImportError(f"No module named {'.'.join(modpath)}")
  269. def _find_spec_with_path(search_path, modname, module_parts, processed, submodule_path):
  270. finders = [finder(search_path) for finder in _SPEC_FINDERS]
  271. for finder in finders:
  272. spec = finder.find_module(modname, module_parts, processed, submodule_path)
  273. if spec is None:
  274. continue
  275. return finder, spec
  276. raise ImportError(f"No module named {'.'.join(module_parts)}")
  277. def find_spec(modpath, path=None):
  278. """Find a spec for the given module.
  279. :type modpath: list or tuple
  280. :param modpath:
  281. split module's name (i.e name of a module or package split
  282. on '.'), with leading empty strings for explicit relative import
  283. :type path: list or None
  284. :param path:
  285. optional list of path where the module or package should be
  286. searched (use sys.path if nothing or None is given)
  287. :rtype: ModuleSpec
  288. :return: A module spec, which describes how the module was
  289. found and where.
  290. """
  291. _path = path or sys.path
  292. # Need a copy for not mutating the argument.
  293. modpath = modpath[:]
  294. submodule_path = None
  295. module_parts = modpath[:]
  296. processed = []
  297. while modpath:
  298. modname = modpath.pop(0)
  299. finder, spec = _find_spec_with_path(
  300. _path, modname, module_parts, processed, submodule_path or path
  301. )
  302. processed.append(modname)
  303. if modpath:
  304. submodule_path = finder.contribute_to_path(spec, processed)
  305. if spec.type == ModuleType.PKG_DIRECTORY:
  306. spec = spec._replace(submodule_search_locations=submodule_path)
  307. return spec