resources.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2013-2017 Vinay Sajip.
  4. # Licensed to the Python Software Foundation under a contributor agreement.
  5. # See LICENSE.txt and CONTRIBUTORS.txt.
  6. #
  7. from __future__ import unicode_literals
  8. import bisect
  9. import io
  10. import logging
  11. import os
  12. import pkgutil
  13. import sys
  14. import types
  15. import zipimport
  16. from . import DistlibException
  17. from .util import cached_property, get_cache_base, Cache
  18. logger = logging.getLogger(__name__)
  19. cache = None # created when needed
  20. class ResourceCache(Cache):
  21. def __init__(self, base=None):
  22. if base is None:
  23. # Use native string to avoid issues on 2.x: see Python #20140.
  24. base = os.path.join(get_cache_base(), str('resource-cache'))
  25. super(ResourceCache, self).__init__(base)
  26. def is_stale(self, resource, path):
  27. """
  28. Is the cache stale for the given resource?
  29. :param resource: The :class:`Resource` being cached.
  30. :param path: The path of the resource in the cache.
  31. :return: True if the cache is stale.
  32. """
  33. # Cache invalidation is a hard problem :-)
  34. return True
  35. def get(self, resource):
  36. """
  37. Get a resource into the cache,
  38. :param resource: A :class:`Resource` instance.
  39. :return: The pathname of the resource in the cache.
  40. """
  41. prefix, path = resource.finder.get_cache_info(resource)
  42. if prefix is None:
  43. result = path
  44. else:
  45. result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
  46. dirname = os.path.dirname(result)
  47. if not os.path.isdir(dirname):
  48. os.makedirs(dirname)
  49. if not os.path.exists(result):
  50. stale = True
  51. else:
  52. stale = self.is_stale(resource, path)
  53. if stale:
  54. # write the bytes of the resource to the cache location
  55. with open(result, 'wb') as f:
  56. f.write(resource.bytes)
  57. return result
  58. class ResourceBase(object):
  59. def __init__(self, finder, name):
  60. self.finder = finder
  61. self.name = name
  62. class Resource(ResourceBase):
  63. """
  64. A class representing an in-package resource, such as a data file. This is
  65. not normally instantiated by user code, but rather by a
  66. :class:`ResourceFinder` which manages the resource.
  67. """
  68. is_container = False # Backwards compatibility
  69. def as_stream(self):
  70. """
  71. Get the resource as a stream.
  72. This is not a property to make it obvious that it returns a new stream
  73. each time.
  74. """
  75. return self.finder.get_stream(self)
  76. @cached_property
  77. def file_path(self):
  78. global cache
  79. if cache is None:
  80. cache = ResourceCache()
  81. return cache.get(self)
  82. @cached_property
  83. def bytes(self):
  84. return self.finder.get_bytes(self)
  85. @cached_property
  86. def size(self):
  87. return self.finder.get_size(self)
  88. class ResourceContainer(ResourceBase):
  89. is_container = True # Backwards compatibility
  90. @cached_property
  91. def resources(self):
  92. return self.finder.get_resources(self)
  93. class ResourceFinder(object):
  94. """
  95. Resource finder for file system resources.
  96. """
  97. if sys.platform.startswith('java'):
  98. skipped_extensions = ('.pyc', '.pyo', '.class')
  99. else:
  100. skipped_extensions = ('.pyc', '.pyo')
  101. def __init__(self, module):
  102. self.module = module
  103. self.loader = getattr(module, '__loader__', None)
  104. self.base = os.path.dirname(getattr(module, '__file__', ''))
  105. def _adjust_path(self, path):
  106. return os.path.realpath(path)
  107. def _make_path(self, resource_name):
  108. # Issue #50: need to preserve type of path on Python 2.x
  109. # like os.path._get_sep
  110. if isinstance(resource_name, bytes): # should only happen on 2.x
  111. sep = b'/'
  112. else:
  113. sep = '/'
  114. parts = resource_name.split(sep)
  115. parts.insert(0, self.base)
  116. result = os.path.join(*parts)
  117. return self._adjust_path(result)
  118. def _find(self, path):
  119. return os.path.exists(path)
  120. def get_cache_info(self, resource):
  121. return None, resource.path
  122. def find(self, resource_name):
  123. path = self._make_path(resource_name)
  124. if not self._find(path):
  125. result = None
  126. else:
  127. if self._is_directory(path):
  128. result = ResourceContainer(self, resource_name)
  129. else:
  130. result = Resource(self, resource_name)
  131. result.path = path
  132. return result
  133. def get_stream(self, resource):
  134. return open(resource.path, 'rb')
  135. def get_bytes(self, resource):
  136. with open(resource.path, 'rb') as f:
  137. return f.read()
  138. def get_size(self, resource):
  139. return os.path.getsize(resource.path)
  140. def get_resources(self, resource):
  141. def allowed(f):
  142. return (f != '__pycache__' and not
  143. f.endswith(self.skipped_extensions))
  144. return set([f for f in os.listdir(resource.path) if allowed(f)])
  145. def is_container(self, resource):
  146. return self._is_directory(resource.path)
  147. _is_directory = staticmethod(os.path.isdir)
  148. def iterator(self, resource_name):
  149. resource = self.find(resource_name)
  150. if resource is not None:
  151. todo = [resource]
  152. while todo:
  153. resource = todo.pop(0)
  154. yield resource
  155. if resource.is_container:
  156. rname = resource.name
  157. for name in resource.resources:
  158. if not rname:
  159. new_name = name
  160. else:
  161. new_name = '/'.join([rname, name])
  162. child = self.find(new_name)
  163. if child.is_container:
  164. todo.append(child)
  165. else:
  166. yield child
  167. class ZipResourceFinder(ResourceFinder):
  168. """
  169. Resource finder for resources in .zip files.
  170. """
  171. def __init__(self, module):
  172. super(ZipResourceFinder, self).__init__(module)
  173. archive = self.loader.archive
  174. self.prefix_len = 1 + len(archive)
  175. # PyPy doesn't have a _files attr on zipimporter, and you can't set one
  176. if hasattr(self.loader, '_files'):
  177. self._files = self.loader._files
  178. else:
  179. self._files = zipimport._zip_directory_cache[archive]
  180. self.index = sorted(self._files)
  181. def _adjust_path(self, path):
  182. return path
  183. def _find(self, path):
  184. path = path[self.prefix_len:]
  185. if path in self._files:
  186. result = True
  187. else:
  188. if path and path[-1] != os.sep:
  189. path = path + os.sep
  190. i = bisect.bisect(self.index, path)
  191. try:
  192. result = self.index[i].startswith(path)
  193. except IndexError:
  194. result = False
  195. if not result:
  196. logger.debug('_find failed: %r %r', path, self.loader.prefix)
  197. else:
  198. logger.debug('_find worked: %r %r', path, self.loader.prefix)
  199. return result
  200. def get_cache_info(self, resource):
  201. prefix = self.loader.archive
  202. path = resource.path[1 + len(prefix):]
  203. return prefix, path
  204. def get_bytes(self, resource):
  205. return self.loader.get_data(resource.path)
  206. def get_stream(self, resource):
  207. return io.BytesIO(self.get_bytes(resource))
  208. def get_size(self, resource):
  209. path = resource.path[self.prefix_len:]
  210. return self._files[path][3]
  211. def get_resources(self, resource):
  212. path = resource.path[self.prefix_len:]
  213. if path and path[-1] != os.sep:
  214. path += os.sep
  215. plen = len(path)
  216. result = set()
  217. i = bisect.bisect(self.index, path)
  218. while i < len(self.index):
  219. if not self.index[i].startswith(path):
  220. break
  221. s = self.index[i][plen:]
  222. result.add(s.split(os.sep, 1)[0]) # only immediate children
  223. i += 1
  224. return result
  225. def _is_directory(self, path):
  226. path = path[self.prefix_len:]
  227. if path and path[-1] != os.sep:
  228. path += os.sep
  229. i = bisect.bisect(self.index, path)
  230. try:
  231. result = self.index[i].startswith(path)
  232. except IndexError:
  233. result = False
  234. return result
  235. _finder_registry = {
  236. type(None): ResourceFinder,
  237. zipimport.zipimporter: ZipResourceFinder
  238. }
  239. try:
  240. # In Python 3.6, _frozen_importlib -> _frozen_importlib_external
  241. try:
  242. import _frozen_importlib_external as _fi
  243. except ImportError:
  244. import _frozen_importlib as _fi
  245. _finder_registry[_fi.SourceFileLoader] = ResourceFinder
  246. _finder_registry[_fi.FileFinder] = ResourceFinder
  247. # See issue #146
  248. _finder_registry[_fi.SourcelessFileLoader] = ResourceFinder
  249. del _fi
  250. except (ImportError, AttributeError):
  251. pass
  252. def register_finder(loader, finder_maker):
  253. _finder_registry[type(loader)] = finder_maker
  254. _finder_cache = {}
  255. def finder(package):
  256. """
  257. Return a resource finder for a package.
  258. :param package: The name of the package.
  259. :return: A :class:`ResourceFinder` instance for the package.
  260. """
  261. if package in _finder_cache:
  262. result = _finder_cache[package]
  263. else:
  264. if package not in sys.modules:
  265. __import__(package)
  266. module = sys.modules[package]
  267. path = getattr(module, '__path__', None)
  268. if path is None:
  269. raise DistlibException('You cannot get a finder for a module, '
  270. 'only for a package')
  271. loader = getattr(module, '__loader__', None)
  272. finder_maker = _finder_registry.get(type(loader))
  273. if finder_maker is None:
  274. raise DistlibException('Unable to locate finder for %r' % package)
  275. result = finder_maker(module)
  276. _finder_cache[package] = result
  277. return result
  278. _dummy_module = types.ModuleType(str('__dummy__'))
  279. def finder_for_path(path):
  280. """
  281. Return a resource finder for a path, which should represent a container.
  282. :param path: The path.
  283. :return: A :class:`ResourceFinder` instance for the path.
  284. """
  285. result = None
  286. # calls any path hooks, gets importer into cache
  287. pkgutil.get_importer(path)
  288. loader = sys.path_importer_cache.get(path)
  289. finder = _finder_registry.get(type(loader))
  290. if finder:
  291. module = _dummy_module
  292. module.__file__ = os.path.join(path, '')
  293. module.__loader__ = loader
  294. result = finder(module)
  295. return result