index.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2013 Vinay Sajip.
  4. # Licensed to the Python Software Foundation under a contributor agreement.
  5. # See LICENSE.txt and CONTRIBUTORS.txt.
  6. #
  7. import hashlib
  8. import logging
  9. import os
  10. import shutil
  11. import subprocess
  12. import tempfile
  13. try:
  14. from threading import Thread
  15. except ImportError:
  16. from dummy_threading import Thread
  17. from . import DistlibException
  18. from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
  19. urlparse, build_opener, string_types)
  20. from .util import zip_dir, ServerProxy
  21. logger = logging.getLogger(__name__)
  22. DEFAULT_INDEX = 'https://pypi.org/pypi'
  23. DEFAULT_REALM = 'pypi'
  24. class PackageIndex(object):
  25. """
  26. This class represents a package index compatible with PyPI, the Python
  27. Package Index.
  28. """
  29. boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
  30. def __init__(self, url=None):
  31. """
  32. Initialise an instance.
  33. :param url: The URL of the index. If not specified, the URL for PyPI is
  34. used.
  35. """
  36. self.url = url or DEFAULT_INDEX
  37. self.read_configuration()
  38. scheme, netloc, path, params, query, frag = urlparse(self.url)
  39. if params or query or frag or scheme not in ('http', 'https'):
  40. raise DistlibException('invalid repository: %s' % self.url)
  41. self.password_handler = None
  42. self.ssl_verifier = None
  43. self.gpg = None
  44. self.gpg_home = None
  45. with open(os.devnull, 'w') as sink:
  46. # Use gpg by default rather than gpg2, as gpg2 insists on
  47. # prompting for passwords
  48. for s in ('gpg', 'gpg2'):
  49. try:
  50. rc = subprocess.check_call([s, '--version'], stdout=sink,
  51. stderr=sink)
  52. if rc == 0:
  53. self.gpg = s
  54. break
  55. except OSError:
  56. pass
  57. def _get_pypirc_command(self):
  58. """
  59. Get the distutils command for interacting with PyPI configurations.
  60. :return: the command.
  61. """
  62. from .util import _get_pypirc_command as cmd
  63. return cmd()
  64. def read_configuration(self):
  65. """
  66. Read the PyPI access configuration as supported by distutils. This populates
  67. ``username``, ``password``, ``realm`` and ``url`` attributes from the
  68. configuration.
  69. """
  70. from .util import _load_pypirc
  71. cfg = _load_pypirc(self)
  72. self.username = cfg.get('username')
  73. self.password = cfg.get('password')
  74. self.realm = cfg.get('realm', 'pypi')
  75. self.url = cfg.get('repository', self.url)
  76. def save_configuration(self):
  77. """
  78. Save the PyPI access configuration. You must have set ``username`` and
  79. ``password`` attributes before calling this method.
  80. """
  81. self.check_credentials()
  82. from .util import _store_pypirc
  83. _store_pypirc(self)
  84. def check_credentials(self):
  85. """
  86. Check that ``username`` and ``password`` have been set, and raise an
  87. exception if not.
  88. """
  89. if self.username is None or self.password is None:
  90. raise DistlibException('username and password must be set')
  91. pm = HTTPPasswordMgr()
  92. _, netloc, _, _, _, _ = urlparse(self.url)
  93. pm.add_password(self.realm, netloc, self.username, self.password)
  94. self.password_handler = HTTPBasicAuthHandler(pm)
  95. def register(self, metadata):
  96. """
  97. Register a distribution on PyPI, using the provided metadata.
  98. :param metadata: A :class:`Metadata` instance defining at least a name
  99. and version number for the distribution to be
  100. registered.
  101. :return: The HTTP response received from PyPI upon submission of the
  102. request.
  103. """
  104. self.check_credentials()
  105. metadata.validate()
  106. d = metadata.todict()
  107. d[':action'] = 'verify'
  108. request = self.encode_request(d.items(), [])
  109. response = self.send_request(request)
  110. d[':action'] = 'submit'
  111. request = self.encode_request(d.items(), [])
  112. return self.send_request(request)
  113. def _reader(self, name, stream, outbuf):
  114. """
  115. Thread runner for reading lines of from a subprocess into a buffer.
  116. :param name: The logical name of the stream (used for logging only).
  117. :param stream: The stream to read from. This will typically a pipe
  118. connected to the output stream of a subprocess.
  119. :param outbuf: The list to append the read lines to.
  120. """
  121. while True:
  122. s = stream.readline()
  123. if not s:
  124. break
  125. s = s.decode('utf-8').rstrip()
  126. outbuf.append(s)
  127. logger.debug('%s: %s' % (name, s))
  128. stream.close()
  129. def get_sign_command(self, filename, signer, sign_password,
  130. keystore=None):
  131. """
  132. Return a suitable command for signing a file.
  133. :param filename: The pathname to the file to be signed.
  134. :param signer: The identifier of the signer of the file.
  135. :param sign_password: The passphrase for the signer's
  136. private key used for signing.
  137. :param keystore: The path to a directory which contains the keys
  138. used in verification. If not specified, the
  139. instance's ``gpg_home`` attribute is used instead.
  140. :return: The signing command as a list suitable to be
  141. passed to :class:`subprocess.Popen`.
  142. """
  143. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  144. if keystore is None:
  145. keystore = self.gpg_home
  146. if keystore:
  147. cmd.extend(['--homedir', keystore])
  148. if sign_password is not None:
  149. cmd.extend(['--batch', '--passphrase-fd', '0'])
  150. td = tempfile.mkdtemp()
  151. sf = os.path.join(td, os.path.basename(filename) + '.asc')
  152. cmd.extend(['--detach-sign', '--armor', '--local-user',
  153. signer, '--output', sf, filename])
  154. logger.debug('invoking: %s', ' '.join(cmd))
  155. return cmd, sf
  156. def run_command(self, cmd, input_data=None):
  157. """
  158. Run a command in a child process , passing it any input data specified.
  159. :param cmd: The command to run.
  160. :param input_data: If specified, this must be a byte string containing
  161. data to be sent to the child process.
  162. :return: A tuple consisting of the subprocess' exit code, a list of
  163. lines read from the subprocess' ``stdout``, and a list of
  164. lines read from the subprocess' ``stderr``.
  165. """
  166. kwargs = {
  167. 'stdout': subprocess.PIPE,
  168. 'stderr': subprocess.PIPE,
  169. }
  170. if input_data is not None:
  171. kwargs['stdin'] = subprocess.PIPE
  172. stdout = []
  173. stderr = []
  174. p = subprocess.Popen(cmd, **kwargs)
  175. # We don't use communicate() here because we may need to
  176. # get clever with interacting with the command
  177. t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
  178. t1.start()
  179. t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
  180. t2.start()
  181. if input_data is not None:
  182. p.stdin.write(input_data)
  183. p.stdin.close()
  184. p.wait()
  185. t1.join()
  186. t2.join()
  187. return p.returncode, stdout, stderr
  188. def sign_file(self, filename, signer, sign_password, keystore=None):
  189. """
  190. Sign a file.
  191. :param filename: The pathname to the file to be signed.
  192. :param signer: The identifier of the signer of the file.
  193. :param sign_password: The passphrase for the signer's
  194. private key used for signing.
  195. :param keystore: The path to a directory which contains the keys
  196. used in signing. If not specified, the instance's
  197. ``gpg_home`` attribute is used instead.
  198. :return: The absolute pathname of the file where the signature is
  199. stored.
  200. """
  201. cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
  202. keystore)
  203. rc, stdout, stderr = self.run_command(cmd,
  204. sign_password.encode('utf-8'))
  205. if rc != 0:
  206. raise DistlibException('sign command failed with error '
  207. 'code %s' % rc)
  208. return sig_file
  209. def upload_file(self, metadata, filename, signer=None, sign_password=None,
  210. filetype='sdist', pyversion='source', keystore=None):
  211. """
  212. Upload a release file to the index.
  213. :param metadata: A :class:`Metadata` instance defining at least a name
  214. and version number for the file to be uploaded.
  215. :param filename: The pathname of the file to be uploaded.
  216. :param signer: The identifier of the signer of the file.
  217. :param sign_password: The passphrase for the signer's
  218. private key used for signing.
  219. :param filetype: The type of the file being uploaded. This is the
  220. distutils command which produced that file, e.g.
  221. ``sdist`` or ``bdist_wheel``.
  222. :param pyversion: The version of Python which the release relates
  223. to. For code compatible with any Python, this would
  224. be ``source``, otherwise it would be e.g. ``3.2``.
  225. :param keystore: The path to a directory which contains the keys
  226. used in signing. If not specified, the instance's
  227. ``gpg_home`` attribute is used instead.
  228. :return: The HTTP response received from PyPI upon submission of the
  229. request.
  230. """
  231. self.check_credentials()
  232. if not os.path.exists(filename):
  233. raise DistlibException('not found: %s' % filename)
  234. metadata.validate()
  235. d = metadata.todict()
  236. sig_file = None
  237. if signer:
  238. if not self.gpg:
  239. logger.warning('no signing program available - not signed')
  240. else:
  241. sig_file = self.sign_file(filename, signer, sign_password,
  242. keystore)
  243. with open(filename, 'rb') as f:
  244. file_data = f.read()
  245. md5_digest = hashlib.md5(file_data).hexdigest()
  246. sha256_digest = hashlib.sha256(file_data).hexdigest()
  247. d.update({
  248. ':action': 'file_upload',
  249. 'protocol_version': '1',
  250. 'filetype': filetype,
  251. 'pyversion': pyversion,
  252. 'md5_digest': md5_digest,
  253. 'sha256_digest': sha256_digest,
  254. })
  255. files = [('content', os.path.basename(filename), file_data)]
  256. if sig_file:
  257. with open(sig_file, 'rb') as f:
  258. sig_data = f.read()
  259. files.append(('gpg_signature', os.path.basename(sig_file),
  260. sig_data))
  261. shutil.rmtree(os.path.dirname(sig_file))
  262. request = self.encode_request(d.items(), files)
  263. return self.send_request(request)
  264. def upload_documentation(self, metadata, doc_dir):
  265. """
  266. Upload documentation to the index.
  267. :param metadata: A :class:`Metadata` instance defining at least a name
  268. and version number for the documentation to be
  269. uploaded.
  270. :param doc_dir: The pathname of the directory which contains the
  271. documentation. This should be the directory that
  272. contains the ``index.html`` for the documentation.
  273. :return: The HTTP response received from PyPI upon submission of the
  274. request.
  275. """
  276. self.check_credentials()
  277. if not os.path.isdir(doc_dir):
  278. raise DistlibException('not a directory: %r' % doc_dir)
  279. fn = os.path.join(doc_dir, 'index.html')
  280. if not os.path.exists(fn):
  281. raise DistlibException('not found: %r' % fn)
  282. metadata.validate()
  283. name, version = metadata.name, metadata.version
  284. zip_data = zip_dir(doc_dir).getvalue()
  285. fields = [(':action', 'doc_upload'),
  286. ('name', name), ('version', version)]
  287. files = [('content', name, zip_data)]
  288. request = self.encode_request(fields, files)
  289. return self.send_request(request)
  290. def get_verify_command(self, signature_filename, data_filename,
  291. keystore=None):
  292. """
  293. Return a suitable command for verifying a file.
  294. :param signature_filename: The pathname to the file containing the
  295. signature.
  296. :param data_filename: The pathname to the file containing the
  297. signed data.
  298. :param keystore: The path to a directory which contains the keys
  299. used in verification. If not specified, the
  300. instance's ``gpg_home`` attribute is used instead.
  301. :return: The verifying command as a list suitable to be
  302. passed to :class:`subprocess.Popen`.
  303. """
  304. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  305. if keystore is None:
  306. keystore = self.gpg_home
  307. if keystore:
  308. cmd.extend(['--homedir', keystore])
  309. cmd.extend(['--verify', signature_filename, data_filename])
  310. logger.debug('invoking: %s', ' '.join(cmd))
  311. return cmd
  312. def verify_signature(self, signature_filename, data_filename,
  313. keystore=None):
  314. """
  315. Verify a signature for a file.
  316. :param signature_filename: The pathname to the file containing the
  317. signature.
  318. :param data_filename: The pathname to the file containing the
  319. signed data.
  320. :param keystore: The path to a directory which contains the keys
  321. used in verification. If not specified, the
  322. instance's ``gpg_home`` attribute is used instead.
  323. :return: True if the signature was verified, else False.
  324. """
  325. if not self.gpg:
  326. raise DistlibException('verification unavailable because gpg '
  327. 'unavailable')
  328. cmd = self.get_verify_command(signature_filename, data_filename,
  329. keystore)
  330. rc, stdout, stderr = self.run_command(cmd)
  331. if rc not in (0, 1):
  332. raise DistlibException('verify command failed with error '
  333. 'code %s' % rc)
  334. return rc == 0
  335. def download_file(self, url, destfile, digest=None, reporthook=None):
  336. """
  337. This is a convenience method for downloading a file from an URL.
  338. Normally, this will be a file from the index, though currently
  339. no check is made for this (i.e. a file can be downloaded from
  340. anywhere).
  341. The method is just like the :func:`urlretrieve` function in the
  342. standard library, except that it allows digest computation to be
  343. done during download and checking that the downloaded data
  344. matched any expected value.
  345. :param url: The URL of the file to be downloaded (assumed to be
  346. available via an HTTP GET request).
  347. :param destfile: The pathname where the downloaded file is to be
  348. saved.
  349. :param digest: If specified, this must be a (hasher, value)
  350. tuple, where hasher is the algorithm used (e.g.
  351. ``'md5'``) and ``value`` is the expected value.
  352. :param reporthook: The same as for :func:`urlretrieve` in the
  353. standard library.
  354. """
  355. if digest is None:
  356. digester = None
  357. logger.debug('No digest specified')
  358. else:
  359. if isinstance(digest, (list, tuple)):
  360. hasher, digest = digest
  361. else:
  362. hasher = 'md5'
  363. digester = getattr(hashlib, hasher)()
  364. logger.debug('Digest specified: %s' % digest)
  365. # The following code is equivalent to urlretrieve.
  366. # We need to do it this way so that we can compute the
  367. # digest of the file as we go.
  368. with open(destfile, 'wb') as dfp:
  369. # addinfourl is not a context manager on 2.x
  370. # so we have to use try/finally
  371. sfp = self.send_request(Request(url))
  372. try:
  373. headers = sfp.info()
  374. blocksize = 8192
  375. size = -1
  376. read = 0
  377. blocknum = 0
  378. if "content-length" in headers:
  379. size = int(headers["Content-Length"])
  380. if reporthook:
  381. reporthook(blocknum, blocksize, size)
  382. while True:
  383. block = sfp.read(blocksize)
  384. if not block:
  385. break
  386. read += len(block)
  387. dfp.write(block)
  388. if digester:
  389. digester.update(block)
  390. blocknum += 1
  391. if reporthook:
  392. reporthook(blocknum, blocksize, size)
  393. finally:
  394. sfp.close()
  395. # check that we got the whole file, if we can
  396. if size >= 0 and read < size:
  397. raise DistlibException(
  398. 'retrieval incomplete: got only %d out of %d bytes'
  399. % (read, size))
  400. # if we have a digest, it must match.
  401. if digester:
  402. actual = digester.hexdigest()
  403. if digest != actual:
  404. raise DistlibException('%s digest mismatch for %s: expected '
  405. '%s, got %s' % (hasher, destfile,
  406. digest, actual))
  407. logger.debug('Digest verified: %s', digest)
  408. def send_request(self, req):
  409. """
  410. Send a standard library :class:`Request` to PyPI and return its
  411. response.
  412. :param req: The request to send.
  413. :return: The HTTP response from PyPI (a standard library HTTPResponse).
  414. """
  415. handlers = []
  416. if self.password_handler:
  417. handlers.append(self.password_handler)
  418. if self.ssl_verifier:
  419. handlers.append(self.ssl_verifier)
  420. opener = build_opener(*handlers)
  421. return opener.open(req)
  422. def encode_request(self, fields, files):
  423. """
  424. Encode fields and files for posting to an HTTP server.
  425. :param fields: The fields to send as a list of (fieldname, value)
  426. tuples.
  427. :param files: The files to send as a list of (fieldname, filename,
  428. file_bytes) tuple.
  429. """
  430. # Adapted from packaging, which in turn was adapted from
  431. # http://code.activestate.com/recipes/146306
  432. parts = []
  433. boundary = self.boundary
  434. for k, values in fields:
  435. if not isinstance(values, (list, tuple)):
  436. values = [values]
  437. for v in values:
  438. parts.extend((
  439. b'--' + boundary,
  440. ('Content-Disposition: form-data; name="%s"' %
  441. k).encode('utf-8'),
  442. b'',
  443. v.encode('utf-8')))
  444. for key, filename, value in files:
  445. parts.extend((
  446. b'--' + boundary,
  447. ('Content-Disposition: form-data; name="%s"; filename="%s"' %
  448. (key, filename)).encode('utf-8'),
  449. b'',
  450. value))
  451. parts.extend((b'--' + boundary + b'--', b''))
  452. body = b'\r\n'.join(parts)
  453. ct = b'multipart/form-data; boundary=' + boundary
  454. headers = {
  455. 'Content-type': ct,
  456. 'Content-length': str(len(body))
  457. }
  458. return Request(self.url, body, headers)
  459. def search(self, terms, operator=None):
  460. if isinstance(terms, string_types):
  461. terms = {'name': terms}
  462. rpc_proxy = ServerProxy(self.url, timeout=3.0)
  463. try:
  464. return rpc_proxy.search(terms, operator or 'and')
  465. finally:
  466. rpc_proxy('close')()