parsers.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. """
  2. Parsers are used to parse the content of incoming HTTP requests.
  3. They give us a generic way of being able to handle various media types
  4. on the request, such as form content or json encoded data.
  5. """
  6. import codecs
  7. from urllib import parse
  8. from django.conf import settings
  9. from django.core.files.uploadhandler import StopFutureHandlers
  10. from django.http import QueryDict
  11. from django.http.multipartparser import ChunkIter
  12. from django.http.multipartparser import \
  13. MultiPartParser as DjangoMultiPartParser
  14. from django.http.multipartparser import MultiPartParserError, parse_header
  15. from django.utils.encoding import force_str
  16. from rest_framework import renderers
  17. from rest_framework.exceptions import ParseError
  18. from rest_framework.settings import api_settings
  19. from rest_framework.utils import json
  20. class DataAndFiles:
  21. def __init__(self, data, files):
  22. self.data = data
  23. self.files = files
  24. class BaseParser:
  25. """
  26. All parsers should extend `BaseParser`, specifying a `media_type`
  27. attribute, and overriding the `.parse()` method.
  28. """
  29. media_type = None
  30. def parse(self, stream, media_type=None, parser_context=None):
  31. """
  32. Given a stream to read from, return the parsed representation.
  33. Should return parsed data, or a `DataAndFiles` object consisting of the
  34. parsed data and files.
  35. """
  36. raise NotImplementedError(".parse() must be overridden.")
  37. class JSONParser(BaseParser):
  38. """
  39. Parses JSON-serialized data.
  40. """
  41. media_type = 'application/json'
  42. renderer_class = renderers.JSONRenderer
  43. strict = api_settings.STRICT_JSON
  44. def parse(self, stream, media_type=None, parser_context=None):
  45. """
  46. Parses the incoming bytestream as JSON and returns the resulting data.
  47. """
  48. parser_context = parser_context or {}
  49. encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
  50. try:
  51. decoded_stream = codecs.getreader(encoding)(stream)
  52. parse_constant = json.strict_constant if self.strict else None
  53. return json.load(decoded_stream, parse_constant=parse_constant)
  54. except ValueError as exc:
  55. raise ParseError('JSON parse error - %s' % str(exc))
  56. class FormParser(BaseParser):
  57. """
  58. Parser for form data.
  59. """
  60. media_type = 'application/x-www-form-urlencoded'
  61. def parse(self, stream, media_type=None, parser_context=None):
  62. """
  63. Parses the incoming bytestream as a URL encoded form,
  64. and returns the resulting QueryDict.
  65. """
  66. parser_context = parser_context or {}
  67. encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
  68. return QueryDict(stream.read(), encoding=encoding)
  69. class MultiPartParser(BaseParser):
  70. """
  71. Parser for multipart form data, which may include file data.
  72. """
  73. media_type = 'multipart/form-data'
  74. def parse(self, stream, media_type=None, parser_context=None):
  75. """
  76. Parses the incoming bytestream as a multipart encoded form,
  77. and returns a DataAndFiles object.
  78. `.data` will be a `QueryDict` containing all the form parameters.
  79. `.files` will be a `QueryDict` containing all the form files.
  80. """
  81. parser_context = parser_context or {}
  82. request = parser_context['request']
  83. encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
  84. meta = request.META.copy()
  85. meta['CONTENT_TYPE'] = media_type
  86. upload_handlers = request.upload_handlers
  87. try:
  88. parser = DjangoMultiPartParser(meta, stream, upload_handlers, encoding)
  89. data, files = parser.parse()
  90. return DataAndFiles(data, files)
  91. except MultiPartParserError as exc:
  92. raise ParseError('Multipart form parse error - %s' % str(exc))
  93. class FileUploadParser(BaseParser):
  94. """
  95. Parser for file upload data.
  96. """
  97. media_type = '*/*'
  98. errors = {
  99. 'unhandled': 'FileUpload parse error - none of upload handlers can handle the stream',
  100. 'no_filename': 'Missing filename. Request should include a Content-Disposition header with a filename parameter.',
  101. }
  102. def parse(self, stream, media_type=None, parser_context=None):
  103. """
  104. Treats the incoming bytestream as a raw file upload and returns
  105. a `DataAndFiles` object.
  106. `.data` will be None (we expect request body to be a file content).
  107. `.files` will be a `QueryDict` containing one 'file' element.
  108. """
  109. parser_context = parser_context or {}
  110. request = parser_context['request']
  111. encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET)
  112. meta = request.META
  113. upload_handlers = request.upload_handlers
  114. filename = self.get_filename(stream, media_type, parser_context)
  115. if not filename:
  116. raise ParseError(self.errors['no_filename'])
  117. # Note that this code is extracted from Django's handling of
  118. # file uploads in MultiPartParser.
  119. content_type = meta.get('HTTP_CONTENT_TYPE',
  120. meta.get('CONTENT_TYPE', ''))
  121. try:
  122. content_length = int(meta.get('HTTP_CONTENT_LENGTH',
  123. meta.get('CONTENT_LENGTH', 0)))
  124. except (ValueError, TypeError):
  125. content_length = None
  126. # See if the handler will want to take care of the parsing.
  127. for handler in upload_handlers:
  128. result = handler.handle_raw_input(stream,
  129. meta,
  130. content_length,
  131. None,
  132. encoding)
  133. if result is not None:
  134. return DataAndFiles({}, {'file': result[1]})
  135. # This is the standard case.
  136. possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size]
  137. chunk_size = min([2 ** 31 - 4] + possible_sizes)
  138. chunks = ChunkIter(stream, chunk_size)
  139. counters = [0] * len(upload_handlers)
  140. for index, handler in enumerate(upload_handlers):
  141. try:
  142. handler.new_file(None, filename, content_type,
  143. content_length, encoding)
  144. except StopFutureHandlers:
  145. upload_handlers = upload_handlers[:index + 1]
  146. break
  147. for chunk in chunks:
  148. for index, handler in enumerate(upload_handlers):
  149. chunk_length = len(chunk)
  150. chunk = handler.receive_data_chunk(chunk, counters[index])
  151. counters[index] += chunk_length
  152. if chunk is None:
  153. break
  154. for index, handler in enumerate(upload_handlers):
  155. file_obj = handler.file_complete(counters[index])
  156. if file_obj is not None:
  157. return DataAndFiles({}, {'file': file_obj})
  158. raise ParseError(self.errors['unhandled'])
  159. def get_filename(self, stream, media_type, parser_context):
  160. """
  161. Detects the uploaded file name. First searches a 'filename' url kwarg.
  162. Then tries to parse Content-Disposition header.
  163. """
  164. try:
  165. return parser_context['kwargs']['filename']
  166. except KeyError:
  167. pass
  168. try:
  169. meta = parser_context['request'].META
  170. disposition = parse_header(meta['HTTP_CONTENT_DISPOSITION'].encode())
  171. filename_parm = disposition[1]
  172. if 'filename*' in filename_parm:
  173. return self.get_encoded_filename(filename_parm)
  174. return force_str(filename_parm['filename'])
  175. except (AttributeError, KeyError, ValueError):
  176. pass
  177. def get_encoded_filename(self, filename_parm):
  178. """
  179. Handle encoded filenames per RFC6266. See also:
  180. https://tools.ietf.org/html/rfc2231#section-4
  181. """
  182. encoded_filename = force_str(filename_parm['filename*'])
  183. try:
  184. charset, lang, filename = encoded_filename.split('\'', 2)
  185. filename = parse.unquote(filename)
  186. except (ValueError, LookupError):
  187. filename = force_str(filename_parm['filename'])
  188. return filename