decoder.py 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. import datetime
  2. import io
  3. from os import linesep
  4. import re
  5. import sys
  6. from toml.tz import TomlTz
  7. if sys.version_info < (3,):
  8. _range = xrange # noqa: F821
  9. else:
  10. unicode = str
  11. _range = range
  12. basestring = str
  13. unichr = chr
  14. def _detect_pathlib_path(p):
  15. if (3, 4) <= sys.version_info:
  16. import pathlib
  17. if isinstance(p, pathlib.PurePath):
  18. return True
  19. return False
  20. def _ispath(p):
  21. if isinstance(p, (bytes, basestring)):
  22. return True
  23. return _detect_pathlib_path(p)
  24. def _getpath(p):
  25. if (3, 6) <= sys.version_info:
  26. import os
  27. return os.fspath(p)
  28. if _detect_pathlib_path(p):
  29. return str(p)
  30. return p
  31. try:
  32. FNFError = FileNotFoundError
  33. except NameError:
  34. FNFError = IOError
  35. TIME_RE = re.compile(r"([0-9]{2}):([0-9]{2}):([0-9]{2})(\.([0-9]{3,6}))?")
  36. class TomlDecodeError(ValueError):
  37. """Base toml Exception / Error."""
  38. def __init__(self, msg, doc, pos):
  39. lineno = doc.count('\n', 0, pos) + 1
  40. colno = pos - doc.rfind('\n', 0, pos)
  41. emsg = '{} (line {} column {} char {})'.format(msg, lineno, colno, pos)
  42. ValueError.__init__(self, emsg)
  43. self.msg = msg
  44. self.doc = doc
  45. self.pos = pos
  46. self.lineno = lineno
  47. self.colno = colno
  48. # Matches a TOML number, which allows underscores for readability
  49. _number_with_underscores = re.compile('([0-9])(_([0-9]))*')
  50. class CommentValue(object):
  51. def __init__(self, val, comment, beginline, _dict):
  52. self.val = val
  53. separator = "\n" if beginline else " "
  54. self.comment = separator + comment
  55. self._dict = _dict
  56. def __getitem__(self, key):
  57. return self.val[key]
  58. def __setitem__(self, key, value):
  59. self.val[key] = value
  60. def dump(self, dump_value_func):
  61. retstr = dump_value_func(self.val)
  62. if isinstance(self.val, self._dict):
  63. return self.comment + "\n" + unicode(retstr)
  64. else:
  65. return unicode(retstr) + self.comment
  66. def _strictly_valid_num(n):
  67. n = n.strip()
  68. if not n:
  69. return False
  70. if n[0] == '_':
  71. return False
  72. if n[-1] == '_':
  73. return False
  74. if "_." in n or "._" in n:
  75. return False
  76. if len(n) == 1:
  77. return True
  78. if n[0] == '0' and n[1] not in ['.', 'o', 'b', 'x']:
  79. return False
  80. if n[0] == '+' or n[0] == '-':
  81. n = n[1:]
  82. if len(n) > 1 and n[0] == '0' and n[1] != '.':
  83. return False
  84. if '__' in n:
  85. return False
  86. return True
  87. def load(f, _dict=dict, decoder=None):
  88. """Parses named file or files as toml and returns a dictionary
  89. Args:
  90. f: Path to the file to open, array of files to read into single dict
  91. or a file descriptor
  92. _dict: (optional) Specifies the class of the returned toml dictionary
  93. decoder: The decoder to use
  94. Returns:
  95. Parsed toml file represented as a dictionary
  96. Raises:
  97. TypeError -- When f is invalid type
  98. TomlDecodeError: Error while decoding toml
  99. IOError / FileNotFoundError -- When an array with no valid (existing)
  100. (Python 2 / Python 3) file paths is passed
  101. """
  102. if _ispath(f):
  103. with io.open(_getpath(f), encoding='utf-8') as ffile:
  104. return loads(ffile.read(), _dict, decoder)
  105. elif isinstance(f, list):
  106. from os import path as op
  107. from warnings import warn
  108. if not [path for path in f if op.exists(path)]:
  109. error_msg = "Load expects a list to contain filenames only."
  110. error_msg += linesep
  111. error_msg += ("The list needs to contain the path of at least one "
  112. "existing file.")
  113. raise FNFError(error_msg)
  114. if decoder is None:
  115. decoder = TomlDecoder(_dict)
  116. d = decoder.get_empty_table()
  117. for l in f: # noqa: E741
  118. if op.exists(l):
  119. d.update(load(l, _dict, decoder))
  120. else:
  121. warn("Non-existent filename in list with at least one valid "
  122. "filename")
  123. return d
  124. else:
  125. try:
  126. return loads(f.read(), _dict, decoder)
  127. except AttributeError:
  128. raise TypeError("You can only load a file descriptor, filename or "
  129. "list")
  130. _groupname_re = re.compile(r'^[A-Za-z0-9_-]+$')
  131. def loads(s, _dict=dict, decoder=None):
  132. """Parses string as toml
  133. Args:
  134. s: String to be parsed
  135. _dict: (optional) Specifies the class of the returned toml dictionary
  136. Returns:
  137. Parsed toml file represented as a dictionary
  138. Raises:
  139. TypeError: When a non-string is passed
  140. TomlDecodeError: Error while decoding toml
  141. """
  142. implicitgroups = []
  143. if decoder is None:
  144. decoder = TomlDecoder(_dict)
  145. retval = decoder.get_empty_table()
  146. currentlevel = retval
  147. if not isinstance(s, basestring):
  148. raise TypeError("Expecting something like a string")
  149. if not isinstance(s, unicode):
  150. s = s.decode('utf8')
  151. original = s
  152. sl = list(s)
  153. openarr = 0
  154. openstring = False
  155. openstrchar = ""
  156. multilinestr = False
  157. arrayoftables = False
  158. beginline = True
  159. keygroup = False
  160. dottedkey = False
  161. keyname = 0
  162. key = ''
  163. prev_key = ''
  164. line_no = 1
  165. for i, item in enumerate(sl):
  166. if item == '\r' and sl[i + 1] == '\n':
  167. sl[i] = ' '
  168. continue
  169. if keyname:
  170. key += item
  171. if item == '\n':
  172. raise TomlDecodeError("Key name found without value."
  173. " Reached end of line.", original, i)
  174. if openstring:
  175. if item == openstrchar:
  176. oddbackslash = False
  177. k = 1
  178. while i >= k and sl[i - k] == '\\':
  179. oddbackslash = not oddbackslash
  180. k += 1
  181. if not oddbackslash:
  182. keyname = 2
  183. openstring = False
  184. openstrchar = ""
  185. continue
  186. elif keyname == 1:
  187. if item.isspace():
  188. keyname = 2
  189. continue
  190. elif item == '.':
  191. dottedkey = True
  192. continue
  193. elif item.isalnum() or item == '_' or item == '-':
  194. continue
  195. elif (dottedkey and sl[i - 1] == '.' and
  196. (item == '"' or item == "'")):
  197. openstring = True
  198. openstrchar = item
  199. continue
  200. elif keyname == 2:
  201. if item.isspace():
  202. if dottedkey:
  203. nextitem = sl[i + 1]
  204. if not nextitem.isspace() and nextitem != '.':
  205. keyname = 1
  206. continue
  207. if item == '.':
  208. dottedkey = True
  209. nextitem = sl[i + 1]
  210. if not nextitem.isspace() and nextitem != '.':
  211. keyname = 1
  212. continue
  213. if item == '=':
  214. keyname = 0
  215. prev_key = key[:-1].rstrip()
  216. key = ''
  217. dottedkey = False
  218. else:
  219. raise TomlDecodeError("Found invalid character in key name: '" +
  220. item + "'. Try quoting the key name.",
  221. original, i)
  222. if item == "'" and openstrchar != '"':
  223. k = 1
  224. try:
  225. while sl[i - k] == "'":
  226. k += 1
  227. if k == 3:
  228. break
  229. except IndexError:
  230. pass
  231. if k == 3:
  232. multilinestr = not multilinestr
  233. openstring = multilinestr
  234. else:
  235. openstring = not openstring
  236. if openstring:
  237. openstrchar = "'"
  238. else:
  239. openstrchar = ""
  240. if item == '"' and openstrchar != "'":
  241. oddbackslash = False
  242. k = 1
  243. tripquote = False
  244. try:
  245. while sl[i - k] == '"':
  246. k += 1
  247. if k == 3:
  248. tripquote = True
  249. break
  250. if k == 1 or (k == 3 and tripquote):
  251. while sl[i - k] == '\\':
  252. oddbackslash = not oddbackslash
  253. k += 1
  254. except IndexError:
  255. pass
  256. if not oddbackslash:
  257. if tripquote:
  258. multilinestr = not multilinestr
  259. openstring = multilinestr
  260. else:
  261. openstring = not openstring
  262. if openstring:
  263. openstrchar = '"'
  264. else:
  265. openstrchar = ""
  266. if item == '#' and (not openstring and not keygroup and
  267. not arrayoftables):
  268. j = i
  269. comment = ""
  270. try:
  271. while sl[j] != '\n':
  272. comment += s[j]
  273. sl[j] = ' '
  274. j += 1
  275. except IndexError:
  276. break
  277. if not openarr:
  278. decoder.preserve_comment(line_no, prev_key, comment, beginline)
  279. if item == '[' and (not openstring and not keygroup and
  280. not arrayoftables):
  281. if beginline:
  282. if len(sl) > i + 1 and sl[i + 1] == '[':
  283. arrayoftables = True
  284. else:
  285. keygroup = True
  286. else:
  287. openarr += 1
  288. if item == ']' and not openstring:
  289. if keygroup:
  290. keygroup = False
  291. elif arrayoftables:
  292. if sl[i - 1] == ']':
  293. arrayoftables = False
  294. else:
  295. openarr -= 1
  296. if item == '\n':
  297. if openstring or multilinestr:
  298. if not multilinestr:
  299. raise TomlDecodeError("Unbalanced quotes", original, i)
  300. if ((sl[i - 1] == "'" or sl[i - 1] == '"') and (
  301. sl[i - 2] == sl[i - 1])):
  302. sl[i] = sl[i - 1]
  303. if sl[i - 3] == sl[i - 1]:
  304. sl[i - 3] = ' '
  305. elif openarr:
  306. sl[i] = ' '
  307. else:
  308. beginline = True
  309. line_no += 1
  310. elif beginline and sl[i] != ' ' and sl[i] != '\t':
  311. beginline = False
  312. if not keygroup and not arrayoftables:
  313. if sl[i] == '=':
  314. raise TomlDecodeError("Found empty keyname. ", original, i)
  315. keyname = 1
  316. key += item
  317. if keyname:
  318. raise TomlDecodeError("Key name found without value."
  319. " Reached end of file.", original, len(s))
  320. if openstring: # reached EOF and have an unterminated string
  321. raise TomlDecodeError("Unterminated string found."
  322. " Reached end of file.", original, len(s))
  323. s = ''.join(sl)
  324. s = s.split('\n')
  325. multikey = None
  326. multilinestr = ""
  327. multibackslash = False
  328. pos = 0
  329. for idx, line in enumerate(s):
  330. if idx > 0:
  331. pos += len(s[idx - 1]) + 1
  332. decoder.embed_comments(idx, currentlevel)
  333. if not multilinestr or multibackslash or '\n' not in multilinestr:
  334. line = line.strip()
  335. if line == "" and (not multikey or multibackslash):
  336. continue
  337. if multikey:
  338. if multibackslash:
  339. multilinestr += line
  340. else:
  341. multilinestr += line
  342. multibackslash = False
  343. closed = False
  344. if multilinestr[0] == '[':
  345. closed = line[-1] == ']'
  346. elif len(line) > 2:
  347. closed = (line[-1] == multilinestr[0] and
  348. line[-2] == multilinestr[0] and
  349. line[-3] == multilinestr[0])
  350. if closed:
  351. try:
  352. value, vtype = decoder.load_value(multilinestr)
  353. except ValueError as err:
  354. raise TomlDecodeError(str(err), original, pos)
  355. currentlevel[multikey] = value
  356. multikey = None
  357. multilinestr = ""
  358. else:
  359. k = len(multilinestr) - 1
  360. while k > -1 and multilinestr[k] == '\\':
  361. multibackslash = not multibackslash
  362. k -= 1
  363. if multibackslash:
  364. multilinestr = multilinestr[:-1]
  365. else:
  366. multilinestr += "\n"
  367. continue
  368. if line[0] == '[':
  369. arrayoftables = False
  370. if len(line) == 1:
  371. raise TomlDecodeError("Opening key group bracket on line by "
  372. "itself.", original, pos)
  373. if line[1] == '[':
  374. arrayoftables = True
  375. line = line[2:]
  376. splitstr = ']]'
  377. else:
  378. line = line[1:]
  379. splitstr = ']'
  380. i = 1
  381. quotesplits = decoder._get_split_on_quotes(line)
  382. quoted = False
  383. for quotesplit in quotesplits:
  384. if not quoted and splitstr in quotesplit:
  385. break
  386. i += quotesplit.count(splitstr)
  387. quoted = not quoted
  388. line = line.split(splitstr, i)
  389. if len(line) < i + 1 or line[-1].strip() != "":
  390. raise TomlDecodeError("Key group not on a line by itself.",
  391. original, pos)
  392. groups = splitstr.join(line[:-1]).split('.')
  393. i = 0
  394. while i < len(groups):
  395. groups[i] = groups[i].strip()
  396. if len(groups[i]) > 0 and (groups[i][0] == '"' or
  397. groups[i][0] == "'"):
  398. groupstr = groups[i]
  399. j = i + 1
  400. while ((not groupstr[0] == groupstr[-1]) or
  401. len(groupstr) == 1):
  402. j += 1
  403. if j > len(groups) + 2:
  404. raise TomlDecodeError("Invalid group name '" +
  405. groupstr + "' Something " +
  406. "went wrong.", original, pos)
  407. groupstr = '.'.join(groups[i:j]).strip()
  408. groups[i] = groupstr[1:-1]
  409. groups[i + 1:j] = []
  410. else:
  411. if not _groupname_re.match(groups[i]):
  412. raise TomlDecodeError("Invalid group name '" +
  413. groups[i] + "'. Try quoting it.",
  414. original, pos)
  415. i += 1
  416. currentlevel = retval
  417. for i in _range(len(groups)):
  418. group = groups[i]
  419. if group == "":
  420. raise TomlDecodeError("Can't have a keygroup with an empty "
  421. "name", original, pos)
  422. try:
  423. currentlevel[group]
  424. if i == len(groups) - 1:
  425. if group in implicitgroups:
  426. implicitgroups.remove(group)
  427. if arrayoftables:
  428. raise TomlDecodeError("An implicitly defined "
  429. "table can't be an array",
  430. original, pos)
  431. elif arrayoftables:
  432. currentlevel[group].append(decoder.get_empty_table()
  433. )
  434. else:
  435. raise TomlDecodeError("What? " + group +
  436. " already exists?" +
  437. str(currentlevel),
  438. original, pos)
  439. except TypeError:
  440. currentlevel = currentlevel[-1]
  441. if group not in currentlevel:
  442. currentlevel[group] = decoder.get_empty_table()
  443. if i == len(groups) - 1 and arrayoftables:
  444. currentlevel[group] = [decoder.get_empty_table()]
  445. except KeyError:
  446. if i != len(groups) - 1:
  447. implicitgroups.append(group)
  448. currentlevel[group] = decoder.get_empty_table()
  449. if i == len(groups) - 1 and arrayoftables:
  450. currentlevel[group] = [decoder.get_empty_table()]
  451. currentlevel = currentlevel[group]
  452. if arrayoftables:
  453. try:
  454. currentlevel = currentlevel[-1]
  455. except KeyError:
  456. pass
  457. elif line[0] == "{":
  458. if line[-1] != "}":
  459. raise TomlDecodeError("Line breaks are not allowed in inline"
  460. "objects", original, pos)
  461. try:
  462. decoder.load_inline_object(line, currentlevel, multikey,
  463. multibackslash)
  464. except ValueError as err:
  465. raise TomlDecodeError(str(err), original, pos)
  466. elif "=" in line:
  467. try:
  468. ret = decoder.load_line(line, currentlevel, multikey,
  469. multibackslash)
  470. except ValueError as err:
  471. raise TomlDecodeError(str(err), original, pos)
  472. if ret is not None:
  473. multikey, multilinestr, multibackslash = ret
  474. return retval
  475. def _load_date(val):
  476. microsecond = 0
  477. tz = None
  478. try:
  479. if len(val) > 19:
  480. if val[19] == '.':
  481. if val[-1].upper() == 'Z':
  482. subsecondval = val[20:-1]
  483. tzval = "Z"
  484. else:
  485. subsecondvalandtz = val[20:]
  486. if '+' in subsecondvalandtz:
  487. splitpoint = subsecondvalandtz.index('+')
  488. subsecondval = subsecondvalandtz[:splitpoint]
  489. tzval = subsecondvalandtz[splitpoint:]
  490. elif '-' in subsecondvalandtz:
  491. splitpoint = subsecondvalandtz.index('-')
  492. subsecondval = subsecondvalandtz[:splitpoint]
  493. tzval = subsecondvalandtz[splitpoint:]
  494. else:
  495. tzval = None
  496. subsecondval = subsecondvalandtz
  497. if tzval is not None:
  498. tz = TomlTz(tzval)
  499. microsecond = int(int(subsecondval) *
  500. (10 ** (6 - len(subsecondval))))
  501. else:
  502. tz = TomlTz(val[19:])
  503. except ValueError:
  504. tz = None
  505. if "-" not in val[1:]:
  506. return None
  507. try:
  508. if len(val) == 10:
  509. d = datetime.date(
  510. int(val[:4]), int(val[5:7]),
  511. int(val[8:10]))
  512. else:
  513. d = datetime.datetime(
  514. int(val[:4]), int(val[5:7]),
  515. int(val[8:10]), int(val[11:13]),
  516. int(val[14:16]), int(val[17:19]), microsecond, tz)
  517. except ValueError:
  518. return None
  519. return d
  520. def _load_unicode_escapes(v, hexbytes, prefix):
  521. skip = False
  522. i = len(v) - 1
  523. while i > -1 and v[i] == '\\':
  524. skip = not skip
  525. i -= 1
  526. for hx in hexbytes:
  527. if skip:
  528. skip = False
  529. i = len(hx) - 1
  530. while i > -1 and hx[i] == '\\':
  531. skip = not skip
  532. i -= 1
  533. v += prefix
  534. v += hx
  535. continue
  536. hxb = ""
  537. i = 0
  538. hxblen = 4
  539. if prefix == "\\U":
  540. hxblen = 8
  541. hxb = ''.join(hx[i:i + hxblen]).lower()
  542. if hxb.strip('0123456789abcdef'):
  543. raise ValueError("Invalid escape sequence: " + hxb)
  544. if hxb[0] == "d" and hxb[1].strip('01234567'):
  545. raise ValueError("Invalid escape sequence: " + hxb +
  546. ". Only scalar unicode points are allowed.")
  547. v += unichr(int(hxb, 16))
  548. v += unicode(hx[len(hxb):])
  549. return v
  550. # Unescape TOML string values.
  551. # content after the \
  552. _escapes = ['0', 'b', 'f', 'n', 'r', 't', '"']
  553. # What it should be replaced by
  554. _escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"']
  555. # Used for substitution
  556. _escape_to_escapedchars = dict(zip(_escapes, _escapedchars))
  557. def _unescape(v):
  558. """Unescape characters in a TOML string."""
  559. i = 0
  560. backslash = False
  561. while i < len(v):
  562. if backslash:
  563. backslash = False
  564. if v[i] in _escapes:
  565. v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:]
  566. elif v[i] == '\\':
  567. v = v[:i - 1] + v[i:]
  568. elif v[i] == 'u' or v[i] == 'U':
  569. i += 1
  570. else:
  571. raise ValueError("Reserved escape sequence used")
  572. continue
  573. elif v[i] == '\\':
  574. backslash = True
  575. i += 1
  576. return v
  577. class InlineTableDict(object):
  578. """Sentinel subclass of dict for inline tables."""
  579. class TomlDecoder(object):
  580. def __init__(self, _dict=dict):
  581. self._dict = _dict
  582. def get_empty_table(self):
  583. return self._dict()
  584. def get_empty_inline_table(self):
  585. class DynamicInlineTableDict(self._dict, InlineTableDict):
  586. """Concrete sentinel subclass for inline tables.
  587. It is a subclass of _dict which is passed in dynamically at load
  588. time
  589. It is also a subclass of InlineTableDict
  590. """
  591. return DynamicInlineTableDict()
  592. def load_inline_object(self, line, currentlevel, multikey=False,
  593. multibackslash=False):
  594. candidate_groups = line[1:-1].split(",")
  595. groups = []
  596. if len(candidate_groups) == 1 and not candidate_groups[0].strip():
  597. candidate_groups.pop()
  598. while len(candidate_groups) > 0:
  599. candidate_group = candidate_groups.pop(0)
  600. try:
  601. _, value = candidate_group.split('=', 1)
  602. except ValueError:
  603. raise ValueError("Invalid inline table encountered")
  604. value = value.strip()
  605. if ((value[0] == value[-1] and value[0] in ('"', "'")) or (
  606. value[0] in '-0123456789' or
  607. value in ('true', 'false') or
  608. (value[0] == "[" and value[-1] == "]") or
  609. (value[0] == '{' and value[-1] == '}'))):
  610. groups.append(candidate_group)
  611. elif len(candidate_groups) > 0:
  612. candidate_groups[0] = (candidate_group + "," +
  613. candidate_groups[0])
  614. else:
  615. raise ValueError("Invalid inline table value encountered")
  616. for group in groups:
  617. status = self.load_line(group, currentlevel, multikey,
  618. multibackslash)
  619. if status is not None:
  620. break
  621. def _get_split_on_quotes(self, line):
  622. doublequotesplits = line.split('"')
  623. quoted = False
  624. quotesplits = []
  625. if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]:
  626. singlequotesplits = doublequotesplits[0].split("'")
  627. doublequotesplits = doublequotesplits[1:]
  628. while len(singlequotesplits) % 2 == 0 and len(doublequotesplits):
  629. singlequotesplits[-1] += '"' + doublequotesplits[0]
  630. doublequotesplits = doublequotesplits[1:]
  631. if "'" in singlequotesplits[-1]:
  632. singlequotesplits = (singlequotesplits[:-1] +
  633. singlequotesplits[-1].split("'"))
  634. quotesplits += singlequotesplits
  635. for doublequotesplit in doublequotesplits:
  636. if quoted:
  637. quotesplits.append(doublequotesplit)
  638. else:
  639. quotesplits += doublequotesplit.split("'")
  640. quoted = not quoted
  641. return quotesplits
  642. def load_line(self, line, currentlevel, multikey, multibackslash):
  643. i = 1
  644. quotesplits = self._get_split_on_quotes(line)
  645. quoted = False
  646. for quotesplit in quotesplits:
  647. if not quoted and '=' in quotesplit:
  648. break
  649. i += quotesplit.count('=')
  650. quoted = not quoted
  651. pair = line.split('=', i)
  652. strictly_valid = _strictly_valid_num(pair[-1])
  653. if _number_with_underscores.match(pair[-1]):
  654. pair[-1] = pair[-1].replace('_', '')
  655. while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and
  656. pair[-1][0] != "'" and pair[-1][0] != '"' and
  657. pair[-1][0] != '[' and pair[-1][0] != '{' and
  658. pair[-1].strip() != 'true' and
  659. pair[-1].strip() != 'false'):
  660. try:
  661. float(pair[-1])
  662. break
  663. except ValueError:
  664. pass
  665. if _load_date(pair[-1]) is not None:
  666. break
  667. if TIME_RE.match(pair[-1]):
  668. break
  669. i += 1
  670. prev_val = pair[-1]
  671. pair = line.split('=', i)
  672. if prev_val == pair[-1]:
  673. raise ValueError("Invalid date or number")
  674. if strictly_valid:
  675. strictly_valid = _strictly_valid_num(pair[-1])
  676. pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()]
  677. if '.' in pair[0]:
  678. if '"' in pair[0] or "'" in pair[0]:
  679. quotesplits = self._get_split_on_quotes(pair[0])
  680. quoted = False
  681. levels = []
  682. for quotesplit in quotesplits:
  683. if quoted:
  684. levels.append(quotesplit)
  685. else:
  686. levels += [level.strip() for level in
  687. quotesplit.split('.')]
  688. quoted = not quoted
  689. else:
  690. levels = pair[0].split('.')
  691. while levels[-1] == "":
  692. levels = levels[:-1]
  693. for level in levels[:-1]:
  694. if level == "":
  695. continue
  696. if level not in currentlevel:
  697. currentlevel[level] = self.get_empty_table()
  698. currentlevel = currentlevel[level]
  699. pair[0] = levels[-1].strip()
  700. elif (pair[0][0] == '"' or pair[0][0] == "'") and \
  701. (pair[0][-1] == pair[0][0]):
  702. pair[0] = _unescape(pair[0][1:-1])
  703. k, koffset = self._load_line_multiline_str(pair[1])
  704. if k > -1:
  705. while k > -1 and pair[1][k + koffset] == '\\':
  706. multibackslash = not multibackslash
  707. k -= 1
  708. if multibackslash:
  709. multilinestr = pair[1][:-1]
  710. else:
  711. multilinestr = pair[1] + "\n"
  712. multikey = pair[0]
  713. else:
  714. value, vtype = self.load_value(pair[1], strictly_valid)
  715. try:
  716. currentlevel[pair[0]]
  717. raise ValueError("Duplicate keys!")
  718. except TypeError:
  719. raise ValueError("Duplicate keys!")
  720. except KeyError:
  721. if multikey:
  722. return multikey, multilinestr, multibackslash
  723. else:
  724. currentlevel[pair[0]] = value
  725. def _load_line_multiline_str(self, p):
  726. poffset = 0
  727. if len(p) < 3:
  728. return -1, poffset
  729. if p[0] == '[' and (p.strip()[-1] != ']' and
  730. self._load_array_isstrarray(p)):
  731. newp = p[1:].strip().split(',')
  732. while len(newp) > 1 and newp[-1][0] != '"' and newp[-1][0] != "'":
  733. newp = newp[:-2] + [newp[-2] + ',' + newp[-1]]
  734. newp = newp[-1]
  735. poffset = len(p) - len(newp)
  736. p = newp
  737. if p[0] != '"' and p[0] != "'":
  738. return -1, poffset
  739. if p[1] != p[0] or p[2] != p[0]:
  740. return -1, poffset
  741. if len(p) > 5 and p[-1] == p[0] and p[-2] == p[0] and p[-3] == p[0]:
  742. return -1, poffset
  743. return len(p) - 1, poffset
  744. def load_value(self, v, strictly_valid=True):
  745. if not v:
  746. raise ValueError("Empty value is invalid")
  747. if v == 'true':
  748. return (True, "bool")
  749. elif v.lower() == 'true':
  750. raise ValueError("Only all lowercase booleans allowed")
  751. elif v == 'false':
  752. return (False, "bool")
  753. elif v.lower() == 'false':
  754. raise ValueError("Only all lowercase booleans allowed")
  755. elif v[0] == '"' or v[0] == "'":
  756. quotechar = v[0]
  757. testv = v[1:].split(quotechar)
  758. triplequote = False
  759. triplequotecount = 0
  760. if len(testv) > 1 and testv[0] == '' and testv[1] == '':
  761. testv = testv[2:]
  762. triplequote = True
  763. closed = False
  764. for tv in testv:
  765. if tv == '':
  766. if triplequote:
  767. triplequotecount += 1
  768. else:
  769. closed = True
  770. else:
  771. oddbackslash = False
  772. try:
  773. i = -1
  774. j = tv[i]
  775. while j == '\\':
  776. oddbackslash = not oddbackslash
  777. i -= 1
  778. j = tv[i]
  779. except IndexError:
  780. pass
  781. if not oddbackslash:
  782. if closed:
  783. raise ValueError("Found tokens after a closed " +
  784. "string. Invalid TOML.")
  785. else:
  786. if not triplequote or triplequotecount > 1:
  787. closed = True
  788. else:
  789. triplequotecount = 0
  790. if quotechar == '"':
  791. escapeseqs = v.split('\\')[1:]
  792. backslash = False
  793. for i in escapeseqs:
  794. if i == '':
  795. backslash = not backslash
  796. else:
  797. if i[0] not in _escapes and (i[0] != 'u' and
  798. i[0] != 'U' and
  799. not backslash):
  800. raise ValueError("Reserved escape sequence used")
  801. if backslash:
  802. backslash = False
  803. for prefix in ["\\u", "\\U"]:
  804. if prefix in v:
  805. hexbytes = v.split(prefix)
  806. v = _load_unicode_escapes(hexbytes[0], hexbytes[1:],
  807. prefix)
  808. v = _unescape(v)
  809. if len(v) > 1 and v[1] == quotechar and (len(v) < 3 or
  810. v[1] == v[2]):
  811. v = v[2:-2]
  812. return (v[1:-1], "str")
  813. elif v[0] == '[':
  814. return (self.load_array(v), "array")
  815. elif v[0] == '{':
  816. inline_object = self.get_empty_inline_table()
  817. self.load_inline_object(v, inline_object)
  818. return (inline_object, "inline_object")
  819. elif TIME_RE.match(v):
  820. h, m, s, _, ms = TIME_RE.match(v).groups()
  821. time = datetime.time(int(h), int(m), int(s), int(ms) if ms else 0)
  822. return (time, "time")
  823. else:
  824. parsed_date = _load_date(v)
  825. if parsed_date is not None:
  826. return (parsed_date, "date")
  827. if not strictly_valid:
  828. raise ValueError("Weirdness with leading zeroes or "
  829. "underscores in your number.")
  830. itype = "int"
  831. neg = False
  832. if v[0] == '-':
  833. neg = True
  834. v = v[1:]
  835. elif v[0] == '+':
  836. v = v[1:]
  837. v = v.replace('_', '')
  838. lowerv = v.lower()
  839. if '.' in v or ('x' not in v and ('e' in v or 'E' in v)):
  840. if '.' in v and v.split('.', 1)[1] == '':
  841. raise ValueError("This float is missing digits after "
  842. "the point")
  843. if v[0] not in '0123456789':
  844. raise ValueError("This float doesn't have a leading "
  845. "digit")
  846. v = float(v)
  847. itype = "float"
  848. elif len(lowerv) == 3 and (lowerv == 'inf' or lowerv == 'nan'):
  849. v = float(v)
  850. itype = "float"
  851. if itype == "int":
  852. v = int(v, 0)
  853. if neg:
  854. return (0 - v, itype)
  855. return (v, itype)
  856. def bounded_string(self, s):
  857. if len(s) == 0:
  858. return True
  859. if s[-1] != s[0]:
  860. return False
  861. i = -2
  862. backslash = False
  863. while len(s) + i > 0:
  864. if s[i] == "\\":
  865. backslash = not backslash
  866. i -= 1
  867. else:
  868. break
  869. return not backslash
  870. def _load_array_isstrarray(self, a):
  871. a = a[1:-1].strip()
  872. if a != '' and (a[0] == '"' or a[0] == "'"):
  873. return True
  874. return False
  875. def load_array(self, a):
  876. atype = None
  877. retval = []
  878. a = a.strip()
  879. if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip():
  880. strarray = self._load_array_isstrarray(a)
  881. if not a[1:-1].strip().startswith('{'):
  882. a = a[1:-1].split(',')
  883. else:
  884. # a is an inline object, we must find the matching parenthesis
  885. # to define groups
  886. new_a = []
  887. start_group_index = 1
  888. end_group_index = 2
  889. open_bracket_count = 1 if a[start_group_index] == '{' else 0
  890. in_str = False
  891. while end_group_index < len(a[1:]):
  892. if a[end_group_index] == '"' or a[end_group_index] == "'":
  893. if in_str:
  894. backslash_index = end_group_index - 1
  895. while (backslash_index > -1 and
  896. a[backslash_index] == '\\'):
  897. in_str = not in_str
  898. backslash_index -= 1
  899. in_str = not in_str
  900. if not in_str and a[end_group_index] == '{':
  901. open_bracket_count += 1
  902. if in_str or a[end_group_index] != '}':
  903. end_group_index += 1
  904. continue
  905. elif a[end_group_index] == '}' and open_bracket_count > 1:
  906. open_bracket_count -= 1
  907. end_group_index += 1
  908. continue
  909. # Increase end_group_index by 1 to get the closing bracket
  910. end_group_index += 1
  911. new_a.append(a[start_group_index:end_group_index])
  912. # The next start index is at least after the closing
  913. # bracket, a closing bracket can be followed by a comma
  914. # since we are in an array.
  915. start_group_index = end_group_index + 1
  916. while (start_group_index < len(a[1:]) and
  917. a[start_group_index] != '{'):
  918. start_group_index += 1
  919. end_group_index = start_group_index + 1
  920. a = new_a
  921. b = 0
  922. if strarray:
  923. while b < len(a) - 1:
  924. ab = a[b].strip()
  925. while (not self.bounded_string(ab) or
  926. (len(ab) > 2 and
  927. ab[0] == ab[1] == ab[2] and
  928. ab[-2] != ab[0] and
  929. ab[-3] != ab[0])):
  930. a[b] = a[b] + ',' + a[b + 1]
  931. ab = a[b].strip()
  932. if b < len(a) - 2:
  933. a = a[:b + 1] + a[b + 2:]
  934. else:
  935. a = a[:b + 1]
  936. b += 1
  937. else:
  938. al = list(a[1:-1])
  939. a = []
  940. openarr = 0
  941. j = 0
  942. for i in _range(len(al)):
  943. if al[i] == '[':
  944. openarr += 1
  945. elif al[i] == ']':
  946. openarr -= 1
  947. elif al[i] == ',' and not openarr:
  948. a.append(''.join(al[j:i]))
  949. j = i + 1
  950. a.append(''.join(al[j:]))
  951. for i in _range(len(a)):
  952. a[i] = a[i].strip()
  953. if a[i] != '':
  954. nval, ntype = self.load_value(a[i])
  955. if atype:
  956. if ntype != atype:
  957. raise ValueError("Not a homogeneous array")
  958. else:
  959. atype = ntype
  960. retval.append(nval)
  961. return retval
  962. def preserve_comment(self, line_no, key, comment, beginline):
  963. pass
  964. def embed_comments(self, idx, currentlevel):
  965. pass
  966. class TomlPreserveCommentDecoder(TomlDecoder):
  967. def __init__(self, _dict=dict):
  968. self.saved_comments = {}
  969. super(TomlPreserveCommentDecoder, self).__init__(_dict)
  970. def preserve_comment(self, line_no, key, comment, beginline):
  971. self.saved_comments[line_no] = (key, comment, beginline)
  972. def embed_comments(self, idx, currentlevel):
  973. if idx not in self.saved_comments:
  974. return
  975. key, comment, beginline = self.saved_comments[idx]
  976. currentlevel[key] = CommentValue(currentlevel[key], comment, beginline,
  977. self._dict)