_parser.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. from __future__ import annotations
  2. from collections.abc import Iterable
  3. import string
  4. from types import MappingProxyType
  5. from typing import Any, BinaryIO, NamedTuple
  6. from tomli._re import (
  7. RE_DATETIME,
  8. RE_LOCALTIME,
  9. RE_NUMBER,
  10. match_to_datetime,
  11. match_to_localtime,
  12. match_to_number,
  13. )
  14. from tomli._types import Key, ParseFloat, Pos
  15. ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
  16. # Neither of these sets include quotation mark or backslash. They are
  17. # currently handled as separate cases in the parser functions.
  18. ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
  19. ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
  20. ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
  21. ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
  22. ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
  23. TOML_WS = frozenset(" \t")
  24. TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n")
  25. BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
  26. KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
  27. HEXDIGIT_CHARS = frozenset(string.hexdigits)
  28. BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
  29. {
  30. "\\b": "\u0008", # backspace
  31. "\\t": "\u0009", # tab
  32. "\\n": "\u000A", # linefeed
  33. "\\f": "\u000C", # form feed
  34. "\\r": "\u000D", # carriage return
  35. '\\"': "\u0022", # quote
  36. "\\\\": "\u005C", # backslash
  37. }
  38. )
  39. class TOMLDecodeError(ValueError):
  40. """An error raised if a document is not valid TOML."""
  41. def load(__fp: BinaryIO, *, parse_float: ParseFloat = float) -> dict[str, Any]:
  42. """Parse TOML from a binary file object."""
  43. s = __fp.read().decode()
  44. return loads(s, parse_float=parse_float)
  45. def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901
  46. """Parse TOML from a string."""
  47. # The spec allows converting "\r\n" to "\n", even in string
  48. # literals. Let's do so to simplify parsing.
  49. src = __s.replace("\r\n", "\n")
  50. pos = 0
  51. out = Output(NestedDict(), Flags())
  52. header: Key = ()
  53. # Parse one statement at a time
  54. # (typically means one line in TOML source)
  55. while True:
  56. # 1. Skip line leading whitespace
  57. pos = skip_chars(src, pos, TOML_WS)
  58. # 2. Parse rules. Expect one of the following:
  59. # - end of file
  60. # - end of line
  61. # - comment
  62. # - key/value pair
  63. # - append dict to list (and move to its namespace)
  64. # - create dict (and move to its namespace)
  65. # Skip trailing whitespace when applicable.
  66. try:
  67. char = src[pos]
  68. except IndexError:
  69. break
  70. if char == "\n":
  71. pos += 1
  72. continue
  73. if char in KEY_INITIAL_CHARS:
  74. pos = key_value_rule(src, pos, out, header, parse_float)
  75. pos = skip_chars(src, pos, TOML_WS)
  76. elif char == "[":
  77. try:
  78. second_char: str | None = src[pos + 1]
  79. except IndexError:
  80. second_char = None
  81. out.flags.finalize_pending()
  82. if second_char == "[":
  83. pos, header = create_list_rule(src, pos, out)
  84. else:
  85. pos, header = create_dict_rule(src, pos, out)
  86. pos = skip_chars(src, pos, TOML_WS)
  87. elif char != "#":
  88. raise suffixed_err(src, pos, "Invalid statement")
  89. # 3. Skip comment
  90. pos = skip_comment(src, pos)
  91. # 4. Expect end of line or end of file
  92. try:
  93. char = src[pos]
  94. except IndexError:
  95. break
  96. if char != "\n":
  97. raise suffixed_err(
  98. src, pos, "Expected newline or end of document after a statement"
  99. )
  100. pos += 1
  101. return out.data.dict
  102. class Flags:
  103. """Flags that map to parsed keys/namespaces."""
  104. # Marks an immutable namespace (inline array or inline table).
  105. FROZEN = 0
  106. # Marks a nest that has been explicitly created and can no longer
  107. # be opened using the "[table]" syntax.
  108. EXPLICIT_NEST = 1
  109. def __init__(self) -> None:
  110. self._flags: dict[str, dict] = {}
  111. self._pending_flags: set[tuple[Key, int]] = set()
  112. def add_pending(self, key: Key, flag: int) -> None:
  113. self._pending_flags.add((key, flag))
  114. def finalize_pending(self) -> None:
  115. for key, flag in self._pending_flags:
  116. self.set(key, flag, recursive=False)
  117. self._pending_flags.clear()
  118. def unset_all(self, key: Key) -> None:
  119. cont = self._flags
  120. for k in key[:-1]:
  121. if k not in cont:
  122. return
  123. cont = cont[k]["nested"]
  124. cont.pop(key[-1], None)
  125. def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
  126. cont = self._flags
  127. key_parent, key_stem = key[:-1], key[-1]
  128. for k in key_parent:
  129. if k not in cont:
  130. cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
  131. cont = cont[k]["nested"]
  132. if key_stem not in cont:
  133. cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
  134. cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
  135. def is_(self, key: Key, flag: int) -> bool:
  136. if not key:
  137. return False # document root has no flags
  138. cont = self._flags
  139. for k in key[:-1]:
  140. if k not in cont:
  141. return False
  142. inner_cont = cont[k]
  143. if flag in inner_cont["recursive_flags"]:
  144. return True
  145. cont = inner_cont["nested"]
  146. key_stem = key[-1]
  147. if key_stem in cont:
  148. cont = cont[key_stem]
  149. return flag in cont["flags"] or flag in cont["recursive_flags"]
  150. return False
  151. class NestedDict:
  152. def __init__(self) -> None:
  153. # The parsed content of the TOML document
  154. self.dict: dict[str, Any] = {}
  155. def get_or_create_nest(
  156. self,
  157. key: Key,
  158. *,
  159. access_lists: bool = True,
  160. ) -> dict:
  161. cont: Any = self.dict
  162. for k in key:
  163. if k not in cont:
  164. cont[k] = {}
  165. cont = cont[k]
  166. if access_lists and isinstance(cont, list):
  167. cont = cont[-1]
  168. if not isinstance(cont, dict):
  169. raise KeyError("There is no nest behind this key")
  170. return cont
  171. def append_nest_to_list(self, key: Key) -> None:
  172. cont = self.get_or_create_nest(key[:-1])
  173. last_key = key[-1]
  174. if last_key in cont:
  175. list_ = cont[last_key]
  176. try:
  177. list_.append({})
  178. except AttributeError:
  179. raise KeyError("An object other than list found behind this key")
  180. else:
  181. cont[last_key] = [{}]
  182. class Output(NamedTuple):
  183. data: NestedDict
  184. flags: Flags
  185. def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
  186. try:
  187. while src[pos] in chars:
  188. pos += 1
  189. except IndexError:
  190. pass
  191. return pos
  192. def skip_until(
  193. src: str,
  194. pos: Pos,
  195. expect: str,
  196. *,
  197. error_on: frozenset[str],
  198. error_on_eof: bool,
  199. ) -> Pos:
  200. try:
  201. new_pos = src.index(expect, pos)
  202. except ValueError:
  203. new_pos = len(src)
  204. if error_on_eof:
  205. raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None
  206. if not error_on.isdisjoint(src[pos:new_pos]):
  207. while src[pos] not in error_on:
  208. pos += 1
  209. raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}")
  210. return new_pos
  211. def skip_comment(src: str, pos: Pos) -> Pos:
  212. try:
  213. char: str | None = src[pos]
  214. except IndexError:
  215. char = None
  216. if char == "#":
  217. return skip_until(
  218. src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
  219. )
  220. return pos
  221. def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
  222. while True:
  223. pos_before_skip = pos
  224. pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
  225. pos = skip_comment(src, pos)
  226. if pos == pos_before_skip:
  227. return pos
  228. def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
  229. pos += 1 # Skip "["
  230. pos = skip_chars(src, pos, TOML_WS)
  231. pos, key = parse_key(src, pos)
  232. if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
  233. raise suffixed_err(src, pos, f"Can not declare {key} twice")
  234. out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
  235. try:
  236. out.data.get_or_create_nest(key)
  237. except KeyError:
  238. raise suffixed_err(src, pos, "Can not overwrite a value") from None
  239. if not src.startswith("]", pos):
  240. raise suffixed_err(src, pos, 'Expected "]" at the end of a table declaration')
  241. return pos + 1, key
  242. def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
  243. pos += 2 # Skip "[["
  244. pos = skip_chars(src, pos, TOML_WS)
  245. pos, key = parse_key(src, pos)
  246. if out.flags.is_(key, Flags.FROZEN):
  247. raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}")
  248. # Free the namespace now that it points to another empty list item...
  249. out.flags.unset_all(key)
  250. # ...but this key precisely is still prohibited from table declaration
  251. out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
  252. try:
  253. out.data.append_nest_to_list(key)
  254. except KeyError:
  255. raise suffixed_err(src, pos, "Can not overwrite a value") from None
  256. if not src.startswith("]]", pos):
  257. raise suffixed_err(src, pos, 'Expected "]]" at the end of an array declaration')
  258. return pos + 2, key
  259. def key_value_rule(
  260. src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
  261. ) -> Pos:
  262. pos, key, value = parse_key_value_pair(src, pos, parse_float)
  263. key_parent, key_stem = key[:-1], key[-1]
  264. abs_key_parent = header + key_parent
  265. relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
  266. for cont_key in relative_path_cont_keys:
  267. # Check that dotted key syntax does not redefine an existing table
  268. if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
  269. raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}")
  270. # Containers in the relative path can't be opened with the table syntax or
  271. # dotted key/value syntax in following table sections.
  272. out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
  273. if out.flags.is_(abs_key_parent, Flags.FROZEN):
  274. raise suffixed_err(
  275. src, pos, f"Cannot mutate immutable namespace {abs_key_parent}"
  276. )
  277. try:
  278. nest = out.data.get_or_create_nest(abs_key_parent)
  279. except KeyError:
  280. raise suffixed_err(src, pos, "Can not overwrite a value") from None
  281. if key_stem in nest:
  282. raise suffixed_err(src, pos, "Can not overwrite a value")
  283. # Mark inline table and array namespaces recursively immutable
  284. if isinstance(value, (dict, list)):
  285. out.flags.set(header + key, Flags.FROZEN, recursive=True)
  286. nest[key_stem] = value
  287. return pos
  288. def parse_key_value_pair(
  289. src: str, pos: Pos, parse_float: ParseFloat
  290. ) -> tuple[Pos, Key, Any]:
  291. pos, key = parse_key(src, pos)
  292. try:
  293. char: str | None = src[pos]
  294. except IndexError:
  295. char = None
  296. if char != "=":
  297. raise suffixed_err(src, pos, 'Expected "=" after a key in a key/value pair')
  298. pos += 1
  299. pos = skip_chars(src, pos, TOML_WS)
  300. pos, value = parse_value(src, pos, parse_float)
  301. return pos, key, value
  302. def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
  303. pos, key_part = parse_key_part(src, pos)
  304. key: Key = (key_part,)
  305. pos = skip_chars(src, pos, TOML_WS)
  306. while True:
  307. try:
  308. char: str | None = src[pos]
  309. except IndexError:
  310. char = None
  311. if char != ".":
  312. return pos, key
  313. pos += 1
  314. pos = skip_chars(src, pos, TOML_WS)
  315. pos, key_part = parse_key_part(src, pos)
  316. key += (key_part,)
  317. pos = skip_chars(src, pos, TOML_WS)
  318. def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
  319. try:
  320. char: str | None = src[pos]
  321. except IndexError:
  322. char = None
  323. if char in BARE_KEY_CHARS:
  324. start_pos = pos
  325. pos = skip_chars(src, pos, BARE_KEY_CHARS)
  326. return pos, src[start_pos:pos]
  327. if char == "'":
  328. return parse_literal_str(src, pos)
  329. if char == '"':
  330. return parse_one_line_basic_str(src, pos)
  331. raise suffixed_err(src, pos, "Invalid initial character for a key part")
  332. def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
  333. pos += 1
  334. return parse_basic_str(src, pos, multiline=False)
  335. def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]:
  336. pos += 1
  337. array: list = []
  338. pos = skip_comments_and_array_ws(src, pos)
  339. if src.startswith("]", pos):
  340. return pos + 1, array
  341. while True:
  342. pos, val = parse_value(src, pos, parse_float)
  343. array.append(val)
  344. pos = skip_comments_and_array_ws(src, pos)
  345. c = src[pos : pos + 1]
  346. if c == "]":
  347. return pos + 1, array
  348. if c != ",":
  349. raise suffixed_err(src, pos, "Unclosed array")
  350. pos += 1
  351. pos = skip_comments_and_array_ws(src, pos)
  352. if src.startswith("]", pos):
  353. return pos + 1, array
  354. def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]:
  355. pos += 1
  356. nested_dict = NestedDict()
  357. flags = Flags()
  358. pos = skip_chars(src, pos, TOML_WS)
  359. if src.startswith("}", pos):
  360. return pos + 1, nested_dict.dict
  361. while True:
  362. pos, key, value = parse_key_value_pair(src, pos, parse_float)
  363. key_parent, key_stem = key[:-1], key[-1]
  364. if flags.is_(key, Flags.FROZEN):
  365. raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}")
  366. try:
  367. nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
  368. except KeyError:
  369. raise suffixed_err(src, pos, "Can not overwrite a value") from None
  370. if key_stem in nest:
  371. raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}")
  372. nest[key_stem] = value
  373. pos = skip_chars(src, pos, TOML_WS)
  374. c = src[pos : pos + 1]
  375. if c == "}":
  376. return pos + 1, nested_dict.dict
  377. if c != ",":
  378. raise suffixed_err(src, pos, "Unclosed inline table")
  379. if isinstance(value, (dict, list)):
  380. flags.set(key, Flags.FROZEN, recursive=True)
  381. pos += 1
  382. pos = skip_chars(src, pos, TOML_WS)
  383. def parse_basic_str_escape( # noqa: C901
  384. src: str, pos: Pos, *, multiline: bool = False
  385. ) -> tuple[Pos, str]:
  386. escape_id = src[pos : pos + 2]
  387. pos += 2
  388. if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
  389. # Skip whitespace until next non-whitespace character or end of
  390. # the doc. Error if non-whitespace is found before newline.
  391. if escape_id != "\\\n":
  392. pos = skip_chars(src, pos, TOML_WS)
  393. try:
  394. char = src[pos]
  395. except IndexError:
  396. return pos, ""
  397. if char != "\n":
  398. raise suffixed_err(src, pos, 'Unescaped "\\" in a string')
  399. pos += 1
  400. pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
  401. return pos, ""
  402. if escape_id == "\\u":
  403. return parse_hex_char(src, pos, 4)
  404. if escape_id == "\\U":
  405. return parse_hex_char(src, pos, 8)
  406. try:
  407. return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
  408. except KeyError:
  409. if len(escape_id) != 2:
  410. raise suffixed_err(src, pos, "Unterminated string") from None
  411. raise suffixed_err(src, pos, 'Unescaped "\\" in a string') from None
  412. def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
  413. return parse_basic_str_escape(src, pos, multiline=True)
  414. def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
  415. hex_str = src[pos : pos + hex_len]
  416. if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
  417. raise suffixed_err(src, pos, "Invalid hex value")
  418. pos += hex_len
  419. hex_int = int(hex_str, 16)
  420. if not is_unicode_scalar_value(hex_int):
  421. raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value")
  422. return pos, chr(hex_int)
  423. def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
  424. pos += 1 # Skip starting apostrophe
  425. start_pos = pos
  426. pos = skip_until(
  427. src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
  428. )
  429. return pos + 1, src[start_pos:pos] # Skip ending apostrophe
  430. def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
  431. pos += 3
  432. if src.startswith("\n", pos):
  433. pos += 1
  434. if literal:
  435. delim = "'"
  436. end_pos = skip_until(
  437. src,
  438. pos,
  439. "'''",
  440. error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
  441. error_on_eof=True,
  442. )
  443. result = src[pos:end_pos]
  444. pos = end_pos + 3
  445. else:
  446. delim = '"'
  447. pos, result = parse_basic_str(src, pos, multiline=True)
  448. # Add at maximum two extra apostrophes/quotes if the end sequence
  449. # is 4 or 5 chars long instead of just 3.
  450. if not src.startswith(delim, pos):
  451. return pos, result
  452. pos += 1
  453. if not src.startswith(delim, pos):
  454. return pos, result + delim
  455. pos += 1
  456. return pos, result + (delim * 2)
  457. def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
  458. if multiline:
  459. error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
  460. parse_escapes = parse_basic_str_escape_multiline
  461. else:
  462. error_on = ILLEGAL_BASIC_STR_CHARS
  463. parse_escapes = parse_basic_str_escape
  464. result = ""
  465. start_pos = pos
  466. while True:
  467. try:
  468. char = src[pos]
  469. except IndexError:
  470. raise suffixed_err(src, pos, "Unterminated string") from None
  471. if char == '"':
  472. if not multiline:
  473. return pos + 1, result + src[start_pos:pos]
  474. if src.startswith('"""', pos):
  475. return pos + 3, result + src[start_pos:pos]
  476. pos += 1
  477. continue
  478. if char == "\\":
  479. result += src[start_pos:pos]
  480. pos, parsed_escape = parse_escapes(src, pos)
  481. result += parsed_escape
  482. start_pos = pos
  483. continue
  484. if char in error_on:
  485. raise suffixed_err(src, pos, f"Illegal character {char!r}")
  486. pos += 1
  487. def parse_value( # noqa: C901
  488. src: str, pos: Pos, parse_float: ParseFloat
  489. ) -> tuple[Pos, Any]:
  490. try:
  491. char: str | None = src[pos]
  492. except IndexError:
  493. char = None
  494. # IMPORTANT: order conditions based on speed of checking and likelihood
  495. # Basic strings
  496. if char == '"':
  497. if src.startswith('"""', pos):
  498. return parse_multiline_str(src, pos, literal=False)
  499. return parse_one_line_basic_str(src, pos)
  500. # Literal strings
  501. if char == "'":
  502. if src.startswith("'''", pos):
  503. return parse_multiline_str(src, pos, literal=True)
  504. return parse_literal_str(src, pos)
  505. # Booleans
  506. if char == "t":
  507. if src.startswith("true", pos):
  508. return pos + 4, True
  509. if char == "f":
  510. if src.startswith("false", pos):
  511. return pos + 5, False
  512. # Arrays
  513. if char == "[":
  514. return parse_array(src, pos, parse_float)
  515. # Inline tables
  516. if char == "{":
  517. return parse_inline_table(src, pos, parse_float)
  518. # Dates and times
  519. datetime_match = RE_DATETIME.match(src, pos)
  520. if datetime_match:
  521. try:
  522. datetime_obj = match_to_datetime(datetime_match)
  523. except ValueError as e:
  524. raise suffixed_err(src, pos, "Invalid date or datetime") from e
  525. return datetime_match.end(), datetime_obj
  526. localtime_match = RE_LOCALTIME.match(src, pos)
  527. if localtime_match:
  528. return localtime_match.end(), match_to_localtime(localtime_match)
  529. # Integers and "normal" floats.
  530. # The regex will greedily match any type starting with a decimal
  531. # char, so needs to be located after handling of dates and times.
  532. number_match = RE_NUMBER.match(src, pos)
  533. if number_match:
  534. return number_match.end(), match_to_number(number_match, parse_float)
  535. # Special floats
  536. first_three = src[pos : pos + 3]
  537. if first_three in {"inf", "nan"}:
  538. return pos + 3, parse_float(first_three)
  539. first_four = src[pos : pos + 4]
  540. if first_four in {"-inf", "+inf", "-nan", "+nan"}:
  541. return pos + 4, parse_float(first_four)
  542. raise suffixed_err(src, pos, "Invalid value")
  543. def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
  544. """Return a `TOMLDecodeError` where error message is suffixed with
  545. coordinates in source."""
  546. def coord_repr(src: str, pos: Pos) -> str:
  547. if pos >= len(src):
  548. return "end of document"
  549. line = src.count("\n", 0, pos) + 1
  550. if line == 1:
  551. column = pos + 1
  552. else:
  553. column = pos - src.rindex("\n", 0, pos)
  554. return f"line {line}, column {column}"
  555. return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})")
  556. def is_unicode_scalar_value(codepoint: int) -> bool:
  557. return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)