__init__.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. import railroad
  2. import pyparsing
  3. from pkg_resources import resource_filename
  4. from typing import (
  5. List,
  6. Optional,
  7. NamedTuple,
  8. Generic,
  9. TypeVar,
  10. Dict,
  11. Callable,
  12. Set,
  13. Iterable,
  14. )
  15. from jinja2 import Template
  16. from io import StringIO
  17. import inspect
  18. with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
  19. template = Template(fp.read())
  20. # Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
  21. NamedDiagram = NamedTuple(
  22. "NamedDiagram",
  23. [("name", str), ("diagram", Optional[railroad.DiagramItem]), ("index", int)],
  24. )
  25. """
  26. A simple structure for associating a name with a railroad diagram
  27. """
  28. T = TypeVar("T")
  29. class EachItem(railroad.Group):
  30. """
  31. Custom railroad item to compose a:
  32. - Group containing a
  33. - OneOrMore containing a
  34. - Choice of the elements in the Each
  35. with the group label indicating that all must be matched
  36. """
  37. all_label = "[ALL]"
  38. def __init__(self, *items):
  39. choice_item = railroad.Choice(len(items) - 1, *items)
  40. one_or_more_item = railroad.OneOrMore(item=choice_item)
  41. super().__init__(one_or_more_item, label=self.all_label)
  42. class AnnotatedItem(railroad.Group):
  43. """
  44. Simple subclass of Group that creates an annotation label
  45. """
  46. def __init__(self, label: str, item):
  47. super().__init__(item=item, label="[{}]".format(label))
  48. class EditablePartial(Generic[T]):
  49. """
  50. Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
  51. constructed.
  52. """
  53. # We need this here because the railroad constructors actually transform the data, so can't be called until the
  54. # entire tree is assembled
  55. def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
  56. self.func = func
  57. self.args = args
  58. self.kwargs = kwargs
  59. @classmethod
  60. def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
  61. """
  62. If you call this function in the same way that you would call the constructor, it will store the arguments
  63. as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
  64. """
  65. return EditablePartial(func=func, args=list(args), kwargs=kwargs)
  66. @property
  67. def name(self):
  68. return self.kwargs["name"]
  69. def __call__(self) -> T:
  70. """
  71. Evaluate the partial and return the result
  72. """
  73. args = self.args.copy()
  74. kwargs = self.kwargs.copy()
  75. # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
  76. # args=['list', 'of', 'things'])
  77. arg_spec = inspect.getfullargspec(self.func)
  78. if arg_spec.varargs in self.kwargs:
  79. args += kwargs.pop(arg_spec.varargs)
  80. return self.func(*args, **kwargs)
  81. def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
  82. """
  83. Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
  84. :params kwargs: kwargs to be passed in to the template
  85. """
  86. data = []
  87. for diagram in diagrams:
  88. io = StringIO()
  89. diagram.diagram.writeSvg(io.write)
  90. title = diagram.name
  91. if diagram.index == 0:
  92. title += " (root)"
  93. data.append({"title": title, "text": "", "svg": io.getvalue()})
  94. return template.render(diagrams=data, **kwargs)
  95. def resolve_partial(partial: "EditablePartial[T]") -> T:
  96. """
  97. Recursively resolves a collection of Partials into whatever type they are
  98. """
  99. if isinstance(partial, EditablePartial):
  100. partial.args = resolve_partial(partial.args)
  101. partial.kwargs = resolve_partial(partial.kwargs)
  102. return partial()
  103. elif isinstance(partial, list):
  104. return [resolve_partial(x) for x in partial]
  105. elif isinstance(partial, dict):
  106. return {key: resolve_partial(x) for key, x in partial.items()}
  107. else:
  108. return partial
  109. def to_railroad(
  110. element: pyparsing.ParserElement,
  111. diagram_kwargs: Optional[dict] = None,
  112. vertical: int = 3,
  113. show_results_names: bool = False,
  114. ) -> List[NamedDiagram]:
  115. """
  116. Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
  117. creation if you want to access the Railroad tree before it is converted to HTML
  118. :param element: base element of the parser being diagrammed
  119. :param diagram_kwargs: kwargs to pass to the Diagram() constructor
  120. :param vertical: (optional) - int - limit at which number of alternatives should be
  121. shown vertically instead of horizontally
  122. :param show_results_names - bool to indicate whether results name annotations should be
  123. included in the diagram
  124. """
  125. # Convert the whole tree underneath the root
  126. lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
  127. _to_diagram_element(
  128. element,
  129. lookup=lookup,
  130. parent=None,
  131. vertical=vertical,
  132. show_results_names=show_results_names,
  133. )
  134. root_id = id(element)
  135. # Convert the root if it hasn't been already
  136. if root_id in lookup:
  137. if not element.customName:
  138. lookup[root_id].name = ""
  139. lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
  140. # Now that we're finished, we can convert from intermediate structures into Railroad elements
  141. diags = list(lookup.diagrams.values())
  142. if len(diags) > 1:
  143. # collapse out duplicate diags with the same name
  144. seen = set()
  145. deduped_diags = []
  146. for d in diags:
  147. # don't extract SkipTo elements, they are uninformative as subdiagrams
  148. if d.name == "...":
  149. continue
  150. if d.name is not None and d.name not in seen:
  151. seen.add(d.name)
  152. deduped_diags.append(d)
  153. resolved = [resolve_partial(partial) for partial in deduped_diags]
  154. else:
  155. # special case - if just one diagram, always display it, even if
  156. # it has no name
  157. resolved = [resolve_partial(partial) for partial in diags]
  158. return sorted(resolved, key=lambda diag: diag.index)
  159. def _should_vertical(
  160. specification: int, exprs: Iterable[pyparsing.ParserElement]
  161. ) -> bool:
  162. """
  163. Returns true if we should return a vertical list of elements
  164. """
  165. if specification is None:
  166. return False
  167. else:
  168. return len(_visible_exprs(exprs)) >= specification
  169. class ElementState:
  170. """
  171. State recorded for an individual pyparsing Element
  172. """
  173. # Note: this should be a dataclass, but we have to support Python 3.5
  174. def __init__(
  175. self,
  176. element: pyparsing.ParserElement,
  177. converted: EditablePartial,
  178. parent: EditablePartial,
  179. number: int,
  180. name: str = None,
  181. parent_index: Optional[int] = None,
  182. ):
  183. #: The pyparsing element that this represents
  184. self.element: pyparsing.ParserElement = element
  185. #: The name of the element
  186. self.name: str = name
  187. #: The output Railroad element in an unconverted state
  188. self.converted: EditablePartial = converted
  189. #: The parent Railroad element, which we store so that we can extract this if it's duplicated
  190. self.parent: EditablePartial = parent
  191. #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
  192. self.number: int = number
  193. #: The index of this inside its parent
  194. self.parent_index: Optional[int] = parent_index
  195. #: If true, we should extract this out into a subdiagram
  196. self.extract: bool = False
  197. #: If true, all of this element's children have been filled out
  198. self.complete: bool = False
  199. def mark_for_extraction(
  200. self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
  201. ):
  202. """
  203. Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
  204. :param el_id: id of the element
  205. :param state: element/diagram state tracker
  206. :param name: name to use for this element's text
  207. :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
  208. root element when we know we're finished
  209. """
  210. self.extract = True
  211. # Set the name
  212. if not self.name:
  213. if name:
  214. # Allow forcing a custom name
  215. self.name = name
  216. elif self.element.customName:
  217. self.name = self.element.customName
  218. else:
  219. self.name = ""
  220. # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
  221. # to be added
  222. # Also, if this is just a string literal etc, don't bother extracting it
  223. if force or (self.complete and _worth_extracting(self.element)):
  224. state.extract_into_diagram(el_id)
  225. class ConverterState:
  226. """
  227. Stores some state that persists between recursions into the element tree
  228. """
  229. def __init__(self, diagram_kwargs: Optional[dict] = None):
  230. #: A dictionary mapping ParserElements to state relating to them
  231. self._element_diagram_states: Dict[int, ElementState] = {}
  232. #: A dictionary mapping ParserElement IDs to subdiagrams generated from them
  233. self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
  234. #: The index of the next unnamed element
  235. self.unnamed_index: int = 1
  236. #: The index of the next element. This is used for sorting
  237. self.index: int = 0
  238. #: Shared kwargs that are used to customize the construction of diagrams
  239. self.diagram_kwargs: dict = diagram_kwargs or {}
  240. self.extracted_diagram_names: Set[str] = set()
  241. def __setitem__(self, key: int, value: ElementState):
  242. self._element_diagram_states[key] = value
  243. def __getitem__(self, key: int) -> ElementState:
  244. return self._element_diagram_states[key]
  245. def __delitem__(self, key: int):
  246. del self._element_diagram_states[key]
  247. def __contains__(self, key: int):
  248. return key in self._element_diagram_states
  249. def generate_unnamed(self) -> int:
  250. """
  251. Generate a number used in the name of an otherwise unnamed diagram
  252. """
  253. self.unnamed_index += 1
  254. return self.unnamed_index
  255. def generate_index(self) -> int:
  256. """
  257. Generate a number used to index a diagram
  258. """
  259. self.index += 1
  260. return self.index
  261. def extract_into_diagram(self, el_id: int):
  262. """
  263. Used when we encounter the same token twice in the same tree. When this
  264. happens, we replace all instances of that token with a terminal, and
  265. create a new subdiagram for the token
  266. """
  267. position = self[el_id]
  268. # Replace the original definition of this element with a regular block
  269. if position.parent:
  270. ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
  271. if "item" in position.parent.kwargs:
  272. position.parent.kwargs["item"] = ret
  273. elif "items" in position.parent.kwargs:
  274. position.parent.kwargs["items"][position.parent_index] = ret
  275. # If the element we're extracting is a group, skip to its content but keep the title
  276. if position.converted.func == railroad.Group:
  277. content = position.converted.kwargs["item"]
  278. else:
  279. content = position.converted
  280. self.diagrams[el_id] = EditablePartial.from_call(
  281. NamedDiagram,
  282. name=position.name,
  283. diagram=EditablePartial.from_call(
  284. railroad.Diagram, content, **self.diagram_kwargs
  285. ),
  286. index=position.number,
  287. )
  288. del self[el_id]
  289. def _worth_extracting(element: pyparsing.ParserElement) -> bool:
  290. """
  291. Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
  292. themselves have children, then its complex enough to extract
  293. """
  294. children = element.recurse()
  295. return any(child.recurse() for child in children)
  296. def _apply_diagram_item_enhancements(fn):
  297. """
  298. decorator to ensure enhancements to a diagram item (such as results name annotations)
  299. get applied on return from _to_diagram_element (we do this since there are several
  300. returns in _to_diagram_element)
  301. """
  302. def _inner(
  303. element: pyparsing.ParserElement,
  304. parent: Optional[EditablePartial],
  305. lookup: ConverterState = None,
  306. vertical: int = None,
  307. index: int = 0,
  308. name_hint: str = None,
  309. show_results_names: bool = False,
  310. ) -> Optional[EditablePartial]:
  311. ret = fn(
  312. element,
  313. parent,
  314. lookup,
  315. vertical,
  316. index,
  317. name_hint,
  318. show_results_names,
  319. )
  320. # apply annotation for results name, if present
  321. if show_results_names and ret is not None:
  322. element_results_name = element.resultsName
  323. if element_results_name:
  324. # add "*" to indicate if this is a "list all results" name
  325. element_results_name += "" if element.modalResults else "*"
  326. ret = EditablePartial.from_call(
  327. railroad.Group, item=ret, label=element_results_name
  328. )
  329. return ret
  330. return _inner
  331. def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
  332. non_diagramming_exprs = (
  333. pyparsing.ParseElementEnhance,
  334. pyparsing.PositionToken,
  335. pyparsing.And._ErrorStop,
  336. )
  337. return [
  338. e
  339. for e in exprs
  340. if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
  341. ]
  342. @_apply_diagram_item_enhancements
  343. def _to_diagram_element(
  344. element: pyparsing.ParserElement,
  345. parent: Optional[EditablePartial],
  346. lookup: ConverterState = None,
  347. vertical: int = None,
  348. index: int = 0,
  349. name_hint: str = None,
  350. show_results_names: bool = False,
  351. ) -> Optional[EditablePartial]:
  352. """
  353. Recursively converts a PyParsing Element to a railroad Element
  354. :param lookup: The shared converter state that keeps track of useful things
  355. :param index: The index of this element within the parent
  356. :param parent: The parent of this element in the output tree
  357. :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
  358. it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
  359. do so
  360. :param name_hint: If provided, this will override the generated name
  361. :param show_results_names: bool flag indicating whether to add annotations for results names
  362. :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
  363. """
  364. exprs = element.recurse()
  365. name = name_hint or element.customName or element.__class__.__name__
  366. # Python's id() is used to provide a unique identifier for elements
  367. el_id = id(element)
  368. element_results_name = element.resultsName
  369. # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
  370. if not element.customName:
  371. if isinstance(
  372. element,
  373. (
  374. pyparsing.TokenConverter,
  375. # pyparsing.Forward,
  376. pyparsing.Located,
  377. ),
  378. ):
  379. # However, if this element has a useful custom name, and its child does not, we can pass it on to the child
  380. if exprs:
  381. if not exprs[0].customName:
  382. propagated_name = name
  383. else:
  384. propagated_name = None
  385. return _to_diagram_element(
  386. element.expr,
  387. parent=parent,
  388. lookup=lookup,
  389. vertical=vertical,
  390. index=index,
  391. name_hint=propagated_name,
  392. show_results_names=show_results_names,
  393. )
  394. # If the element isn't worth extracting, we always treat it as the first time we say it
  395. if _worth_extracting(element):
  396. if el_id in lookup:
  397. # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
  398. # so we have to extract it into a new diagram.
  399. looked_up = lookup[el_id]
  400. looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
  401. ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
  402. return ret
  403. elif el_id in lookup.diagrams:
  404. # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
  405. # just put in a marker element that refers to the sub-diagram
  406. ret = EditablePartial.from_call(
  407. railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
  408. )
  409. return ret
  410. # Recursively convert child elements
  411. # Here we find the most relevant Railroad element for matching pyparsing Element
  412. # We use ``items=[]`` here to hold the place for where the child elements will go once created
  413. if isinstance(element, pyparsing.And):
  414. # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
  415. # (all will have the same name, and resultsName)
  416. if not exprs:
  417. return None
  418. if len(set((e.name, e.resultsName) for e in exprs)) == 1:
  419. ret = EditablePartial.from_call(
  420. railroad.OneOrMore, item="", repeat=str(len(exprs))
  421. )
  422. elif _should_vertical(vertical, exprs):
  423. ret = EditablePartial.from_call(railroad.Stack, items=[])
  424. else:
  425. ret = EditablePartial.from_call(railroad.Sequence, items=[])
  426. elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
  427. if not exprs:
  428. return None
  429. if _should_vertical(vertical, exprs):
  430. ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
  431. else:
  432. ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
  433. elif isinstance(element, pyparsing.Each):
  434. if not exprs:
  435. return None
  436. ret = EditablePartial.from_call(EachItem, items=[])
  437. elif isinstance(element, pyparsing.NotAny):
  438. ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
  439. elif isinstance(element, pyparsing.FollowedBy):
  440. ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
  441. elif isinstance(element, pyparsing.PrecededBy):
  442. ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
  443. elif isinstance(element, pyparsing.Opt):
  444. ret = EditablePartial.from_call(railroad.Optional, item="")
  445. elif isinstance(element, pyparsing.OneOrMore):
  446. ret = EditablePartial.from_call(railroad.OneOrMore, item="")
  447. elif isinstance(element, pyparsing.ZeroOrMore):
  448. ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
  449. elif isinstance(element, pyparsing.Group):
  450. ret = EditablePartial.from_call(
  451. railroad.Group, item=None, label=element_results_name
  452. )
  453. elif isinstance(element, pyparsing.Empty) and not element.customName:
  454. # Skip unnamed "Empty" elements
  455. ret = None
  456. elif len(exprs) > 1:
  457. ret = EditablePartial.from_call(railroad.Sequence, items=[])
  458. elif len(exprs) > 0 and not element_results_name:
  459. ret = EditablePartial.from_call(railroad.Group, item="", label=name)
  460. else:
  461. terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
  462. ret = terminal
  463. if ret is None:
  464. return
  465. # Indicate this element's position in the tree so we can extract it if necessary
  466. lookup[el_id] = ElementState(
  467. element=element,
  468. converted=ret,
  469. parent=parent,
  470. parent_index=index,
  471. number=lookup.generate_index(),
  472. )
  473. if element.customName:
  474. lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
  475. i = 0
  476. for expr in exprs:
  477. # Add a placeholder index in case we have to extract the child before we even add it to the parent
  478. if "items" in ret.kwargs:
  479. ret.kwargs["items"].insert(i, None)
  480. item = _to_diagram_element(
  481. expr,
  482. parent=ret,
  483. lookup=lookup,
  484. vertical=vertical,
  485. index=i,
  486. show_results_names=show_results_names,
  487. )
  488. # Some elements don't need to be shown in the diagram
  489. if item is not None:
  490. if "item" in ret.kwargs:
  491. ret.kwargs["item"] = item
  492. elif "items" in ret.kwargs:
  493. # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
  494. ret.kwargs["items"][i] = item
  495. i += 1
  496. elif "items" in ret.kwargs:
  497. # If we're supposed to skip this element, remove it from the parent
  498. del ret.kwargs["items"][i]
  499. # If all this items children are none, skip this item
  500. if ret and (
  501. ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
  502. or ("item" in ret.kwargs and ret.kwargs["item"] is None)
  503. ):
  504. ret = EditablePartial.from_call(railroad.Terminal, name)
  505. # Mark this element as "complete", ie it has all of its children
  506. if el_id in lookup:
  507. lookup[el_id].complete = True
  508. if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
  509. lookup.extract_into_diagram(el_id)
  510. if ret is not None:
  511. ret = EditablePartial.from_call(
  512. railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
  513. )
  514. return ret