123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593 |
- import railroad
- import pyparsing
- from pkg_resources import resource_filename
- from typing import (
- List,
- Optional,
- NamedTuple,
- Generic,
- TypeVar,
- Dict,
- Callable,
- Set,
- Iterable,
- )
- from jinja2 import Template
- from io import StringIO
- import inspect
- with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
- template = Template(fp.read())
- # Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
- NamedDiagram = NamedTuple(
- "NamedDiagram",
- [("name", str), ("diagram", Optional[railroad.DiagramItem]), ("index", int)],
- )
- """
- A simple structure for associating a name with a railroad diagram
- """
- T = TypeVar("T")
- class EachItem(railroad.Group):
- """
- Custom railroad item to compose a:
- - Group containing a
- - OneOrMore containing a
- - Choice of the elements in the Each
- with the group label indicating that all must be matched
- """
- all_label = "[ALL]"
- def __init__(self, *items):
- choice_item = railroad.Choice(len(items) - 1, *items)
- one_or_more_item = railroad.OneOrMore(item=choice_item)
- super().__init__(one_or_more_item, label=self.all_label)
- class AnnotatedItem(railroad.Group):
- """
- Simple subclass of Group that creates an annotation label
- """
- def __init__(self, label: str, item):
- super().__init__(item=item, label="[{}]".format(label))
- class EditablePartial(Generic[T]):
- """
- Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
- constructed.
- """
- # We need this here because the railroad constructors actually transform the data, so can't be called until the
- # entire tree is assembled
- def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
- self.func = func
- self.args = args
- self.kwargs = kwargs
- @classmethod
- def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
- """
- If you call this function in the same way that you would call the constructor, it will store the arguments
- as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
- """
- return EditablePartial(func=func, args=list(args), kwargs=kwargs)
- @property
- def name(self):
- return self.kwargs["name"]
- def __call__(self) -> T:
- """
- Evaluate the partial and return the result
- """
- args = self.args.copy()
- kwargs = self.kwargs.copy()
- # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
- # args=['list', 'of', 'things'])
- arg_spec = inspect.getfullargspec(self.func)
- if arg_spec.varargs in self.kwargs:
- args += kwargs.pop(arg_spec.varargs)
- return self.func(*args, **kwargs)
- def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
- """
- Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
- :params kwargs: kwargs to be passed in to the template
- """
- data = []
- for diagram in diagrams:
- io = StringIO()
- diagram.diagram.writeSvg(io.write)
- title = diagram.name
- if diagram.index == 0:
- title += " (root)"
- data.append({"title": title, "text": "", "svg": io.getvalue()})
- return template.render(diagrams=data, **kwargs)
- def resolve_partial(partial: "EditablePartial[T]") -> T:
- """
- Recursively resolves a collection of Partials into whatever type they are
- """
- if isinstance(partial, EditablePartial):
- partial.args = resolve_partial(partial.args)
- partial.kwargs = resolve_partial(partial.kwargs)
- return partial()
- elif isinstance(partial, list):
- return [resolve_partial(x) for x in partial]
- elif isinstance(partial, dict):
- return {key: resolve_partial(x) for key, x in partial.items()}
- else:
- return partial
- def to_railroad(
- element: pyparsing.ParserElement,
- diagram_kwargs: Optional[dict] = None,
- vertical: int = 3,
- show_results_names: bool = False,
- ) -> List[NamedDiagram]:
- """
- Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
- creation if you want to access the Railroad tree before it is converted to HTML
- :param element: base element of the parser being diagrammed
- :param diagram_kwargs: kwargs to pass to the Diagram() constructor
- :param vertical: (optional) - int - limit at which number of alternatives should be
- shown vertically instead of horizontally
- :param show_results_names - bool to indicate whether results name annotations should be
- included in the diagram
- """
- # Convert the whole tree underneath the root
- lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
- _to_diagram_element(
- element,
- lookup=lookup,
- parent=None,
- vertical=vertical,
- show_results_names=show_results_names,
- )
- root_id = id(element)
- # Convert the root if it hasn't been already
- if root_id in lookup:
- if not element.customName:
- lookup[root_id].name = ""
- lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
- # Now that we're finished, we can convert from intermediate structures into Railroad elements
- diags = list(lookup.diagrams.values())
- if len(diags) > 1:
- # collapse out duplicate diags with the same name
- seen = set()
- deduped_diags = []
- for d in diags:
- # don't extract SkipTo elements, they are uninformative as subdiagrams
- if d.name == "...":
- continue
- if d.name is not None and d.name not in seen:
- seen.add(d.name)
- deduped_diags.append(d)
- resolved = [resolve_partial(partial) for partial in deduped_diags]
- else:
- # special case - if just one diagram, always display it, even if
- # it has no name
- resolved = [resolve_partial(partial) for partial in diags]
- return sorted(resolved, key=lambda diag: diag.index)
- def _should_vertical(
- specification: int, exprs: Iterable[pyparsing.ParserElement]
- ) -> bool:
- """
- Returns true if we should return a vertical list of elements
- """
- if specification is None:
- return False
- else:
- return len(_visible_exprs(exprs)) >= specification
- class ElementState:
- """
- State recorded for an individual pyparsing Element
- """
- # Note: this should be a dataclass, but we have to support Python 3.5
- def __init__(
- self,
- element: pyparsing.ParserElement,
- converted: EditablePartial,
- parent: EditablePartial,
- number: int,
- name: str = None,
- parent_index: Optional[int] = None,
- ):
- #: The pyparsing element that this represents
- self.element: pyparsing.ParserElement = element
- #: The name of the element
- self.name: str = name
- #: The output Railroad element in an unconverted state
- self.converted: EditablePartial = converted
- #: The parent Railroad element, which we store so that we can extract this if it's duplicated
- self.parent: EditablePartial = parent
- #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
- self.number: int = number
- #: The index of this inside its parent
- self.parent_index: Optional[int] = parent_index
- #: If true, we should extract this out into a subdiagram
- self.extract: bool = False
- #: If true, all of this element's children have been filled out
- self.complete: bool = False
- def mark_for_extraction(
- self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
- ):
- """
- Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
- :param el_id: id of the element
- :param state: element/diagram state tracker
- :param name: name to use for this element's text
- :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
- root element when we know we're finished
- """
- self.extract = True
- # Set the name
- if not self.name:
- if name:
- # Allow forcing a custom name
- self.name = name
- elif self.element.customName:
- self.name = self.element.customName
- else:
- self.name = ""
- # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
- # to be added
- # Also, if this is just a string literal etc, don't bother extracting it
- if force or (self.complete and _worth_extracting(self.element)):
- state.extract_into_diagram(el_id)
- class ConverterState:
- """
- Stores some state that persists between recursions into the element tree
- """
- def __init__(self, diagram_kwargs: Optional[dict] = None):
- #: A dictionary mapping ParserElements to state relating to them
- self._element_diagram_states: Dict[int, ElementState] = {}
- #: A dictionary mapping ParserElement IDs to subdiagrams generated from them
- self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
- #: The index of the next unnamed element
- self.unnamed_index: int = 1
- #: The index of the next element. This is used for sorting
- self.index: int = 0
- #: Shared kwargs that are used to customize the construction of diagrams
- self.diagram_kwargs: dict = diagram_kwargs or {}
- self.extracted_diagram_names: Set[str] = set()
- def __setitem__(self, key: int, value: ElementState):
- self._element_diagram_states[key] = value
- def __getitem__(self, key: int) -> ElementState:
- return self._element_diagram_states[key]
- def __delitem__(self, key: int):
- del self._element_diagram_states[key]
- def __contains__(self, key: int):
- return key in self._element_diagram_states
- def generate_unnamed(self) -> int:
- """
- Generate a number used in the name of an otherwise unnamed diagram
- """
- self.unnamed_index += 1
- return self.unnamed_index
- def generate_index(self) -> int:
- """
- Generate a number used to index a diagram
- """
- self.index += 1
- return self.index
- def extract_into_diagram(self, el_id: int):
- """
- Used when we encounter the same token twice in the same tree. When this
- happens, we replace all instances of that token with a terminal, and
- create a new subdiagram for the token
- """
- position = self[el_id]
- # Replace the original definition of this element with a regular block
- if position.parent:
- ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
- if "item" in position.parent.kwargs:
- position.parent.kwargs["item"] = ret
- elif "items" in position.parent.kwargs:
- position.parent.kwargs["items"][position.parent_index] = ret
- # If the element we're extracting is a group, skip to its content but keep the title
- if position.converted.func == railroad.Group:
- content = position.converted.kwargs["item"]
- else:
- content = position.converted
- self.diagrams[el_id] = EditablePartial.from_call(
- NamedDiagram,
- name=position.name,
- diagram=EditablePartial.from_call(
- railroad.Diagram, content, **self.diagram_kwargs
- ),
- index=position.number,
- )
- del self[el_id]
- def _worth_extracting(element: pyparsing.ParserElement) -> bool:
- """
- Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
- themselves have children, then its complex enough to extract
- """
- children = element.recurse()
- return any(child.recurse() for child in children)
- def _apply_diagram_item_enhancements(fn):
- """
- decorator to ensure enhancements to a diagram item (such as results name annotations)
- get applied on return from _to_diagram_element (we do this since there are several
- returns in _to_diagram_element)
- """
- def _inner(
- element: pyparsing.ParserElement,
- parent: Optional[EditablePartial],
- lookup: ConverterState = None,
- vertical: int = None,
- index: int = 0,
- name_hint: str = None,
- show_results_names: bool = False,
- ) -> Optional[EditablePartial]:
- ret = fn(
- element,
- parent,
- lookup,
- vertical,
- index,
- name_hint,
- show_results_names,
- )
- # apply annotation for results name, if present
- if show_results_names and ret is not None:
- element_results_name = element.resultsName
- if element_results_name:
- # add "*" to indicate if this is a "list all results" name
- element_results_name += "" if element.modalResults else "*"
- ret = EditablePartial.from_call(
- railroad.Group, item=ret, label=element_results_name
- )
- return ret
- return _inner
- def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
- non_diagramming_exprs = (
- pyparsing.ParseElementEnhance,
- pyparsing.PositionToken,
- pyparsing.And._ErrorStop,
- )
- return [
- e
- for e in exprs
- if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
- ]
- @_apply_diagram_item_enhancements
- def _to_diagram_element(
- element: pyparsing.ParserElement,
- parent: Optional[EditablePartial],
- lookup: ConverterState = None,
- vertical: int = None,
- index: int = 0,
- name_hint: str = None,
- show_results_names: bool = False,
- ) -> Optional[EditablePartial]:
- """
- Recursively converts a PyParsing Element to a railroad Element
- :param lookup: The shared converter state that keeps track of useful things
- :param index: The index of this element within the parent
- :param parent: The parent of this element in the output tree
- :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
- it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
- do so
- :param name_hint: If provided, this will override the generated name
- :param show_results_names: bool flag indicating whether to add annotations for results names
- :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
- """
- exprs = element.recurse()
- name = name_hint or element.customName or element.__class__.__name__
- # Python's id() is used to provide a unique identifier for elements
- el_id = id(element)
- element_results_name = element.resultsName
- # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
- if not element.customName:
- if isinstance(
- element,
- (
- pyparsing.TokenConverter,
- # pyparsing.Forward,
- pyparsing.Located,
- ),
- ):
- # However, if this element has a useful custom name, and its child does not, we can pass it on to the child
- if exprs:
- if not exprs[0].customName:
- propagated_name = name
- else:
- propagated_name = None
- return _to_diagram_element(
- element.expr,
- parent=parent,
- lookup=lookup,
- vertical=vertical,
- index=index,
- name_hint=propagated_name,
- show_results_names=show_results_names,
- )
- # If the element isn't worth extracting, we always treat it as the first time we say it
- if _worth_extracting(element):
- if el_id in lookup:
- # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
- # so we have to extract it into a new diagram.
- looked_up = lookup[el_id]
- looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
- ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
- return ret
- elif el_id in lookup.diagrams:
- # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
- # just put in a marker element that refers to the sub-diagram
- ret = EditablePartial.from_call(
- railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
- )
- return ret
- # Recursively convert child elements
- # Here we find the most relevant Railroad element for matching pyparsing Element
- # We use ``items=[]`` here to hold the place for where the child elements will go once created
- if isinstance(element, pyparsing.And):
- # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
- # (all will have the same name, and resultsName)
- if not exprs:
- return None
- if len(set((e.name, e.resultsName) for e in exprs)) == 1:
- ret = EditablePartial.from_call(
- railroad.OneOrMore, item="", repeat=str(len(exprs))
- )
- elif _should_vertical(vertical, exprs):
- ret = EditablePartial.from_call(railroad.Stack, items=[])
- else:
- ret = EditablePartial.from_call(railroad.Sequence, items=[])
- elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
- if not exprs:
- return None
- if _should_vertical(vertical, exprs):
- ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
- else:
- ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
- elif isinstance(element, pyparsing.Each):
- if not exprs:
- return None
- ret = EditablePartial.from_call(EachItem, items=[])
- elif isinstance(element, pyparsing.NotAny):
- ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
- elif isinstance(element, pyparsing.FollowedBy):
- ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
- elif isinstance(element, pyparsing.PrecededBy):
- ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
- elif isinstance(element, pyparsing.Opt):
- ret = EditablePartial.from_call(railroad.Optional, item="")
- elif isinstance(element, pyparsing.OneOrMore):
- ret = EditablePartial.from_call(railroad.OneOrMore, item="")
- elif isinstance(element, pyparsing.ZeroOrMore):
- ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
- elif isinstance(element, pyparsing.Group):
- ret = EditablePartial.from_call(
- railroad.Group, item=None, label=element_results_name
- )
- elif isinstance(element, pyparsing.Empty) and not element.customName:
- # Skip unnamed "Empty" elements
- ret = None
- elif len(exprs) > 1:
- ret = EditablePartial.from_call(railroad.Sequence, items=[])
- elif len(exprs) > 0 and not element_results_name:
- ret = EditablePartial.from_call(railroad.Group, item="", label=name)
- else:
- terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
- ret = terminal
- if ret is None:
- return
- # Indicate this element's position in the tree so we can extract it if necessary
- lookup[el_id] = ElementState(
- element=element,
- converted=ret,
- parent=parent,
- parent_index=index,
- number=lookup.generate_index(),
- )
- if element.customName:
- lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
- i = 0
- for expr in exprs:
- # Add a placeholder index in case we have to extract the child before we even add it to the parent
- if "items" in ret.kwargs:
- ret.kwargs["items"].insert(i, None)
- item = _to_diagram_element(
- expr,
- parent=ret,
- lookup=lookup,
- vertical=vertical,
- index=i,
- show_results_names=show_results_names,
- )
- # Some elements don't need to be shown in the diagram
- if item is not None:
- if "item" in ret.kwargs:
- ret.kwargs["item"] = item
- elif "items" in ret.kwargs:
- # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
- ret.kwargs["items"][i] = item
- i += 1
- elif "items" in ret.kwargs:
- # If we're supposed to skip this element, remove it from the parent
- del ret.kwargs["items"][i]
- # If all this items children are none, skip this item
- if ret and (
- ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
- or ("item" in ret.kwargs and ret.kwargs["item"] is None)
- ):
- ret = EditablePartial.from_call(railroad.Terminal, name)
- # Mark this element as "complete", ie it has all of its children
- if el_id in lookup:
- lookup[el_id].complete = True
- if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
- lookup.extract_into_diagram(el_id)
- if ret is not None:
- ret = EditablePartial.from_call(
- railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
- )
- return ret
|