pragma_parser.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
  3. import re
  4. from collections import namedtuple
  5. from typing import Generator, List, Optional
  6. # Allow stopping after the first semicolon/hash encountered,
  7. # so that an option can be continued with the reasons
  8. # why it is active or disabled.
  9. OPTION_RGX = r"""
  10. \s* # Any number of whithespace
  11. \#? # One or zero hash
  12. .* # Anything (as much as possible)
  13. (\s* # Beginning of first matched group and any number of whitespaces
  14. \# # Beginning of comment
  15. .*? # Anything (as little as possible)
  16. \bpylint: # pylint word and column
  17. \s* # Any number of whitespaces
  18. ([^;#\n]+)) # Anything except semicolon or hash or newline (it is the second matched group)
  19. # and end of the first matched group
  20. [;#]{0,1}""" # From 0 to 1 repetition of semicolon or hash
  21. OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE)
  22. PragmaRepresenter = namedtuple("PragmaRepresenter", "action messages")
  23. ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file"))
  24. MESSAGE_KEYWORDS = frozenset(
  25. ("disable-next", "disable-msg", "enable-msg", "disable", "enable")
  26. )
  27. # sorted is necessary because sets are unordered collections and ALL_KEYWORDS
  28. # string should not vary between executions
  29. # reverse is necessary in order to have the longest keywords first, so that, for example,
  30. # 'disable' string should not be matched instead of 'disable-all'
  31. ALL_KEYWORDS = "|".join(
  32. sorted(ATOMIC_KEYWORDS | MESSAGE_KEYWORDS, key=len, reverse=True)
  33. )
  34. TOKEN_SPECIFICATION = [
  35. ("KEYWORD", fr"\b({ALL_KEYWORDS:s})\b"),
  36. ("MESSAGE_STRING", r"[0-9A-Za-z\-\_]{2,}"), # Identifiers
  37. ("ASSIGN", r"="), # Assignment operator
  38. ("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"),
  39. ]
  40. TOK_REGEX = "|".join(
  41. f"(?P<{token_name:s}>{token_rgx:s})"
  42. for token_name, token_rgx in TOKEN_SPECIFICATION
  43. )
  44. def emit_pragma_representer(action: str, messages: List[str]) -> PragmaRepresenter:
  45. if not messages and action in MESSAGE_KEYWORDS:
  46. raise InvalidPragmaError(
  47. "The keyword is not followed by message identifier", action
  48. )
  49. return PragmaRepresenter(action, messages)
  50. class PragmaParserError(Exception):
  51. """
  52. A class for exceptions thrown by pragma_parser module
  53. """
  54. def __init__(self, message: str, token: str) -> None:
  55. """
  56. :args message: explain the reason why the exception has been thrown
  57. :args token: token concerned by the exception
  58. """
  59. self.message = message
  60. self.token = token
  61. super().__init__(self.message)
  62. class UnRecognizedOptionError(PragmaParserError):
  63. """
  64. Thrown in case the of a valid but unrecognized option
  65. """
  66. class InvalidPragmaError(PragmaParserError):
  67. """
  68. Thrown in case the pragma is invalid
  69. """
  70. def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]:
  71. action: Optional[str] = None
  72. messages: List[str] = []
  73. assignment_required = False
  74. previous_token = ""
  75. for mo in re.finditer(TOK_REGEX, pylint_pragma):
  76. kind = mo.lastgroup
  77. value = mo.group()
  78. if kind == "ASSIGN":
  79. if not assignment_required:
  80. if action:
  81. # A keyword has been found previously but doesn't support assignment
  82. raise UnRecognizedOptionError(
  83. "The keyword doesn't support assignment", action
  84. )
  85. if previous_token:
  86. # Something found previously but not a known keyword
  87. raise UnRecognizedOptionError(
  88. "The keyword is unknown", previous_token
  89. )
  90. # Nothing at all detected before this assignment
  91. raise InvalidPragmaError("Missing keyword before assignment", "")
  92. assignment_required = False
  93. elif assignment_required:
  94. raise InvalidPragmaError(
  95. "The = sign is missing after the keyword", action or ""
  96. )
  97. elif kind == "KEYWORD":
  98. if action:
  99. yield emit_pragma_representer(action, messages)
  100. action = value
  101. messages = []
  102. assignment_required = action in MESSAGE_KEYWORDS
  103. elif kind in {"MESSAGE_STRING", "MESSAGE_NUMBER"}:
  104. messages.append(value)
  105. assignment_required = False
  106. else:
  107. raise RuntimeError("Token not recognized")
  108. previous_token = value
  109. if action:
  110. yield emit_pragma_representer(action, messages)
  111. else:
  112. raise UnRecognizedOptionError("The keyword is unknown", previous_token)