format.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822
  1. # Copyright (c) 2006-2014 LOGILAB S.A. (Paris, FRANCE) <contact@logilab.fr>
  2. # Copyright (c) 2012-2015 Google, Inc.
  3. # Copyright (c) 2013 moxian <aleftmail@inbox.ru>
  4. # Copyright (c) 2014-2020 Claudiu Popa <pcmanticore@gmail.com>
  5. # Copyright (c) 2014 frost-nzcr4 <frost.nzcr4@jagmort.com>
  6. # Copyright (c) 2014 Brett Cannon <brett@python.org>
  7. # Copyright (c) 2014 Michal Nowikowski <godfryd@gmail.com>
  8. # Copyright (c) 2014 Arun Persaud <arun@nubati.net>
  9. # Copyright (c) 2015 Mike Frysinger <vapier@gentoo.org>
  10. # Copyright (c) 2015 Fabio Natali <me@fabionatali.com>
  11. # Copyright (c) 2015 Harut <yes@harutune.name>
  12. # Copyright (c) 2015 Mihai Balint <balint.mihai@gmail.com>
  13. # Copyright (c) 2015 Pavel Roskin <proski@gnu.org>
  14. # Copyright (c) 2015 Ionel Cristian Maries <contact@ionelmc.ro>
  15. # Copyright (c) 2016 Petr Pulc <petrpulc@gmail.com>
  16. # Copyright (c) 2016 Moises Lopez <moylop260@vauxoo.com>
  17. # Copyright (c) 2016 Ashley Whetter <ashley@awhetter.co.uk>
  18. # Copyright (c) 2017, 2019-2020 hippo91 <guillaume.peillex@gmail.com>
  19. # Copyright (c) 2017-2018 Bryce Guinta <bryce.paul.guinta@gmail.com>
  20. # Copyright (c) 2017 Krzysztof Czapla <k.czapla68@gmail.com>
  21. # Copyright (c) 2017 Łukasz Rogalski <rogalski.91@gmail.com>
  22. # Copyright (c) 2017 James M. Allen <james.m.allen@gmail.com>
  23. # Copyright (c) 2017 vinnyrose <vinnyrose@users.noreply.github.com>
  24. # Copyright (c) 2018-2021 Pierre Sassoulas <pierre.sassoulas@gmail.com>
  25. # Copyright (c) 2018, 2020 Bryce Guinta <bryce.guinta@protonmail.com>
  26. # Copyright (c) 2018, 2020 Anthony Sottile <asottile@umich.edu>
  27. # Copyright (c) 2018 Lucas Cimon <lucas.cimon@gmail.com>
  28. # Copyright (c) 2018 Michael Hudson-Doyle <michael.hudson@canonical.com>
  29. # Copyright (c) 2018 Natalie Serebryakova <natalie.serebryakova@Natalies-MacBook-Pro.local>
  30. # Copyright (c) 2018 ssolanki <sushobhitsolanki@gmail.com>
  31. # Copyright (c) 2018 Marcus Näslund <naslundx@gmail.com>
  32. # Copyright (c) 2018 Mike Frysinger <vapier@gmail.com>
  33. # Copyright (c) 2018 Fureigh <rhys.fureigh@gsa.gov>
  34. # Copyright (c) 2018 Andreas Freimuth <andreas.freimuth@united-bits.de>
  35. # Copyright (c) 2018 Jakub Wilk <jwilk@jwilk.net>
  36. # Copyright (c) 2019 Nick Drozd <nicholasdrozd@gmail.com>
  37. # Copyright (c) 2019 Hugo van Kemenade <hugovk@users.noreply.github.com>
  38. # Copyright (c) 2020 Raphael Gaschignard <raphael@rtpg.co>
  39. # Copyright (c) 2021 Daniël van Noord <13665637+DanielNoord@users.noreply.github.com>
  40. # Copyright (c) 2021 Tushar Sadhwani <tushar.sadhwani000@gmail.com>
  41. # Copyright (c) 2021 bot <bot@noreply.github.com>
  42. # Copyright (c) 2021 Ville Skyttä <ville.skytta@iki.fi>
  43. # Copyright (c) 2021 Marc Mueller <30130371+cdce8p@users.noreply.github.com>
  44. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  45. # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
  46. """Python code format's checker.
  47. By default try to follow Guido's style guide :
  48. https://www.python.org/doc/essays/styleguide/
  49. Some parts of the process_token method is based from The Tab Nanny std module.
  50. """
  51. import tokenize
  52. from functools import reduce
  53. from typing import List
  54. from astroid import nodes
  55. from pylint.checkers import BaseTokenChecker
  56. from pylint.checkers.utils import (
  57. check_messages,
  58. is_overload_stub,
  59. is_protocol_class,
  60. node_frame_class,
  61. )
  62. from pylint.constants import WarningScope
  63. from pylint.interfaces import IAstroidChecker, IRawChecker, ITokenChecker
  64. from pylint.utils.pragma_parser import OPTION_PO, PragmaParserError, parse_pragma
  65. _ASYNC_TOKEN = "async"
  66. _KEYWORD_TOKENS = [
  67. "assert",
  68. "del",
  69. "elif",
  70. "except",
  71. "for",
  72. "if",
  73. "in",
  74. "not",
  75. "raise",
  76. "return",
  77. "while",
  78. "yield",
  79. "with",
  80. ]
  81. _SPACED_OPERATORS = [
  82. "==",
  83. "<",
  84. ">",
  85. "!=",
  86. "<>",
  87. "<=",
  88. ">=",
  89. "+=",
  90. "-=",
  91. "*=",
  92. "**=",
  93. "/=",
  94. "//=",
  95. "&=",
  96. "|=",
  97. "^=",
  98. "%=",
  99. ">>=",
  100. "<<=",
  101. ]
  102. _OPENING_BRACKETS = ["(", "[", "{"]
  103. _CLOSING_BRACKETS = [")", "]", "}"]
  104. _TAB_LENGTH = 8
  105. _EOL = frozenset([tokenize.NEWLINE, tokenize.NL, tokenize.COMMENT])
  106. _JUNK_TOKENS = (tokenize.COMMENT, tokenize.NL)
  107. # Whitespace checking policy constants
  108. _MUST = 0
  109. _MUST_NOT = 1
  110. _IGNORE = 2
  111. MSGS = {
  112. "C0301": (
  113. "Line too long (%s/%s)",
  114. "line-too-long",
  115. "Used when a line is longer than a given number of characters.",
  116. ),
  117. "C0302": (
  118. "Too many lines in module (%s/%s)", # was W0302
  119. "too-many-lines",
  120. "Used when a module has too many lines, reducing its readability.",
  121. ),
  122. "C0303": (
  123. "Trailing whitespace",
  124. "trailing-whitespace",
  125. "Used when there is whitespace between the end of a line and the newline.",
  126. ),
  127. "C0304": (
  128. "Final newline missing",
  129. "missing-final-newline",
  130. "Used when the last line in a file is missing a newline.",
  131. ),
  132. "C0305": (
  133. "Trailing newlines",
  134. "trailing-newlines",
  135. "Used when there are trailing blank lines in a file.",
  136. ),
  137. "W0311": (
  138. "Bad indentation. Found %s %s, expected %s",
  139. "bad-indentation",
  140. "Used when an unexpected number of indentation's tabulations or "
  141. "spaces has been found.",
  142. ),
  143. "W0301": (
  144. "Unnecessary semicolon", # was W0106
  145. "unnecessary-semicolon",
  146. 'Used when a statement is ended by a semi-colon (";"), which '
  147. "isn't necessary (that's python, not C ;).",
  148. ),
  149. "C0321": (
  150. "More than one statement on a single line",
  151. "multiple-statements",
  152. "Used when more than on statement are found on the same line.",
  153. {"scope": WarningScope.NODE},
  154. ),
  155. "C0325": (
  156. "Unnecessary parens after %r keyword",
  157. "superfluous-parens",
  158. "Used when a single item in parentheses follows an if, for, or "
  159. "other keyword.",
  160. ),
  161. "C0327": (
  162. "Mixed line endings LF and CRLF",
  163. "mixed-line-endings",
  164. "Used when there are mixed (LF and CRLF) newline signs in a file.",
  165. ),
  166. "C0328": (
  167. "Unexpected line ending format. There is '%s' while it should be '%s'.",
  168. "unexpected-line-ending-format",
  169. "Used when there is different newline than expected.",
  170. ),
  171. }
  172. def _last_token_on_line_is(tokens, line_end, token):
  173. return (
  174. line_end > 0
  175. and tokens.token(line_end - 1) == token
  176. or line_end > 1
  177. and tokens.token(line_end - 2) == token
  178. and tokens.type(line_end - 1) == tokenize.COMMENT
  179. )
  180. # The contexts for hanging indents.
  181. # A hanging indented dictionary value after :
  182. HANGING_DICT_VALUE = "dict-value"
  183. # Hanging indentation in an expression.
  184. HANGING = "hanging"
  185. # Hanging indentation in a block header.
  186. HANGING_BLOCK = "hanging-block"
  187. # Continued indentation inside an expression.
  188. CONTINUED = "continued"
  189. # Continued indentation in a block header.
  190. CONTINUED_BLOCK = "continued-block"
  191. SINGLE_LINE = "single"
  192. WITH_BODY = "multi"
  193. class TokenWrapper:
  194. """A wrapper for readable access to token information."""
  195. def __init__(self, tokens):
  196. self._tokens = tokens
  197. def token(self, idx):
  198. return self._tokens[idx][1]
  199. def type(self, idx):
  200. return self._tokens[idx][0]
  201. def start_line(self, idx):
  202. return self._tokens[idx][2][0]
  203. def start_col(self, idx):
  204. return self._tokens[idx][2][1]
  205. def line(self, idx):
  206. return self._tokens[idx][4]
  207. class FormatChecker(BaseTokenChecker):
  208. """checks for :
  209. * unauthorized constructions
  210. * strict indentation
  211. * line length
  212. """
  213. __implements__ = (ITokenChecker, IAstroidChecker, IRawChecker)
  214. # configuration section name
  215. name = "format"
  216. # messages
  217. msgs = MSGS
  218. # configuration options
  219. # for available dict keys/values see the optik parser 'add_option' method
  220. options = (
  221. (
  222. "max-line-length",
  223. {
  224. "default": 100,
  225. "type": "int",
  226. "metavar": "<int>",
  227. "help": "Maximum number of characters on a single line.",
  228. },
  229. ),
  230. (
  231. "ignore-long-lines",
  232. {
  233. "type": "regexp",
  234. "metavar": "<regexp>",
  235. "default": r"^\s*(# )?<?https?://\S+>?$",
  236. "help": (
  237. "Regexp for a line that is allowed to be longer than the limit."
  238. ),
  239. },
  240. ),
  241. (
  242. "single-line-if-stmt",
  243. {
  244. "default": False,
  245. "type": "yn",
  246. "metavar": "<y or n>",
  247. "help": (
  248. "Allow the body of an if to be on the same "
  249. "line as the test if there is no else."
  250. ),
  251. },
  252. ),
  253. (
  254. "single-line-class-stmt",
  255. {
  256. "default": False,
  257. "type": "yn",
  258. "metavar": "<y or n>",
  259. "help": (
  260. "Allow the body of a class to be on the same "
  261. "line as the declaration if body contains "
  262. "single statement."
  263. ),
  264. },
  265. ),
  266. (
  267. "max-module-lines",
  268. {
  269. "default": 1000,
  270. "type": "int",
  271. "metavar": "<int>",
  272. "help": "Maximum number of lines in a module.",
  273. },
  274. ),
  275. (
  276. "indent-string",
  277. {
  278. "default": " ",
  279. "type": "non_empty_string",
  280. "metavar": "<string>",
  281. "help": "String used as indentation unit. This is usually "
  282. '" " (4 spaces) or "\\t" (1 tab).',
  283. },
  284. ),
  285. (
  286. "indent-after-paren",
  287. {
  288. "type": "int",
  289. "metavar": "<int>",
  290. "default": 4,
  291. "help": "Number of spaces of indent required inside a hanging "
  292. "or continued line.",
  293. },
  294. ),
  295. (
  296. "expected-line-ending-format",
  297. {
  298. "type": "choice",
  299. "metavar": "<empty or LF or CRLF>",
  300. "default": "",
  301. "choices": ["", "LF", "CRLF"],
  302. "help": (
  303. "Expected format of line ending, "
  304. "e.g. empty (any line ending), LF or CRLF."
  305. ),
  306. },
  307. ),
  308. )
  309. def __init__(self, linter=None):
  310. super().__init__(linter)
  311. self._lines = None
  312. self._visited_lines = None
  313. self._bracket_stack = [None]
  314. def new_line(self, tokens, line_end, line_start):
  315. """a new line has been encountered, process it if necessary"""
  316. if _last_token_on_line_is(tokens, line_end, ";"):
  317. self.add_message("unnecessary-semicolon", line=tokens.start_line(line_end))
  318. line_num = tokens.start_line(line_start)
  319. line = tokens.line(line_start)
  320. if tokens.type(line_start) not in _JUNK_TOKENS:
  321. self._lines[line_num] = line.split("\n")[0]
  322. self.check_lines(line, line_num)
  323. def process_module(self, _node: nodes.Module) -> None:
  324. pass
  325. def _check_keyword_parentheses(
  326. self, tokens: List[tokenize.TokenInfo], start: int
  327. ) -> None:
  328. """Check that there are not unnecessary parentheses after a keyword.
  329. Parens are unnecessary if there is exactly one balanced outer pair on a
  330. line, and it is followed by a colon, and contains no commas (i.e. is not a
  331. tuple).
  332. Args:
  333. tokens: list of Tokens; the entire list of Tokens.
  334. start: int; the position of the keyword in the token list.
  335. """
  336. # If the next token is not a paren, we're fine.
  337. if self._bracket_stack[-1] == ":" and tokens[start].string == "for":
  338. self._bracket_stack.pop()
  339. if tokens[start + 1].string != "(":
  340. return
  341. found_and_or = False
  342. contains_walrus_operator = False
  343. walrus_operator_depth = 0
  344. contains_double_parens = 0
  345. depth = 0
  346. keyword_token = str(tokens[start].string)
  347. line_num = tokens[start].start[0]
  348. for i in range(start, len(tokens) - 1):
  349. token = tokens[i]
  350. # If we hit a newline, then assume any parens were for continuation.
  351. if token.type == tokenize.NL:
  352. return
  353. # Since the walrus operator doesn't exist below python3.8, the tokenizer
  354. # generates independent tokens
  355. if (
  356. token.string == ":=" # <-- python3.8+ path
  357. or token.string + tokens[i + 1].string == ":="
  358. ):
  359. contains_walrus_operator = True
  360. walrus_operator_depth = depth
  361. if token.string == "(":
  362. depth += 1
  363. if tokens[i + 1].string == "(":
  364. contains_double_parens = 1
  365. elif token.string == ")":
  366. depth -= 1
  367. if depth:
  368. if contains_double_parens and tokens[i + 1].string == ")":
  369. # For walrus operators in `if (not)` conditions and comprehensions
  370. if keyword_token in {"in", "if", "not"}:
  371. continue
  372. return
  373. contains_double_parens -= 1
  374. continue
  375. # ')' can't happen after if (foo), since it would be a syntax error.
  376. if tokens[i + 1].string in {":", ")", "]", "}", "in"} or tokens[
  377. i + 1
  378. ].type in {tokenize.NEWLINE, tokenize.ENDMARKER, tokenize.COMMENT}:
  379. if contains_walrus_operator and walrus_operator_depth - 1 == depth:
  380. return
  381. # The empty tuple () is always accepted.
  382. if i == start + 2:
  383. return
  384. if keyword_token == "not":
  385. if not found_and_or:
  386. self.add_message(
  387. "superfluous-parens", line=line_num, args=keyword_token
  388. )
  389. elif keyword_token in {"return", "yield"}:
  390. self.add_message(
  391. "superfluous-parens", line=line_num, args=keyword_token
  392. )
  393. elif not found_and_or and keyword_token != "in":
  394. self.add_message(
  395. "superfluous-parens", line=line_num, args=keyword_token
  396. )
  397. return
  398. elif depth == 1:
  399. # This is a tuple, which is always acceptable.
  400. if token[1] == ",":
  401. return
  402. # 'and' and 'or' are the only boolean operators with lower precedence
  403. # than 'not', so parens are only required when they are found.
  404. if token[1] in {"and", "or"}:
  405. found_and_or = True
  406. # A yield inside an expression must always be in parentheses,
  407. # quit early without error.
  408. elif token[1] == "yield":
  409. return
  410. # A generator expression always has a 'for' token in it, and
  411. # the 'for' token is only legal inside parens when it is in a
  412. # generator expression. The parens are necessary here, so bail
  413. # without an error.
  414. elif token[1] == "for":
  415. return
  416. # A generator expression can have an 'else' token in it.
  417. # We check the rest of the tokens to see if any problems incure after
  418. # the 'else'.
  419. elif token[1] == "else":
  420. if "(" in (i.string for i in tokens[i:]):
  421. self._check_keyword_parentheses(tokens[i:], 0)
  422. return
  423. def _prepare_token_dispatcher(self):
  424. dispatch = {}
  425. for tokens, handler in ((_KEYWORD_TOKENS, self._check_keyword_parentheses),):
  426. for token in tokens:
  427. dispatch[token] = handler
  428. return dispatch
  429. def process_tokens(self, tokens):
  430. """process tokens and search for :
  431. _ too long lines (i.e. longer than <max_chars>)
  432. _ optionally bad construct (if given, bad_construct must be a compiled
  433. regular expression).
  434. """
  435. self._bracket_stack = [None]
  436. indents = [0]
  437. check_equal = False
  438. line_num = 0
  439. self._lines = {}
  440. self._visited_lines = {}
  441. token_handlers = self._prepare_token_dispatcher()
  442. self._last_line_ending = None
  443. last_blank_line_num = 0
  444. for idx, (tok_type, token, start, _, line) in enumerate(tokens):
  445. if start[0] != line_num:
  446. line_num = start[0]
  447. # A tokenizer oddity: if an indented line contains a multi-line
  448. # docstring, the line member of the INDENT token does not contain
  449. # the full line; therefore we check the next token on the line.
  450. if tok_type == tokenize.INDENT:
  451. self.new_line(TokenWrapper(tokens), idx - 1, idx + 1)
  452. else:
  453. self.new_line(TokenWrapper(tokens), idx - 1, idx)
  454. if tok_type == tokenize.NEWLINE:
  455. # a program statement, or ENDMARKER, will eventually follow,
  456. # after some (possibly empty) run of tokens of the form
  457. # (NL | COMMENT)* (INDENT | DEDENT+)?
  458. # If an INDENT appears, setting check_equal is wrong, and will
  459. # be undone when we see the INDENT.
  460. check_equal = True
  461. self._check_line_ending(token, line_num)
  462. elif tok_type == tokenize.INDENT:
  463. check_equal = False
  464. self.check_indent_level(token, indents[-1] + 1, line_num)
  465. indents.append(indents[-1] + 1)
  466. elif tok_type == tokenize.DEDENT:
  467. # there's nothing we need to check here! what's important is
  468. # that when the run of DEDENTs ends, the indentation of the
  469. # program statement (or ENDMARKER) that triggered the run is
  470. # equal to what's left at the top of the indents stack
  471. check_equal = True
  472. if len(indents) > 1:
  473. del indents[-1]
  474. elif tok_type == tokenize.NL:
  475. if not line.strip("\r\n"):
  476. last_blank_line_num = line_num
  477. elif tok_type not in (tokenize.COMMENT, tokenize.ENCODING):
  478. # This is the first concrete token following a NEWLINE, so it
  479. # must be the first token of the next program statement, or an
  480. # ENDMARKER; the "line" argument exposes the leading whitespace
  481. # for this statement; in the case of ENDMARKER, line is an empty
  482. # string, so will properly match the empty string with which the
  483. # "indents" stack was seeded
  484. if check_equal:
  485. check_equal = False
  486. self.check_indent_level(line, indents[-1], line_num)
  487. if tok_type == tokenize.NUMBER and token.endswith("l"):
  488. self.add_message("lowercase-l-suffix", line=line_num)
  489. try:
  490. handler = token_handlers[token]
  491. except KeyError:
  492. pass
  493. else:
  494. handler(tokens, idx)
  495. line_num -= 1 # to be ok with "wc -l"
  496. if line_num > self.config.max_module_lines:
  497. # Get the line where the too-many-lines (or its message id)
  498. # was disabled or default to 1.
  499. message_definition = self.linter.msgs_store.get_message_definitions(
  500. "too-many-lines"
  501. )[0]
  502. names = (message_definition.msgid, "too-many-lines")
  503. line = next(
  504. filter(None, (self.linter._pragma_lineno.get(name) for name in names)),
  505. 1,
  506. )
  507. self.add_message(
  508. "too-many-lines",
  509. args=(line_num, self.config.max_module_lines),
  510. line=line,
  511. )
  512. # See if there are any trailing lines. Do not complain about empty
  513. # files like __init__.py markers.
  514. if line_num == last_blank_line_num and line_num > 0:
  515. self.add_message("trailing-newlines", line=line_num)
  516. def _check_line_ending(self, line_ending, line_num):
  517. # check if line endings are mixed
  518. if self._last_line_ending is not None:
  519. # line_ending == "" indicates a synthetic newline added at
  520. # the end of a file that does not, in fact, end with a
  521. # newline.
  522. if line_ending and line_ending != self._last_line_ending:
  523. self.add_message("mixed-line-endings", line=line_num)
  524. self._last_line_ending = line_ending
  525. # check if line ending is as expected
  526. expected = self.config.expected_line_ending_format
  527. if expected:
  528. # reduce multiple \n\n\n\n to one \n
  529. line_ending = reduce(lambda x, y: x + y if x != y else x, line_ending, "")
  530. line_ending = "LF" if line_ending == "\n" else "CRLF"
  531. if line_ending != expected:
  532. self.add_message(
  533. "unexpected-line-ending-format",
  534. args=(line_ending, expected),
  535. line=line_num,
  536. )
  537. @check_messages("multiple-statements")
  538. def visit_default(self, node: nodes.NodeNG) -> None:
  539. """check the node line number and check it if not yet done"""
  540. if not node.is_statement:
  541. return
  542. if not node.root().pure_python:
  543. return
  544. prev_sibl = node.previous_sibling()
  545. if prev_sibl is not None:
  546. prev_line = prev_sibl.fromlineno
  547. # The line on which a finally: occurs in a try/finally
  548. # is not directly represented in the AST. We infer it
  549. # by taking the last line of the body and adding 1, which
  550. # should be the line of finally:
  551. elif (
  552. isinstance(node.parent, nodes.TryFinally) and node in node.parent.finalbody
  553. ):
  554. prev_line = node.parent.body[0].tolineno + 1
  555. else:
  556. prev_line = node.parent.statement().fromlineno
  557. line = node.fromlineno
  558. assert line, node
  559. if prev_line == line and self._visited_lines.get(line) != 2:
  560. self._check_multi_statement_line(node, line)
  561. return
  562. if line in self._visited_lines:
  563. return
  564. try:
  565. tolineno = node.blockstart_tolineno
  566. except AttributeError:
  567. tolineno = node.tolineno
  568. assert tolineno, node
  569. lines = []
  570. for line in range(line, tolineno + 1):
  571. self._visited_lines[line] = 1
  572. try:
  573. lines.append(self._lines[line].rstrip())
  574. except KeyError:
  575. lines.append("")
  576. def _check_multi_statement_line(self, node, line):
  577. """Check for lines containing multiple statements."""
  578. # Do not warn about multiple nested context managers
  579. # in with statements.
  580. if isinstance(node, nodes.With):
  581. return
  582. # For try... except... finally..., the two nodes
  583. # appear to be on the same line due to how the AST is built.
  584. if isinstance(node, nodes.TryExcept) and isinstance(
  585. node.parent, nodes.TryFinally
  586. ):
  587. return
  588. if (
  589. isinstance(node.parent, nodes.If)
  590. and not node.parent.orelse
  591. and self.config.single_line_if_stmt
  592. ):
  593. return
  594. if (
  595. isinstance(node.parent, nodes.ClassDef)
  596. and len(node.parent.body) == 1
  597. and self.config.single_line_class_stmt
  598. ):
  599. return
  600. # Function overloads that use ``Ellipsis`` are exempted.
  601. if (
  602. isinstance(node, nodes.Expr)
  603. and isinstance(node.value, nodes.Const)
  604. and node.value.value is Ellipsis
  605. ):
  606. frame = node.frame()
  607. if is_overload_stub(frame) or is_protocol_class(node_frame_class(frame)):
  608. return
  609. self.add_message("multiple-statements", node=node)
  610. self._visited_lines[line] = 2
  611. def check_line_ending(self, line: str, i: int) -> None:
  612. """
  613. Check that the final newline is not missing and that there is no trailing whitespace.
  614. """
  615. if not line.endswith("\n"):
  616. self.add_message("missing-final-newline", line=i)
  617. return
  618. # exclude \f (formfeed) from the rstrip
  619. stripped_line = line.rstrip("\t\n\r\v ")
  620. if line[len(stripped_line) :] not in ("\n", "\r\n"):
  621. self.add_message(
  622. "trailing-whitespace", line=i, col_offset=len(stripped_line)
  623. )
  624. def check_line_length(self, line: str, i: int, checker_off: bool) -> None:
  625. """
  626. Check that the line length is less than the authorized value
  627. """
  628. max_chars = self.config.max_line_length
  629. ignore_long_line = self.config.ignore_long_lines
  630. line = line.rstrip()
  631. if len(line) > max_chars and not ignore_long_line.search(line):
  632. if checker_off:
  633. self.linter.add_ignored_message("line-too-long", i)
  634. else:
  635. self.add_message("line-too-long", line=i, args=(len(line), max_chars))
  636. @staticmethod
  637. def remove_pylint_option_from_lines(options_pattern_obj) -> str:
  638. """
  639. Remove the `# pylint ...` pattern from lines
  640. """
  641. lines = options_pattern_obj.string
  642. purged_lines = (
  643. lines[: options_pattern_obj.start(1)].rstrip()
  644. + lines[options_pattern_obj.end(1) :]
  645. )
  646. return purged_lines
  647. @staticmethod
  648. def is_line_length_check_activated(pylint_pattern_match_object) -> bool:
  649. """
  650. Return true if the line length check is activated
  651. """
  652. try:
  653. for pragma in parse_pragma(pylint_pattern_match_object.group(2)):
  654. if pragma.action == "disable" and "line-too-long" in pragma.messages:
  655. return False
  656. except PragmaParserError:
  657. # Printing useful information dealing with this error is done in the lint package
  658. pass
  659. return True
  660. @staticmethod
  661. def specific_splitlines(lines: str) -> List[str]:
  662. """
  663. Split lines according to universal newlines except those in a specific sets
  664. """
  665. unsplit_ends = {
  666. "\v",
  667. "\x0b",
  668. "\f",
  669. "\x0c",
  670. "\x1c",
  671. "\x1d",
  672. "\x1e",
  673. "\x85",
  674. "\u2028",
  675. "\u2029",
  676. }
  677. res = []
  678. buffer = ""
  679. for atomic_line in lines.splitlines(True):
  680. if atomic_line[-1] not in unsplit_ends:
  681. res.append(buffer + atomic_line)
  682. buffer = ""
  683. else:
  684. buffer += atomic_line
  685. return res
  686. def check_lines(self, lines: str, lineno: int) -> None:
  687. """
  688. Check lines have :
  689. - a final newline
  690. - no trailing whitespace
  691. - less than a maximum number of characters
  692. """
  693. # we're first going to do a rough check whether any lines in this set
  694. # go over the line limit. If none of them do, then we don't need to
  695. # parse out the pylint options later on and can just assume that these
  696. # lines are clean
  697. # we'll also handle the line ending check here to avoid double-iteration
  698. # unless the line lengths are suspect
  699. max_chars = self.config.max_line_length
  700. split_lines = self.specific_splitlines(lines)
  701. for offset, line in enumerate(split_lines):
  702. self.check_line_ending(line, lineno + offset)
  703. # hold onto the initial lineno for later
  704. potential_line_length_warning = False
  705. for offset, line in enumerate(split_lines):
  706. # this check is purposefully simple and doesn't rstrip
  707. # since this is running on every line you're checking it's
  708. # advantageous to avoid doing a lot of work
  709. if len(line) > max_chars:
  710. potential_line_length_warning = True
  711. break
  712. # if there were no lines passing the max_chars config, we don't bother
  713. # running the full line check (as we've met an even more strict condition)
  714. if not potential_line_length_warning:
  715. return
  716. # Line length check may be deactivated through `pylint: disable` comment
  717. mobj = OPTION_PO.search(lines)
  718. checker_off = False
  719. if mobj:
  720. if not self.is_line_length_check_activated(mobj):
  721. checker_off = True
  722. # The 'pylint: disable whatever' should not be taken into account for line length count
  723. lines = self.remove_pylint_option_from_lines(mobj)
  724. # here we re-run specific_splitlines since we have filtered out pylint options above
  725. for offset, line in enumerate(self.specific_splitlines(lines)):
  726. self.check_line_length(line, lineno + offset, checker_off)
  727. def check_indent_level(self, string, expected, line_num):
  728. """return the indent level of the string"""
  729. indent = self.config.indent_string
  730. if indent == "\\t": # \t is not interpreted in the configuration file
  731. indent = "\t"
  732. level = 0
  733. unit_size = len(indent)
  734. while string[:unit_size] == indent:
  735. string = string[unit_size:]
  736. level += 1
  737. suppl = ""
  738. while string and string[0] in " \t":
  739. suppl += string[0]
  740. string = string[1:]
  741. if level != expected or suppl:
  742. i_type = "spaces"
  743. if indent[0] == "\t":
  744. i_type = "tabs"
  745. self.add_message(
  746. "bad-indentation",
  747. line=line_num,
  748. args=(level * unit_size + len(suppl), i_type, expected * unit_size),
  749. )
  750. def register(linter):
  751. """required method to auto register this checker"""
  752. linter.register_checker(FormatChecker(linter))