parse.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. """Defines parsing functions used by isort for parsing import definitions"""
  2. from collections import OrderedDict, defaultdict
  3. from functools import partial
  4. from itertools import chain
  5. from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Tuple
  6. from warnings import warn
  7. from . import place
  8. from .comments import parse as parse_comments
  9. from .exceptions import MissingSection
  10. from .settings import DEFAULT_CONFIG, Config
  11. if TYPE_CHECKING:
  12. from mypy_extensions import TypedDict
  13. CommentsAboveDict = TypedDict(
  14. "CommentsAboveDict", {"straight": Dict[str, Any], "from": Dict[str, Any]}
  15. )
  16. CommentsDict = TypedDict(
  17. "CommentsDict",
  18. {
  19. "from": Dict[str, Any],
  20. "straight": Dict[str, Any],
  21. "nested": Dict[str, Any],
  22. "above": CommentsAboveDict,
  23. },
  24. )
  25. def _infer_line_separator(contents: str) -> str:
  26. if "\r\n" in contents:
  27. return "\r\n"
  28. if "\r" in contents:
  29. return "\r"
  30. return "\n"
  31. def _normalize_line(raw_line: str) -> Tuple[str, str]:
  32. """Normalizes import related statements in the provided line.
  33. Returns (normalized_line: str, raw_line: str)
  34. """
  35. line = raw_line.replace("from.import ", "from . import ")
  36. line = line.replace("from.cimport ", "from . cimport ")
  37. line = line.replace("import*", "import *")
  38. line = line.replace(" .import ", " . import ")
  39. line = line.replace(" .cimport ", " . cimport ")
  40. line = line.replace("\t", " ")
  41. return (line, raw_line)
  42. def import_type(line: str, config: Config = DEFAULT_CONFIG) -> Optional[str]:
  43. """If the current line is an import line it will return its type (from or straight)"""
  44. if config.honor_noqa and line.lower().rstrip().endswith("noqa"):
  45. return None
  46. if "isort:skip" in line or "isort: skip" in line or "isort: split" in line:
  47. return None
  48. if line.startswith(("import ", "cimport ")):
  49. return "straight"
  50. if line.startswith("from "):
  51. return "from"
  52. return None
  53. def _strip_syntax(import_string: str) -> str:
  54. import_string = import_string.replace("_import", "[[i]]")
  55. import_string = import_string.replace("_cimport", "[[ci]]")
  56. for remove_syntax in ["\\", "(", ")", ","]:
  57. import_string = import_string.replace(remove_syntax, " ")
  58. import_list = import_string.split()
  59. for key in ("from", "import", "cimport"):
  60. if key in import_list:
  61. import_list.remove(key)
  62. import_string = " ".join(import_list)
  63. import_string = import_string.replace("[[i]]", "_import")
  64. import_string = import_string.replace("[[ci]]", "_cimport")
  65. return import_string.replace("{ ", "{|").replace(" }", "|}")
  66. def skip_line(
  67. line: str,
  68. in_quote: str,
  69. index: int,
  70. section_comments: Tuple[str, ...],
  71. needs_import: bool = True,
  72. ) -> Tuple[bool, str]:
  73. """Determine if a given line should be skipped.
  74. Returns back a tuple containing:
  75. (skip_line: bool,
  76. in_quote: str,)
  77. """
  78. should_skip = bool(in_quote)
  79. if '"' in line or "'" in line:
  80. char_index = 0
  81. while char_index < len(line):
  82. if line[char_index] == "\\":
  83. char_index += 1
  84. elif in_quote:
  85. if line[char_index : char_index + len(in_quote)] == in_quote:
  86. in_quote = ""
  87. elif line[char_index] in ("'", '"'):
  88. long_quote = line[char_index : char_index + 3]
  89. if long_quote in ('"""', "'''"):
  90. in_quote = long_quote
  91. char_index += 2
  92. else:
  93. in_quote = line[char_index]
  94. elif line[char_index] == "#":
  95. break
  96. char_index += 1
  97. if ";" in line.split("#")[0] and needs_import:
  98. for part in (part.strip() for part in line.split(";")):
  99. if (
  100. part
  101. and not part.startswith("from ")
  102. and not part.startswith(("import ", "cimport "))
  103. ):
  104. should_skip = True
  105. return (bool(should_skip or in_quote), in_quote)
  106. class ParsedContent(NamedTuple):
  107. in_lines: List[str]
  108. lines_without_imports: List[str]
  109. import_index: int
  110. place_imports: Dict[str, List[str]]
  111. import_placements: Dict[str, str]
  112. as_map: Dict[str, Dict[str, List[str]]]
  113. imports: Dict[str, Dict[str, Any]]
  114. categorized_comments: "CommentsDict"
  115. change_count: int
  116. original_line_count: int
  117. line_separator: str
  118. sections: Any
  119. verbose_output: List[str]
  120. def file_contents(contents: str, config: Config = DEFAULT_CONFIG) -> ParsedContent:
  121. """Parses a python file taking out and categorizing imports."""
  122. line_separator: str = config.line_ending or _infer_line_separator(contents)
  123. in_lines = contents.splitlines()
  124. if contents and contents[-1] in ("\n", "\r"):
  125. in_lines.append("")
  126. out_lines = []
  127. original_line_count = len(in_lines)
  128. if config.old_finders:
  129. from .deprecated.finders import FindersManager
  130. finder = FindersManager(config=config).find
  131. else:
  132. finder = partial(place.module, config=config)
  133. line_count = len(in_lines)
  134. place_imports: Dict[str, List[str]] = {}
  135. import_placements: Dict[str, str] = {}
  136. as_map: Dict[str, Dict[str, List[str]]] = {
  137. "straight": defaultdict(list),
  138. "from": defaultdict(list),
  139. }
  140. imports: OrderedDict[str, Dict[str, Any]] = OrderedDict()
  141. verbose_output: List[str] = []
  142. for section in chain(config.sections, config.forced_separate):
  143. imports[section] = {"straight": OrderedDict(), "from": OrderedDict()}
  144. categorized_comments: CommentsDict = {
  145. "from": {},
  146. "straight": {},
  147. "nested": {},
  148. "above": {"straight": {}, "from": {}},
  149. }
  150. index = 0
  151. import_index = -1
  152. in_quote = ""
  153. while index < line_count:
  154. line = in_lines[index]
  155. index += 1
  156. statement_index = index
  157. (skipping_line, in_quote) = skip_line(
  158. line, in_quote=in_quote, index=index, section_comments=config.section_comments
  159. )
  160. if (
  161. line in config.section_comments or line in config.section_comments_end
  162. ) and not skipping_line:
  163. if import_index == -1: # pragma: no branch
  164. import_index = index - 1
  165. continue
  166. if "isort:imports-" in line and line.startswith("#"):
  167. section = line.split("isort:imports-")[-1].split()[0].upper()
  168. place_imports[section] = []
  169. import_placements[line] = section
  170. elif "isort: imports-" in line and line.startswith("#"):
  171. section = line.split("isort: imports-")[-1].split()[0].upper()
  172. place_imports[section] = []
  173. import_placements[line] = section
  174. if skipping_line:
  175. out_lines.append(line)
  176. continue
  177. lstripped_line = line.lstrip()
  178. if (
  179. config.float_to_top
  180. and import_index == -1
  181. and line
  182. and not in_quote
  183. and not lstripped_line.startswith("#")
  184. and not lstripped_line.startswith("'''")
  185. and not lstripped_line.startswith('"""')
  186. ):
  187. if not lstripped_line.startswith("import") and not lstripped_line.startswith("from"):
  188. import_index = index - 1
  189. while import_index and not in_lines[import_index - 1]:
  190. import_index -= 1
  191. else:
  192. commentless = line.split("#", 1)[0].strip()
  193. if (
  194. ("isort:skip" in line or "isort: skip" in line)
  195. and "(" in commentless
  196. and ")" not in commentless
  197. ):
  198. import_index = index
  199. starting_line = line
  200. while "isort:skip" in starting_line or "isort: skip" in starting_line:
  201. commentless = starting_line.split("#", 1)[0]
  202. if (
  203. "(" in commentless
  204. and not commentless.rstrip().endswith(")")
  205. and import_index < line_count
  206. ):
  207. while import_index < line_count and not commentless.rstrip().endswith(
  208. ")"
  209. ):
  210. commentless = in_lines[import_index].split("#", 1)[0]
  211. import_index += 1
  212. else:
  213. import_index += 1
  214. if import_index >= line_count:
  215. break
  216. starting_line = in_lines[import_index]
  217. line, *end_of_line_comment = line.split("#", 1)
  218. if ";" in line:
  219. statements = [line.strip() for line in line.split(";")]
  220. else:
  221. statements = [line]
  222. if end_of_line_comment:
  223. statements[-1] = f"{statements[-1]}#{end_of_line_comment[0]}"
  224. for statement in statements:
  225. line, raw_line = _normalize_line(statement)
  226. type_of_import = import_type(line, config) or ""
  227. raw_lines = [raw_line]
  228. if not type_of_import:
  229. out_lines.append(raw_line)
  230. continue
  231. if import_index == -1:
  232. import_index = index - 1
  233. nested_comments = {}
  234. import_string, comment = parse_comments(line)
  235. comments = [comment] if comment else []
  236. line_parts = [part for part in _strip_syntax(import_string).strip().split(" ") if part]
  237. if type_of_import == "from" and len(line_parts) == 2 and comments:
  238. nested_comments[line_parts[-1]] = comments[0]
  239. if "(" in line.split("#", 1)[0] and index < line_count:
  240. while not line.split("#")[0].strip().endswith(")") and index < line_count:
  241. line, new_comment = parse_comments(in_lines[index])
  242. index += 1
  243. if new_comment:
  244. comments.append(new_comment)
  245. stripped_line = _strip_syntax(line).strip()
  246. if (
  247. type_of_import == "from"
  248. and stripped_line
  249. and " " not in stripped_line.replace(" as ", "")
  250. and new_comment
  251. ):
  252. nested_comments[stripped_line] = comments[-1]
  253. import_string += line_separator + line
  254. raw_lines.append(line)
  255. else:
  256. while line.strip().endswith("\\"):
  257. line, new_comment = parse_comments(in_lines[index])
  258. line = line.lstrip()
  259. index += 1
  260. if new_comment:
  261. comments.append(new_comment)
  262. # Still need to check for parentheses after an escaped line
  263. if (
  264. "(" in line.split("#")[0]
  265. and ")" not in line.split("#")[0]
  266. and index < line_count
  267. ):
  268. stripped_line = _strip_syntax(line).strip()
  269. if (
  270. type_of_import == "from"
  271. and stripped_line
  272. and " " not in stripped_line.replace(" as ", "")
  273. and new_comment
  274. ):
  275. nested_comments[stripped_line] = comments[-1]
  276. import_string += line_separator + line
  277. raw_lines.append(line)
  278. while not line.split("#")[0].strip().endswith(")") and index < line_count:
  279. line, new_comment = parse_comments(in_lines[index])
  280. index += 1
  281. if new_comment:
  282. comments.append(new_comment)
  283. stripped_line = _strip_syntax(line).strip()
  284. if (
  285. type_of_import == "from"
  286. and stripped_line
  287. and " " not in stripped_line.replace(" as ", "")
  288. and new_comment
  289. ):
  290. nested_comments[stripped_line] = comments[-1]
  291. import_string += line_separator + line
  292. raw_lines.append(line)
  293. stripped_line = _strip_syntax(line).strip()
  294. if (
  295. type_of_import == "from"
  296. and stripped_line
  297. and " " not in stripped_line.replace(" as ", "")
  298. and new_comment
  299. ):
  300. nested_comments[stripped_line] = comments[-1]
  301. if import_string.strip().endswith(
  302. (" import", " cimport")
  303. ) or line.strip().startswith(("import ", "cimport ")):
  304. import_string += line_separator + line
  305. else:
  306. import_string = import_string.rstrip().rstrip("\\") + " " + line.lstrip()
  307. if type_of_import == "from":
  308. cimports: bool
  309. import_string = (
  310. import_string.replace("import(", "import (")
  311. .replace("\\", " ")
  312. .replace("\n", " ")
  313. )
  314. if "import " not in import_string:
  315. out_lines.extend(raw_lines)
  316. continue
  317. if " cimport " in import_string:
  318. parts = import_string.split(" cimport ")
  319. cimports = True
  320. else:
  321. parts = import_string.split(" import ")
  322. cimports = False
  323. from_import = parts[0].split(" ")
  324. import_string = (" cimport " if cimports else " import ").join(
  325. [from_import[0] + " " + "".join(from_import[1:])] + parts[1:]
  326. )
  327. just_imports = [
  328. item.replace("{|", "{ ").replace("|}", " }")
  329. for item in _strip_syntax(import_string).split()
  330. ]
  331. attach_comments_to: Optional[List[Any]] = None
  332. direct_imports = just_imports[1:]
  333. straight_import = True
  334. top_level_module = ""
  335. if "as" in just_imports and (just_imports.index("as") + 1) < len(just_imports):
  336. straight_import = False
  337. while "as" in just_imports:
  338. nested_module = None
  339. as_index = just_imports.index("as")
  340. if type_of_import == "from":
  341. nested_module = just_imports[as_index - 1]
  342. top_level_module = just_imports[0]
  343. module = top_level_module + "." + nested_module
  344. as_name = just_imports[as_index + 1]
  345. direct_imports.remove(nested_module)
  346. direct_imports.remove(as_name)
  347. direct_imports.remove("as")
  348. if nested_module == as_name and config.remove_redundant_aliases:
  349. pass
  350. elif as_name not in as_map["from"][module]: # pragma: no branch
  351. as_map["from"][module].append(as_name)
  352. full_name = f"{nested_module} as {as_name}"
  353. associated_comment = nested_comments.get(full_name)
  354. if associated_comment:
  355. categorized_comments["nested"].setdefault(top_level_module, {})[
  356. full_name
  357. ] = associated_comment
  358. if associated_comment in comments: # pragma: no branch
  359. comments.pop(comments.index(associated_comment))
  360. else:
  361. module = just_imports[as_index - 1]
  362. as_name = just_imports[as_index + 1]
  363. if module == as_name and config.remove_redundant_aliases:
  364. pass
  365. elif as_name not in as_map["straight"][module]:
  366. as_map["straight"][module].append(as_name)
  367. if comments and attach_comments_to is None:
  368. if nested_module and config.combine_as_imports:
  369. attach_comments_to = categorized_comments["from"].setdefault(
  370. f"{top_level_module}.__combined_as__", []
  371. )
  372. else:
  373. if type_of_import == "from" or (
  374. config.remove_redundant_aliases and as_name == module.split(".")[-1]
  375. ):
  376. attach_comments_to = categorized_comments["straight"].setdefault(
  377. module, []
  378. )
  379. else:
  380. attach_comments_to = categorized_comments["straight"].setdefault(
  381. f"{module} as {as_name}", []
  382. )
  383. del just_imports[as_index : as_index + 2]
  384. if type_of_import == "from":
  385. import_from = just_imports.pop(0)
  386. placed_module = finder(import_from)
  387. if config.verbose and not config.only_modified:
  388. print(f"from-type place_module for {import_from} returned {placed_module}")
  389. elif config.verbose:
  390. verbose_output.append(
  391. f"from-type place_module for {import_from} returned {placed_module}"
  392. )
  393. if placed_module == "":
  394. warn(
  395. f"could not place module {import_from} of line {line} --"
  396. " Do you need to define a default section?"
  397. )
  398. if placed_module and placed_module not in imports:
  399. raise MissingSection(import_module=import_from, section=placed_module)
  400. root = imports[placed_module][type_of_import] # type: ignore
  401. for import_name in just_imports:
  402. associated_comment = nested_comments.get(import_name)
  403. if associated_comment:
  404. categorized_comments["nested"].setdefault(import_from, {})[
  405. import_name
  406. ] = associated_comment
  407. if associated_comment in comments: # pragma: no branch
  408. comments.pop(comments.index(associated_comment))
  409. if (
  410. config.force_single_line
  411. and comments
  412. and attach_comments_to is None
  413. and len(just_imports) == 1
  414. ):
  415. nested_from_comments = categorized_comments["nested"].setdefault(
  416. import_from, {}
  417. )
  418. existing_comment = nested_from_comments.get(just_imports[0], "")
  419. nested_from_comments[
  420. just_imports[0]
  421. ] = f"{existing_comment}{'; ' if existing_comment else ''}{'; '.join(comments)}"
  422. comments = []
  423. if comments and attach_comments_to is None:
  424. attach_comments_to = categorized_comments["from"].setdefault(import_from, [])
  425. if len(out_lines) > max(import_index, 1) - 1:
  426. last = out_lines[-1].rstrip() if out_lines else ""
  427. while (
  428. last.startswith("#")
  429. and not last.endswith('"""')
  430. and not last.endswith("'''")
  431. and "isort:imports-" not in last
  432. and "isort: imports-" not in last
  433. and not config.treat_all_comments_as_code
  434. and not last.strip() in config.treat_comments_as_code
  435. ):
  436. categorized_comments["above"]["from"].setdefault(import_from, []).insert(
  437. 0, out_lines.pop(-1)
  438. )
  439. if out_lines:
  440. last = out_lines[-1].rstrip()
  441. else:
  442. last = ""
  443. if statement_index - 1 == import_index: # pragma: no cover
  444. import_index -= len(
  445. categorized_comments["above"]["from"].get(import_from, [])
  446. )
  447. if import_from not in root:
  448. root[import_from] = OrderedDict(
  449. (module, module in direct_imports) for module in just_imports
  450. )
  451. else:
  452. root[import_from].update(
  453. (module, root[import_from].get(module, False) or module in direct_imports)
  454. for module in just_imports
  455. )
  456. if comments and attach_comments_to is not None:
  457. attach_comments_to.extend(comments)
  458. else:
  459. if comments and attach_comments_to is not None:
  460. attach_comments_to.extend(comments)
  461. comments = []
  462. for module in just_imports:
  463. if comments:
  464. categorized_comments["straight"][module] = comments
  465. comments = []
  466. if len(out_lines) > max(import_index, +1, 1) - 1:
  467. last = out_lines[-1].rstrip() if out_lines else ""
  468. while (
  469. last.startswith("#")
  470. and not last.endswith('"""')
  471. and not last.endswith("'''")
  472. and "isort:imports-" not in last
  473. and "isort: imports-" not in last
  474. and not config.treat_all_comments_as_code
  475. and not last.strip() in config.treat_comments_as_code
  476. ):
  477. categorized_comments["above"]["straight"].setdefault(module, []).insert(
  478. 0, out_lines.pop(-1)
  479. )
  480. if out_lines:
  481. last = out_lines[-1].rstrip()
  482. else:
  483. last = ""
  484. if index - 1 == import_index:
  485. import_index -= len(
  486. categorized_comments["above"]["straight"].get(module, [])
  487. )
  488. placed_module = finder(module)
  489. if config.verbose and not config.only_modified:
  490. print(f"else-type place_module for {module} returned {placed_module}")
  491. elif config.verbose:
  492. verbose_output.append(
  493. f"else-type place_module for {module} returned {placed_module}"
  494. )
  495. if placed_module == "":
  496. warn(
  497. f"could not place module {module} of line {line} --"
  498. " Do you need to define a default section?"
  499. )
  500. imports.setdefault("", {"straight": OrderedDict(), "from": OrderedDict()})
  501. if placed_module and placed_module not in imports:
  502. raise MissingSection(import_module=module, section=placed_module)
  503. straight_import |= imports[placed_module][type_of_import].get( # type: ignore
  504. module, False
  505. )
  506. imports[placed_module][type_of_import][module] = straight_import # type: ignore
  507. change_count = len(out_lines) - original_line_count
  508. return ParsedContent(
  509. in_lines=in_lines,
  510. lines_without_imports=out_lines,
  511. import_index=import_index,
  512. place_imports=place_imports,
  513. import_placements=import_placements,
  514. as_map=as_map,
  515. imports=imports,
  516. categorized_comments=categorized_comments,
  517. change_count=change_count,
  518. original_line_count=original_line_count,
  519. line_separator=line_separator,
  520. sections=config.sections,
  521. verbose_output=verbose_output,
  522. )