reflection.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. # mysql/reflection.py
  2. # Copyright (C) 2005-2022 the SQLAlchemy authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of SQLAlchemy and is released under
  6. # the MIT License: https://www.opensource.org/licenses/mit-license.php
  7. import re
  8. from .enumerated import ENUM
  9. from .enumerated import SET
  10. from .types import DATETIME
  11. from .types import TIME
  12. from .types import TIMESTAMP
  13. from ... import log
  14. from ... import types as sqltypes
  15. from ... import util
  16. class ReflectedState(object):
  17. """Stores raw information about a SHOW CREATE TABLE statement."""
  18. def __init__(self):
  19. self.columns = []
  20. self.table_options = {}
  21. self.table_name = None
  22. self.keys = []
  23. self.fk_constraints = []
  24. self.ck_constraints = []
  25. @log.class_logger
  26. class MySQLTableDefinitionParser(object):
  27. """Parses the results of a SHOW CREATE TABLE statement."""
  28. def __init__(self, dialect, preparer):
  29. self.dialect = dialect
  30. self.preparer = preparer
  31. self._prep_regexes()
  32. def parse(self, show_create, charset):
  33. state = ReflectedState()
  34. state.charset = charset
  35. for line in re.split(r"\r?\n", show_create):
  36. if line.startswith(" " + self.preparer.initial_quote):
  37. self._parse_column(line, state)
  38. # a regular table options line
  39. elif line.startswith(") "):
  40. self._parse_table_options(line, state)
  41. # an ANSI-mode table options line
  42. elif line == ")":
  43. pass
  44. elif line.startswith("CREATE "):
  45. self._parse_table_name(line, state)
  46. # Not present in real reflection, but may be if
  47. # loading from a file.
  48. elif not line:
  49. pass
  50. else:
  51. type_, spec = self._parse_constraints(line)
  52. if type_ is None:
  53. util.warn("Unknown schema content: %r" % line)
  54. elif type_ == "key":
  55. state.keys.append(spec)
  56. elif type_ == "fk_constraint":
  57. state.fk_constraints.append(spec)
  58. elif type_ == "ck_constraint":
  59. state.ck_constraints.append(spec)
  60. else:
  61. pass
  62. return state
  63. def _parse_constraints(self, line):
  64. """Parse a KEY or CONSTRAINT line.
  65. :param line: A line of SHOW CREATE TABLE output
  66. """
  67. # KEY
  68. m = self._re_key.match(line)
  69. if m:
  70. spec = m.groupdict()
  71. # convert columns into name, length pairs
  72. # NOTE: we may want to consider SHOW INDEX as the
  73. # format of indexes in MySQL becomes more complex
  74. spec["columns"] = self._parse_keyexprs(spec["columns"])
  75. if spec["version_sql"]:
  76. m2 = self._re_key_version_sql.match(spec["version_sql"])
  77. if m2 and m2.groupdict()["parser"]:
  78. spec["parser"] = m2.groupdict()["parser"]
  79. if spec["parser"]:
  80. spec["parser"] = self.preparer.unformat_identifiers(
  81. spec["parser"]
  82. )[0]
  83. return "key", spec
  84. # FOREIGN KEY CONSTRAINT
  85. m = self._re_fk_constraint.match(line)
  86. if m:
  87. spec = m.groupdict()
  88. spec["table"] = self.preparer.unformat_identifiers(spec["table"])
  89. spec["local"] = [c[0] for c in self._parse_keyexprs(spec["local"])]
  90. spec["foreign"] = [
  91. c[0] for c in self._parse_keyexprs(spec["foreign"])
  92. ]
  93. return "fk_constraint", spec
  94. # CHECK constraint
  95. m = self._re_ck_constraint.match(line)
  96. if m:
  97. spec = m.groupdict()
  98. return "ck_constraint", spec
  99. # PARTITION and SUBPARTITION
  100. m = self._re_partition.match(line)
  101. if m:
  102. # Punt!
  103. return "partition", line
  104. # No match.
  105. return (None, line)
  106. def _parse_table_name(self, line, state):
  107. """Extract the table name.
  108. :param line: The first line of SHOW CREATE TABLE
  109. """
  110. regex, cleanup = self._pr_name
  111. m = regex.match(line)
  112. if m:
  113. state.table_name = cleanup(m.group("name"))
  114. def _parse_table_options(self, line, state):
  115. """Build a dictionary of all reflected table-level options.
  116. :param line: The final line of SHOW CREATE TABLE output.
  117. """
  118. options = {}
  119. if not line or line == ")":
  120. pass
  121. else:
  122. rest_of_line = line[:]
  123. for regex, cleanup in self._pr_options:
  124. m = regex.search(rest_of_line)
  125. if not m:
  126. continue
  127. directive, value = m.group("directive"), m.group("val")
  128. if cleanup:
  129. value = cleanup(value)
  130. options[directive.lower()] = value
  131. rest_of_line = regex.sub("", rest_of_line)
  132. for nope in ("auto_increment", "data directory", "index directory"):
  133. options.pop(nope, None)
  134. for opt, val in options.items():
  135. state.table_options["%s_%s" % (self.dialect.name, opt)] = val
  136. def _parse_column(self, line, state):
  137. """Extract column details.
  138. Falls back to a 'minimal support' variant if full parse fails.
  139. :param line: Any column-bearing line from SHOW CREATE TABLE
  140. """
  141. spec = None
  142. m = self._re_column.match(line)
  143. if m:
  144. spec = m.groupdict()
  145. spec["full"] = True
  146. else:
  147. m = self._re_column_loose.match(line)
  148. if m:
  149. spec = m.groupdict()
  150. spec["full"] = False
  151. if not spec:
  152. util.warn("Unknown column definition %r" % line)
  153. return
  154. if not spec["full"]:
  155. util.warn("Incomplete reflection of column definition %r" % line)
  156. name, type_, args = spec["name"], spec["coltype"], spec["arg"]
  157. try:
  158. col_type = self.dialect.ischema_names[type_]
  159. except KeyError:
  160. util.warn(
  161. "Did not recognize type '%s' of column '%s'" % (type_, name)
  162. )
  163. col_type = sqltypes.NullType
  164. # Column type positional arguments eg. varchar(32)
  165. if args is None or args == "":
  166. type_args = []
  167. elif args[0] == "'" and args[-1] == "'":
  168. type_args = self._re_csv_str.findall(args)
  169. else:
  170. type_args = [int(v) for v in self._re_csv_int.findall(args)]
  171. # Column type keyword options
  172. type_kw = {}
  173. if issubclass(col_type, (DATETIME, TIME, TIMESTAMP)):
  174. if type_args:
  175. type_kw["fsp"] = type_args.pop(0)
  176. for kw in ("unsigned", "zerofill"):
  177. if spec.get(kw, False):
  178. type_kw[kw] = True
  179. for kw in ("charset", "collate"):
  180. if spec.get(kw, False):
  181. type_kw[kw] = spec[kw]
  182. if issubclass(col_type, (ENUM, SET)):
  183. type_args = _strip_values(type_args)
  184. if issubclass(col_type, SET) and "" in type_args:
  185. type_kw["retrieve_as_bitwise"] = True
  186. type_instance = col_type(*type_args, **type_kw)
  187. col_kw = {}
  188. # NOT NULL
  189. col_kw["nullable"] = True
  190. # this can be "NULL" in the case of TIMESTAMP
  191. if spec.get("notnull", False) == "NOT NULL":
  192. col_kw["nullable"] = False
  193. # AUTO_INCREMENT
  194. if spec.get("autoincr", False):
  195. col_kw["autoincrement"] = True
  196. elif issubclass(col_type, sqltypes.Integer):
  197. col_kw["autoincrement"] = False
  198. # DEFAULT
  199. default = spec.get("default", None)
  200. if default == "NULL":
  201. # eliminates the need to deal with this later.
  202. default = None
  203. comment = spec.get("comment", None)
  204. if comment is not None:
  205. comment = comment.replace("\\\\", "\\").replace("''", "'")
  206. sqltext = spec.get("generated")
  207. if sqltext is not None:
  208. computed = dict(sqltext=sqltext)
  209. persisted = spec.get("persistence")
  210. if persisted is not None:
  211. computed["persisted"] = persisted == "STORED"
  212. col_kw["computed"] = computed
  213. col_d = dict(
  214. name=name, type=type_instance, default=default, comment=comment
  215. )
  216. col_d.update(col_kw)
  217. state.columns.append(col_d)
  218. def _describe_to_create(self, table_name, columns):
  219. """Re-format DESCRIBE output as a SHOW CREATE TABLE string.
  220. DESCRIBE is a much simpler reflection and is sufficient for
  221. reflecting views for runtime use. This method formats DDL
  222. for columns only- keys are omitted.
  223. :param columns: A sequence of DESCRIBE or SHOW COLUMNS 6-tuples.
  224. SHOW FULL COLUMNS FROM rows must be rearranged for use with
  225. this function.
  226. """
  227. buffer = []
  228. for row in columns:
  229. (name, col_type, nullable, default, extra) = [
  230. row[i] for i in (0, 1, 2, 4, 5)
  231. ]
  232. line = [" "]
  233. line.append(self.preparer.quote_identifier(name))
  234. line.append(col_type)
  235. if not nullable:
  236. line.append("NOT NULL")
  237. if default:
  238. if "auto_increment" in default:
  239. pass
  240. elif col_type.startswith("timestamp") and default.startswith(
  241. "C"
  242. ):
  243. line.append("DEFAULT")
  244. line.append(default)
  245. elif default == "NULL":
  246. line.append("DEFAULT")
  247. line.append(default)
  248. else:
  249. line.append("DEFAULT")
  250. line.append("'%s'" % default.replace("'", "''"))
  251. if extra:
  252. line.append(extra)
  253. buffer.append(" ".join(line))
  254. return "".join(
  255. [
  256. (
  257. "CREATE TABLE %s (\n"
  258. % self.preparer.quote_identifier(table_name)
  259. ),
  260. ",\n".join(buffer),
  261. "\n) ",
  262. ]
  263. )
  264. def _parse_keyexprs(self, identifiers):
  265. """Unpack '"col"(2),"col" ASC'-ish strings into components."""
  266. return self._re_keyexprs.findall(identifiers)
  267. def _prep_regexes(self):
  268. """Pre-compile regular expressions."""
  269. self._re_columns = []
  270. self._pr_options = []
  271. _final = self.preparer.final_quote
  272. quotes = dict(
  273. zip(
  274. ("iq", "fq", "esc_fq"),
  275. [
  276. re.escape(s)
  277. for s in (
  278. self.preparer.initial_quote,
  279. _final,
  280. self.preparer._escape_identifier(_final),
  281. )
  282. ],
  283. )
  284. )
  285. self._pr_name = _pr_compile(
  286. r"^CREATE (?:\w+ +)?TABLE +"
  287. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($" % quotes,
  288. self.preparer._unescape_identifier,
  289. )
  290. # `col`,`col2`(32),`col3`(15) DESC
  291. #
  292. self._re_keyexprs = _re_compile(
  293. r"(?:"
  294. r"(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)"
  295. r"(?:\((\d+)\))?(?: +(ASC|DESC))?(?=\,|$))+" % quotes
  296. )
  297. # 'foo' or 'foo','bar' or 'fo,o','ba''a''r'
  298. self._re_csv_str = _re_compile(r"\x27(?:\x27\x27|[^\x27])*\x27")
  299. # 123 or 123,456
  300. self._re_csv_int = _re_compile(r"\d+")
  301. # `colname` <type> [type opts]
  302. # (NOT NULL | NULL)
  303. # DEFAULT ('value' | CURRENT_TIMESTAMP...)
  304. # COMMENT 'comment'
  305. # COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT)
  306. # STORAGE (DISK|MEMORY)
  307. self._re_column = _re_compile(
  308. r" "
  309. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  310. r"(?P<coltype>\w+)"
  311. r"(?:\((?P<arg>(?:\d+|\d+,\d+|"
  312. r"(?:'(?:''|[^'])*',?)+))\))?"
  313. r"(?: +(?P<unsigned>UNSIGNED))?"
  314. r"(?: +(?P<zerofill>ZEROFILL))?"
  315. r"(?: +CHARACTER SET +(?P<charset>[\w_]+))?"
  316. r"(?: +COLLATE +(?P<collate>[\w_]+))?"
  317. r"(?: +(?P<notnull>(?:NOT )?NULL))?"
  318. r"(?: +DEFAULT +(?P<default>"
  319. r"(?:NULL|'(?:''|[^'])*'|[\-\w\.\(\)]+"
  320. r"(?: +ON UPDATE [\-\w\.\(\)]+)?)"
  321. r"))?"
  322. r"(?: +(?:GENERATED ALWAYS)? ?AS +(?P<generated>\("
  323. r".*\))? ?(?P<persistence>VIRTUAL|STORED)?)?"
  324. r"(?: +(?P<autoincr>AUTO_INCREMENT))?"
  325. r"(?: +COMMENT +'(?P<comment>(?:''|[^'])*)')?"
  326. r"(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?"
  327. r"(?: +STORAGE +(?P<storage>\w+))?"
  328. r"(?: +(?P<extra>.*))?"
  329. r",?$" % quotes
  330. )
  331. # Fallback, try to parse as little as possible
  332. self._re_column_loose = _re_compile(
  333. r" "
  334. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  335. r"(?P<coltype>\w+)"
  336. r"(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?"
  337. r".*?(?P<notnull>(?:NOT )NULL)?" % quotes
  338. )
  339. # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))?
  340. # (`col` (ASC|DESC)?, `col` (ASC|DESC)?)
  341. # KEY_BLOCK_SIZE size | WITH PARSER name /*!50100 WITH PARSER name */
  342. self._re_key = _re_compile(
  343. r" "
  344. r"(?:(?P<type>\S+) )?KEY"
  345. r"(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?"
  346. r"(?: +USING +(?P<using_pre>\S+))?"
  347. r" +\((?P<columns>.+?)\)"
  348. r"(?: +USING +(?P<using_post>\S+))?"
  349. r"(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?"
  350. r"(?: +WITH PARSER +(?P<parser>\S+))?"
  351. r"(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?"
  352. r"(?: +/\*(?P<version_sql>.+)\*/ *)?"
  353. r",?$" % quotes
  354. )
  355. # https://forums.mysql.com/read.php?20,567102,567111#msg-567111
  356. # It means if the MySQL version >= \d+, execute what's in the comment
  357. self._re_key_version_sql = _re_compile(
  358. r"\!\d+ " r"(?: *WITH PARSER +(?P<parser>\S+) *)?"
  359. )
  360. # CONSTRAINT `name` FOREIGN KEY (`local_col`)
  361. # REFERENCES `remote` (`remote_col`)
  362. # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE
  363. # ON DELETE CASCADE ON UPDATE RESTRICT
  364. #
  365. # unique constraints come back as KEYs
  366. kw = quotes.copy()
  367. kw["on"] = "RESTRICT|CASCADE|SET NULL|NO ACTION"
  368. self._re_fk_constraint = _re_compile(
  369. r" "
  370. r"CONSTRAINT +"
  371. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  372. r"FOREIGN KEY +"
  373. r"\((?P<local>[^\)]+?)\) REFERENCES +"
  374. r"(?P<table>%(iq)s[^%(fq)s]+%(fq)s"
  375. r"(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +"
  376. r"\((?P<foreign>[^\)]+?)\)"
  377. r"(?: +(?P<match>MATCH \w+))?"
  378. r"(?: +ON DELETE (?P<ondelete>%(on)s))?"
  379. r"(?: +ON UPDATE (?P<onupdate>%(on)s))?" % kw
  380. )
  381. # CONSTRAINT `CONSTRAINT_1` CHECK (`x` > 5)'
  382. # testing on MariaDB 10.2 shows that the CHECK constraint
  383. # is returned on a line by itself, so to match without worrying
  384. # about parenthesis in the expression we go to the end of the line
  385. self._re_ck_constraint = _re_compile(
  386. r" "
  387. r"CONSTRAINT +"
  388. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  389. r"CHECK +"
  390. r"\((?P<sqltext>.+)\),?" % kw
  391. )
  392. # PARTITION
  393. #
  394. # punt!
  395. self._re_partition = _re_compile(r"(?:.*)(?:SUB)?PARTITION(?:.*)")
  396. # Table-level options (COLLATE, ENGINE, etc.)
  397. # Do the string options first, since they have quoted
  398. # strings we need to get rid of.
  399. for option in _options_of_type_string:
  400. self._add_option_string(option)
  401. for option in (
  402. "ENGINE",
  403. "TYPE",
  404. "AUTO_INCREMENT",
  405. "AVG_ROW_LENGTH",
  406. "CHARACTER SET",
  407. "DEFAULT CHARSET",
  408. "CHECKSUM",
  409. "COLLATE",
  410. "DELAY_KEY_WRITE",
  411. "INSERT_METHOD",
  412. "MAX_ROWS",
  413. "MIN_ROWS",
  414. "PACK_KEYS",
  415. "ROW_FORMAT",
  416. "KEY_BLOCK_SIZE",
  417. ):
  418. self._add_option_word(option)
  419. self._add_option_regex("UNION", r"\([^\)]+\)")
  420. self._add_option_regex("TABLESPACE", r".*? STORAGE DISK")
  421. self._add_option_regex(
  422. "RAID_TYPE",
  423. r"\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+",
  424. )
  425. _optional_equals = r"(?:\s*(?:=\s*)|\s+)"
  426. def _add_option_string(self, directive):
  427. regex = r"(?P<directive>%s)%s" r"'(?P<val>(?:[^']|'')*?)'(?!')" % (
  428. re.escape(directive),
  429. self._optional_equals,
  430. )
  431. self._pr_options.append(
  432. _pr_compile(
  433. regex, lambda v: v.replace("\\\\", "\\").replace("''", "'")
  434. )
  435. )
  436. def _add_option_word(self, directive):
  437. regex = r"(?P<directive>%s)%s" r"(?P<val>\w+)" % (
  438. re.escape(directive),
  439. self._optional_equals,
  440. )
  441. self._pr_options.append(_pr_compile(regex))
  442. def _add_option_regex(self, directive, regex):
  443. regex = r"(?P<directive>%s)%s" r"(?P<val>%s)" % (
  444. re.escape(directive),
  445. self._optional_equals,
  446. regex,
  447. )
  448. self._pr_options.append(_pr_compile(regex))
  449. _options_of_type_string = (
  450. "COMMENT",
  451. "DATA DIRECTORY",
  452. "INDEX DIRECTORY",
  453. "PASSWORD",
  454. "CONNECTION",
  455. )
  456. def _pr_compile(regex, cleanup=None):
  457. """Prepare a 2-tuple of compiled regex and callable."""
  458. return (_re_compile(regex), cleanup)
  459. def _re_compile(regex):
  460. """Compile a string to regex, I and UNICODE."""
  461. return re.compile(regex, re.I | re.UNICODE)
  462. def _strip_values(values):
  463. "Strip reflected values quotes"
  464. strip_values = []
  465. for a in values:
  466. if a[0:1] == '"' or a[0:1] == "'":
  467. # strip enclosing quotes and unquote interior
  468. a = a[1:-1].replace(a[0] * 2, a[0])
  469. strip_values.append(a)
  470. return strip_values