lexer.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #
  2. # Copyright (C) 2009-2020 the sqlparse authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of python-sqlparse and is released under
  6. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  7. """SQL Lexer"""
  8. # This code is based on the SqlLexer in pygments.
  9. # http://pygments.org/
  10. # It's separated from the rest of pygments to increase performance
  11. # and to allow some customizations.
  12. from io import TextIOBase
  13. from sqlparse import tokens
  14. from sqlparse.keywords import SQL_REGEX
  15. from sqlparse.utils import consume
  16. class Lexer:
  17. """Lexer
  18. Empty class. Leaving for backwards-compatibility
  19. """
  20. @staticmethod
  21. def get_tokens(text, encoding=None):
  22. """
  23. Return an iterable of (tokentype, value) pairs generated from
  24. `text`. If `unfiltered` is set to `True`, the filtering mechanism
  25. is bypassed even if filters are defined.
  26. Also preprocess the text, i.e. expand tabs and strip it if
  27. wanted and applies registered filters.
  28. Split ``text`` into (tokentype, text) pairs.
  29. ``stack`` is the initial stack (default: ``['root']``)
  30. """
  31. if isinstance(text, TextIOBase):
  32. text = text.read()
  33. if isinstance(text, str):
  34. pass
  35. elif isinstance(text, bytes):
  36. if encoding:
  37. text = text.decode(encoding)
  38. else:
  39. try:
  40. text = text.decode('utf-8')
  41. except UnicodeDecodeError:
  42. text = text.decode('unicode-escape')
  43. else:
  44. raise TypeError("Expected text or file-like object, got {!r}".
  45. format(type(text)))
  46. iterable = enumerate(text)
  47. for pos, char in iterable:
  48. for rexmatch, action in SQL_REGEX:
  49. m = rexmatch(text, pos)
  50. if not m:
  51. continue
  52. elif isinstance(action, tokens._TokenType):
  53. yield action, m.group()
  54. elif callable(action):
  55. yield action(m.group())
  56. consume(iterable, m.end() - pos - 1)
  57. break
  58. else:
  59. yield tokens.Error, char
  60. def tokenize(sql, encoding=None):
  61. """Tokenize sql.
  62. Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
  63. of ``(token type, value)`` items.
  64. """
  65. return Lexer().get_tokens(sql, encoding)