12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- #
- # Copyright (C) 2009-2020 the sqlparse authors and contributors
- # <see AUTHORS file>
- #
- # This module is part of python-sqlparse and is released under
- # the BSD License: https://opensource.org/licenses/BSD-3-Clause
- """SQL Lexer"""
- # This code is based on the SqlLexer in pygments.
- # http://pygments.org/
- # It's separated from the rest of pygments to increase performance
- # and to allow some customizations.
- from io import TextIOBase
- from sqlparse import tokens
- from sqlparse.keywords import SQL_REGEX
- from sqlparse.utils import consume
- class Lexer:
- """Lexer
- Empty class. Leaving for backwards-compatibility
- """
- @staticmethod
- def get_tokens(text, encoding=None):
- """
- Return an iterable of (tokentype, value) pairs generated from
- `text`. If `unfiltered` is set to `True`, the filtering mechanism
- is bypassed even if filters are defined.
- Also preprocess the text, i.e. expand tabs and strip it if
- wanted and applies registered filters.
- Split ``text`` into (tokentype, text) pairs.
- ``stack`` is the initial stack (default: ``['root']``)
- """
- if isinstance(text, TextIOBase):
- text = text.read()
- if isinstance(text, str):
- pass
- elif isinstance(text, bytes):
- if encoding:
- text = text.decode(encoding)
- else:
- try:
- text = text.decode('utf-8')
- except UnicodeDecodeError:
- text = text.decode('unicode-escape')
- else:
- raise TypeError("Expected text or file-like object, got {!r}".
- format(type(text)))
- iterable = enumerate(text)
- for pos, char in iterable:
- for rexmatch, action in SQL_REGEX:
- m = rexmatch(text, pos)
- if not m:
- continue
- elif isinstance(action, tokens._TokenType):
- yield action, m.group()
- elif callable(action):
- yield action(m.group())
- consume(iterable, m.end() - pos - 1)
- break
- else:
- yield tokens.Error, char
- def tokenize(sql, encoding=None):
- """Tokenize sql.
- Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
- of ``(token type, value)`` items.
- """
- return Lexer().get_tokens(sql, encoding)
|