array.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. # postgresql/array.py
  2. # Copyright (C) 2005-2022 the SQLAlchemy authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of SQLAlchemy and is released under
  6. # the MIT License: https://www.opensource.org/licenses/mit-license.php
  7. import re
  8. from ... import types as sqltypes
  9. from ... import util
  10. from ...sql import coercions
  11. from ...sql import expression
  12. from ...sql import operators
  13. from ...sql import roles
  14. def Any(other, arrexpr, operator=operators.eq):
  15. """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.any` method.
  16. See that method for details.
  17. """
  18. return arrexpr.any(other, operator)
  19. def All(other, arrexpr, operator=operators.eq):
  20. """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.all` method.
  21. See that method for details.
  22. """
  23. return arrexpr.all(other, operator)
  24. class array(expression.ClauseList, expression.ColumnElement):
  25. """A PostgreSQL ARRAY literal.
  26. This is used to produce ARRAY literals in SQL expressions, e.g.::
  27. from sqlalchemy.dialects.postgresql import array
  28. from sqlalchemy.dialects import postgresql
  29. from sqlalchemy import select, func
  30. stmt = select(array([1,2]) + array([3,4,5]))
  31. print(stmt.compile(dialect=postgresql.dialect()))
  32. Produces the SQL::
  33. SELECT ARRAY[%(param_1)s, %(param_2)s] ||
  34. ARRAY[%(param_3)s, %(param_4)s, %(param_5)s]) AS anon_1
  35. An instance of :class:`.array` will always have the datatype
  36. :class:`_types.ARRAY`. The "inner" type of the array is inferred from
  37. the values present, unless the ``type_`` keyword argument is passed::
  38. array(['foo', 'bar'], type_=CHAR)
  39. Multidimensional arrays are produced by nesting :class:`.array` constructs.
  40. The dimensionality of the final :class:`_types.ARRAY`
  41. type is calculated by
  42. recursively adding the dimensions of the inner :class:`_types.ARRAY`
  43. type::
  44. stmt = select(
  45. array([
  46. array([1, 2]), array([3, 4]), array([column('q'), column('x')])
  47. ])
  48. )
  49. print(stmt.compile(dialect=postgresql.dialect()))
  50. Produces::
  51. SELECT ARRAY[ARRAY[%(param_1)s, %(param_2)s],
  52. ARRAY[%(param_3)s, %(param_4)s], ARRAY[q, x]] AS anon_1
  53. .. versionadded:: 1.3.6 added support for multidimensional array literals
  54. .. seealso::
  55. :class:`_postgresql.ARRAY`
  56. """
  57. __visit_name__ = "array"
  58. stringify_dialect = "postgresql"
  59. inherit_cache = True
  60. def __init__(self, clauses, **kw):
  61. clauses = [
  62. coercions.expect(roles.ExpressionElementRole, c) for c in clauses
  63. ]
  64. super(array, self).__init__(*clauses, **kw)
  65. self._type_tuple = [arg.type for arg in clauses]
  66. main_type = kw.pop(
  67. "type_",
  68. self._type_tuple[0] if self._type_tuple else sqltypes.NULLTYPE,
  69. )
  70. if isinstance(main_type, ARRAY):
  71. self.type = ARRAY(
  72. main_type.item_type,
  73. dimensions=main_type.dimensions + 1
  74. if main_type.dimensions is not None
  75. else 2,
  76. )
  77. else:
  78. self.type = ARRAY(main_type)
  79. @property
  80. def _select_iterable(self):
  81. return (self,)
  82. def _bind_param(self, operator, obj, _assume_scalar=False, type_=None):
  83. if _assume_scalar or operator is operators.getitem:
  84. return expression.BindParameter(
  85. None,
  86. obj,
  87. _compared_to_operator=operator,
  88. type_=type_,
  89. _compared_to_type=self.type,
  90. unique=True,
  91. )
  92. else:
  93. return array(
  94. [
  95. self._bind_param(
  96. operator, o, _assume_scalar=True, type_=type_
  97. )
  98. for o in obj
  99. ]
  100. )
  101. def self_group(self, against=None):
  102. if against in (operators.any_op, operators.all_op, operators.getitem):
  103. return expression.Grouping(self)
  104. else:
  105. return self
  106. CONTAINS = operators.custom_op("@>", precedence=5, is_comparison=True)
  107. CONTAINED_BY = operators.custom_op("<@", precedence=5, is_comparison=True)
  108. OVERLAP = operators.custom_op("&&", precedence=5, is_comparison=True)
  109. class ARRAY(sqltypes.ARRAY):
  110. """PostgreSQL ARRAY type.
  111. .. versionchanged:: 1.1 The :class:`_postgresql.ARRAY` type is now
  112. a subclass of the core :class:`_types.ARRAY` type.
  113. The :class:`_postgresql.ARRAY` type is constructed in the same way
  114. as the core :class:`_types.ARRAY` type; a member type is required, and a
  115. number of dimensions is recommended if the type is to be used for more
  116. than one dimension::
  117. from sqlalchemy.dialects import postgresql
  118. mytable = Table("mytable", metadata,
  119. Column("data", postgresql.ARRAY(Integer, dimensions=2))
  120. )
  121. The :class:`_postgresql.ARRAY` type provides all operations defined on the
  122. core :class:`_types.ARRAY` type, including support for "dimensions",
  123. indexed access, and simple matching such as
  124. :meth:`.types.ARRAY.Comparator.any` and
  125. :meth:`.types.ARRAY.Comparator.all`. :class:`_postgresql.ARRAY`
  126. class also
  127. provides PostgreSQL-specific methods for containment operations, including
  128. :meth:`.postgresql.ARRAY.Comparator.contains`
  129. :meth:`.postgresql.ARRAY.Comparator.contained_by`, and
  130. :meth:`.postgresql.ARRAY.Comparator.overlap`, e.g.::
  131. mytable.c.data.contains([1, 2])
  132. The :class:`_postgresql.ARRAY` type may not be supported on all
  133. PostgreSQL DBAPIs; it is currently known to work on psycopg2 only.
  134. Additionally, the :class:`_postgresql.ARRAY`
  135. type does not work directly in
  136. conjunction with the :class:`.ENUM` type. For a workaround, see the
  137. special type at :ref:`postgresql_array_of_enum`.
  138. .. seealso::
  139. :class:`_types.ARRAY` - base array type
  140. :class:`_postgresql.array` - produces a literal array value.
  141. """
  142. class Comparator(sqltypes.ARRAY.Comparator):
  143. """Define comparison operations for :class:`_types.ARRAY`.
  144. Note that these operations are in addition to those provided
  145. by the base :class:`.types.ARRAY.Comparator` class, including
  146. :meth:`.types.ARRAY.Comparator.any` and
  147. :meth:`.types.ARRAY.Comparator.all`.
  148. """
  149. def contains(self, other, **kwargs):
  150. """Boolean expression. Test if elements are a superset of the
  151. elements of the argument array expression.
  152. kwargs may be ignored by this operator but are required for API
  153. conformance.
  154. """
  155. return self.operate(CONTAINS, other, result_type=sqltypes.Boolean)
  156. def contained_by(self, other):
  157. """Boolean expression. Test if elements are a proper subset of the
  158. elements of the argument array expression.
  159. """
  160. return self.operate(
  161. CONTAINED_BY, other, result_type=sqltypes.Boolean
  162. )
  163. def overlap(self, other):
  164. """Boolean expression. Test if array has elements in common with
  165. an argument array expression.
  166. """
  167. return self.operate(OVERLAP, other, result_type=sqltypes.Boolean)
  168. comparator_factory = Comparator
  169. def __init__(
  170. self, item_type, as_tuple=False, dimensions=None, zero_indexes=False
  171. ):
  172. """Construct an ARRAY.
  173. E.g.::
  174. Column('myarray', ARRAY(Integer))
  175. Arguments are:
  176. :param item_type: The data type of items of this array. Note that
  177. dimensionality is irrelevant here, so multi-dimensional arrays like
  178. ``INTEGER[][]``, are constructed as ``ARRAY(Integer)``, not as
  179. ``ARRAY(ARRAY(Integer))`` or such.
  180. :param as_tuple=False: Specify whether return results
  181. should be converted to tuples from lists. DBAPIs such
  182. as psycopg2 return lists by default. When tuples are
  183. returned, the results are hashable.
  184. :param dimensions: if non-None, the ARRAY will assume a fixed
  185. number of dimensions. This will cause the DDL emitted for this
  186. ARRAY to include the exact number of bracket clauses ``[]``,
  187. and will also optimize the performance of the type overall.
  188. Note that PG arrays are always implicitly "non-dimensioned",
  189. meaning they can store any number of dimensions no matter how
  190. they were declared.
  191. :param zero_indexes=False: when True, index values will be converted
  192. between Python zero-based and PostgreSQL one-based indexes, e.g.
  193. a value of one will be added to all index values before passing
  194. to the database.
  195. .. versionadded:: 0.9.5
  196. """
  197. if isinstance(item_type, ARRAY):
  198. raise ValueError(
  199. "Do not nest ARRAY types; ARRAY(basetype) "
  200. "handles multi-dimensional arrays of basetype"
  201. )
  202. if isinstance(item_type, type):
  203. item_type = item_type()
  204. self.item_type = item_type
  205. self.as_tuple = as_tuple
  206. self.dimensions = dimensions
  207. self.zero_indexes = zero_indexes
  208. @property
  209. def hashable(self):
  210. return self.as_tuple
  211. @property
  212. def python_type(self):
  213. return list
  214. def compare_values(self, x, y):
  215. return x == y
  216. def _proc_array(self, arr, itemproc, dim, collection):
  217. if dim is None:
  218. arr = list(arr)
  219. if (
  220. dim == 1
  221. or dim is None
  222. and (
  223. # this has to be (list, tuple), or at least
  224. # not hasattr('__iter__'), since Py3K strings
  225. # etc. have __iter__
  226. not arr
  227. or not isinstance(arr[0], (list, tuple))
  228. )
  229. ):
  230. if itemproc:
  231. return collection(itemproc(x) for x in arr)
  232. else:
  233. return collection(arr)
  234. else:
  235. return collection(
  236. self._proc_array(
  237. x,
  238. itemproc,
  239. dim - 1 if dim is not None else None,
  240. collection,
  241. )
  242. for x in arr
  243. )
  244. @util.memoized_property
  245. def _against_native_enum(self):
  246. return (
  247. isinstance(self.item_type, sqltypes.Enum)
  248. and self.item_type.native_enum
  249. )
  250. def bind_expression(self, bindvalue):
  251. return bindvalue
  252. def bind_processor(self, dialect):
  253. item_proc = self.item_type.dialect_impl(dialect).bind_processor(
  254. dialect
  255. )
  256. def process(value):
  257. if value is None:
  258. return value
  259. else:
  260. return self._proc_array(
  261. value, item_proc, self.dimensions, list
  262. )
  263. return process
  264. def result_processor(self, dialect, coltype):
  265. item_proc = self.item_type.dialect_impl(dialect).result_processor(
  266. dialect, coltype
  267. )
  268. def process(value):
  269. if value is None:
  270. return value
  271. else:
  272. return self._proc_array(
  273. value,
  274. item_proc,
  275. self.dimensions,
  276. tuple if self.as_tuple else list,
  277. )
  278. if self._against_native_enum:
  279. super_rp = process
  280. pattern = re.compile(r"^{(.*)}$")
  281. def handle_raw_string(value):
  282. inner = pattern.match(value).group(1)
  283. return _split_enum_values(inner)
  284. def process(value):
  285. if value is None:
  286. return value
  287. # isinstance(value, util.string_types) is required to handle
  288. # the case where a TypeDecorator for and Array of Enum is
  289. # used like was required in sa < 1.3.17
  290. return super_rp(
  291. handle_raw_string(value)
  292. if isinstance(value, util.string_types)
  293. else value
  294. )
  295. return process
  296. def _split_enum_values(array_string):
  297. if '"' not in array_string:
  298. # no escape char is present so it can just split on the comma
  299. return array_string.split(",") if array_string else []
  300. # handles quoted strings from:
  301. # r'abc,"quoted","also\\\\quoted", "quoted, comma", "esc \" quot", qpr'
  302. # returns
  303. # ['abc', 'quoted', 'also\\quoted', 'quoted, comma', 'esc " quot', 'qpr']
  304. text = array_string.replace(r"\"", "_$ESC_QUOTE$_")
  305. text = text.replace(r"\\", "\\")
  306. result = []
  307. on_quotes = re.split(r'(")', text)
  308. in_quotes = False
  309. for tok in on_quotes:
  310. if tok == '"':
  311. in_quotes = not in_quotes
  312. elif in_quotes:
  313. result.append(tok.replace("_$ESC_QUOTE$_", '"'))
  314. else:
  315. result.extend(re.findall(r"([^\s,]+),?", tok))
  316. return result