cells.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. from functools import lru_cache
  2. import re
  3. from typing import Dict, List
  4. from ._cell_widths import CELL_WIDTHS
  5. from ._lru_cache import LRUCache
  6. # Regex to match sequence of the most common character ranges
  7. _is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match
  8. def cell_len(text: str, _cache: Dict[str, int] = LRUCache(1024 * 4)) -> int:
  9. """Get the number of cells required to display text.
  10. Args:
  11. text (str): Text to display.
  12. Returns:
  13. int: Get the number of cells required to display text.
  14. """
  15. if _is_single_cell_widths(text):
  16. return len(text)
  17. else:
  18. cached_result = _cache.get(text, None)
  19. if cached_result is not None:
  20. return cached_result
  21. _get_size = get_character_cell_size
  22. total_size = sum(_get_size(character) for character in text)
  23. if len(text) <= 64:
  24. _cache[text] = total_size
  25. return total_size
  26. @lru_cache(maxsize=4096)
  27. def get_character_cell_size(character: str) -> int:
  28. """Get the cell size of a character.
  29. Args:
  30. character (str): A single character.
  31. Returns:
  32. int: Number of cells (0, 1 or 2) occupied by that character.
  33. """
  34. if _is_single_cell_widths(character):
  35. return 1
  36. return _get_codepoint_cell_size(ord(character))
  37. @lru_cache(maxsize=4096)
  38. def _get_codepoint_cell_size(codepoint: int) -> int:
  39. """Get the cell size of a character.
  40. Args:
  41. character (str): A single character.
  42. Returns:
  43. int: Number of cells (0, 1 or 2) occupied by that character.
  44. """
  45. _table = CELL_WIDTHS
  46. lower_bound = 0
  47. upper_bound = len(_table) - 1
  48. index = (lower_bound + upper_bound) // 2
  49. while True:
  50. start, end, width = _table[index]
  51. if codepoint < start:
  52. upper_bound = index - 1
  53. elif codepoint > end:
  54. lower_bound = index + 1
  55. else:
  56. return 0 if width == -1 else width
  57. if upper_bound < lower_bound:
  58. break
  59. index = (lower_bound + upper_bound) // 2
  60. return 1
  61. def set_cell_size(text: str, total: int) -> str:
  62. """Set the length of a string to fit within given number of cells."""
  63. if _is_single_cell_widths(text):
  64. size = len(text)
  65. if size < total:
  66. return text + " " * (total - size)
  67. return text[:total]
  68. if not total:
  69. return ""
  70. cell_size = cell_len(text)
  71. if cell_size == total:
  72. return text
  73. if cell_size < total:
  74. return text + " " * (total - cell_size)
  75. start = 0
  76. end = len(text)
  77. # Binary search until we find the right size
  78. while True:
  79. pos = (start + end) // 2
  80. before = text[: pos + 1]
  81. before_len = cell_len(before)
  82. if before_len == total + 1 and cell_len(before[-1]) == 2:
  83. return before[:-1] + " "
  84. if before_len == total:
  85. return before
  86. if before_len > total:
  87. end = pos
  88. else:
  89. start = pos
  90. # TODO: This is inefficient
  91. # TODO: This might not work with CWJ type characters
  92. def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]:
  93. """Break text in to equal (cell) length strings."""
  94. _get_character_cell_size = get_character_cell_size
  95. characters = [
  96. (character, _get_character_cell_size(character)) for character in text
  97. ][::-1]
  98. total_size = position
  99. lines: List[List[str]] = [[]]
  100. append = lines[-1].append
  101. pop = characters.pop
  102. while characters:
  103. character, size = pop()
  104. if total_size + size > max_size:
  105. lines.append([character])
  106. append = lines[-1].append
  107. total_size = size
  108. else:
  109. total_size += size
  110. append(character)
  111. return ["".join(line) for line in lines]
  112. if __name__ == "__main__": # pragma: no cover
  113. print(get_character_cell_size("😽"))
  114. for line in chop_cells("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", 8):
  115. print(line)
  116. for n in range(80, 1, -1):
  117. print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", n) + "|")
  118. print("x" * n)