intranges.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. """
  2. Given a list of integers, made up of (hopefully) a small number of long runs
  3. of consecutive integers, compute a representation of the form
  4. ((start1, end1), (start2, end2) ...). Then answer the question "was x present
  5. in the original list?" in time O(log(# runs)).
  6. """
  7. import bisect
  8. from typing import List, Tuple
  9. def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
  10. """Represent a list of integers as a sequence of ranges:
  11. ((start_0, end_0), (start_1, end_1), ...), such that the original
  12. integers are exactly those x such that start_i <= x < end_i for some i.
  13. Ranges are encoded as single integers (start << 32 | end), not as tuples.
  14. """
  15. sorted_list = sorted(list_)
  16. ranges = []
  17. last_write = -1
  18. for i in range(len(sorted_list)):
  19. if i+1 < len(sorted_list):
  20. if sorted_list[i] == sorted_list[i+1]-1:
  21. continue
  22. current_range = sorted_list[last_write+1:i+1]
  23. ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
  24. last_write = i
  25. return tuple(ranges)
  26. def _encode_range(start: int, end: int) -> int:
  27. return (start << 32) | end
  28. def _decode_range(r: int) -> Tuple[int, int]:
  29. return (r >> 32), (r & ((1 << 32) - 1))
  30. def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool:
  31. """Determine if `int_` falls into one of the ranges in `ranges`."""
  32. tuple_ = _encode_range(int_, 0)
  33. pos = bisect.bisect_left(ranges, tuple_)
  34. # we could be immediately ahead of a tuple (start, end)
  35. # with start < int_ <= end
  36. if pos > 0:
  37. left, right = _decode_range(ranges[pos-1])
  38. if left <= int_ < right:
  39. return True
  40. # or we could be immediately behind a tuple (int_, end)
  41. if pos < len(ranges):
  42. left, _ = _decode_range(ranges[pos])
  43. if left == int_:
  44. return True
  45. return False