LAL  7.2.4.1-d5ae9e8
iterutils.py
Go to the documentation of this file.
1 # Copyright (C) 2007,2008,2010--2016,2021 Kipp Cannon, Nickolas Fotopoulos
2 #
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation; either version 2 of the License, or (at your
6 # option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11 # Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License along
14 # with this program; if not, write to the Free Software Foundation, Inc.,
15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 
17 
18 #
19 # =============================================================================
20 #
21 # Preamble
22 #
23 # =============================================================================
24 #
25 
26 
27 """
28 A collection of iteration utilities.
29 """
30 
31 
32 import functools
33 import math
34 import numpy
35 import random
36 import six
37 
38 
39 from . import git_version
40 
41 
42 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
43 __version__ = "git id %s" % git_version.id
44 __date__ = git_version.date
45 
46 
47 #
48 # =============================================================================
49 #
50 # Iteration Tools
51 #
52 # =============================================================================
53 #
54 
55 
56 def MultiIter(*sequences):
57  """
58  A generator for iterating over the elements of multiple sequences
59  simultaneously. With N sequences given as input, the generator
60  yields all possible distinct N-tuples that contain one element from
61  each of the input sequences.
62 
63  Example:
64 
65  >>> x = MultiIter([0, 1, 2], [10, 11])
66  >>> list(x)
67  [(0, 10), (1, 10), (2, 10), (0, 11), (1, 11), (2, 11)]
68 
69  The elements in each output tuple are in the order of the input
70  sequences, and the left-most input sequence is iterated over first.
71 
72  Internally, the input sequences themselves are each iterated over
73  only once, so it is safe to pass generators as arguments. Also,
74  this generator is significantly faster if the longest input
75  sequence is given as the first argument. For example, this code
76 
77  >>> lengths = range(1, 12)
78  >>> for x in MultiIter(*map(range, lengths)):
79  ... pass
80  ...
81 
82  runs approximately 5 times faster if the lengths list is reversed.
83  """
84  if len(sequences) > 1:
85  # FIXME: this loop is about 5% faster if done the other
86  # way around, if the last list is iterated over in the
87  # inner loop. but there is code, like snglcoinc.py,
88  # that has been optimized for the current order and
89  # would need to be reoptimized if this function were to be
90  # reversed.
91  head = tuple((x,) for x in sequences[0])
92  for t in MultiIter(*sequences[1:]):
93  for h in head:
94  yield h + t
95  elif sequences:
96  for t in sequences[0]:
97  yield (t,)
98 
99 
100 def choices(vals, n):
101  """
102  A generator for iterating over all choices of n elements from the
103  input sequence vals. In each result returned, the original order
104  of the values is preserved.
105 
106  Example:
107 
108  >>> x = choices(["a", "b", "c"], 2)
109  >>> list(x)
110  [('a', 'b'), ('a', 'c'), ('b', 'c')]
111 
112  The order of combinations in the output sequence is always the
113  same, so if choices() is called twice with two different sequences
114  of the same length the first combination in each of the two output
115  sequences will contain elements from the same positions in the two
116  different input sequences, and so on for each subsequent pair of
117  output combinations.
118 
119  Example:
120 
121  >>> x = choices(["a", "b", "c"], 2)
122  >>> y = choices(["1", "2", "3"], 2)
123  >>> list(zip(x, y))
124  [(('a', 'b'), ('1', '2')), (('a', 'c'), ('1', '3')), (('b', 'c'), ('2', '3'))]
125 
126  Furthermore, the order of combinations in the output sequence is
127  such that if the input list has n elements, and one constructs the
128  combinations choices(input, m), then each combination in
129  choices(input, n-m).reverse() contains the elements discarded in
130  forming the corresponding combination in the former.
131 
132  Example:
133 
134  >>> x = ["a", "b", "c", "d", "e"]
135  >>> X = list(choices(x, 2))
136  >>> Y = list(choices(x, len(x) - 2))
137  >>> Y.reverse()
138  >>> list(zip(X, Y))
139  [(('a', 'b'), ('c', 'd', 'e')), (('a', 'c'), ('b', 'd', 'e')), (('a', 'd'), ('b', 'c', 'e')), (('a', 'e'), ('b', 'c', 'd')), (('b', 'c'), ('a', 'd', 'e')), (('b', 'd'), ('a', 'c', 'e')), (('b', 'e'), ('a', 'c', 'd')), (('c', 'd'), ('a', 'b', 'e')), (('c', 'e'), ('a', 'b', 'd')), (('d', 'e'), ('a', 'b', 'c'))]
140 
141  NOTE: this generator is identical to the itertools.combinations()
142  generator in Python's standard library. This routine was written
143  before Python provided that functionality, and is now only
144  preserved for two reasons. The first is to maintain this API for
145  existing codes that were written before the standard library
146  provided the capability. But the second reason is because the
147  Python standard library doesn't make the guarantees that we do,
148  here, about the order of the results. Specifically, there is no
149  guarantee that itertools.combinations() is repeatable, nor is there
150  a statement on how to obtain the inverse of the sequence as
151  described above. At the time of writing the order in which results
152  are produced is identical to this generator and in all use cases
153  they are exact substitutes for each other, and new code should use
154  itertools.combinations(). Be careful making assumptions about the
155  order of the results, add safety checks where needed, and if a
156  problem arises this generator can be used as a fall-back.
157  """
158  if n == len(vals):
159  yield tuple(vals)
160  elif n > 1:
161  n -= 1
162  for i, v in enumerate(vals[:-n]):
163  v = (v,)
164  for c in choices(vals[i+1:], n):
165  yield v + c
166  elif n == 1:
167  for v in vals:
168  yield (v,)
169  elif n == 0:
170  yield ()
171  else:
172  # n < 0
173  raise ValueError(n)
174 
175 
176 def uniq(iterable):
177  """
178  Yield the unique items of an iterable, preserving order.
179  http://mail.python.org/pipermail/tutor/2002-March/012930.html
180 
181  Example:
182 
183  >>> x = uniq([0, 0, 2, 6, 2, 0, 5])
184  >>> list(x)
185  [0, 2, 6, 5]
186  """
187  temp_dict = {}
188  for e in iterable:
189  if e not in temp_dict:
190  yield temp_dict.setdefault(e, e)
191 
192 
193 def nonuniq(iterable):
194  """
195  Yield the non-unique items of an iterable, preserving order. If an
196  item occurs N > 0 times in the input sequence, it will occur N-1
197  times in the output sequence.
198 
199  Example:
200 
201  >>> x = nonuniq([0, 0, 2, 6, 2, 0, 5])
202  >>> list(x)
203  [0, 2, 0]
204  """
205  temp_dict = {}
206  for e in iterable:
207  if e in temp_dict:
208  yield e
209  temp_dict.setdefault(e, e)
210 
211 
212 def flatten(sequence, levels = 1):
213  """
214  Example:
215  >>> nested = [[1,2], [[3]]]
216  >>> list(flatten(nested))
217  [1, 2, [3]]
218  """
219  if levels == 0:
220  for x in sequence:
221  yield x
222  else:
223  for x in sequence:
224  for y in flatten(x, levels - 1):
225  yield y
226 
227 
228 #
229 # =============================================================================
230 #
231 # In-Place filter()
232 #
233 # =============================================================================
234 #
235 
236 
237 def inplace_filter(func, sequence):
238  """
239  Like Python's filter() builtin, but modifies the sequence in place.
240 
241  Example:
242 
243  >>> l = list(range(10))
244  >>> inplace_filter(lambda x: x > 5, l)
245  >>> l
246  [6, 7, 8, 9]
247 
248  Performance considerations: the function iterates over the
249  sequence, shuffling surviving members down and deleting whatever
250  top part of the sequence is left empty at the end, so sequences
251  whose surviving members are predominantly at the bottom will be
252  processed faster.
253  """
254  target = 0
255  for source in range(len(sequence)):
256  if func(sequence[source]):
257  sequence[target] = sequence[source]
258  target += 1
259  del sequence[target:]
260 
261 
262 #
263 # =============================================================================
264 #
265 # Return the Values from Several Ordered Iterables in Order
266 #
267 # =============================================================================
268 #
269 
270 
271 def inorder(*iterables, **kwargs):
272  """
273  A generator that yields the values from several ordered iterables
274  in order.
275 
276  Example:
277 
278  >>> x = [0, 1, 2, 3]
279  >>> y = [1.5, 2.5, 3.5, 4.5]
280  >>> z = [1.75, 2.25, 3.75, 4.25]
281  >>> list(inorder(x, y, z))
282  [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5]
283  >>> list(inorder(x, y, z, key=lambda x: x * x))
284  [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5]
285 
286  >>> x.sort(key=lambda x: abs(x-3))
287  >>> y.sort(key=lambda x: abs(x-3))
288  >>> z.sort(key=lambda x: abs(x-3))
289  >>> list(inorder(x, y, z, key=lambda x: abs(x - 3)))
290  [3, 2.5, 3.5, 2.25, 3.75, 2, 1.75, 4.25, 1.5, 4.5, 1, 0]
291 
292  >>> x = [3, 2, 1, 0]
293  >>> y = [4.5, 3.5, 2.5, 1.5]
294  >>> z = [4.25, 3.75, 2.25, 1.75]
295  >>> list(inorder(x, y, z, reverse = True))
296  [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0]
297  >>> list(inorder(x, y, z, key = lambda x: -x))
298  [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0]
299 
300  NOTE: this function will never reverse the order of elements in
301  the input iterables. If the reverse keyword argument is False (the
302  default) then the input sequences must yield elements in increasing
303  order, likewise if the keyword argument is True then the input
304  sequences must yield elements in decreasing order. Failure to
305  adhere to this yields undefined results, and for performance
306  reasons no check is performed to validate the element order in the
307  input sequences.
308  """
309  reverse = kwargs.pop("reverse", False)
310  keyfunc = kwargs.pop("key", lambda x: x) # default = identity
311  if kwargs:
312  raise TypeError("invalid keyword argument '%s'" % list(kwargs.keys())[0])
313  nextvals = {}
314  for iterable in iterables:
315  next_ = functools.partial(next, iter(iterable))
316  try:
317  nextval = next_()
318  nextvals[next_] = keyfunc(nextval), nextval, next_
319  except StopIteration:
320  pass
321  if not nextvals:
322  # all sequences are empty
323  return
324  if reverse:
325  select = lambda seq: max(seq, key = lambda elem: elem[0])
326  else:
327  select = lambda seq: min(seq, key = lambda elem: elem[0])
328  values = functools.partial(six.itervalues, nextvals)
329  if len(nextvals) > 1:
330  while 1:
331  _, val, next_ = select(values())
332  yield val
333  try:
334  nextval = next_()
335  nextvals[next_] = keyfunc(nextval), nextval, next_
336  except StopIteration:
337  del nextvals[next_]
338  if len(nextvals) < 2:
339  break
340  # exactly one sequence remains, short circuit and drain it. since
341  # PEP 479 we must trap the StopIteration and terminate the loop
342  # manually
343  (_, val, next_), = values()
344  yield val
345  try:
346  while 1:
347  yield next_()
348  except StopIteration:
349  pass
350 
351 
352 #
353 # =============================================================================
354 #
355 # Random Sequences
356 #
357 # =============================================================================
358 #
359 
360 
361 def randindex(lo, hi, n = 1.):
362  """
363  Yields integers in the range [lo, hi) where 0 <= lo < hi. Each
364  return value is a two-element tuple. The first element is the
365  random integer, the second is the natural logarithm of the
366  probability with which that integer will be chosen.
367 
368  The CDF for the distribution from which the integers are drawn goes
369  as [integer]^{n}, where n > 0. Specifically, it's
370 
371  CDF(x) = (x^{n} - lo^{n}) / (hi^{n} - lo^{n})
372 
373  n = 1 yields a uniform distribution; n > 1 favours larger
374  integers, n < 1 favours smaller integers.
375  """
376  if not 0 <= lo < hi:
377  raise ValueError("require 0 <= lo < hi: lo = %d, hi = %d" % (lo, hi))
378  if n <= 0.:
379  raise ValueError("n <= 0: %g" % n)
380  elif n == 1.:
381  # special case for uniform distribution
382  try:
383  lnP = math.log(1. / (hi - lo))
384  except ValueError:
385  raise ValueError("[lo, hi) domain error")
386  hi -= 1
387  rnd = random.randint
388  while 1:
389  yield rnd(lo, hi), lnP
390 
391  # CDF evaluated at index boundaries
392  lnP = numpy.arange(lo, hi + 1, dtype = "double")**n
393  lnP -= lnP[0]
394  lnP /= lnP[-1]
395  # differences give probabilities
396  lnP = tuple(numpy.log(lnP[1:] - lnP[:-1]))
397  if numpy.isinf(lnP).any():
398  raise ValueError("[lo, hi) domain error")
399 
400  beta = lo**n / (hi**n - lo**n)
401  n = 1. / n
402  alpha = hi / (1. + beta)**n
403  flr = math.floor
404  rnd = random.random
405  while 1:
406  index = int(flr(alpha * (rnd() + beta)**n))
407  # the tuple look-up provides the second part of the
408  # range safety check on index
409  assert index >= lo
410  yield index, lnP[index - lo]
def inorder(*iterables, **kwargs)
A generator that yields the values from several ordered iterables in order.
Definition: iterutils.py:308
def inplace_filter(func, sequence)
Like Python's filter() builtin, but modifies the sequence in place.
Definition: iterutils.py:253
def flatten(sequence, levels=1)
Example: nested = [[1,2], [[3]]] list(flatten(nested)) [1, 2, [3]].
Definition: iterutils.py:218
def randindex(lo, hi, n=1.)
Yields integers in the range [lo, hi) where 0 <= lo < hi.
Definition: iterutils.py:375
def nonuniq(iterable)
Yield the non-unique items of an iterable, preserving order.
Definition: iterutils.py:204
def choices(vals, n)
A generator for iterating over all choices of n elements from the input sequence vals.
Definition: iterutils.py:157
def uniq(iterable)
Yield the unique items of an iterable, preserving order.
Definition: iterutils.py:186
def MultiIter(*sequences)
A generator for iterating over the elements of multiple sequences simultaneously.
Definition: iterutils.py:83