LAL  7.4.1.1-f1ed79c
iterutils.py
Go to the documentation of this file.
1 # Copyright (C) 2007,2008,2010--2016,2021 Kipp Cannon, Nickolas Fotopoulos
2 #
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation; either version 2 of the License, or (at your
6 # option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11 # Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License along
14 # with this program; if not, write to the Free Software Foundation, Inc.,
15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 
17 
18 #
19 # =============================================================================
20 #
21 # Preamble
22 #
23 # =============================================================================
24 #
25 
26 
27 """
28 A collection of iteration utilities.
29 """
30 
31 
32 import functools
33 import math
34 import numpy
35 import random
36 
37 
38 from . import git_version
39 
40 
41 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
42 __version__ = "git id %s" % git_version.id
43 __date__ = git_version.date
44 
45 
46 #
47 # =============================================================================
48 #
49 # Iteration Tools
50 #
51 # =============================================================================
52 #
53 
54 
55 def MultiIter(*sequences):
56  """
57  A generator for iterating over the elements of multiple sequences
58  simultaneously. With N sequences given as input, the generator
59  yields all possible distinct N-tuples that contain one element from
60  each of the input sequences.
61 
62  Example:
63 
64  >>> x = MultiIter([0, 1, 2], [10, 11])
65  >>> list(x)
66  [(0, 10), (1, 10), (2, 10), (0, 11), (1, 11), (2, 11)]
67 
68  The elements in each output tuple are in the order of the input
69  sequences, and the left-most input sequence is iterated over first.
70 
71  Internally, the input sequences themselves are each iterated over
72  only once, so it is safe to pass generators as arguments. Also,
73  this generator is significantly faster if the longest input
74  sequence is given as the first argument. For example, this code
75 
76  >>> lengths = range(1, 12)
77  >>> for x in MultiIter(*map(range, lengths)):
78  ... pass
79  ...
80 
81  runs approximately 5 times faster if the lengths list is reversed.
82  """
83  if len(sequences) > 1:
84  # FIXME: this loop is about 5% faster if done the other
85  # way around, if the last list is iterated over in the
86  # inner loop. but there is code, like snglcoinc.py,
87  # that has been optimized for the current order and
88  # would need to be reoptimized if this function were to be
89  # reversed.
90  head = tuple((x,) for x in sequences[0])
91  for t in MultiIter(*sequences[1:]):
92  for h in head:
93  yield h + t
94  elif sequences:
95  for t in sequences[0]:
96  yield (t,)
97 
98 
99 def choices(vals, n):
100  """
101  A generator for iterating over all choices of n elements from the
102  input sequence vals. In each result returned, the original order
103  of the values is preserved.
104 
105  Example:
106 
107  >>> x = choices(["a", "b", "c"], 2)
108  >>> list(x)
109  [('a', 'b'), ('a', 'c'), ('b', 'c')]
110 
111  The order of combinations in the output sequence is always the
112  same, so if choices() is called twice with two different sequences
113  of the same length the first combination in each of the two output
114  sequences will contain elements from the same positions in the two
115  different input sequences, and so on for each subsequent pair of
116  output combinations.
117 
118  Example:
119 
120  >>> x = choices(["a", "b", "c"], 2)
121  >>> y = choices(["1", "2", "3"], 2)
122  >>> list(zip(x, y))
123  [(('a', 'b'), ('1', '2')), (('a', 'c'), ('1', '3')), (('b', 'c'), ('2', '3'))]
124 
125  Furthermore, the order of combinations in the output sequence is
126  such that if the input list has n elements, and one constructs the
127  combinations choices(input, m), then each combination in
128  choices(input, n-m).reverse() contains the elements discarded in
129  forming the corresponding combination in the former.
130 
131  Example:
132 
133  >>> x = ["a", "b", "c", "d", "e"]
134  >>> X = list(choices(x, 2))
135  >>> Y = list(choices(x, len(x) - 2))
136  >>> Y.reverse()
137  >>> list(zip(X, Y))
138  [(('a', 'b'), ('c', 'd', 'e')), (('a', 'c'), ('b', 'd', 'e')), (('a', 'd'), ('b', 'c', 'e')), (('a', 'e'), ('b', 'c', 'd')), (('b', 'c'), ('a', 'd', 'e')), (('b', 'd'), ('a', 'c', 'e')), (('b', 'e'), ('a', 'c', 'd')), (('c', 'd'), ('a', 'b', 'e')), (('c', 'e'), ('a', 'b', 'd')), (('d', 'e'), ('a', 'b', 'c'))]
139 
140  NOTE: this generator is identical to the itertools.combinations()
141  generator in Python's standard library. This routine was written
142  before Python provided that functionality, and is now only
143  preserved for two reasons. The first is to maintain this API for
144  existing codes that were written before the standard library
145  provided the capability. But the second reason is because the
146  Python standard library doesn't make the guarantees that we do,
147  here, about the order of the results. Specifically, there is no
148  guarantee that itertools.combinations() is repeatable, nor is there
149  a statement on how to obtain the inverse of the sequence as
150  described above. At the time of writing the order in which results
151  are produced is identical to this generator and in all use cases
152  they are exact substitutes for each other, and new code should use
153  itertools.combinations(). Be careful making assumptions about the
154  order of the results, add safety checks where needed, and if a
155  problem arises this generator can be used as a fall-back.
156  """
157  if n == len(vals):
158  yield tuple(vals)
159  elif n > 1:
160  n -= 1
161  for i, v in enumerate(vals[:-n]):
162  v = (v,)
163  for c in choices(vals[i+1:], n):
164  yield v + c
165  elif n == 1:
166  for v in vals:
167  yield (v,)
168  elif n == 0:
169  yield ()
170  else:
171  # n < 0
172  raise ValueError(n)
173 
174 
175 def uniq(iterable):
176  """
177  Yield the unique items of an iterable, preserving order.
178  http://mail.python.org/pipermail/tutor/2002-March/012930.html
179 
180  Example:
181 
182  >>> x = uniq([0, 0, 2, 6, 2, 0, 5])
183  >>> list(x)
184  [0, 2, 6, 5]
185  """
186  temp_dict = {}
187  for e in iterable:
188  if e not in temp_dict:
189  yield temp_dict.setdefault(e, e)
190 
191 
192 def nonuniq(iterable):
193  """
194  Yield the non-unique items of an iterable, preserving order. If an
195  item occurs N > 0 times in the input sequence, it will occur N-1
196  times in the output sequence.
197 
198  Example:
199 
200  >>> x = nonuniq([0, 0, 2, 6, 2, 0, 5])
201  >>> list(x)
202  [0, 2, 0]
203  """
204  temp_dict = {}
205  for e in iterable:
206  if e in temp_dict:
207  yield e
208  temp_dict.setdefault(e, e)
209 
210 
211 def flatten(sequence, levels = 1):
212  """
213  Example:
214  >>> nested = [[1,2], [[3]]]
215  >>> list(flatten(nested))
216  [1, 2, [3]]
217  """
218  if levels == 0:
219  for x in sequence:
220  yield x
221  else:
222  for x in sequence:
223  for y in flatten(x, levels - 1):
224  yield y
225 
226 
227 #
228 # =============================================================================
229 #
230 # In-Place filter()
231 #
232 # =============================================================================
233 #
234 
235 
236 def inplace_filter(func, sequence):
237  """
238  Like Python's filter() builtin, but modifies the sequence in place.
239 
240  Example:
241 
242  >>> l = list(range(10))
243  >>> inplace_filter(lambda x: x > 5, l)
244  >>> l
245  [6, 7, 8, 9]
246 
247  Performance considerations: the function iterates over the
248  sequence, shuffling surviving members down and deleting whatever
249  top part of the sequence is left empty at the end, so sequences
250  whose surviving members are predominantly at the bottom will be
251  processed faster.
252  """
253  target = 0
254  for source in range(len(sequence)):
255  if func(sequence[source]):
256  sequence[target] = sequence[source]
257  target += 1
258  del sequence[target:]
259 
260 
261 #
262 # =============================================================================
263 #
264 # Return the Values from Several Ordered Iterables in Order
265 #
266 # =============================================================================
267 #
268 
269 
270 def inorder(*iterables, **kwargs):
271  """
272  A generator that yields the values from several ordered iterables
273  in order.
274 
275  Example:
276 
277  >>> x = [0, 1, 2, 3]
278  >>> y = [1.5, 2.5, 3.5, 4.5]
279  >>> z = [1.75, 2.25, 3.75, 4.25]
280  >>> list(inorder(x, y, z))
281  [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5]
282  >>> list(inorder(x, y, z, key=lambda x: x * x))
283  [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5]
284 
285  >>> x.sort(key=lambda x: abs(x-3))
286  >>> y.sort(key=lambda x: abs(x-3))
287  >>> z.sort(key=lambda x: abs(x-3))
288  >>> list(inorder(x, y, z, key=lambda x: abs(x - 3)))
289  [3, 2.5, 3.5, 2.25, 3.75, 2, 1.75, 4.25, 1.5, 4.5, 1, 0]
290 
291  >>> x = [3, 2, 1, 0]
292  >>> y = [4.5, 3.5, 2.5, 1.5]
293  >>> z = [4.25, 3.75, 2.25, 1.75]
294  >>> list(inorder(x, y, z, reverse = True))
295  [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0]
296  >>> list(inorder(x, y, z, key = lambda x: -x))
297  [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0]
298 
299  NOTE: this function will never reverse the order of elements in
300  the input iterables. If the reverse keyword argument is False (the
301  default) then the input sequences must yield elements in increasing
302  order, likewise if the keyword argument is True then the input
303  sequences must yield elements in decreasing order. Failure to
304  adhere to this yields undefined results, and for performance
305  reasons no check is performed to validate the element order in the
306  input sequences.
307  """
308  reverse = kwargs.pop("reverse", False)
309  keyfunc = kwargs.pop("key", lambda x: x) # default = identity
310  if kwargs:
311  raise TypeError("invalid keyword argument '%s'" % list(kwargs.keys())[0])
312  nextvals = {}
313  for iterable in iterables:
314  next_ = functools.partial(next, iter(iterable))
315  try:
316  nextval = next_()
317  nextvals[next_] = keyfunc(nextval), nextval, next_
318  except StopIteration:
319  pass
320  if not nextvals:
321  # all sequences are empty
322  return
323  if reverse:
324  select = lambda seq: max(seq, key = lambda elem: elem[0])
325  else:
326  select = lambda seq: min(seq, key = lambda elem: elem[0])
327  if len(nextvals) > 1:
328  while 1:
329  _, val, next_ = select(nextvals.values())
330  yield val
331  try:
332  nextval = next_()
333  nextvals[next_] = keyfunc(nextval), nextval, next_
334  except StopIteration:
335  del nextvals[next_]
336  if len(nextvals) < 2:
337  break
338  # exactly one sequence remains, short circuit and drain it. since
339  # PEP 479 we must trap the StopIteration and terminate the loop
340  # manually
341  (_, val, next_), = nextvals.values()
342  yield val
343  try:
344  while 1:
345  yield next_()
346  except StopIteration:
347  pass
348 
349 
350 #
351 # =============================================================================
352 #
353 # Random Sequences
354 #
355 # =============================================================================
356 #
357 
358 
359 def randindex(lo, hi, n = 1.):
360  """
361  Yields integers in the range [lo, hi) where 0 <= lo < hi. Each
362  return value is a two-element tuple. The first element is the
363  random integer, the second is the natural logarithm of the
364  probability with which that integer will be chosen.
365 
366  The CDF for the distribution from which the integers are drawn goes
367  as [integer]^{n}, where n > 0. Specifically, it's
368 
369  CDF(x) = (x^{n} - lo^{n}) / (hi^{n} - lo^{n})
370 
371  n = 1 yields a uniform distribution; n > 1 favours larger
372  integers, n < 1 favours smaller integers.
373  """
374  if not 0 <= lo < hi:
375  raise ValueError("require 0 <= lo < hi: lo = %d, hi = %d" % (lo, hi))
376  if n <= 0.:
377  raise ValueError("n <= 0: %g" % n)
378  elif n == 1.:
379  # special case for uniform distribution
380  try:
381  lnP = math.log(1. / (hi - lo))
382  except ValueError:
383  raise ValueError("[lo, hi) domain error")
384  hi -= 1
385  rnd = random.randint
386  while 1:
387  yield rnd(lo, hi), lnP
388 
389  # CDF evaluated at index boundaries
390  lnP = numpy.arange(lo, hi + 1, dtype = "double")**n
391  lnP -= lnP[0]
392  lnP /= lnP[-1]
393  # differences give probabilities
394  lnP = tuple(numpy.log(lnP[1:] - lnP[:-1]))
395  if numpy.isinf(lnP).any():
396  raise ValueError("[lo, hi) domain error")
397 
398  beta = lo**n / (hi**n - lo**n)
399  n = 1. / n
400  alpha = hi / (1. + beta)**n
401  flr = math.floor
402  rnd = random.random
403  while 1:
404  index = int(flr(alpha * (rnd() + beta)**n))
405  # the tuple look-up provides the second part of the
406  # range safety check on index
407  assert index >= lo
408  yield index, lnP[index - lo]
def inorder(*iterables, **kwargs)
A generator that yields the values from several ordered iterables in order.
Definition: iterutils.py:307
def inplace_filter(func, sequence)
Like Python's filter() builtin, but modifies the sequence in place.
Definition: iterutils.py:252
def flatten(sequence, levels=1)
Example: nested = [[1,2], [[3]]] list(flatten(nested)) [1, 2, [3]].
Definition: iterutils.py:217
def randindex(lo, hi, n=1.)
Yields integers in the range [lo, hi) where 0 <= lo < hi.
Definition: iterutils.py:373
def nonuniq(iterable)
Yield the non-unique items of an iterable, preserving order.
Definition: iterutils.py:203
def choices(vals, n)
A generator for iterating over all choices of n elements from the input sequence vals.
Definition: iterutils.py:156
def uniq(iterable)
Yield the unique items of an iterable, preserving order.
Definition: iterutils.py:185
def MultiIter(*sequences)
A generator for iterating over the elements of multiple sequences simultaneously.
Definition: iterutils.py:82