Loading [MathJax]/extensions/TeX/AMSsymbols.js
LAL 7.7.0.1-00ddc7f
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
iterutils.py
Go to the documentation of this file.
1# Copyright (C) 2007,2008,2010--2016,2021 Kipp Cannon, Nickolas Fotopoulos
2#
3# This program is free software; you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation; either version 2 of the License, or (at your
6# option) any later version.
7#
8# This program is distributed in the hope that it will be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11# Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along
14# with this program; if not, write to the Free Software Foundation, Inc.,
15# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17
18#
19# =============================================================================
20#
21# Preamble
22#
23# =============================================================================
24#
25
26
27"""
28A collection of iteration utilities.
29"""
30
31
32import functools
33import math
34import numpy
35import random
36
37
38from . import git_version
39
40
41__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
42__version__ = "git id %s" % git_version.id
43__date__ = git_version.date
44
45
46#
47# =============================================================================
48#
49# Iteration Tools
50#
51# =============================================================================
52#
53
54
55def MultiIter(*sequences):
56 """
57 A generator for iterating over the elements of multiple sequences
58 simultaneously. With N sequences given as input, the generator
59 yields all possible distinct N-tuples that contain one element from
60 each of the input sequences.
61
62 Example:
63
64 >>> x = MultiIter([0, 1, 2], [10, 11])
65 >>> list(x)
66 [(0, 10), (1, 10), (2, 10), (0, 11), (1, 11), (2, 11)]
67
68 The elements in each output tuple are in the order of the input
69 sequences, and the left-most input sequence is iterated over first.
70
71 Internally, the input sequences themselves are each iterated over
72 only once, so it is safe to pass generators as arguments. Also,
73 this generator is significantly faster if the longest input
74 sequence is given as the first argument. For example, this code
75
76 >>> lengths = range(1, 12)
77 >>> for x in MultiIter(*map(range, lengths)):
78 ... pass
79 ...
80
81 runs approximately 5 times faster if the lengths list is reversed.
82 """
83 if len(sequences) > 1:
84 # FIXME: this loop is about 5% faster if done the other
85 # way around, if the last list is iterated over in the
86 # inner loop. but there is code, like snglcoinc.py,
87 # that has been optimized for the current order and
88 # would need to be reoptimized if this function were to be
89 # reversed.
90 head = tuple((x,) for x in sequences[0])
91 for t in MultiIter(*sequences[1:]):
92 for h in head:
93 yield h + t
94 elif sequences:
95 for t in sequences[0]:
96 yield (t,)
97
98
99def choices(vals, n):
100 """
101 A generator for iterating over all choices of n elements from the
102 input sequence vals. In each result returned, the original order
103 of the values is preserved.
104
105 Example:
106
107 >>> x = choices(["a", "b", "c"], 2)
108 >>> list(x)
109 [('a', 'b'), ('a', 'c'), ('b', 'c')]
110
111 The order of combinations in the output sequence is always the
112 same, so if choices() is called twice with two different sequences
113 of the same length the first combination in each of the two output
114 sequences will contain elements from the same positions in the two
115 different input sequences, and so on for each subsequent pair of
116 output combinations.
117
118 Example:
119
120 >>> x = choices(["a", "b", "c"], 2)
121 >>> y = choices(["1", "2", "3"], 2)
122 >>> list(zip(x, y))
123 [(('a', 'b'), ('1', '2')), (('a', 'c'), ('1', '3')), (('b', 'c'), ('2', '3'))]
124
125 Furthermore, the order of combinations in the output sequence is
126 such that if the input list has n elements, and one constructs the
127 combinations choices(input, m), then each combination in
128 choices(input, n-m).reverse() contains the elements discarded in
129 forming the corresponding combination in the former.
130
131 Example:
132
133 >>> x = ["a", "b", "c", "d", "e"]
134 >>> X = list(choices(x, 2))
135 >>> Y = list(choices(x, len(x) - 2))
136 >>> Y.reverse()
137 >>> list(zip(X, Y))
138 [(('a', 'b'), ('c', 'd', 'e')), (('a', 'c'), ('b', 'd', 'e')), (('a', 'd'), ('b', 'c', 'e')), (('a', 'e'), ('b', 'c', 'd')), (('b', 'c'), ('a', 'd', 'e')), (('b', 'd'), ('a', 'c', 'e')), (('b', 'e'), ('a', 'c', 'd')), (('c', 'd'), ('a', 'b', 'e')), (('c', 'e'), ('a', 'b', 'd')), (('d', 'e'), ('a', 'b', 'c'))]
139
140 NOTE: this generator is identical to the itertools.combinations()
141 generator in Python's standard library. This routine was written
142 before Python provided that functionality, and is now only
143 preserved for two reasons. The first is to maintain this API for
144 existing codes that were written before the standard library
145 provided the capability. But the second reason is because the
146 Python standard library doesn't make the guarantees that we do,
147 here, about the order of the results. Specifically, there is no
148 guarantee that itertools.combinations() is repeatable, nor is there
149 a statement on how to obtain the inverse of the sequence as
150 described above. At the time of writing the order in which results
151 are produced is identical to this generator and in all use cases
152 they are exact substitutes for each other, and new code should use
153 itertools.combinations(). Be careful making assumptions about the
154 order of the results, add safety checks where needed, and if a
155 problem arises this generator can be used as a fall-back.
156 """
157 if n == len(vals):
158 yield tuple(vals)
159 elif n > 1:
160 n -= 1
161 for i, v in enumerate(vals[:-n]):
162 v = (v,)
163 for c in choices(vals[i+1:], n):
164 yield v + c
165 elif n == 1:
166 for v in vals:
167 yield (v,)
168 elif n == 0:
169 yield ()
170 else:
171 # n < 0
172 raise ValueError(n)
173
174
175def uniq(iterable):
176 """
177 Yield the unique items of an iterable, preserving order.
178 http://mail.python.org/pipermail/tutor/2002-March/012930.html
179
180 Example:
181
182 >>> x = uniq([0, 0, 2, 6, 2, 0, 5])
183 >>> list(x)
184 [0, 2, 6, 5]
185 """
186 temp_dict = {}
187 for e in iterable:
188 if e not in temp_dict:
189 yield temp_dict.setdefault(e, e)
190
191
192def nonuniq(iterable):
193 """
194 Yield the non-unique items of an iterable, preserving order. If an
195 item occurs N > 0 times in the input sequence, it will occur N-1
196 times in the output sequence.
197
198 Example:
199
200 >>> x = nonuniq([0, 0, 2, 6, 2, 0, 5])
201 >>> list(x)
202 [0, 2, 0]
203 """
204 temp_dict = {}
205 for e in iterable:
206 if e in temp_dict:
207 yield e
208 temp_dict.setdefault(e, e)
209
210
211def flatten(sequence, levels = 1):
212 """
213 Example:
214 >>> nested = [[1,2], [[3]]]
215 >>> list(flatten(nested))
216 [1, 2, [3]]
217 """
218 if levels == 0:
219 for x in sequence:
220 yield x
221 else:
222 for x in sequence:
223 for y in flatten(x, levels - 1):
224 yield y
225
226
227#
228# =============================================================================
229#
230# In-Place filter()
231#
232# =============================================================================
233#
234
235
236def inplace_filter(func, sequence):
237 """
238 Like Python's filter() builtin, but modifies the sequence in place.
239
240 Example:
241
242 >>> l = list(range(10))
243 >>> inplace_filter(lambda x: x > 5, l)
244 >>> l
245 [6, 7, 8, 9]
246
247 Performance considerations: the function iterates over the
248 sequence, shuffling surviving members down and deleting whatever
249 top part of the sequence is left empty at the end, so sequences
250 whose surviving members are predominantly at the bottom will be
251 processed faster.
252 """
253 target = 0
254 for source in range(len(sequence)):
255 if func(sequence[source]):
256 sequence[target] = sequence[source]
257 target += 1
258 del sequence[target:]
259
260
261#
262# =============================================================================
263#
264# Return the Values from Several Ordered Iterables in Order
265#
266# =============================================================================
267#
268
269
270def inorder(*iterables, **kwargs):
271 """
272 A generator that yields the values from several ordered iterables
273 in order.
274
275 Example:
276
277 >>> x = [0, 1, 2, 3]
278 >>> y = [1.5, 2.5, 3.5, 4.5]
279 >>> z = [1.75, 2.25, 3.75, 4.25]
280 >>> list(inorder(x, y, z))
281 [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5]
282 >>> list(inorder(x, y, z, key=lambda x: x * x))
283 [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5]
284
285 >>> x.sort(key=lambda x: abs(x-3))
286 >>> y.sort(key=lambda x: abs(x-3))
287 >>> z.sort(key=lambda x: abs(x-3))
288 >>> list(inorder(x, y, z, key=lambda x: abs(x - 3)))
289 [3, 2.5, 3.5, 2.25, 3.75, 2, 1.75, 4.25, 1.5, 4.5, 1, 0]
290
291 >>> x = [3, 2, 1, 0]
292 >>> y = [4.5, 3.5, 2.5, 1.5]
293 >>> z = [4.25, 3.75, 2.25, 1.75]
294 >>> list(inorder(x, y, z, reverse = True))
295 [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0]
296 >>> list(inorder(x, y, z, key = lambda x: -x))
297 [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0]
298
299 NOTE: this function will never reverse the order of elements in
300 the input iterables. If the reverse keyword argument is False (the
301 default) then the input sequences must yield elements in increasing
302 order, likewise if the keyword argument is True then the input
303 sequences must yield elements in decreasing order. Failure to
304 adhere to this yields undefined results, and for performance
305 reasons no check is performed to validate the element order in the
306 input sequences.
307 """
308 reverse = kwargs.pop("reverse", False)
309 keyfunc = kwargs.pop("key", lambda x: x) # default = identity
310 if kwargs:
311 raise TypeError("invalid keyword argument '%s'" % list(kwargs.keys())[0])
312 nextvals = {}
313 for iterable in iterables:
314 next_ = functools.partial(next, iter(iterable))
315 try:
316 nextval = next_()
317 nextvals[next_] = keyfunc(nextval), nextval, next_
318 except StopIteration:
319 pass
320 if not nextvals:
321 # all sequences are empty
322 return
323 if reverse:
324 select = lambda seq: max(seq, key = lambda elem: elem[0])
325 else:
326 select = lambda seq: min(seq, key = lambda elem: elem[0])
327 if len(nextvals) > 1:
328 while 1:
329 _, val, next_ = select(nextvals.values())
330 yield val
331 try:
332 nextval = next_()
333 nextvals[next_] = keyfunc(nextval), nextval, next_
334 except StopIteration:
335 del nextvals[next_]
336 if len(nextvals) < 2:
337 break
338 # exactly one sequence remains, short circuit and drain it. since
339 # PEP 479 we must trap the StopIteration and terminate the loop
340 # manually
341 (_, val, next_), = nextvals.values()
342 yield val
343 try:
344 while 1:
345 yield next_()
346 except StopIteration:
347 pass
348
349
350#
351# =============================================================================
352#
353# Random Sequences
354#
355# =============================================================================
356#
357
358
359def randindex(lo, hi, n = 1.):
360 """
361 Yields integers in the range [lo, hi) where 0 <= lo < hi. Each
362 return value is a two-element tuple. The first element is the
363 random integer, the second is the natural logarithm of the
364 probability with which that integer will be chosen.
365
366 The CDF for the distribution from which the integers are drawn goes
367 as [integer]^{n}, where n > 0. Specifically, it's
368
369 CDF(x) = (x^{n} - lo^{n}) / (hi^{n} - lo^{n})
370
371 n = 1 yields a uniform distribution; n > 1 favours larger
372 integers, n < 1 favours smaller integers.
373 """
374 if not 0 <= lo < hi:
375 raise ValueError("require 0 <= lo < hi: lo = %d, hi = %d" % (lo, hi))
376 if n <= 0.:
377 raise ValueError("n <= 0: %g" % n)
378 elif n == 1.:
379 # special case for uniform distribution
380 try:
381 lnP = math.log(1. / (hi - lo))
382 except ValueError:
383 raise ValueError("[lo, hi) domain error")
384 hi -= 1
385 rnd = random.randint
386 while 1:
387 yield rnd(lo, hi), lnP
388
389 # CDF evaluated at index boundaries
390 lnP = numpy.arange(lo, hi + 1, dtype = "double")**n
391 lnP -= lnP[0]
392 lnP /= lnP[-1]
393 # differences give probabilities
394 lnP = tuple(numpy.log(lnP[1:] - lnP[:-1]))
395 if numpy.isinf(lnP).any():
396 raise ValueError("[lo, hi) domain error")
397
398 beta = lo**n / (hi**n - lo**n)
399 n = 1. / n
400 alpha = hi / (1. + beta)**n
401 flr = math.floor
402 rnd = random.random
403 while 1:
404 index = int(flr(alpha * (rnd() + beta)**n))
405 # the tuple look-up provides the second part of the
406 # range safety check on index
407 assert index >= lo
408 yield index, lnP[index - lo]
static void reverse(LALDict **a, size_t start, size_t end)
def inorder(*iterables, **kwargs)
A generator that yields the values from several ordered iterables in order.
Definition: iterutils.py:307
def inplace_filter(func, sequence)
Like Python's filter() builtin, but modifies the sequence in place.
Definition: iterutils.py:252
def flatten(sequence, levels=1)
Example: ‍nested = [[1,2], [[3]]] list(flatten(nested)) [1, 2, [3]].
Definition: iterutils.py:217
def randindex(lo, hi, n=1.)
Yields integers in the range [lo, hi) where 0 <= lo < hi.
Definition: iterutils.py:373
def nonuniq(iterable)
Yield the non-unique items of an iterable, preserving order.
Definition: iterutils.py:203
def choices(vals, n)
A generator for iterating over all choices of n elements from the input sequence vals.
Definition: iterutils.py:156
def uniq(iterable)
Yield the unique items of an iterable, preserving order.
Definition: iterutils.py:185
def MultiIter(*sequences)
A generator for iterating over the elements of multiple sequences simultaneously.
Definition: iterutils.py:82