Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALBurst 2.0.7.1-b246709
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Macros Modules Pages
cafe.py
Go to the documentation of this file.
1# Copyright (C) 2006-2010,2012--2021 Kipp Cannon
2#
3# This program is free software; you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation; either version 2 of the License, or (at your
6# option) any later version.
7#
8# This program is distributed in the hope that it will be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11# Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along
14# with this program; if not, write to the Free Software Foundation, Inc.,
15# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17
18#
19# =============================================================================
20#
21# Preamble
22#
23# =============================================================================
24#
25
26
27"""
28LIGO Light-Weight XML coincidence analysis front end.
29"""
30
31
32import math
33import sys
34
35
36from lal import LIGOTimeGPS
37from lal.utils import CacheEntry
38import igwn_segments as segments
39
40
41from . import offsetvector
42from . import packing
43
44
45__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
46from .git_version import date as __date__
47from .git_version import version as __version__
48
49
50#
51# =============================================================================
52#
53# Input
54#
55# =============================================================================
56#
57
58
59def load_cache(filename, verbose = False):
60 """
61 Parse a LAL cache file named filename into a list of
62 lal.utils.CacheEntry objects. If filename is None then input is
63 taken from stdin.
64 """
65 if verbose:
66 print("reading %s ..." % (filename or "stdin"), file=sys.stderr)
67 if filename is not None:
68 f = open(filename)
69 else:
70 f = sys.stdin
71 return [CacheEntry(line) for line in f]
72
73
74def cache_to_seglistdict(cache):
75 """
76 Construct a coalesced segmentlistdict object from a list of
77 lal.utils.CacheEntry objects.
78 """
79 s = segments.segmentlistdict()
80 for c in cache:
81 s |= c.segmentlistdict
82 return s
83
84
85#
86# =============================================================================
87#
88# Performance Helpers
89#
90# =============================================================================
91#
92
93
94def segmentlistdict_normalize(seglistdict, origin):
95 """
96 Convert the times in a segmentlist dictionary to floats relative to
97 origin. The purpose is to allow segment lists stored as
98 LIGOTimeGPS times to be manipulated more quickly without loss of
99 precision. The modification is done in place.
100 """
101 for seglist in seglistdict.itervalues():
102 seglist[:] = (segments.segment(float(seg[0] - origin), float(seg[1] - origin)) for seg in seglist)
103
104
105def get_coincident_segmentlistdict(seglistdict, offset_vectors):
106 """
107 Compute the segments for which data is required in order to perform
108 a complete coincidence analysis given the segments for which data
109 is available and the list of offset vectors to be applied to the
110 data during the coincidence analysis.
111
112 seglistdict is a segmentlistdict object defining the instruments
113 and times for which data is available. offset_vectors is a list of
114 offset vectors to be applied to the data --- dictionaries of
115 instrument/offset pairs.
116
117 The offset vectors in offset_vectors are applied to the input
118 segments one by one and the interesection of the shifted segments
119 is computed. The segments surviving the intersection are unshifted
120 to their original positions and stored. The return value is the
121 union of the results of this operation.
122
123 In all cases all pair-wise intersections are computed, that is if
124 an offset vector lists three instruments then this function returns
125 the times when any two of those isntruments are on, including times
126 when all three are on.
127
128 For example, let us say that "input" is a segmentlistdict object
129 containing segment lists for three instruments, "H1", "H2" and
130 "L1". And let us say that "slides" is a list of dictionaries, and
131 is equal to [{"H1":0, "H2":0, "L1":0}, {"H1":0, "H2":10}]. Then if
132
133 output = get_coincident_segmentlistdict(input, slides)
134
135 output will contain, for each of the three instruments, the
136 segments (or parts thereof) from the original lists that are
137 required in order to perform a triple- and double-coincident
138 analyses at zero lag with the three instruments, *and* a
139 double-coincident analysis between H1 and H2 with H2 offset by 10
140 seconds.
141
142 The segmentlistdict object returned by this function has its
143 offsets set to those of the input segmentlistdict.
144 """
145 # don't modify original
146 seglistdict = seglistdict.copy()
147 all_instruments = set(seglistdict)
148
149 # save original offsets
150 origoffsets = dict(seglistdict.offsets)
151
152 # compute result
153 coincseglists = segments.segmentlistdict()
154 for offset_vector in offsetvector.component_offsetvectors(offset_vectors, 2):
155 if set(offset_vector).issubset(all_instruments):
156 seglistdict.offsets.update(offset_vector)
157 intersection = seglistdict.extract_common(offset_vector.keys())
158 intersection.offsets.clear()
159 coincseglists |= intersection
160
161 # restore original offsets
162 coincseglists.offsets.update(origoffsets)
163
164 # done
165 return coincseglists
166
167
168def segmentlistdict_unnormalize(seglistdict, origin):
169 """
170 The opposite of segmentlistdict_normalize(), restores the times in
171 a segmentlist dictionary to absolute times. The modification is
172 done in place.
173 """
174 for seglist in seglistdict.itervalues():
175 seglist[:] = (segments.segment(origin + seg[0], origin + seg[1]) for seg in seglist)
176
177
178#
179# =============================================================================
180#
181# Output Cache Packing
182#
183# =============================================================================
184#
185
186
188 """
189 Subclass of the packing.Bin class representing a LAL file cache.
190 The files contained in the bin are available in the .objects
191 attribute, which is a list of lal.utils.CacheEntry objects. The
192 .size attribute holds a igwn_segments.segmentlistdict object giving
193 the times spanned by the files in the bin. The .extent attribute
194 holds the result of running .extent_all() on the .size attribute.
195 """
196 def __init__(self):
197 packing.Bin.__init__(self)
198 self.sizesize = segments.segmentlistdict()
199 self.extent = None
200
201 def add(self, cache_entry):
202 packing.Bin.add(self, cache_entry, cache_entry.segmentlistdict)
203 self.extent = self.sizesize.extent_all()
204 return self
205
206 def __iadd__(self, *args):
207 packing.Bin.__iadd__(self, *args)
208 self.extent = self.sizesize.extent_all()
209 return self
210
211 def __lt__(self, other):
212 return self.extent < other.extent
213
214 def __le__(self, other):
215 return self.extent <= other.extent
216
217 def __eq__(self, other):
218 return self.extent == other.extent
219
220 def __ne__(self, other):
221 return self.extent != other.extent
222
223 def __ge__(self, other):
224 return self.extent >= other.extent
225
226 def __gt__(self, other):
227 return self.extent > other.extent
228
229 def __str__(self):
230 return "\n".join(map(str, self.objects))
231
232
234 """
235 Packing algorithm implementing the ligolw_cafe file list packing
236 algorithm.
237 """
238 def set_offset_vectors(self, offset_vectors):
239 """
240 Set the list of offset vectors to be considered when
241 deciding the bins in which each file belongs. Must be
242 called before packing any files. The input is a list of
243 dictionaries, each mapping instruments to offsets.
244 """
245 #
246 # sort the offset vectors to reduce the number of
247 # arithmetic operations performed while applying them
248 #
249
250 self.offset_vectors = list(offset_vectors)
251 self.offset_vectors.sort(key = lambda offset_vector: sorted(offset_vector.items()))
252
253 #
254 # determine the largest gap that can conceivably be closed
255 # by the time slides
256 #
257
258 min_offset = min(min(offset_vector.values()) for offset_vector in offset_vectors)
259 max_offset = max(max(offset_vector.values()) for offset_vector in offset_vectors)
260 self.max_gap = max_offset - min_offset
261 assert self.max_gap >= 0
262
263 def pack(self, cache_entry):
264 """
265 Find all bins in which this lal.utils.CacheEntry instance
266 belongs, merge them, and add this cache entry to the
267 result. Create a new bin for this cache entry if it does
268 not belong in any of the existing bins.
269
270 The cache entry "belongs" in a bin if after each of the
271 preset offset vectors (see the .set_offset_vectors()
272 method) is applied to both the contents of a bin and the
273 cache entry, any of the segment lists of the bin and cache
274 entry are found to intersect. When checking for
275 intersection, only the segment lists whose instrument names
276 are listed in the offset vector are compared.
277 """
278 #
279 # add the cache entry to a new bin by itself
280 #
281
282 new = LALCacheBin()
283 new.add(cache_entry)
284
285 #
286 # assemble a list of bins in which the cache entry belongs.
287 # iterate over existing bins backwards so that we record
288 # the indeces of matching bins in descending order. bail
289 # out when we find a bin that precedes the new one
290 #
291
292 matching_bins = []
293 for n in range(len(self.bins) - 1, -1, -1):
294 bin = self.bins[n]
295 if bin.extent[1] < new.extent[0] - self.max_gap:
296 break
297 for offset_vector in self.offset_vectors:
298 new.size.offsets.update(offset_vector)
299 bin.size.offsets.update(offset_vector)
300 if bin.size.is_coincident(new.size, keys = offset_vector.keys()):
301 matching_bins.append(n)
302 break
303 bin.size.offsets.clear()
304 new.size.offsets.clear()
305
306 #
307 # add new cache entry to bins
308 #
309
310 if not matching_bins:
311 #
312 # no existing bins match, add a new one
313 #
314
315 self.bins.append(new)
316 else:
317 #
318 # put cache entry into earliest bin that was found
319 # to match. if cache entry belongs in more than
320 # one bin, merge them. note that the matching bin
321 # indexes are given in descending order so the last
322 # is the earliest bin, and after that popping them
323 # in order does not affet the indexes of the
324 # remaining, matching, bins.
325 #
326
327 dest = self.bins[matching_bins.pop(-1)]
328 dest += new
329 for n in matching_bins:
330 dest += self.bins.pop(n)
331
332 #
333 # time-order the bins so the bail-out above works next time
334 # this method is called
335 #
336
337 self.bins.sort()
338
339
340def split_bins(cafepacker, extentlimit, verbose = False):
341 """
342 Split bins in CafePacker so that each bin has an extent no longer
343 than extentlimit.
344 """
345
346 #
347 # loop over all bins in cafepacker.bins. loop is backwards because
348 # list grows in size as bins are split
349 #
350
351 for idx in range(len(cafepacker.bins) - 1, -1, -1):
352 #
353 # retrieve bin
354 #
355
356 origbin = cafepacker.bins[idx]
357
358 #
359 # how many pieces? if bin doesn't need splitting move to
360 # next
361 #
362
363 n = int(math.ceil(float(abs(origbin.extent)) / extentlimit))
364 if n <= 1:
365 continue
366
367 #
368 # calculate the times of the splits, and then build
369 # segmentlistdicts for clipping.
370 #
371
372 extents = [origbin.extent[0]] + [LIGOTimeGPS(origbin.extent[0] + i * float(abs(origbin.extent)) / n) for i in range(1, n)] + [origbin.extent[1]]
373 if verbose:
374 print("\tsplitting cache spanning %s at %s" % (str(origbin.extent), ", ".join(str(extent) for extent in extents[1:-1])), file=sys.stderr)
375 extents = [segments.segment(*bounds) for bounds in zip(extents[:-1], extents[1:])]
376
377 #
378 # build new bins, pack objects from origbin into new bins
379 #
380
381 newbins = []
382 for extent in extents:
383 #
384 # append new bin
385 #
386
387 newbins.append(LALCacheBin())
388
389 #
390 # test each cache entry in original bin
391 #
392
393 extent_plus_max_gap = extent.protract(cafepacker.max_gap)
394 for cache_entry in origbin.objects:
395 #
396 # quick check of gap
397 #
398
399 if cache_entry.segment.disjoint(extent_plus_max_gap):
400 continue
401
402 #
403 # apply each offset vector
404 #
405
406 cache_entry_segs = cache_entry.segmentlistdict
407 for offset_vector in cafepacker.offset_vectors:
408 cache_entry_segs.offsets.update(offset_vector)
409
410 #
411 # test against bin
412 #
413
414 if cache_entry_segs.intersects_segment(extent):
415 #
416 # object is coicident with
417 # bin
418 #
419
420 newbins[-1].add(cache_entry)
421 break
422
423 #
424 # override the bin's extent
425 #
426
427 newbins[-1].extent = extent
428
429 #
430 # replace original bin with split bins.
431 #
432
433 cafepacker.bins[idx:idx+1] = newbins
434
435 #
436 # done
437 #
438
439
440#
441# =============================================================================
442#
443# Output
444#
445# =============================================================================
446#
447
448
449def write_caches(base, bins, instruments = None, verbose = False):
450 filenames = []
451 if len(bins):
452 pattern = "%%s%%0%dd.cache" % int(math.log10(len(bins)) + 1)
453 for n, bin in enumerate(bins):
454 filename = pattern % (base, n)
455 filenames.append(filename)
456 if verbose:
457 print("writing %s ..." % filename, file=sys.stderr)
458 f = open(filename, "w")
459 for cacheentry in bin.objects:
460 if instruments is None or (instruments & set(cacheentry.segmentlistdict)):
461 print(str(cacheentry), file=f)
462 return filenames
463
464
465def write_single_instrument_caches(base, bins, instruments, verbose = False):
466 for instrument in instruments:
467 write_caches("%s%s_" % (base, instrument), bins, set([instrument]), verbose)
468
469
470#
471# =============================================================================
472#
473# Library API
474#
475# =============================================================================
476#
477
478
479def ligolw_cafe(cache, offset_vectors, verbose = False, extentlimit = None):
480 """
481 Transform a LAL cache into a list of caches each of whose contents
482 can be subjected to a coincidence analysis independently of the
483 contents of the other caches, assuming the coincidence analyses
484 will involve the application of the given offset vectors.
485
486 cache is a sequence (e.g., list, tuple, etc.) of
487 lal.utils.CacheEntry objects. offset_vectors is a sequence of
488 instrument/offset dictionaries describing the offset vectors to
489 consider. Set verbose to True for verbosity.
490
491 The output is a two-element tuple. The first element is a
492 igwn_segments.segmentlistdict object describing the times for which
493 coincident data is available (derived from the segment metadata of
494 the input cache). The second element is a list of LALCacheBin
495 objects, providing the file groups.
496 """
497 #
498 # Construct a segment list dictionary from the cache
499 #
500
501 if verbose:
502 print("computing segment list ...", file=sys.stderr)
503 seglists = cache_to_seglistdict(cache)
504
505 #
506 # For each instrument compute the times for which it will (could)
507 # contribute to a coincidence analysis.
508 #
509
510 epoch = min([min(seg[0] for seg in seglist) for seglist in seglists.values() if seglist] or [None])
511 segmentlistdict_normalize(seglists, epoch)
512 seglists = get_coincident_segmentlistdict(seglists, offset_vectors)
513 segmentlistdict_unnormalize(seglists, epoch)
514
515 #
516 # Remove files that will not participate in a coincidence. Take
517 # care not to modify the calling code's data. Note that because we
518 # have established that this segment list describes exactly the
519 # times spanned by the input files that are coincident under at
520 # least one time slide, a file participates in a multi-instrument
521 # coincidence if and only if it intersects these times.
522 #
523
524 if verbose:
525 print("filtering input cache ...", file=sys.stderr)
526 cache = [c for c in cache if seglists.intersects_all(c.segmentlistdict)]
527
528 #
529 # Optimization: adding files to bins in time order keeps the number
530 # of bins from growing larger than needed.
531 #
532
533 if verbose:
534 print("sorting input cache ...", file=sys.stderr)
535 cache.sort(key = lambda x: x.segment)
536
537 #
538 # Pack cache entries into output caches. Having reduced the file
539 # list to just those that participate in coincidences, it only
540 # remains to determine which other files each must be grouped with.
541 #
542
543 outputcaches = []
544 packer = CafePacker(outputcaches)
545 packer.set_offset_vectors(offset_vectors)
546 if verbose:
547 print("packing files (considering %s offset vectors) ..." % len(offset_vectors), file=sys.stderr)
548 for n, cacheentry in enumerate(cache):
549 if verbose and not n % 13:
550 print("\t%.1f%%\t(%d files, %d caches)\r" % (100.0 * n / len(cache), n + 1, len(outputcaches)), end=' ', file=sys.stderr)
551 packer.pack(cacheentry)
552 if verbose:
553 print("\t100.0%%\t(%d files, %d caches)" % (len(cache), len(outputcaches)), file=sys.stderr)
554
555 #
556 # Split caches with extent more than extentlimit
557 #
558
559 if extentlimit is not None:
560 if verbose:
561 print("splitting caches with extent greater than %g s ..." % extentlimit, file=sys.stderr)
562 split_bins(packer, extentlimit, verbose = verbose)
563 if verbose:
564 print("\t\t(%d files, %d caches)" % (len(cache), len(outputcaches)), file=sys.stderr)
565
566 #
567 # Sort output caches
568 #
569
570 if verbose:
571 print("sorting output caches ...", file=sys.stderr)
572 for cache in outputcaches:
573 cache.objects.sort()
574
575 #
576 # Done.
577 #
578
579 return seglists, outputcaches
static double max(double a, double b)
Definition: EPFilters.c:43
static double min(double a, double b)
Definition: EPFilters.c:42
Packing algorithm implementing the ligolw_cafe file list packing algorithm.
Definition: cafe.py:237
def pack(self, cache_entry)
Find all bins in which this lal.utils.CacheEntry instance belongs, merge them, and add this cache ent...
Definition: cafe.py:277
def set_offset_vectors(self, offset_vectors)
Set the list of offset vectors to be considered when deciding the bins in which each file belongs.
Definition: cafe.py:244
Subclass of the packing.Bin class representing a LAL file cache.
Definition: cafe.py:195
def add(self, cache_entry)
Add the object, whose size is as given, to the bin.
Definition: cafe.py:201
def __eq__(self, other)
Definition: cafe.py:217
def __iadd__(self, *args)
Add the contents of another Bin object to this one.
Definition: cafe.py:206
def __ne__(self, other)
Definition: cafe.py:220
def __gt__(self, other)
Definition: cafe.py:226
def __ge__(self, other)
Definition: cafe.py:223
def __le__(self, other)
Definition: cafe.py:214
def __lt__(self, other)
Definition: cafe.py:211
Bin object for use in packing algorithm implementations.
Definition: packing.py:66
Parent class for packing algorithms.
Definition: packing.py:137
def split_bins(cafepacker, extentlimit, verbose=False)
Split bins in CafePacker so that each bin has an extent no longer than extentlimit.
Definition: cafe.py:344
def segmentlistdict_normalize(seglistdict, origin)
Convert the times in a segmentlist dictionary to floats relative to origin.
Definition: cafe.py:100
def load_cache(filename, verbose=False)
Parse a LAL cache file named filename into a list of lal.utils.CacheEntry objects.
Definition: cafe.py:64
def write_caches(base, bins, instruments=None, verbose=False)
Definition: cafe.py:449
def get_coincident_segmentlistdict(seglistdict, offset_vectors)
Compute the segments for which data is required in order to perform a complete coincidence analysis g...
Definition: cafe.py:144
def segmentlistdict_unnormalize(seglistdict, origin)
The opposite of segmentlistdict_normalize(), restores the times in a segmentlist dictionary to absolu...
Definition: cafe.py:173
def cache_to_seglistdict(cache)
Construct a coalesced segmentlistdict object from a list of lal.utils.CacheEntry objects.
Definition: cafe.py:78
def write_single_instrument_caches(base, bins, instruments, verbose=False)
Definition: cafe.py:465
def ligolw_cafe(cache, offset_vectors, verbose=False, extentlimit=None)
Transform a LAL cache into a list of caches each of whose contents can be subjected to a coincidence ...
Definition: cafe.py:496