Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALBurst 2.0.7.1-b246709
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Macros Modules Pages
snglcoinc.py
Go to the documentation of this file.
1# Copyright (C) 2006--2021 Kipp Cannon, Drew G. Keppel, Jolien Creighton
2#
3# This program is free software; you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation; either version 2 of the License, or (at your
6# option) any later version.
7#
8# This program is distributed in the hope that it will be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11# Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along
14# with this program; if not, write to the Free Software Foundation, Inc.,
15# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17
18#
19# =============================================================================
20#
21# Preamble
22#
23# =============================================================================
24#
25
26
27"""
28Generic time interval coincidence engine.
29"""
30
31
32from bisect import bisect_left
33try:
34 from fpconst import NegInf
35except ImportError:
36 # fpconst is not part of the standard library and might not be
37 # available
38 NegInf = float("-inf")
39import itertools
40import math
41import numpy
42import random
43import scipy.optimize
44from scipy import spatial
45import sys
46from collections import ChainMap, Counter
47import warnings
48
49
50from igwn_ligolw import ligolw
51from igwn_ligolw import lsctables
52from igwn_ligolw.utils import coincs as ligolw_coincs
53import igwn_segments as segments
54import lal
55from . import offsetvector
56
57
58__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
59from .git_version import date as __date__
60from .git_version import version as __version__
61
62
63#
64# =============================================================================
65#
66# Utilities
67#
68# =============================================================================
69#
70
71
72def light_travel_time(instrument1, instrument2):
73 """
74 Compute and return the time required for light to travel through
75 free space the distance separating the two instruments. The inputs
76 are two instrument prefixes (e.g., "H1"), and the result is
77 returned in seconds. Note how this differs from LAL's
78 XLALLightTravelTime() function, which takes two detector objects as
79 input, and returns the time truncated to integer nanoseconds.
80 """
81 dx = lal.cached_detector_by_prefix[instrument1].location - lal.cached_detector_by_prefix[instrument2].location
82 return math.sqrt((dx * dx).sum()) / lal.C_SI
83
84
85#
86# =============================================================================
87#
88# Streaming Double Coincidence Generator
89#
90# =============================================================================
91#
92
93
94#
95# Algorithm notes:
96#
97# The ingredients are shown in the diagram below:
98#
99# #1 -----X----------X--X------------------X---X------)----->
100#
101# #2 ------X--------------X------X-X------------X)---X-X---->
102#
103# #3 ----X------X---------x--------------x)----------------->
104# ----------------------------------------------------------------------> t
105#
106#
107# "X" = an event. Each event has a unique discrete time associated with
108# it. Physically the event might represent a signal lasting many tens
109# of minutes, but regardless of that we require a "time" to be assigned to
110# that signal, and that time will form the basis of the coincidence test.
111#
112# "#1", etc. = a detector that provides a sequence of events. Each
113# detector has a time offset applied to its events
114#
115# ")" = t_complete for that detector. Calling code guarantees that the
116# event sequence for that detector is complete up to (not necessarily
117# including) this time. Events might already have been collected at or
118# following this time, but no further events will be collected preceding
119# that time. In the diagram above the event lists are shown displaced
120# according to their respective offsets, and the t_completes shifted
121# accordingly.
122#
123# For each pair of detectors there is a coincidence window which is assumed
124# to equal some fixed time interval common to all detector pairs + the
125# light travel time between that pair. Two events are coincident if their
126# times differ by no more than the coincidence window for their respective
127# detector pair. three or more events are coincident if all pairs of
128# events within the n-tuple are coincident.
129#
130# Looking at the events again:
131#
132# #1 -----X----------X--X------------------X---X------)----->
133#
134# #2 ------X--------------X------X-X------------X)---X-X---->
135#
136# #3 ----X------X---------x--------------x)----------------->
137# ----------------------------------------------------------------------> t
138# ^ ^
139# B A
140#
141# The event list for detector #2 is complete up to
142#
143# t = t_complete(#2) + offset(#2) = A,
144#
145# and if we were only concerned about single-detector candidates from
146# detector #2 then we could scan up to that time and report everything that
147# we find and know that we had found all such candidates without omissions
148# or double-counting any. However, that last event in detector #2, for
149# example, just before that detector's t_complete, might later be found to
150# be coincident with something from detector #3, whose event list at this
151# time is still only complete up to
152#
153# t = t_complete(#3) + offset(#3) = B
154#
155# Likewise, #1+#2 doubles can only be constructed up to approximately t =
156# B because they might actually be found to form triples once the event
157# list for detector #3 is filled in.
158#
159# We need to determine which candidates or potential candidates we have
160# sufficiently complete information about to determine if they form a
161# coincident n-tuple; and, after that, which events can be disposed of
162# because all possible n-tuple candidates that can be constructed from them
163# have been constructed (or, equivalently, which events to retain because
164# we have not yet been able to decide all the candidates they might
165# participate in); and, additionally, we need to do this in a way that
166# upon repeated applications of this process no candidate or part thereof
167# is reported more than once and no candidate that can be constructed from
168# the (complete) event sequences is missed.
169#
170# The algorithm we use for achieving this is the following. We need the
171# following ingredients:
172#
173# - an exactly reproducible method by which to assign a "time" to an
174# n-tuple candidate so that two different codes (possibly using different
175# arithmetic optimiziations, or the same code doing the calculation a
176# second time) will agree whether a given candidate is before or after (or
177# at) some given time boundary;
178#
179# - given that definition, a method by which to determine the latest such
180# time up to which all possible candidates can be constructed;
181#
182# - a method by which to determine which events can no longer participate
183# in candidates on subsequent invocations of the algorithm.
184#
185# In particular, we need these to act together so that the decision as to
186# whether or not a candidate is before or after the time bound we choose is
187# stable against the addition of new events to the queues (the bound must
188# precede the times up to which event lists are complete by a sufficent
189# margin), and we will need to retain events long enough to make this
190# definition stable against removal of events from the queues at least
191# until the risk of mistaking subsets of the n-tuple as new candidates has
192# passed. It is desirable for the algorithm to achieve the lowest possible
193# latency, meaning always reporting as many candidates as can be decided
194# given the information available, but performance is the primary
195# consideration so we also require an algorithm that can make decisions
196# about candidates very quickly without using much memory. For example,
197# comparing every potential candidate against a list of everything that has
198# previously been reported is impractical.
199#
200# We will define the "time" of an n-tuple candidate to be the earliest time
201# of the events in the candidate with their respective offsets applied.
202# For n-tuple candidates whoses "times" precede the time up to which all
203# detectors' event lists are complete this is stable against addition of
204# new events to the detector queues, even for those candidates that are
205# modified by the addition of extra coincident events as the event
206# sequences are advanced.
207#
208# Looking at our event sequences from above again, we now identify 6 times
209# of interest:
210#
211# #1 -----O---|----|-X--X--|----|----|-|---X---X------)----->
212# | | | | | |
213# #2 ----|-X--|-------|---X|----|X|X------------X)---X-X---->
214# | | | | | |
215# #3 ----O------O--|----|-x-----|----|---x)-|--------------->
216# -----------------|----[=======|====)----|-|---------------------------> t
217# A B C D E F
218#
219# E is the earliest of the time-shifted t_compeletes, in this case it
220# corresponds to #3's time-shifted t_complete. D is the closest
221# time-shifted time an event in another detector can have to E before we
222# cannot say if it forms a coincidence with events in #3 that we have not
223# yet collected. Since the #3's event list is complete upto but not
224# including E, we can collect all n-tuples whose time (time of earliest
225# candidate) is upto but not including D. B was the boundary upto
226# which we collected candidates in the previous iteration. Since at that
227# time we did not allow candidates whose time was exactly B to be
228# collected, we now include them. Therefore the range of candidate times
229# to be collected is [B, D) (closed from below, open from above). In order
230# to know that an n-tuple's earliest coincident member is not prior to B
231# (and therefore it would have already been reported as part of a larger
232# coincidence involving that event) we must retain and include for
233# consideration events as early as A. A precedes B by the largest
234# coincidence window between any pair of detectors. In this iteration, any
235# events that had preceded A have been disposed of (and are indicated now
236# as an "O"). After this iteration is complete, we will have reported
237# exactly all candidates who canonical times fall in [B, D) and they will
238# not be reported again, but for the same reason we retained events between
239# A and B we must not discard all of them yet. We must retain all the
240# events whose time-shifted times are C or later where C precedes D by the
241# largest coincidence window between any pair of detectors. Since we are
242# only forming coincidences involving at least one event preceding D, we do
243# not need to include in the coincidence analysis at this iteration any of
244# the events whose time-sifted times are F or later, where F antecedes
245# D by the largest coincidence window between any pair of detectors.
246#
247# The procedure is to (i) compute these boundary times, (ii) collect all
248# the events from all detectors upto but not including F, (iii) perform a
249# full coincidence analysis, (iv) report as newly-identified any candidates
250# whose canonical times fall in the interval [B, D), ignoring the rest, (v)
251# then discard all events up to C, (vi) record D to be used as B next time,
252# (vii) and return.
253#
254# There is an additional complication. One of the problems we need to
255# solve is to provide the set of single-detector events that participate in
256# the n-tuple candidates, across all time-shift offset vectors. For
257# example, we need a list of the detector #1 events that participate in any
258# n-tuple candidate, but we need each one to be listed only once,
259# regardless of how many different offset vectors were found to result in
260# it participating in an n-tuple candidate.
261#
262# We solve this problem the following way. We maintain a set of the events
263# that have participated in n-tuple coincidences. The first time an event
264# is found to participate in an n-tuple candidate it is added to the set
265# and reported as a newly used single-detector event to be inserted into
266# the calling code's output document. Events that participate in n-tuple
267# coincidences but are found to already be in that set are not reported to
268# the calling code (the n-tuple coincidence is, but not the single-detector
269# event). To prevent the set from growing without bound, causing both memory
270# and performance problems, events are removed from the set when they are
271# removed from the last queue that contains them.
272#
273# How to compute the times. For each time slide,
274#
275# - D is the minimum across all instruments of
276#
277# D = min(t_complete + offset - max(coincidence window with other detectors))
278#
279# Note that this is slightly different than the algorithm described above,
280# where we explained that D was computed relative to E, the minimum of the
281# time-shifted completes. In fact we compute the boundary uniquely for
282# each detector and take the minimum of those. This is simpler, but also
283# is necessary in the event that two detectors' t_completes are very close
284# and the later one has a larger maximum coincidence window than the
285# earlier one, larger by an amount greater than the difference between the
286# t_completes.
287#
288# - B is this value computed in the previous iteration.
289#
290# - F is computed as a unique value for each detector as
291#
292# F = D + max(coincidence window with other detectors)
293#
294# For simplicity, the description above implied F was common to all
295# detectors, but we can reduce the computational cost by trimming it
296# as tightly as we can for each detector.
297#
298# - likewise, A is computed as a unique value for each detector as
299#
300# A = B - max(coincidence window with other detectors)
301#
302# - and C, also, is computed as a unique value for each detector as
303#
304# C = D - max(coincidence window with other detectors)
305#
306# For each detector, all events in the interval [A, F) for that detector
307# are collected and fed into the coincidence constructor. When finished,
308# all events upto but not including C are discarded and reported to the
309# top-level of the coincidence engine as being no-longer in storage, which
310# is information it uses to manage the set of events that have participated
311# in n-tuple coincidences.
312#
313#
314# Some pipelines require a list of the events known to not form
315# coincidences. This information might be used, for example, to assist in
316# construting noise models for false-alarm probability estimation.
317# Therefore, in addition to constructing n-tuple coincs we also construct a
318# stream of non-coincident single-detector events (which might also be
319# reported as n-tuple candidates if n is 1). This is achieved by
320# maintainingg a set of all events that participate in (n>=2)-tuple coincs,
321# and then at the point where events are flushed from the queues any that
322# are not found in that set are reported in the "non-coincident event"
323# stream. Any that are found in the set as they are being flushed are at
324# that time removed from the set to keep it from growing.
325#
326# Implementation Notes:
327#
328# The snglesqueue, coincgen_doubles, and the TimeSlideGraphNode object
329# export APIs that are partially compatible with each other. This is done
330# so that they can be used interchangably within the time slide graph to
331# implement coincidence generators of different orders without the need to
332# wrap them in conditionals to select the correct code path. This is why,
333# for example, the concept of the coincidence window and the distinction
334# between t_complete and t_coinc_complete have been pushed down into the
335# snglesqueue object even though they seem to be nonsensical there.
336#
337
338
339class singlesqueue(object):
340 """
341 Queue a stream of partially time ordered events: as new events are
342 added, sort them into the queue in time order; maintain a record
343 of the time up-to which the list of events is complete; and
344 provide events sorted in time order to coincidence tests.
345
346 Searches must define a class subclassed from this that defines an
347 override of the .event_time() method. See coincgen_doubles for
348 information on what to do with the subclass.
349
350 Implementation notes:
351
352 What is actually inserted into the queue is the time of the event
353 as defined by the .event_time() method provided by the subclass.
354 This allows sorts and bisection searches to be performed quickly,
355 without the need for much additional code. When we need to
356 retrieve an event, we need some way of turning the time of
357 the event (i.e., the thing that is actually in the queue) back into
358 that event, and this is done by adding an attribute to the time
359 object that points back to the original event. This means that
360 builtin Python numeric types cannot be used for the times of the
361 events because new attributes cannot be attached to them.
362 Subclasses of the builtin numeric types can be used as time
363 objects, as can be LAL's LIGOTimeGPS.
364 """
365 @staticmethod
366 def event_time(event):
367 """
368 Override with a method to return the "time" of the given
369 event. The "time" object that is returned is typically a
370 lal.LIGOTimeGPS object. If some other type is used, it
371 must support arithmetic and comparison with python float
372 objects and lal.LIGOTimeGPS objects, and support comparison
373 with igwn_segments.PosInfinity and
374 igwn_segments.NegInfinity, and it must have a .__dict__ or
375 other mechanism allowing an additional attribute named
376 .event to be set on the object. The builtin float type is
377 not suitable, but a subclass of float would be.
378 """
379 raise NotImplementedError
380
381 def queueentry_from_event(self, event):
382 """
383 For internal use. Construct the object that is to be
384 inserted into the queue from an event.
385 """
386 # retrieve the time and attach a reference back to the
387 # original event object
388 entry = self.event_time(event)
389 entry.event = event
390 return entry
391
392 def __init__(self, offset, max_coinc_window):
393 """
394 Initialize a new, empty, singles queue. offset is the time
395 that will be added to events in this queue when forming
396 coincidences with events from other queues, and
397 max_coinc_window is an upper bound on the maximum time that
398 can separate events in this queue from events in other
399 queues and they still be able to form coincidences --- it
400 is not that maximum time, necessarily, but the true maximum
401 time cannot be larger than max_coinc_window.
402
403 Note that max_coinc_window is not defining the coincidence
404 test, that is defined by the get_coincs machinery within
405 the coincgen_doubles class. max_coinc_window is used by
406 the streaming logic to determine where in the event
407 sequences complete n-tuple candidates can be constructed.
408 Although max_coinc_window need only bound the time interval
409 described above, the latency of the coincidence engine is
410 reduced and the number of events stored in the queue (and
411 the computational overhead they cause) is reduced if the
412 number is as small as possible.
413 """
414 self.offset = offset
415 if max_coinc_window < 0.:
416 raise ValueError("max_coinc_window < 0 (%g)" % max_coinc_window)
417 self.max_coinc_window = max_coinc_window
418 # using .event_time() to define the times of events, the
419 # list of events is complete upto but not including
420 # .t_complete. we can't use fpconst.NegInf because
421 # LIGOTimeGPS objects refuse to be compared to it. the
422 # NegInfinity object from the segments library, however, is
423 # compatible with both LIGOTimeGPS and with native Python
424 # numeric types.
425 self.t_complete = segments.NegInfinity
426 # queue of events. the queue's contents are time-ordered.
427 self.queue = []
428 # id() --> event mapping for the contents of queue. sets
429 # will be used to track the status of events, e.g. which
430 # have and haven't been used to form candidates. we don't
431 # require the event objects to be hashable and suitable for
432 # inclusion in sets, instead we put their Python IDs into
433 # the sets, but then we need a way to turn an ID back into
434 # an event, and that's what this provides
435 self.index = {}
436
437 @property
438 def age(self):
439 """
440 Using .event_time() to define the times of events, the time
441 of the oldest event in the queue or self.t_complete if the
442 queue is empty. The time scale for .age is that of the
443 events themselves, not their shifted times (the offset
444 parameter is not applied).
445
446 This is not used by the coincidence engine. This
447 information is provided as a convenience for calling code.
448 For example, some performance improvements might be
449 realized if segment lists or other information are clipped
450 to the range of times spanned by the contents of the
451 coincidence engine queues, and this allows calling codes to
452 see what interval that is, in physical event times not
453 time-shifted event times.
454 """
455 return self.queue[0] if self.queue else self.t_complete
456
457 @property
458 def t_coinc_complete(self):
459 """
460 The time up to which the time-shifted events in other
461 detectors' queues can be considered complete for the
462 purpose of forming n-tuple coincidences with the
463 time-shifted events in this queue. This precedes the
464 (time-shifted) time up to which this event list is complete
465 by the maximum coincidence window that can separate an
466 event in this queue from events in others and they still be
467 considered coincident in time.
468
469 The earliest such time across all detectors is the time up
470 to which the n-tuple candidate list can be completely
471 constructed, assuming the "time" of an n-tuple candidate is
472 defined to be the earliest time-shifted time of the events
473 in the n-tuple.
474 """
475 return self.t_complete + self.offset - self.max_coinc_window
476
477 def push(self, events, t_complete):
478 """
479 Add new events to the queue. Mark the queue complete up to
480 t_complete, meaning you promise no further events will be
481 added to the queue earlier than t_complete. The time scale
482 for t_complete is that of the events themselves, not their
483 shifted times (the offset parameter is not applied). The
484 value of t_complete is stored for later retrieval.
485
486 NOTE: t_complete is not permitted to decrease.
487
488 NOTE: the events sequence will be iterated over multiple
489 times. It may not be a generator.
490 """
491 if t_complete < self.t_complete:
492 raise ValueError("t_complete has gone backwards: last was %s, new is %s" % (self.t_complete, t_complete))
493
494 if events:
495 # add the new events to the ID index
496 self.index.update((id(event), event) for event in events)
497
498 # construct queue entries from the new events and
499 # insort with current "incomplete" queue
500 self.queue.extend(map(self.queueentry_from_event, events))
501 self.queue.sort()
502
503 # update the marker labelling time up to which the event
504 # list is complete
505 self.t_complete = t_complete
506
507 def pull(self, t):
508 """
509 t is the time up to which the calling code is in the
510 process of constructing all available n-tuple coincident
511 candidates. The return value is a tuple of all events from
512 the queue whose time-shifted end times are up to (not
513 including) t + max_coinc_window, which are all the events
514 from this queue that could form a coincidence with an event
515 up to (not including) t.
516
517 t is defined with respect to the time-shifted event times.
518 The contents of the returned tuple is time-ordered as
519 defined by .event_time().
520
521 Assuming that t cannot go backwards, any of the reported
522 events that cannot be used again are removed from the
523 internal queue.
524
525 If t is None, then the queue is completely flushed. All
526 remaining events are pulled from the queue and reported in
527 the tuple, .t_complete is reset to -inf and all other
528 internal state is reset. After calling .pull() with t =
529 None, the object's state is equivalent to its initial state
530 and it is ready to process a new stream of events.
531 """
532 # the input parameter t is the time D in the algorithm
533 # description above
534
535 # are we being flushed? if so, return everything we've got
536 # and reset out state to .__init__() equivalent
537 if t is None:
538 events = tuple(entry.event for entry in self.queue)
539 del self.queue[:]
540 self.index.clear()
541 self.t_complete = segments.NegInfinity
542 return events
543
544 # unshift back to our events' time scale
545 t -= self.offset
546
547 # safety check
548 if t + self.max_coinc_window > self.t_complete:
549 raise ValueError("pull to %g fails to precede time to which queue is complete, (%g + %g), by the required margin of %g s" % (t + self.offset, self.t_complete, self.offset, self.max_coinc_window))
550
551 # these events will never be used again. remove them from
552 # the queue, and remove their IDs from the index. this is
553 # calculating time C for this event list in the description
554 # above.
555 i = bisect_left(self.queue, t - self.max_coinc_window)
556 flushed_events = tuple(entry.event for entry in self.queue[:i])
557 del self.queue[:i]
558 for event in flushed_events:
559 self.index.pop(id(event))
560 # collect other events that might be used again but that
561 # can form coincidences with things in other detectors up
562 # to t. this is computing time F for this event list in
563 # the description above.
564 return flushed_events + tuple(entry.event for entry in self.queue[:bisect_left(self.queue, t + self.max_coinc_window)])
565
566
567class coincgen_doubles(object):
568 """
569 Using a pair of singlesqueue objects, constructs pairs of
570 coincident events from streams of partially time-ordered events.
571
572 Searches must subclass this. The .singlesqueue class attribute
573 must be set to the search-specific singlesqueue implementation to
574 be used, i.e., a subclass of singlesqueue with an appropriate
575 .event_time() override. The internally-defined .get_coincs class
576 must be overridden with an implementation that provides the
577 required .__init__() and .__call__() methods.
578 """
579 # subclass must override this attribute with a reference to an
580 # implementation of singlesqueue that implements the .event_time()
581 # method.
582 singlesqueue = singlesqueue
583
584 class get_coincs(object):
585 """
586 This class defines the coincidence test. An instance is
587 initialized with a sequence of events, and is a callable
588 object. When the instance is called with a single event
589 from some other instrument, a time offset to apply to that
590 event (relative to the events in this list) and a time
591 coincidence window, the return value must be a (possibly
592 empty) sequence of the initial events that are coincident
593 with that given event.
594
595 The sequence of events with which the instance is
596 initialized is passed in time order.
597
598 It is not required that the implementation be subclassed
599 from this. This placeholder implementation is merely
600 provided to document the required interface.
601
602 A minimal example:
603
604 class get_coincs(object):
605 def __init__(self, events):
606 self.events = events
607 def __call__(self, event_a, offset_a, coinc_window):
608 return [event_b for event_b in self.events if abs(event_a.time + offset_a - event_b.time) < coinc_window]
609
610 This is performance-critical code and a naive
611 implementation such as the one above will likely be found
612 to be inadequate. Expect to implement this code in C for
613 best results.
614 """
615 # FIXME: move offset_a and coinc_window parameters to
616 # __init__() method
617 def __init__(self, events):
618 """
619 Prepare to search a collection of events for
620 coincidences with other single events. events is a
621 time-ordered iterable of events. It is recommended
622 that any additional indexing required to improve
623 search performance be performed in this method.
624 """
625 raise NotImplementedError
626
627 def __call__(self, event_a, offset_a, coinc_window):
628 """
629 Return a sequence of the events from those passed
630 to .__init__() that are coincident with event_a.
631 The sequence need not be in time order. The object
632 returned by this method must be iterable and
633 support being passed to bool() to test if it is
634 empty.
635
636 offset_a is the time shift to be added to the time
637 of event_a before comparing to the times of events
638 passed to .__init__(). This behaviour is to
639 support the construction of time shifted
640 coincidences.
641
642 coinc_window is the maximum time, in seconds,
643 separating coincident events from the shifted time of
644 event_a. Here, this is the interval that defines
645 the coincidence test between the two detectors, not
646 the bound on all such intervals used by the
647 singlesqueue object to determine the interval for
648 which n-tuple candidates can be constructed.
649 """
650 raise NotImplementedError
651
652 def __init__(self, offset_vector, coinc_windows):
653 """
654 offset_vector must be a two-instrument offset vector. This
655 sets which two instruments' events are to be processed by
656 this object, and the offsets that should be applied to
657 their events when searching for coincidences.
658 coinc_windows is a dictionary mapping pairs of instruments
659 (as sets) to the largest time that can separate events from
660 those pairs of instruments (after applying the required
661 time offsets) and they still be considered coincident.
662 """
663 if len(offset_vector) != 2:
664 raise ValueError("offset_vector must name exactly 2 instruments (got %d)" % len(offset_vector))
665 self.offset_vector = offset_vector
666 # the coincidence window for our pair of detectors
667 self.coinc_window = coinc_windows[frozenset(offset_vector)]
668 if self.coinc_window < 0.:
669 raise ValueError("coinc_window < 0 (%g)" % coinc_window)
670 # the largest coincidence window between any pair of
671 # detectors in the network
672 max_coinc_window = max(coinc_windows.values())
673 if max_coinc_window < 0.:
674 raise ValueError("max(coinc_window) < 0 (%g)" % max_coinc_window)
675 # initialize the singlesqueue objects, one for each of our
676 # detectors
677 # FIXME: the max_coinc_window is the largest of all
678 # coincidence windows, but as described above in the notes
679 # about this algorithm we only need it to be the largest of
680 # the windows that include the specific instrument. fixing
681 # this will reduce latency slightly (tens of milliseconds)
682 # and keep the in-ram event lists smaller and reduce
683 # coincidence overhead
684 self.queues = dict((instrument, self.singlesqueue(offset, max_coinc_window)) for instrument, offset in offset_vector.items())
685 # view into the id() --> event indexes of the queues
686 self.index = ChainMap(*(queue.index for queue in self.queues.values()))
687
688 # pre-sort the instruments into alphabetical order
689 self.instrumenta, self.instrumentb = sorted(self.queues)
690 # pre-compute the offset of the 1st wrt to the 2nd
691 self.offset_a = self.queues[self.instrumenta].offset - self.queues[self.instrumentb].offset
692
693 @property
694 def age(self):
695 """
696 The earliest of the internal queues' .age.
697 """
698 return min(queue.age for queue in self.queues.values())
699
700 @property
701 def t_coinc_complete(self):
702 """
703 The earliest of the internal queues' .t_coinc_complete.
704 """
705 return min(queue.t_coinc_complete for queue in self.queues.values())
706
707 def push(self, instrument, events, t_complete):
708 """
709 Push new events from some instrument into the internal
710 queues. The iterable of events need not be time-ordered.
711 With self.singlesqueue.event_time() defining the time of
712 events, t_complete sets the time up-to which the collection
713 of events is known to be complete. That is, in the future
714 no new events will be pushed whose times are earlier than
715 t_complete. t_complete is not permitted to decrease.
716 """
717 self.queues[instrument].push(events, t_complete)
718
719 def doublesgen(self, eventsa, offset_a, eventsb, singles_ids):
720 """
721 For internal use only.
722 """
723 # choose the longer of the two lists for the outer loop.
724 # yes, it's counter-interuitive, it's because of the high
725 # cost of indexing events for the inner loop. in any case
726 # we must still return coincident pairs with the events
727 # ordered as supplied so if the event lists are swapped we
728 # need to unswap the pairs that we return.
729 # FIXME: hopefully later improvements to the scaling will
730 # change the logic here, and we'll need to switch to using
731 # the shorter list for the outer loop after-all.
732
733 if len(eventsb) > len(eventsa):
734 eventsa, eventsb = eventsb, eventsa
735 offset_a = -offset_a
736 unswap = lambda a, b: (b, a)
737 else:
738 unswap = lambda a, b: (a, b)
739
740 # initialize the singles_ids set
741
742 singles_ids.clear()
743 singles_ids.update(id(event) for event in eventsb)
744 singles_ids_add = singles_ids.add
745 singles_ids_discard = singles_ids.discard
746
747 # for each event in list A, iterate over events from the
748 # other list that are coincident with the event, and return
749 # the pairs. while doing this, collect the IDs of events
750 # that are used in coincidences in a set for later logic.
751 coinc_window = self.coinc_window
752 queueb_get_coincs = self.get_coincs(eventsb)
753 for eventa in eventsa:
754 matches = queueb_get_coincs(eventa, offset_a, coinc_window)
755 eventa = id(eventa)
756 if matches:
757 for eventb in matches:
758 eventb = id(eventb)
759 singles_ids_discard(eventb)
760 yield unswap(eventa, eventb)
761 else:
762 singles_ids_add(eventa)
763
764 def pull(self, t, singles_ids):
765 """
766 Generate a sequence of 2-element tuples of Python IDs of
767 coincident event pairs. Calling code is assumed to be in
768 the process of constructing all possible n-tuple
769 coincidences such that at least one time-shifted event in
770 the n-tuple is before t, and so we construct and return all
771 pairs required to decide that set, meaning we will
772 construct and return pairs that might contain only events
773 after t so that the calling code can test them for
774 coincidence with events from other instruments that precede
775 t. The internal queues are flushed of events that cannot
776 be used again assuming that t never goes backwards. If t
777 is the special value None then all coincident pairs that
778 can be constructed from the internal queues are constructed
779 and returned, the queues are emptied and returned to their
780 initial state.
781
782 The order of the IDs in each tuple is alphabetical by
783 instrument.
784
785 The Python IDs of events that are not reported in a
786 coincident pair are placed in the singles_ids set.
787
788 NOTE: the singles_ids parameter passed to this function
789 must a set or set-like object. It must support Python set
790 manipulation methods, like .clear(), .update(), and so on.
791 """
792 # retrieve the event lists to be considered for coincidence
793 # using .pull() on the singlesqueues. these return
794 # sequences of event objects, not their python IDs. the
795 # .pull() methods will safety check t against their
796 # time-shifted t_completes, we don't have to do it here.
797 # from the event sequences, we construct coincident pairs.
798 # the pairs are the python IDs of the coincident events,
799 # not the events themselves. while doing this, the
800 # singles_ids set is populated with python IDs of events
801 # that do not form pairs.
802
803 return sorted(self.doublesgen(self.queues[self.instrumenta].pull(t), self.offset_a, self.queues[self.instrumentb].pull(t), singles_ids))
804
805
806#
807# =============================================================================
808#
809# Time Slide Graph
810#
811# =============================================================================
812#
813
814
815class TimeSlideGraphNode(object):
816 def __init__(self, coincgen_doubles_type, offset_vector, coinc_windows, min_instruments, time_slide_id = None):
817 #
818 # initialize
819 #
820
821 # time_slide_id non-None only in head nodes
822 self.time_slide_id = time_slide_id
823 self.offset_vector = offset_vector
824 # not used here, but helpful to calling codes to avoid
825 # repeating this test inside loops
826 self.is_zero_lag = not any(offset_vector.values())
827 # keep_partial is part of the logic that ensures we only
828 # return coincs that meet the min_instruments criterion
829 self.keep_partial = len(offset_vector) > min_instruments
830 # construct this node's children
831 if len(offset_vector) > 2:
832 self.components = tuple(TimeSlideGraphNode(coincgen_doubles_type, component_offset_vector, coinc_windows, min_instruments) for component_offset_vector in self.component_offsetvectors(offset_vector, len(offset_vector) - 1))
833 # view into the id() --> event indexes of the
834 # nodes. we assume they are all ChainMap
835 # instances, and, for performance reasons, flatten
836 # the hierarchy one level. if that was also done
837 # at the lower levels, ensures that, globally, the
838 # ChainMap hierarchy doesn't grow in depth as the
839 # graph is constructed. it's only ever a ChainMap
840 # of dicts, not a ChainMap of ChainMaps of ...
841 self.index = ChainMap(*(index for node in self.components for index in node.index.maps))
842 elif len(offset_vector) == 2:
843 self.components = (coincgen_doubles_type(offset_vector, coinc_windows),)
844 self.index = self.components[0].index
845 elif len(offset_vector) == 1:
846 # use a singles queue directly, no coincidence.
847 # note that this configuration is not how singles
848 # are generated in a normal analysis. this is a
849 # special case to support analyses processing only
850 # a single detector (possibly being run for
851 # diagnostic or performance testing purposes).
852 # normal multi-detector analyses get singles from
853 # the unused events returned by the doubles
854 # generator in the len==2 code path.
855 assert not coinc_windows
856 offset, = offset_vector.values()
857 self.components = (coincgen_doubles_type.singlesqueue(offset, 0.),)
858 self.index = self.components[0].index
859 else:
860 raise ValueError("offset_vector cannot be empty")
861 # the time up to which we have been pulled already
862 self.previous_t = segments.NegInfinity
863 # local reference to event_time() function. required for
864 # cutting candidate sets to time boundaries.
865 self.event_time = coincgen_doubles_type.singlesqueue.event_time
866
867 @staticmethod
868 def component_offsetvectors(offset_vector, n):
869 """
870 Equivalent to offsetvector.component_offsetvectors() except
871 only one input offsetvector is considered, not a sequence
872 of them, and the output offsetvector objects preserve the
873 absolute offsets of each instrument, not only the relative
874 offsets between instruments.
875 """
876 assert 1 <= n <= len(offset_vector)
877 for selected_instruments in itertools.combinations(offset_vector, n):
878 yield offsetvector.offsetvector((instrument, offset_vector[instrument]) for instrument in selected_instruments)
879
880 @property
881 def age(self):
882 """
883 The earliest of the component nodes' .age.
884 """
885 return min(node.age for node in self.components)
886
887 @property
888 def t_coinc_complete(self):
889 """
890 The earliest of the component nodes' .t_coinc_complete.
891 """
892 return min(node.t_coinc_complete for node in self.components)
893
894 def push(self, instrument, events, t_complete):
895 """
896 Push new events from some instrument into the internal
897 queues. The iterable of events need not be time-ordered.
898 With self.singlesqueue.event_time() defining the time of
899 events, t_complete sets the time up-to which the collection
900 of events is known to be complete. That is, in the future
901 no new events will be pushed whose times are earlier than
902 t_complete.
903 """
904 if len(self.offset_vector) == 1:
905 # push directly into the singles queue
906 assert (instrument,) == self.offset_vector.keys()
907 self.components[0].push(events, t_complete)
908 else:
909 for node in self.components:
910 if instrument in node.offset_vector:
911 node.push(instrument, events, t_complete)
912
913 def pull(self, t):
914 """
915 Using the events contained in the internal queues construct
916 and return all coincidences they participate in. It is
917 assumed calling code is in the process of constructing
918 n-tuple candidates such that each candidate contains at
919 least one event preceding t, and so we construct and return
920 all candidates required to decide that set, which might
921 include candidates all of whose events come after t.
922
923 Two objects are returned: a tuple of the (n=N)-way
924 coincidences, where N is the number of instruments in this
925 node's offset vector, and a set of the
926 (min_instruments<=n<N)-way coincidences, which will be
927 empty if N < min_instruments.
928
929 Since the (min_instruments<=n<N)-tuple coincidences cannot
930 go on to form (n>N)-tuple coincidences, any that fail to
931 contain at least one event preceding t could, at this stage,
932 be discarded, however testing for that would require paying
933 the price of the ID-->event look-up on all events in all
934 candidates, which we will have to perform at each level of
935 the graph, including the final stage before reporting
936 candidates, so it is more efficient to leave the invalid
937 candidates in the stream at this stage and cull them in a
938 single pass at the end.
939
940 The coincidences are reported as tuples of the Python IDs
941 of the events that form coincidences. The .index look-up
942 table can be used to map these IDs back to the original
943 events. To do that, however, a copy of the contents of the
944 .index mapping must be made before calling this method
945 because this method will result in some of the events in
946 question being removed from the queues, and thus also from
947 the .index mapping.
948 """
949 #
950 # no-op unless time has advanced. .previous_t is never
951 # None, so flushing cannot be mistaken for a no-op
952 #
953
954 if t == self.previous_t:
955 return tuple(), set()
956
957 #
958 # record the t to which we are being pulled for use in
959 # defining the candidate interval in the next interation.
960 # if we are being flushed, then reset to -inf. this is
961 # time B in the algorithm description above
962 #
963
964 self.previous_t = t if t is not None else segments.NegInfinity
965
966 #
967 # is this a "no-op" node? return 1-detector "coincs" and a
968 # null set of "partial coincs". NOTE: the "no-op" node
969 # concept exists to enable single-detector searches, i.e.,
970 # searches where there is not, in fact, any coincidence
971 # analysis to perform. it is not here to enable
972 # single-detector candidate reporting in multi-detector
973 # searches. typically this streaming coincidence engine is
974 # wrapped in additional code that updates the contents of
975 # an output document based on the results, so by adding
976 # this feature to the "coincidence engine", calling codes
977 # don't need to support special configurations for
978 # one-detector anlyses, they can set up their normal event
979 # handling code with a one-detector offset vector, push
980 # events in and collect candidates out.
981 #
982
983 if len(self.offset_vector) == 1:
984 return tuple((id(event),) for event in self.components[0].pull(t)[0]), set()
985
986 #
987 # is this a leaf node? construct the coincs explicitly
988 #
989
990 if len(self.offset_vector) == 2:
991 #
992 # search for and record coincidences. coincs is a
993 # sorted tuple of event ID pairs, where each pair
994 # of IDs is, itself, ordered alphabetically by
995 # instrument name. here, for the two-instrument
996 # case, the "partial coincs" are any singles that
997 # failed to form coincidences, but we convert the
998 # set into a set of 1-element tuples so they take
999 # the form of 1-detector coincs
1000 #
1001
1002 singles_ids = set()
1003 coincs = tuple(self.components[0].pull(t, singles_ids))
1004 return coincs, (set((eventid,) for eventid in singles_ids) if self.keep_partial else set())
1005
1006 #
1007 # this is a regular node in the graph. use coincidence
1008 # synthesis algorithm to populate its coincs
1009 #
1010
1011 #
1012 # double-check the consistency of our structure
1013 #
1014
1015 assert len(self.components) > 2
1016
1017 # first collect all coincs and partial coincs from the
1018 # component nodes in the graph
1019 component_coincs_and_partial_coincs = tuple(component.pull(t) for component in self.components)
1020 component_coincs = tuple(elem[0] for elem in component_coincs_and_partial_coincs)
1021
1022 if self.keep_partial:
1023 # any coinc with n-1 instruments from the component
1024 # time slides might fail to form an n instrument
1025 # coincidence. we put them all into our
1026 # partial_coincs set now, and we'll remove them
1027 # from this set as we discover which particiapte in
1028 # n instrument coincidences
1029 partial_coincs = set(itertools.chain(*component_coincs))
1030
1031 # the (< n-1)-instrument partial coincs are more
1032 # complicated. for example, H+L doubles are
1033 # constructed as an intermediate step when forming
1034 # H+L+V triples and also when forming H+K+L
1035 # triples. The H+L doubles that failed to form a
1036 # coincidence with V are reported as unused in the
1037 # former case, while the H+L doubles that failed to
1038 # form a coincidence with K are reported as unused
1039 # in the latter case, and there is no reason why
1040 # these two sets should be the same: some H+L
1041 # doubles that were not used in forming H+L+V
1042 # triples might have been used to form H+L+K
1043 # triples. if we are now using those two sets of
1044 # triples (among others) to form H+K+L+V
1045 # quadruples, we need to figure out which unused
1046 # H+L doubles have, in fact, really not been used
1047 # at this stage. it can be shown that a (<
1048 # n-1)-instrument coinc will go unused in forming
1049 # any two of our (n-1)-instrument components if and
1050 # only if it is is not used to form any of our
1051 # (n-1)-instrument components. therefore, we
1052 # obtain the unused (< n-1)-instrument partial
1053 # coincs from the union of all pair-wise
1054 # intersections of the (< n-1)-instrument partial
1055 # coincs from our components. since each partial
1056 # coinc can only be listed at most once in the
1057 # result from each component, and because we must
1058 # compute all possible pair-wise intersections of
1059 # the sets, this is equivalent to finding the
1060 # partial coincs that appear two or more times in
1061 # the concatenated sequence.
1062 partial_coincs.update(coinc for coinc, count in Counter(itertools.chain(*(elem[1] for elem in component_coincs_and_partial_coincs))).items() if count >= 2)
1063 else:
1064 partial_coincs = set()
1065 del component_coincs_and_partial_coincs
1066
1067 # magic: we can form all n-instrument coincs by knowing
1068 # just three sets of the (n-1)-instrument coincs no matter
1069 # what n is (n > 2).
1070 coincs = []
1071 component_coincs0 = component_coincs[0]
1072 component_coincs1 = component_coincs[1]
1073 component_coincs2 = component_coincs[-1]
1074 del component_coincs
1075 # for each coinc in list 0
1076 for coinc0 in component_coincs0:
1077 # find all the coincs in list 1 whose first (n-2)
1078 # event IDs are the same as the first (n-2) event
1079 # IDs in coinc0. note that they are guaranteed to
1080 # be arranged together in the list of coincs and
1081 # can be identified with two bisection searches
1082 # note: cannot use bisect_right() because we're
1083 # only comparing against the first (n-2) of (n-1)
1084 # things in each tuple, we need to use bisect_left
1085 # after incrementing the last of the (n-2) things
1086 # by one to obtain the correct range of indexes
1087 coincs1 = component_coincs1[bisect_left(component_coincs1, coinc0[:-1]):bisect_left(component_coincs1, coinc0[:-2] + (coinc0[-2] + 1,))]
1088 # find all the coincs in list 2 whose first (n-2)
1089 # event IDs are the same as the last (n-2) event
1090 # IDs in coinc0. note that they are guaranteed to
1091 # be arranged together in the list and can be
1092 # identified with two bisection searches
1093 coincs2 = component_coincs2[bisect_left(component_coincs2, coinc0[1:]):bisect_left(component_coincs2, coinc0[1:-1] + (coinc0[-1] + 1,))]
1094 # for each coinc extracted from list 1 above search
1095 # for a coinc extracted from list 2 above whose
1096 # first (n-2) event IDs are the last (n-2) event
1097 # IDs in coinc 0 and whose last event ID is the
1098 # last event ID in coinc 1. when found, the first
1099 # ID from coinc 0 prepended to the (n-1) coinc IDs
1100 # from coinc 2 forms an n-instrument coinc. how
1101 # this works is as follows: coinc 0 and coinc 1,
1102 # both (n-1)-instrument coincs, together identify a
1103 # unique potential n-instrument coinc. coinc 2's
1104 # role is to confirm the coincidence by showing
1105 # that the event from the instrument in coinc 1
1106 # that isn't found in coinc 0 is coincident with
1107 # all the other events that are in coinc 1. if the
1108 # coincidence holds then that combination of event
1109 # IDs must be found in the coincs2 list, because we
1110 # assume the coincs2 list is complete the
1111 # bisection search above to extract the coincs2
1112 # list could be skipped, but by starting with a
1113 # shorter list the bisection searches inside the
1114 # following loop are faster.
1115 for coinc1 in coincs1:
1116 confirmation = coinc0[1:] + coinc1[-1:]
1117 i = bisect_left(coincs2, confirmation)
1118 if i < len(coincs2) and coincs2[i] == confirmation:
1119 new_coinc = coinc0[:1] + confirmation
1120 # break the new coinc into
1121 # (n-1)-instrument components and
1122 # remove them from the unused list
1123 # because we just used them, then
1124 # record the coinc and move on
1125 partial_coincs.difference_update(itertools.combinations(new_coinc, len(new_coinc) - 1))
1126 coincs.append(new_coinc)
1127 # sort the coincs we just constructed by the component
1128 # event IDs and convert to a tuple for speed
1129 coincs.sort()
1130 coincs = tuple(coincs)
1131
1132 #
1133 # done.
1134 #
1135
1136 return coincs, partial_coincs
1137
1138
1139class TimeSlideGraph(object):
1140 def __init__(self, coincgen_doubles_type, offset_vector_dict, coinc_window, min_instruments = 2, verbose = False):
1141 #
1142 # some initial input safety checks
1143 #
1144
1145 if min_instruments < 1:
1146 raise ValueError("require min_instruments >= 1 (%d)" % min_instruments)
1147 if min(len(offset_vector) for offset_vector in offset_vector_dict.values()) < min_instruments:
1148 # this test is part of the logic that ensures we
1149 # will only extract coincs that meet the
1150 # min_instruments criterion, i.e., the condition
1151 # being checked here must be true or we will get
1152 # incorrect results but this is the only place it
1153 # is checked so be careful. don't delete this
1154 # check.
1155 raise ValueError("encountered offset vector (%s) smaller than min_instruments (%d)", (str(min(offset_vector_dict.values(), key = lambda offset_vector: len(offset_vector))), min_instruments))
1156
1157 #
1158 # for each pair of instruments compute the coincidence
1159 # window by adding the common coinc_window parameter to
1160 # the light travel time unique to each pair
1161 #
1162
1163 coinc_windows = dict((frozenset(pair), coinc_window + light_travel_time(*pair)) for pair in itertools.combinations(set(instrument for offset_vector in offset_vector_dict.values() for instrument in offset_vector), 2))
1164 if verbose:
1165 print("coincidence windows:\n\t%s" % ",\n\t".join("%s = %g s" % ("+".join(sorted(pair)), dt) for pair, dt in coinc_windows.items()))
1166
1167 #
1168 # populate the graph head nodes. these represent the
1169 # target offset vectors requested by the calling code.
1170 #
1171
1172 if verbose:
1173 print("constructing coincidence assembly graph for %d offset vectors ..." % len(offset_vector_dict), file=sys.stderr)
1174 self.head = tuple(
1176 coincgen_doubles_type,
1177 offset_vector,
1178 coinc_windows,
1179 min_instruments,
1180 time_slide_id = time_slide_id
1181 ) for time_slide_id, offset_vector in sorted(offset_vector_dict.items())
1182 )
1183
1184 # view into the id() --> event indexes of the graph nodes
1185 self.index = ChainMap(*(node.index for node in self.head))
1186
1187 # the set of the Python id()'s of the events contained in
1188 # the internal queues that have formed coincident
1189 # candidates (including single-detector coincidences if
1190 # min_instruments = 1). this is used to allow .pull() to
1191 # determine which of the events being flushed from the
1192 # internal queues has never been reported in a candidate.
1193 # calling codes can use this information to identify noise
1194 # samples for use in defining background models.
1195 self.used_ids = set()
1196
1197 # the set of the Python id()'s of the events contained in
1198 # the internal queues that have been reported in coincident
1199 # candidates (including single-detector coincidences if
1200 # min_instruments = 1). this is used to allow .pull() to
1201 # report which of the events it has found in coincidences
1202 # is being reported in a coincidence for the first time.
1203 # this can be used by calling code to be informed of which
1204 # events should be moved to an output document for storage.
1205 self.reported_ids = set()
1206
1207 #
1208 # done
1209 #
1210
1211 if verbose:
1212 def walk(node):
1213 # return the number of leaf and non-leaf
1214 # nodes in the graph rooted on this node
1215 return numpy.array((1, 0)) if len(node.components) == 1 else numpy.array((0, 1)) + sum(walk(node) for node in node.components)
1216 print("graph contains %d fundamental nodes, %d higher-order nodes" % tuple(sum(walk(node) for node in self.head)), file=sys.stderr)
1217
1218
1219 @property
1220 def age(self):
1221 """
1222 The earliest of the graph's head nodes' .age.
1223 """
1224 return min(node.age for node in self.head)
1225
1226
1227 def push(self, instrument, events, t_complete):
1228 """
1229 Push new events from some instrument into the internal
1230 queues. The iterable of events need not be time-ordered.
1231 With self.singlesqueue.event_time() defining the time of
1232 events, t_complete sets the time up-to which the event
1233 stream from this instrument is known to be complete. That
1234 is, in the future no new events will be pushed from this
1235 instrument whose times are earlier than t_complete.
1236 t_complete is measured with respect to the unshifted times
1237 of the events.
1238
1239 Returns True if the graph's .t_coinc_complete has changed,
1240 indicating that it might be possible to form new
1241 candidates; False if the addition of these events does not
1242 yet allow new candidates to be formed. If the return value
1243 is False, .pull() is guaranteed to return an empty
1244 candidate list unless the graph is flushed of all
1245 candidates.
1246 """
1247 t_changed = False
1248 for node in self.head:
1249 if instrument in node.offset_vector:
1250 t_before = node.t_coinc_complete
1251 node.push(instrument, events, t_complete)
1252 t_changed |= t_before != node.t_coinc_complete
1253 return t_changed
1254
1255
1256 def pull(self, newly_reported = None, flushed = None, flushed_unused = None, flush = False, coinc_sieve = None, event_collector = None):
1257 """
1258 Extract newly available coincident candidates from the
1259 internal queues. If flush if True, then all remaining
1260 candidates are extracted and the internal queues are reset,
1261 otherwise only those candidates that can be constructed
1262 given the times up to which the individual event queues are
1263 known to be complete and given the offset vectors being
1264 considered will be reported.
1265
1266 This method returns a generator whose elements consist of
1267 (node, events) pairs, wherein events is a tuple of
1268 single-detector event objects comprising a single
1269 coincident n-tuple, and node is the TimeSlideGraphNode
1270 object from which the coincidence was retrieved. The node
1271 object can be consulted to retrieve the offset vector
1272 applied to the event times to form the coincidence, and
1273 other information about the state of the analysis. If the
1274 optional coinc_sieve() function is supplied (see below)
1275 then only those n-tuples that pass that function's test are
1276 included in the sequence.
1277
1278 Two output streams are available. One stream is the
1279 generator returned to the calling code explained above,
1280 which consists only of n-tuples that pass the optional
1281 coinc_sieve() test. The other stream exits this code via
1282 calls to event_collector() (see below), and consists of all
1283 coincident n-tuples formed by the coincidence engine,
1284 regardless of whether they pass the optional coinc_sieve()
1285 test or not. The n-tuples passed to event_collector()
1286 contain the Python IDs of the events, not the event objects
1287 themselves. If coinc_sieve() is not supplied, both streams
1288 contain the same n-tuples, one of tuples of event objects,
1289 the other of tuples of the Python IDs of those events.
1290
1291 event_collector(), if supplied, must be an object with a
1292 .push() method with the signature
1293
1294 event_collector.push(event_ids, offset_vector)
1295
1296 The return value is ignored. It will be called once for
1297 every coincident n-tuple that is formed by the coincidence
1298 engine, regardless of whether or not that n-tuple is
1299 ultimately reported to the calling code. event_ids will be
1300 a tuple of Python IDs of the single-detector events in the
1301 coincidence.
1302
1303 coinc_sieve(), if supplied, must be a callable object with
1304 the signature
1305
1306 coinc_sieve(events, offset_vector)
1307
1308 If the return value is False the event tuple is reported to
1309 the calling code, otherwise it is discarded. The default
1310 is to report all n-tuples. This feature can be used to
1311 remove obviously uninteresting candidates from the reported
1312 candidate stream, to reduce the memory requirements of the
1313 calling code and the disk requirements of output documents.
1314
1315 The optional parameters newly_reported, flushed, and
1316 flushed_unused may be used to pass lists to this code,
1317 which will be populated with additional information to
1318 assist the calling code. If lists are supplied, their
1319 contents will be erased and populated with event objects as
1320 follows:
1321
1322 newly_reported: list of single-detector events that have
1323 participated in n-tuple candidates reported to the calling
1324 code for the first time this invocation of .pull(). Note
1325 that only those events that particate in n-tuples that
1326 survive the optional coinc_sieve() test are included. This
1327 is meant to allow the calling code to populate an event
1328 table with the list of just the events needed to describe
1329 the surviving n-tuple candidates.
1330
1331 flushed: list of single-detector events that have been
1332 removed from the internal queues because they are now
1333 sufficiently old that all n-tuples that they can
1334 participate in have been formed and reported. This can be
1335 used to assist the calling code with house keeping tasks,
1336 for example emptying its event_collector implementation of
1337 unnecessary internal state.
1338
1339 flushed_unused: list of the single-detector events in the
1340 flushed list that never participated in an n-tuple
1341 candidate. Note that events are considered to have
1342 participated in an n-tuple candidate if the coincidence
1343 engine reported them in one, regardless of whether or not
1344 the optional coinc_sieve() test ultimately rejected the
1345 candidate before it was reported to the calling code.
1346
1347 NOTE: the intent of the coinc_sieve() feature is not to
1348 alter the definition of the coincidence test, but merely
1349 reduce the data volume going into the output document.
1350 Therefore, the coinc_sieve() feature affects the events
1351 appearing in the newly_reported list because that list is
1352 expected to be used to construct the output document, but
1353 does not affect the events appearing in the flushed_unused
1354 list because that list is meant to convey information about
1355 which events failed to form coincidenes, for example for
1356 the purpose of informing false-alarm probability noise
1357 models.
1358 """
1359 # flatten ID index for faster performance in loop. NOTE:
1360 # this is also done to freeze the contents of the index.
1361 # calling .pull() on the graph nodes flushes events from
1362 # the internal queues which removes them from the index,
1363 # but we need the index to turn IDs back into events.
1364 index = dict(self.index.items())
1365
1366 # default coinc sieve
1367 if coinc_sieve is None:
1368 coinc_sieve = lambda events, offset_vector: False
1369
1370 # default event_collector:
1371 if event_collector is None:
1372 event_collector_push = lambda event_ids, offset_vector: None
1373 else:
1374 event_collector_push = event_collector.push
1375
1376 newly_reported_ids = set()
1377 # initialize the set of flushed events to the complete
1378 # contents of the internal queues
1379 flushed_ids = set(index)
1380 # avoid attribute look-ups in loops
1381 newly_reported_update = newly_reported_ids.update
1382 used_update = self.used_ids.update
1383 id_to_event = index.__getitem__
1384 for node in self.head:
1385 # avoid attribute look-ups loops
1386 event_time = node.event_time
1387 offset_vector = node.offset_vector
1388
1389 if flush:
1390 t = None
1391 candidate_seg = segments.segment(node.previous_t, segments.PosInfinity)
1392 else:
1393 t = node.t_coinc_complete
1394 candidate_seg = segments.segment(node.previous_t, t)
1395
1396 # we don't need to check that the coincs or partial
1397 # coincs contain at least min_instruments events
1398 # because those that don't meet the criteria are
1399 # excluded during coinc construction.
1400 for event_ids in itertools.chain(*node.pull(t)):
1401 # use the index to convert Python IDs back
1402 # to event objects
1403 events = tuple(map(id_to_event, event_ids))
1404 # check the candidate's time, and skip
1405 # those that don't meet the
1406 # t_coinc_complete constraint
1407 if min(event_time(event) + offset_vector[event.ifo] for event in events) not in candidate_seg:
1408 continue
1409 # update the used IDs state and inform the
1410 # event collector of a candidate
1411 used_update(event_ids)
1412 event_collector_push(event_ids, offset_vector)
1413 # apply the coinc sieve test. if the
1414 # calling code has some mechanism to decide
1415 # which n-tuples are worth saving for later
1416 # analysis, it can be applied here (instead
1417 # of by the calling code itself) so that
1418 # the machinery that is tracking which
1419 # events need to be recorded in the output
1420 # document can see that some will not be
1421 # needed.
1422 if not coinc_sieve(events, offset_vector):
1423 newly_reported_update(event_ids)
1424 yield node, events
1425 # finish computing the set of newly reported event ids
1426 if newly_reported_ids:
1427 newly_reported_ids -= self.reported_ids
1428 self.reported_ids |= newly_reported_ids
1429 # finish computing the set of flushed events by removing
1430 # any from the set that still remain in the queues
1431 flushed_ids -= set(self.index)
1432 # compute the set of flushed events that were never used
1433 # for a candidate, and use flushed ids to clean up other
1434 # sets to avoid them growing without bound
1435 if flushed_ids:
1436 flushed_unused_ids = flushed_ids - self.used_ids
1437 self.used_ids -= flushed_ids
1438 self.reported_ids -= flushed_ids
1439 else:
1440 flushed_unused_ids = set()
1441
1442 # if we've been flushed then there can't be any events left
1443 # in the queues (note that the "or" in parentheses is a
1444 # boolean operation, not a set operation)
1445 assert not flush or not (self.used_ids or self.reported_ids)
1446
1447 # use the index to populate newly_used, flushed, and
1448 # flushed_unused with event objects
1449 if newly_reported is not None:
1450 newly_reported[:] = map(id_to_event, newly_reported_ids)
1451 if flushed is not None:
1452 flushed[:] = map(id_to_event, flushed_ids)
1453 if flushed_unused is not None:
1454 flushed_unused[:] = map(id_to_event, flushed_unused_ids)
1455
1456
1457#
1458# =============================================================================
1459#
1460# Document Interface
1461#
1462# =============================================================================
1463#
1464
1465
1466class CoincTables(object):
1467 """
1468 A convenience interface to the XML document's coincidence tables,
1469 allowing for easy addition of coincidence events.
1470 """
1471 def __init__(self, xmldoc, coinc_definer_row):
1472 # find the coinc table or create one if not found
1473 try:
1474 self.coinctable = lsctables.CoincTable.get_table(xmldoc)
1475 except ValueError:
1476 self.coinctable = lsctables.New(lsctables.CoincTable)
1477 xmldoc.childNodes[0].appendChild(self.coinctable)
1478 self.coinctable.sync_next_id()
1479
1480 # find the coinc_map table or create one if not found
1481 try:
1482 self.coincmaptable = lsctables.CoincMapTable.get_table(xmldoc)
1483 except ValueError:
1484 self.coincmaptable = lsctables.New(lsctables.CoincMapTable)
1485 xmldoc.childNodes[0].appendChild(self.coincmaptable)
1486
1487 # look-up the coinc_def_id, creating a new one if required
1488 self.coinc_def_id = ligolw_coincs.get_coinc_def_id(xmldoc, coinc_definer_row.search, coinc_definer_row.search_coinc_type, create_new = True, description = coinc_definer_row.description)
1489
1490 # find the time_slide table
1491 self.time_slide_table = lsctables.TimeSlideTable.get_table(xmldoc)
1493
1494 def coinc_rows(self, process_id, time_slide_id, events, table_name):
1495 """
1496 From a process ID, a time slide ID, and a sequence of
1497 events (generator expressions are OK), constructs and
1498 initializes a coinc_event table row object and a sequence
1499 of coinc_event_map table row objects describing the
1500 coincident event. The return value is the coinc_event row
1501 and a sequence of the coinc_event_map rows.
1502
1503 The coinc_event is *not* assigned a coinc_event_id by this
1504 method. It is expected that will be done in
1505 .append_coinc(). This allows sub-classes to defer the
1506 question of whether or not to include the coincidence in
1507 the search results without consuming additional IDs.
1508
1509 The coinc_event row's .instruments and .likelihood
1510 attributes are initialized to null values. The calling
1511 code should populate as needed.
1512
1513 When subclassing this method, if the time shifts that were
1514 applied to the events in constructing the coincidence are
1515 required to compute additional metadata, they can be
1516 retrieved from self.time_slide_index using the
1517 time_slide_id.
1518 """
1519 coincmaps = [self.coincmaptable.RowType(
1520 coinc_event_id = None,
1521 table_name = table_name,
1522 event_id = event.event_id
1523 ) for event in events]
1524 assert coincmaps, "coincs must contain >= 1 event"
1525
1526 coinc = self.coinctable.RowType(
1527 process_id = process_id,
1528 coinc_def_id = self.coinc_def_id,
1529 coinc_event_id = None,
1530 time_slide_id = time_slide_id,
1531 insts = None,
1532 nevents = len(coincmaps),
1533 likelihood = None
1534 )
1535
1536 return coinc, coincmaps
1537
1538 def append_coinc(self, coinc_event_row, coinc_event_map_rows):
1539 """
1540 Appends the coinc_event row object and coinc_event_map row
1541 objects to the coinc_event and coinc_event_map tables
1542 respectively after assigning a coinc_event_id to the
1543 coincidence. Returns the coinc_event row object.
1544 """
1545 coinc_event_row.coinc_event_id = self.coinctable.get_next_id()
1546 self.coinctable.append(coinc_event_row)
1547 for row in coinc_event_map_rows:
1548 row.coinc_event_id = coinc_event_row.coinc_event_id
1549 self.coincmaptable.append(row)
1550 return coinc_event_row
1551
1552
1553#
1554# =============================================================================
1555#
1556# Poisson Model for Coincidences
1557#
1558# =============================================================================
1559#
1560
1561
1562class CoincRates(object):
1563 def __init__(self, instruments, delta_t, min_instruments = 2, abundance_rel_accuracy = 1e-4):
1564 """
1565 Model for coincidences among noise events collected from
1566 several antennas. Independent Poisson processes are
1567 assumed. The coincidence window for each pair of
1568 instruments is (delta_t + light travel time between that
1569 pair). A coincidence is assumed to require full N-way
1570 coincidence and require at least min_instruments to
1571 participate.
1572
1573 Initial configuration requires some relatively expensive
1574 pre-calculation of internal quantities, but once
1575 initialized coincidence rates can be computed quickly from
1576 observed event rates. Several other related quantities can
1577 be computed.
1578
1579 Example:
1580
1581 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1582 """
1583 self.instruments = frozenset(instruments)
1584 self.delta_t = delta_t
1585 self.min_instruments = min_instruments
1586 if self.min_instruments > len(self.instruments):
1587 raise ValueError("require len(instruments) >= min_instruments")
1588 if self.delta_t < 0.:
1589 raise ValueError("require delta_t >= 0.")
1590 if abundance_rel_accuracy <= 0.:
1591 raise ValueError("require abundance_rel_accuracy > 0.")
1592
1593 # dictionary mapping pair of instrument names (as a
1594 # frozenset) to coincidence window in seconds = delta_t +
1595 # light travel time
1596 self.tau = dict((frozenset(ab), self.delta_t + light_travel_time(*ab)) for ab in itertools.combinations(tuple(self.instruments), 2))
1597
1598 # for instruments {1, ..., N}, with rates \\mu_{1}, ...,
1599 # \\mu_{N}, the rate of coincidences is
1600 #
1601 # \\propto \\prod_{i} \\mu_{i}.
1602 #
1603 # the proportionality constant depends only on the
1604 # coincidence windows. the following computes a dictionary
1605 # of these proportionality constants keyed by instrument
1606 # set.
1607
1609 for instruments in self.all_instrument_combos:
1610 # choose the instrument whose event time forms the "epoch"
1611 # of the coinc. which we choose is irrelevant
1612 # frozenset of names
1613 key = instruments
1614 # name, list of names
1615 anchor, *instruments = instruments
1616 # to understand the calculation consider the following
1617 # diagram. this depicts the inter-instrument time delays
1618 # for candidates in a three-detector network.
1619 #
1620 # 3
1621 # t3-t2 = const ^
1622 # / | t2-t1 = const
1623 # | / | |
1624 # --+-/------+--------+-- t3-t1 = const
1625 # |/XXXXXXX|XXXXXXXX|
1626 # /XXXXXXXX|XXXXXXXX|
1627 # /|XXXXXXXX|XXXXXXXX|
1628 # ---|--------1--------|---> 2
1629 # |XXXXXXXX|XXXXXXXX|/
1630 # |XXXXXXXX|XXXXXXXX/
1631 # |XXXXXXXX|XXXXXXX/|
1632 # --+--------+------/-+-- t3-t1 = const
1633 # | | / |
1634 # t2-t1 = const | /
1635 # t3-t2 = const
1636 #
1637 # in this example the time of the event seen in detector 1
1638 # (arbitrary) is represented by the origin. the x axis
1639 # indicates the time difference between the event seen in
1640 # detector 2 and the event seen in detector 1, while the y
1641 # axis depicts the time differenc between the event seen in
1642 # detector 3 and the event seen in detector 1. in this
1643 # parameter space, the coincidence windows take the form of
1644 # half-spaces: for example, the requirement that |t2-t1|
1645 # <= const becomes two t2-t1=const half-space boundaries,
1646 # in this case vertical lines parallel to the y-axis.
1647 # similarly, the requirement that |t3-t1| <= const becomes
1648 # a pair of t3-t1=const half-space boundaries parallel to
1649 # the x-axis. the requirement that |t3-t2| <= const can
1650 # also be shown in this space, that requirement becomes a
1651 # third pair of half-space boundaries, this time at 45
1652 # degrees to the axes.
1653 #
1654 # in this 2-dimensional parameter space, all valid 3-way
1655 # mutual coincidences must fall within the shaded region
1656 # marked with "X"s, meaning their (t2-t1) and (t3-t1)
1657 # values must fall in that area. this is the intersection
1658 # of the 6 half-spaces.
1659 #
1660 # all of this generalizes to more than 3 detectors,
1661 # becoming an (N-1)-dimensional space of the \Delta t's,
1662 # with the allowed volume being the intersection of the
1663 # half-spaces defined by the coincidence windows. it's
1664 # aparent that the final quantity we seek here is the
1665 # volume of the convex polyhedron defined by all of the
1666 # constraints. this can be computed using the qhull
1667 # library's half-space intersection implementation.
1668 #
1669 # the half-space instersection code requires constraints be
1670 # provided to it in the form
1671 #
1672 # A x + b <= 0,
1673 #
1674 # where A has size (n constraints x m dimensions), where
1675 # for N instruments m = N-1. in this form, x is the vector
1676 # of time differences between the events seen in each of
1677 # the detectors and the event seen in the "anchor"
1678 # instrument at the origin. each coincidence window
1679 # between an instrument, i, and the anchor imposes two
1680 # constraints of the form
1681 #
1682 # +/-t_{i} - \tau_{1i} <= 0
1683 #
1684 # giving 2*(N-1) constraints. additionally, each
1685 # coincidence window between a pair of non-anchor
1686 # instruments imposes two constraints of the form
1687 #
1688 # +/-(t_{i} - t_{j}) - \tau_{ij} <= 0
1689 #
1690 # for an additional (N-1)*(N-2) constraints. altogether
1691 # there are
1692 #
1693 # n = (N-1)^2 + (N-1) = m * (m + 1)
1694 #
1695 # constraints
1696 #
1697 # the "rate factor" we compute below is the volume in units
1698 # of time^(N-1) of the convex polyhedron described above.
1699 # this is the proportionality constant giving us the rate
1700 # of N-way mutual coincidences
1701 #
1702 # coinc rate = (polyhedron volume) \\prod_{i} \\mu_{i}.
1703 #
1704 # the volume has units of time^(N-1), the \\mu have units
1705 # of 1/time, there are N of them, so as hoped the rate of
1706 # coincidences has units of 1/time.
1707
1708 if not instruments:
1709 # one-instrument case, no-op
1710 self.rate_factors[key] = 1.
1711 elif len(instruments) == 1:
1712 # two-instrument (1-D) case, don't use qhull
1713 self.rate_factors[key] = 2. * self.tau[key]
1714 else:
1715 # three and more instrument (2-D and
1716 # higher) case
1717 dimensions = len(instruments) # anchor not included
1718 halfspaces = numpy.zeros((dimensions * (dimensions + 1), dimensions + 1), dtype = "double")
1719 # anchor constraints
1720 for i, instrument in enumerate(instruments):
1721 j = i
1722 i *= 2
1723 halfspaces[i, j] = +1.
1724 halfspaces[i + 1, j] = -1.
1725 halfspaces[i, -1] = halfspaces[i + 1, -1] = -self.tau[frozenset((anchor, instrument))]
1726 # non-anchor constraints
1727 for i, ((j1, a), (j2, b)) in enumerate(itertools.combinations(enumerate(instruments), 2), dimensions):
1728 i *= 2
1729 halfspaces[i, j1] = +1.
1730 halfspaces[i, j2] = -1.
1731 halfspaces[i + 1, j1] = -1.
1732 halfspaces[i + 1, j2] = +1.
1733 halfspaces[i, -1] = halfspaces[i + 1, -1] = -self.tau[frozenset((a, b))]
1734 # the origin is in the interior
1735 interior = numpy.zeros((len(instruments),), dtype = "double")
1736 # compute volume
1737 self.rate_factors[key] = spatial.ConvexHull(spatial.HalfspaceIntersection(halfspaces, interior).intersections).volume
1738 # done computing rate_factors
1739
1740
1741 @property
1742 def all_instrument_combos(self):
1743 """
1744 A tuple of all possible instrument combinations (as
1745 frozensets).
1746
1747 Example:
1748
1749 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 1)
1750 >>> coincrates.all_instrument_combos
1751 (frozenset(['V1']), frozenset(['H1']), frozenset(['L1']), frozenset(['V1', 'H1']), frozenset(['V1', 'L1']), frozenset(['H1', 'L1']), frozenset(['V1', 'H1', 'L1']))
1752 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1753 >>> coincrates.all_instrument_combos
1754 (frozenset(['V1', 'H1']), frozenset(['V1', 'L1']), frozenset(['H1', 'L1']), frozenset(['V1', 'H1', 'L1']))
1755 """
1756 all_instruments = tuple(self.instruments)
1757 return tuple(frozenset(instruments) for n in range(self.min_instruments, len(all_instruments) + 1) for instruments in itertools.combinations(all_instruments, n))
1758
1759
1760 def coinc_rates(self, **rates):
1761 """
1762 Given the event rates for a collection of instruments,
1763 compute the rates at which N-way coincidences occur among
1764 them where N >= min_instruments. The return value is a
1765 dictionary whose keys are frozensets of instruments and
1766 whose values are the rate of coincidences for that set.
1767
1768 NOTE: the computed rates are the rates at which
1769 coincidences among at least those instruments occur, not
1770 the rate at which coincidences among exactly those
1771 instruments occur. e.g., considering the H1, L1, V1
1772 network, for the pair H1, L1 the return value is the sum of
1773 the rate at which those two instruments form double
1774 coincidences and also the rate at which they participate in
1775 H1, L1, V1 triple coincidences.
1776
1777 See also .strict_coinc_rates().
1778
1779 Example:
1780
1781 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1782 >>> coincrates.coinc_rates(H1 = 0.001, L1 = 0.002, V1 = 0.003)
1783 {frozenset(['V1', 'H1']): 1.9372787960306537e-07, frozenset(['V1', 'H1', 'L1']): 1.0125819710267318e-11, frozenset(['H1', 'L1']): 6.00513846088957e-08, frozenset(['V1', 'L1']): 3.77380092200718e-07}
1784 >>> coincrates.coinc_rates(H1 = 0.001, L1 = 0.002, V1 = 0.002)
1785 {frozenset(['V1', 'H1']): 1.291519197353769e-07, frozenset(['V1', 'H1', 'L1']): 6.750546473511545e-12, frozenset(['H1', 'L1']): 6.00513846088957e-08, frozenset(['V1', 'L1']): 2.5158672813381197e-07}
1786 >>> coincrates.coinc_rates(H1 = 0.001, L1 = 0.002, V1 = 0.001)
1787 {frozenset(['V1', 'H1']): 6.457595986768845e-08, frozenset(['V1', 'H1', 'L1']): 3.3752732367557724e-12, frozenset(['H1', 'L1']): 6.00513846088957e-08, frozenset(['V1', 'L1']): 1.2579336406690598e-07}
1788 """
1789 if set(rates) != self.instruments:
1790 raise ValueError("require event rates for %s, got rates for %s" % (", ".join(sorted(self.instruments)), ", ".join(sorted(rates))))
1791 if any(rate < 0. for rate in rates.values()):
1792 # they don't have to be non-negative for this
1793 # method to succede, but negative values are
1794 # nonsensical and other things will certainly fail.
1795 # it's best to catch and report the problem as
1796 # early in the code path as it can be identified,
1797 # so that when the problem is encountered it's
1798 # easier to identify the cause
1799 raise ValueError("rates must be >= 0")
1800 if self.tau and max(rates.values()) * max(self.tau.values()) >= 1.:
1801 raise ValueError("events per coincidence window must be << 1: rates = %s, max window = %g" % (rates, max(self.tau.values())))
1802
1803 # compute \mu_{1} * \mu_{2} ... \mu_{N} * FACTOR where
1804 # FACTOR is the previously-computed proportionality
1805 # constant from self.rate_factors
1806
1807 coinc_rates = dict(self.rate_factors)
1808 for instruments in coinc_rates:
1809 for instrument in instruments:
1810 coinc_rates[instruments] *= rates[instrument]
1811 return coinc_rates
1812
1813
1814 def strict_coinc_rates(self, **rates):
1815 """
1816 Given the event rates for a collection of instruments,
1817 compute the rates at which strict N-way coincidences occur
1818 among them where N >= min_instruments. The return value is
1819 a dictionary whose keys are frozensets of instruments and
1820 whose values are the rate of coincidences for that set.
1821
1822 NOTE: the computed rates are the rates at which
1823 coincidences occur among exactly those instrument
1824 combinations, excluding the rate at which each combination
1825 participates in higher-order coincs. e.g., considering the
1826 H1, L1, V1 network, for the pair H1, L1 the return value is
1827 the rate at which H1, L1 doubles occur, not including the
1828 rate at which the H1, L1 pair participates in H1, L1, V1
1829 triples.
1830
1831 See also .coinc_rates().
1832
1833 Example:
1834
1835 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1836 >>> coincrates.strict_coinc_rates(H1 = 0.001, L1 = 0.002, V1 = 0.003)
1837 {frozenset(['V1', 'H1']): 1.937177537833551e-07, frozenset(['V1', 'H1', 'L1']): 1.0125819710267318e-11, frozenset(['H1', 'L1']): 6.004125878918543e-08, frozenset(['V1', 'L1']): 3.7736996638100773e-07}
1838 >>> coincrates.strict_coinc_rates(H1 = 0.001, L1 = 0.002, V1 = 0.002)
1839 {frozenset(['V1', 'H1']): 1.2914516918890337e-07, frozenset(['V1', 'H1', 'L1']): 6.750546473511545e-12, frozenset(['H1', 'L1']): 6.004463406242219e-08, frozenset(['V1', 'L1']): 2.5157997758733847e-07}
1840 >>> coincrates.strict_coinc_rates(H1 = 0.001, L1 = 0.002, V1 = 0.001)
1841 {frozenset(['V1', 'H1']): 6.457258459445168e-08, frozenset(['V1', 'H1', 'L1']): 3.3752732367557724e-12, frozenset(['H1', 'L1']): 6.004800933565894e-08, frozenset(['V1', 'L1']): 1.2578998879366924e-07}
1842 """
1843 # initialize from the plain coinc rates
1844 strict_coinc_rates = self.coinc_rates(**rates)
1845 # iterating over the instrument combos from the combo with
1846 # the most instruments to the combo with the least
1847 # instruments ...
1848 for instruments in sorted(strict_coinc_rates, reverse = True, key = lambda instruments: len(instruments)):
1849 # ... subtract from its rate the rate at which
1850 # combos containing it occur (which, because we're
1851 # moving from biggest combo to smallest combo, have
1852 # already had the rates of higher order combos
1853 # containing themselves subtracted)
1854 for key, rate in strict_coinc_rates.items():
1855 if instruments < key:
1856 strict_coinc_rates[instruments] -= rate
1857 # make sure this didn't produce any negative rates
1858 assert all(rate >= 0. for rate in strict_coinc_rates.values()), "encountered negative rate: %s" % strict_coinc_rates
1859 return strict_coinc_rates
1860
1861
1862 def marginalized_strict_coinc_counts(self, seglists, **rates):
1863 """
1864 A dictionary mapping instrument combination (as a
1865 frozenset) to the total number of coincidences involving
1866 precisely that combination of instruments expected from the
1867 background.
1868 """
1869 if set(seglists) != self.instruments:
1870 raise ValueError("require segmentlists for %s, got %s" % (", ".join(sorted(self.instruments)), ", ".join(sorted(seglists))))
1871
1872 # time when exactly a given set of instruments are on
1873 livetime = dict((on_instruments, float(abs(seglists.intersection(on_instruments) - seglists.union(self.instruments - on_instruments)))) for on_instruments in self.all_instrument_combos)
1874
1875 coinc_count = dict.fromkeys(livetime, 0.0)
1876 for on_instruments, T in livetime.items():
1877 for coinc_instruments, rate in self.strict_coinc_rates(**dict((instrument, (rate if instrument in on_instruments else 0.)) for instrument, rate in rates.items())).items():
1878 coinc_count[coinc_instruments] += T * rate
1879
1880 return coinc_count
1881
1882
1883 def lnP_instruments(self, **rates):
1884 """
1885 Given the event rates for a collection of instruments,
1886 compute the natural logarithm of the probability that a
1887 coincidence is found to involve exactly a given set of
1888 instruments. This is equivalent to the ratios of the
1889 values in the dictionary returned by .strict_coinc_rates()
1890 to their sum.
1891
1892 Raises ZeroDivisionError if all coincidence rates are 0.
1893
1894 See also .strict_coinc_rates().
1895
1896 Example:
1897
1898 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1899 >>> coincrates.lnP_instruments(H1 = 0.001, L1 = 0.002, V1 = 0.003)
1900 {frozenset(['V1', 'H1']): -1.181124067253893, frozenset(['V1', 'H1', 'L1']): -11.040192999777876, frozenset(['H1', 'L1']): -2.352494317162074, frozenset(['V1', 'L1']): -0.5143002401188091}
1901 """
1902 strict_coinc_rates = self.strict_coinc_rates(**rates)
1903 total_rate = sum(strict_coinc_rates.values())
1904 if total_rate == 0.:
1905 raise ZeroDivisionError("all rates are 0")
1906 P_instruments = dict((instruments, rate / total_rate) for instruments, rate in strict_coinc_rates.items())
1907 norm = sum(sorted(P_instruments.values()))
1908 # safety check: result should be nearly exactly normalized
1909 assert abs(1.0 - norm) < 1e-14
1910 return dict((instruments, (math.log(P / norm) if P else NegInf)) for instruments, P in P_instruments.items())
1911
1912
1913 def random_instruments(self, **rates):
1914 """
1915 Generator that, given the event rates for a collection of
1916 instruments, yields a sequence of two-element tuples each
1917 containing a randomly-selected frozen set of instruments
1918 and the natural logarithm of the ratio of the rate at which
1919 that combination of instruments forms coincidences to the
1920 rate at which it is being yielded by this generator.
1921
1922 See also .lnP_instruments().
1923
1924 Example:
1925
1926 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1927 >>> x = iter(coincrates.random_instruments(H1 = 0.001, L1 = 0.002, V1 = 0.003))
1928 >>> x.next() # doctest: +SKIP
1929 (frozenset(['H1', 'L1']), -3.738788683913535)
1930
1931 NOTE: the generator yields instrument combinations drawn
1932 uniformly from the set of allowed instrument combinations,
1933 but this is not considered part of the definition of the
1934 behaviour of this generator and this implementation detail
1935 should not be relied upon by calling code. It is stated
1936 here simply for clarity in case it helps improve
1937 optimization choices elsewhere.
1938 """
1939 # guaranteed non-empty
1940 lnP_instruments = self.lnP_instruments(**rates)
1941 lnN = math.log(len(lnP_instruments))
1942 results = tuple((instruments, lnP - lnN) for instruments, lnP in lnP_instruments.items())
1943 choice = random.choice
1944 while 1:
1945 yield choice(results)
1946
1947
1948 def plausible_toas(self, instruments):
1949 """
1950 Generator that yields dictionaries of random event
1951 time-of-arrival offsets for the given instruments such that
1952 the time-of-arrivals are mutually coincident given the
1953 maximum allowed inter-instrument \\Delta t's. The values
1954 returned are offsets, and would need to be added to some
1955 common time to yield absolute arrival times.
1956
1957 Example:
1958
1959 >>> coincrates = CoincRates(("H1", "L1", "V1"), 0.005, 2)
1960 >>> x = iter(coincrates.plausible_toas(("H1", "L1")))
1961 >>> x.next() # doctest: +SKIP
1962 {'H1': 0.0, 'L1': -0.010229226372297711}
1963 """
1964 instruments = tuple(instruments)
1965 if set(instruments) > self.instruments:
1966 raise ValueError("not configured for %s" % ", ".join(sorted(set(instruments) - self.instruments)))
1967 if len(instruments) < self.min_instruments:
1968 raise ValueError("require at least %d instruments, got %d" % (self.min_instruments, len(instruments)))
1969 anchor, instruments = instruments[0], instruments[1:]
1970 anchor_offset = ((anchor, 0.0),) # don't build inside loop
1971 uniform = random.uniform
1972 windows = tuple((instrument, -self.tau[frozenset((anchor, instrument))], +self.tau[frozenset((anchor, instrument))]) for instrument in instruments)
1973 ijseq = tuple((i, j, self.tau[frozenset((instruments[i], instruments[j]))]) for (i, j) in itertools.combinations(range(len(instruments)), 2))
1974 while 1:
1975 dt = tuple((instrument, uniform(lo, hi)) for instrument, lo, hi in windows)
1976 if all(abs(dt[i][1] - dt[j][1]) <= maxdt for i, j, maxdt in ijseq):
1977 yield dict(anchor_offset + dt)
1978
1979
1980#
1981# =============================================================================
1982#
1983# Triangulation
1984#
1985# =============================================================================
1986#
1987
1988
1989class TOATriangulator(object):
1990 """
1991 Time-of-arrival triangulator. See section 6.6.4 of
1992 "Gravitational-Wave Physics and Astronomy" by Creighton and
1993 Anderson.
1994
1995 An instance of this class is a function-like object that accepts a
1996 tuple of event arival times and returns a tuple providing
1997 information derived by solving for the maximum-likelihood source
1998 location assuming Gaussian-distributed timing errors.
1999 """
2000 def __init__(self, rs, sigmas, v = lal.C_SI):
2001 """
2002 Create and initialize a triangulator object.
2003
2004 rs is a sequence of location 3-vectors, sigmas is a
2005 sequence of the timing uncertainties for those locations.
2006 Both sequences must be in the same order --- the first
2007 sigma in the sequence is interpreted as belonging to the
2008 first location 3-vector --- and, of course, they must be
2009 the same length.
2010
2011 v is the speed at which the wave carrying the signals
2012 travels. The rs 3-vectors carry units of distance, the
2013 sigmas carry units of time, v carries units of
2014 distance/time. What units are used for the three is
2015 arbitrary, but they must be mutually consistent. The
2016 default value for v in c, the speed of light, in
2017 metres/second, therefore the location 3-vectors should be
2018 given in metres and the sigmas should be given in seconds
2019 unless a value for v is provided with different units.
2020
2021 Example:
2022
2023 >>> from numpy import array
2024 >>> triangulator = TOATriangulator([
2025 ... array([-2161414.92636, -3834695.17889, 4600350.22664]),
2026 ... array([ -74276.0447238, -5496283.71971 , 3224257.01744 ]),
2027 ... array([ 4546374.099 , 842989.697626, 4378576.96241 ])
2028 ... ], [
2029 ... 0.005,
2030 ... 0.005,
2031 ... 0.005
2032 ... ])
2033 ...
2034 >>>
2035
2036 This creates a TOATriangulator instance configured for the
2037 LIGO Hanford, LIGO Livingston and Virgo antennas with 5 ms
2038 time-of-arrival uncertainties at each location.
2039
2040 Note: rs and sigmas may be iterated over multiple times.
2041 """
2042 assert len(rs) == len(sigmas)
2043 assert len(rs) >= 2
2044
2045 self.rs = numpy.vstack(rs)
2046 self.sigmas = numpy.array(sigmas)
2047 self.v = v
2048
2049 # sigma^-2 -weighted mean of locations
2050 rbar = sum(self.rs / self.sigmas[:,numpy.newaxis]**2) / sum(1 / self.sigmas**2)
2051
2052 # the ith row is r - \bar{r} for the ith location
2053 self.R = self.rs - rbar
2054
2055 # ith row is \sigma_i^-2 (r_i - \bar{r}) / c
2056 M = self.R / (self.v * self.sigmas[:,numpy.newaxis])
2057
2058 self.U, self.S, self.VT = numpy.linalg.svd(M)
2059
2060 if len(rs) >= 3:
2061 # if the smallest singular value is less than 10^-8 * the
2062 # largest singular value, assume the network is degenerate
2063 self.singular = abs(self.S.min() / self.S.max()) < 1e-8
2064 else:
2065 # len(rs) == 2
2066 self.max_dt = numpy.dot(self.rs[1] - self.rs[0], self.rs[1] - self.rs[0])**.5 / self.v
2067
2068 def __call__(self, ts):
2069 """
2070 Triangulate the direction to the source of a signal based
2071 on a tuple of times when the signal was observed. ts is a
2072 sequence of signal arrival times. One arrival time must be
2073 provided for each of the observation locations provided
2074 when the instance was created, and the units of the arrival
2075 times must be the same as the units used for the sequence
2076 of sigmas.
2077
2078 The return value is a tuple of information derived by
2079 solving for the maximum-likelihood source location assuming
2080 Gaussian-distributed timing errors. The return value is
2081
2082 (n, toa, chi2 / DOF, dt)
2083
2084 where n is a unit 3-vector pointing from the co-ordinate
2085 origin towards the source of the signal, toa is the
2086 time-of-arrival of the signal at the co-ordinate origin,
2087 chi2 / DOF is the \\chi^{2} per degree-of-freedom from to
2088 the arrival time residuals, and dt is the root-sum-square
2089 of the arrival time residuals.
2090
2091 Example:
2092
2093 >>> from numpy import array
2094 >>> from numpy import testing
2095 >>> triangulator = TOATriangulator([
2096 ... array([-2161414.92636, -3834695.17889, 4600350.22664]),
2097 ... array([ -74276.0447238, -5496283.71971 , 3224257.01744 ]),
2098 ... array([ 4546374.099 , 842989.697626, 4378576.96241 ])
2099 ... ], [
2100 ... 0.005,
2101 ... 0.005,
2102 ... 0.005
2103 ... ])
2104 ...
2105 >>> n, toa, chi2_per_dof, dt = triangulator([
2106 ... 794546669.429688,
2107 ... 794546669.41333,
2108 ... 794546669.431885
2109 ... ])
2110 ...
2111 >>> n
2112 array([[-0.45605637, 0.75800934, 0.46629865],
2113 [-0.45605637, 0.75800934, 0.46629865]])
2114 >>> testing.assert_approx_equal(toa, 794546669.4269662)
2115 >>> testing.assert_approx_equal(chi2_per_dof, 0.47941941158371465)
2116 >>> testing.assert_approx_equal(dt, 0.005996370224459011)
2117
2118 NOTE: if len(rs) >= 4, n is a 1x3 array.
2119 if len(rs) == 3, n is a 2x3 array.
2120 if len(rs) == 2, n is None.
2121 NOTE: n is not the source direction but the propagation
2122 direction of GW.
2123 Therefore, if you want source direction, you have to
2124 multiply -1.
2125 NOTE: n is represented by earth fixed coordinate, not
2126 celestial coordinate.
2127 Up to your purpose, you should transform \\phi -> RA.
2128 To do it, you can use dir2coord.
2129 """
2130 assert len(ts) == len(self.sigmas)
2131
2132 # change of t co-ordinate to avoid LIGOTimeGPS overflow
2133 t0 = min(ts)
2134 ts = numpy.array([float(t - t0) for t in ts])
2135
2136 # sigma^-2 -weighted mean of arrival times
2137 tbar = sum(ts / self.sigmas**2) / sum(1 / self.sigmas**2)
2138 # the i-th element is ts - tbar for the i-th location
2139 tau = (ts - tbar) / self.sigmas
2140
2141 tau_prime = numpy.dot(self.U.T, tau)[:3]
2142
2143 if len(self.rs) >= 3:
2144 if self.singular:
2145 # len(rs) == 3
2146 np = numpy.array([tau_prime / self.S, tau_prime / self.S])
2147 try:
2148 np[0][2] = math.sqrt(1.0 - np[0][0]**2 - np[0][1]**2)
2149 np[1][2] = -math.sqrt(1.0 - np[1][0]**2 - np[1][1]**2)
2150 except ValueError:
2151 # two point is mergered, n_0 = n_1
2152 np.T[2] = 0.0
2153 np /= math.sqrt(numpy.dot(np[0], np[0]))
2154
2155 # compute n from n'
2156 n = numpy.array([numpy.zeros(3), numpy.zeros(3)])
2157 n = numpy.dot(self.VT.T, np.T).T
2158
2159 # safety check the nomalization of the result
2160 assert abs(numpy.dot(n[0], n[0]) - 1.0) < 1e-8
2161 assert abs(numpy.dot(n[1], n[1]) - 1.0) < 1e-8
2162
2163 # arrival time at origin
2164 toa = sum((ts - numpy.dot(self.rs, n[0]) / self.v) / self.sigmas**2) / sum(1 / self.sigmas**2)
2165
2166 # chi^{2}
2167 chi2 = sum((numpy.dot(self.R, n[0]) / (self.v * self.sigmas) - tau)**2)
2168
2169 # root-sum-square timing residual
2170 dt = ts - toa - numpy.dot(self.rs, n[0]) / self.v
2171 dt = math.sqrt(numpy.dot(dt, dt))
2172 else:
2173 # len(rs) >= 4
2174 def n_prime(l, Stauprime = self.S * tau_prime, S2 = self.S * self.S):
2175 return Stauprime / (S2 + l)
2176 def secular_equation(l):
2177 np = n_prime(l)
2178 return numpy.dot(np, np) - 1
2179
2180 # values of l that make the denominator of
2181 # n'(l) 0
2182 lsing = -self.S * self.S
2183 # least negative of them is used as lower
2184 # bound for bisection search root finder
2185 # (elements of S are ordered from greatest
2186 # to least, so the last element of lsing is
2187 # the least negative)
2188 l_lo = lsing[-1]
2189
2190 # find a suitable upper bound for the root
2191 # finder FIXME: in Jolien's original code
2192 # l_hi was hard-coded to 1 but we can't
2193 # figure out why the root must be <= 1, so
2194 # I put this loop to be safe but at some
2195 # point it would be good to figure out if
2196 # 1.0 can be used because it would allow
2197 # this loop to be skipped
2198 l_hi = 1.0
2199 while secular_equation(l_lo) / secular_equation(l_hi) > 0:
2200 l_lo, l_hi = l_hi, l_hi * 2
2201
2202 # solve for l
2203 l = scipy.optimize.brentq(secular_equation, l_lo, l_hi)
2204
2205 # compute n'
2206 np = n_prime(l)
2207
2208 # compute n from n'
2209 n = numpy.dot(self.VT.T, np)
2210
2211 # safety check the nomalization of the result
2212 assert abs(numpy.dot(n, n) - 1.0) < 1e-8
2213
2214 # arrival time at origin
2215 toa = sum((ts - numpy.dot(self.rs, n) / self.v) / self.sigmas**2) / sum(1 / self.sigmas**2)
2216
2217 # chi^{2}
2218 chi2 = sum((numpy.dot(self.R, n) / (self.v * self.sigmas) - tau)**2)
2219
2220 # root-sum-square timing residual
2221 dt = ts - toa - numpy.dot(self.rs, n) / self.v
2222 dt = math.sqrt(numpy.dot(dt, dt))
2223 else:
2224 # len(rs) == 2
2225 # s_1 == s_2 == 0
2226
2227 # compute an n'
2228 xp = tau_prime[0] / self.S[0]
2229 try:
2230 np = numpy.array([xp, 0, math.sqrt(1-xp**2)])
2231 except ValueError:
2232 # two point is mergered, n_0 = n_1
2233 np = numpy.array([xp, 0, 0])
2234
2235 # compute n from n'
2236 n = numpy.dot(self.VT.T, np)
2237
2238 # safety check the nomalization of the result
2239 assert abs(numpy.dot(n, n) - 1.0) < 1e-8
2240
2241 # arrival time at origin
2242 toa = sum((ts - numpy.dot(self.rs, n) / self.v) / self.sigmas**2) / sum(1 / self.sigmas**2)
2243
2244 # chi^{2}
2245 chi2 = sum((numpy.dot(self.R, n) / (self.v * self.sigmas) - tau)**2)
2246
2247 # root-sum-square timing residual
2248 dt = ts - toa - numpy.dot(self.rs, n) / self.v
2249 dt = math.sqrt(numpy.dot(dt, dt))
2250
2251 # set n None
2252 n = None
2253
2254 # done
2255 return n, t0 + toa, chi2 / len(self.sigmas), dt
2256
2257 @staticmethod
2258 def dir2coord(n, gps):
2259 """
2260 This transforms from propagation direction vector to right
2261 ascension and declination source co-ordinates.
2262
2263 The input is the propagation vector, n, in Earth fixed
2264 co-ordinates (x axis through Greenwich merdian and equator,
2265 z axis through North Pole), and the time. n does not need
2266 to be a unit vector. The return value is the (ra, dec)
2267 pair, in radians. NOTE: right ascension is not
2268 necessarily in [0, 2\\pi).
2269 """
2270 # safety checks
2271 if len(n) != 3:
2272 raise ValueError("n must be a 3-vector")
2273
2274 # normalize n
2275 n /= math.sqrt(numpy.dot(n, n))
2276
2277 # transform from propagation to source direction
2278 n = -n
2279
2280 # compute ra, dec
2281 RA = numpy.arctan2(n[1], n[0]) + lal.GreenwichMeanSiderealTime(gps)
2282 DEC = numpy.arcsin(n[2])
2283
2284 # done
2285 return RA, DEC
2286
2287
2288#
2289# =============================================================================
2290#
2291# Ranking Statistic Components
2292#
2293# =============================================================================
2294#
2295
2296
2297#
2298# Base class for parameter distribution densities for use in log likelihood
2299# ratio ranking statistics
2300#
2301
2302
2303class LnLRDensity(object):
2304 """
2305 Base class for parameter distribution densities for use in log
2306 likelihood ratio ranking statistics. Generally several instances
2307 of (subclasses of) this will be grouped together to construct a log
2308 likelihood ratio class for use as a ranking statistic in a
2309 trigger-based search. For example, as a minimum one would expect
2310 one instance for the numerator and another for the denominator, but
2311 additional ones might be included in a practical ranking statistic
2312 implementation, for example a third might be used for storing a
2313 histogram of the candidates observed in a search.
2314
2315 Typically, the ranking statistic implementation will provide a
2316 function to transform a candidate to the arguments to use with the
2317 .__call__() implementation, and so in this way a LnLRDensity object
2318 is generally only meaningful in the context of the ranking
2319 statistic class for which it has been constructed.
2320 """
2321 def __call__(self, *args, **kwargs):
2322 """
2323 Evaluate. Return the natural logarithm of the density
2324 evaluated at the given parameters.
2325 """
2326 raise NotImplementedError
2327
2328 def __iadd__(self, other):
2329 """
2330 Marginalize the two densities.
2331 """
2332 raise NotImplementedError
2333
2334 def increment(self, *args, **kwargs):
2335 """
2336 Increment the counts defining this density at the given
2337 parameters.
2338 """
2339 raise NotImplementedError
2340
2341 def copy(self):
2342 """
2343 Return a duplicate copy of this object.
2344 """
2345 raise NotImplementedError
2346
2347 def finish(self):
2348 """
2349 Ensure all internal densities are normalized, and
2350 initialize interpolator objects as needed for smooth
2351 evaluation. Must be invoked before .__call__() will yield
2352 sensible results.
2353
2354 NOTE: for some implementations this operation will
2355 irreversibly alter the contents of the counts array, for
2356 example often this operation will involve the convolution
2357 of the counts with a density estimation kernel. If it is
2358 necessary to preserve a pristine copy of the counts data,
2359 use the .copy() method to obtain a copy of the data, first,
2360 and then .finish() the copy.
2361 """
2362 raise NotImplementedError
2363
2364 def samples(self):
2365 """
2366 Generator returning a sequence of parameter values drawn
2367 from the distribution density. Some subclasses might
2368 choose not to implement this, and those that do might
2369 choose to use an MCMC-style sample generator and so the
2370 samples should not be assumed to be statistically
2371 independent.
2372 """
2373 raise NotImplementedError
2374
2375 def to_xml(self, name):
2376 """
2377 Serialize to an XML fragment and return the root element of
2378 the resulting XML tree.
2379
2380 Subclasses must chain to this method, then customize the
2381 return value as needed.
2382 """
2383 return ligolw.LIGO_LW({"Name": "%s:lnlrdensity" % name})
2384
2385 @classmethod
2386 def get_xml_root(cls, xml, name):
2387 """
2388 Sub-classes can use this in their overrides of the
2389 .from_xml() method to find the root element of the XML
2390 serialization.
2391 """
2392 name = "%s:lnlrdensity" % name
2393 xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute("Name") and elem.Name == name]
2394 if len(xml) != 1:
2395 raise ValueError("XML tree must contain exactly one %s element named %s" % (ligolw.LIGO_LW.tagName, name))
2396 return xml[0]
2397
2398 @classmethod
2399 def from_xml(cls, xml, name):
2400 """
2401 In the XML document tree rooted at xml, search for the
2402 serialized LnLRDensity object named name, and deserialize
2403 it. The return value is the deserialized LnLRDensity
2404 object.
2405 """
2406 # Generally implementations should start with something
2407 # like this:
2408 #xml = cls.get_xml_root(xml, name)
2409 #self = cls()
2410 #return self
2411 raise NotImplementedError
2412
2413
2414#
2415# Likelihood Ratio
2416#
2417
2418
2419class LnLikelihoodRatioMixin(object):
2420 """
2421 Mixin to assist in implementing a log likelihood ratio ranking
2422 statistic class. The ranking statistic class that inherits from
2423 this must: (i) define a callable .numerator attribute that returns
2424 ln P(*args, **kwargs | signal); (ii) define a callable
2425 .denominator attribute that returns ln P(*args, **kwargs | noise).
2426
2427 Inheriting from this will:
2428
2429 1. Add a .__call__() method that returns the natural logarithm of
2430 the likelihood ratio
2431
2432 ln P(*args, **kwargs | signal) - ln P(*args, **kwargs | noise)
2433
2434 The implementation handles various special cases sensibly, such as
2435 when either or both of the logarithms of the numerator and
2436 denominator diverge.
2437
2438 2. Add a .ln_lr_samples() method that makes use of the .numerator
2439 and .denominator attributes, together with the .__call__() method
2440 to transform a sequence of (*args, **kwargs) into a sequence of log
2441 likelihood ratios and their respective relative frequencies. This
2442 can be used to construct histograms of P(ln L | signal) and P(ln L
2443 | noise). These distributions are required for, for example,
2444 signal rate estimation and false-alarm probability estimation.
2445
2446 Why is the likelihood ratio useful? Starting from Bayes' theorem,
2447 and using the fact that "signal" and "noise" are the only two
2448 choices:
2449
2450 P(data | signal) * P(signal)
2451 P(signal | data) = ----------------------------
2452 P(data)
2453
2454 P(data | signal) * P(signal)
2455 = ---------------------------------------------------------
2456 P(data | noise) * P(noise) + P(data | signal) * P(signal)
2457
2458 [P(data | signal) / P(data | noise)] * P(signal)
2459 = ----------------------------------------------------------------
2460 1 - P(signal) + [P(data | signal) / P(data | noise)] * P(signal)
2461
2462 Lambda * P(signal)
2463 P(signal | data) = ----------------------------
2464 1 + (Lambda - 1) * P(signal)
2465
2466 P(data | signal)
2467 where Lambda = ----------------
2468 P(data | noise)
2469
2470 Differentiating P(signal | data) w.r.t. Lambda shows the derivative
2471 is always positive, so the probability that a candidate is the
2472 result of a gravitiational wave is a monotonically increasing
2473 function of the likelihood ratio. Ranking events from "most likely
2474 to be a genuine signal" to "least likely to be a genuine signal"
2475 can be performed by sorting candidates by likelihood ratio. Or, if
2476 one wanted to set a threshold on P(signal | data) to select a
2477 subset of candidates such that the candidates selected have a given
2478 purity (the fraction of them that are real is fixed), that is
2479 equivalent to selecting candidates using a likelihood ratio
2480 threshold.
2481
2482 These are expressions of the Neyman-Pearson lemma which tells us
2483 that thresholding on Lambda creates a detector that extremizes the
2484 detection efficiency at fixed false-alarm rate.
2485 """
2486 def __call__(self, *args, **kwargs):
2487 """
2488 Return the natural logarithm of the likelihood ratio for
2489 the given parameters,
2490
2491 ln P(*args, **kwargs | signal) - ln P(*args, **kwargs | noise)
2492
2493 The arguments are passed verbatim to the .__call__()
2494 methods of the .numerator and .denominator attributes of
2495 self and the return value is computed from the results.
2496
2497 NOTE: sub-classes may override this method, possibly
2498 chaining to it if they wish. The .ln_lr_samples()
2499 mechanism does not require this method to return exactly
2500 the natural logarithm of the .numerator/.denominator ratio.
2501 The .ln_lr_samples() mechanism does not assume the
2502 numerator, denominator and ranking statistic are related to
2503 each other as the latter being the ratio of the former two,
2504 it evaluates all three separately. For this reason, the
2505 .__call__() method that implements the ranking statistic is
2506 free to include other logic, such as hierarchical cuts or
2507 bail-outs that are not stricly equivalent to the ratio of
2508 the numerator and denominator.
2509
2510 Special cases:
2511
2512 .numerator/.denominator=0/0 is mapped to ln Lambda = -inf,
2513 meaning P(signal | data) = 0. Although this condition is
2514 nonsensical because the data is claimed to be inconsistent
2515 with both noise and signal --- the only two things it can
2516 be --- our task here is to compute something that is
2517 monotonic in the probability the data is the result of a
2518 signal, and since this data cannot be from a signal the
2519 correct answer is -inf.
2520
2521 .numerator/.denominator = +inf/+inf is mapped to ln Lambda
2522 = NaN. This is sufficiently nonsensical that there is no
2523 correct interpretation. A warning will be displayed when
2524 this is encountered.
2525 """
2526 lnP_signal = self.numerator(*args, **kwargs)
2527 lnP_noise = self.denominator(*args, **kwargs)
2528 if math.isinf(lnP_noise) and math.isinf(lnP_signal):
2529 # need to handle a special case
2530 if lnP_noise < 0. and lnP_signal < 0.:
2531 # both probabilities are 0. "correct"
2532 # answer is -inf, because if a candidate is
2533 # in a region of parameter space where the
2534 # probability of a signal occuring is 0
2535 # then it is not a signal. is it also,
2536 # aparently, not noise, which is curious
2537 # but irrelevant because we are seeking a
2538 # result that is a monotonically increasing
2539 # function of the probability that a
2540 # candidate is a signal, which is
2541 # impossible in this part of the parameter
2542 # space.
2543 return NegInf
2544 # all remaining cases are handled correctly by the
2545 # expression that follows, but one still deserves a
2546 # warning
2547 if lnP_noise > 0. and lnP_signal > 0.:
2548 # both probabilities are +inf. no correct
2549 # answer. NaN will be returned in this
2550 # case, and it helps to have a record in
2551 # the log of why that happened.
2552 warnings.warn("inf/inf encountered")
2553 return lnP_signal - lnP_noise
2554
2555 def ln_lr_samples(self, random_params_seq, signal_noise_pdfs = None):
2556 """
2557 Generator that transforms a sequence of candidate parameter
2558 samples into a sequence of log likelihood ratio samples.
2559
2560 random_params_seq is a sequence (generator is OK) yielding
2561 3-element tuples whose first two elements provide the *args
2562 and **kwargs values to be passed to the .numerator and
2563 .denominator functions, and whose third element is the
2564 natural logarithm of the probability density from which the
2565 (*args, **kwargs) parameters have been drawn evaluated at
2566 those parameters.
2567
2568 The output of this generator is a sequence of 3-element
2569 tuples, each of whose elements are:
2570
2571 1. a value of the natural logarithm of the likelihood
2572 ratio,
2573
2574 2. the natural logarithm of the relative frequency of
2575 occurance of that likelihood ratio in the signal population
2576 corrected for the relative frequency at which the
2577 random_params_seq sampler is causing that value to be
2578 returned, and
2579
2580 3. the natural logarithm of the relative frequency of
2581 occurance of that likelihood ratio in the noise population
2582 similarly corrected for the relative frequency at which the
2583 random_params_seq sampler is causing that value to be
2584 returned.
2585
2586 The intention is for the first element of each tuple to be
2587 added to histograms using the two relative frequencies as
2588 weights, i.e., the two relative frequencies give the number
2589 of times one should consider this one draw of log
2590 likelihood ratio to have occured in the two populations.
2591
2592 On each iteration, the *args and **kwargs values yielded by
2593 random_params_seq are passed to our own .__call__() method
2594 to evalute the log likelihood ratio at that choice of
2595 parameter values. The parameters are also passed to the
2596 .__call__() mehods of our own .numerator and .denominator
2597 attributes to obtain the signal and noise population
2598 densities at those parameters.
2599
2600 If signal_noise_pdfs is not None then, instead of using our
2601 own .numerator and .denominator attributes, the parameters
2602 are passed to the .__call__() methods of its .numerator and
2603 .denominator attributes to obtain those densities. This
2604 allows the distribution of ranking statistic values
2605 obtained from alternative signal and noise populations to
2606 be modelled. This is sometimes useful for diagnostic
2607 purposes.
2608
2609 Normalizations:
2610
2611 Within the context of the intended application, it is
2612 sufficient for all of the probabilities involved (the
2613 .numerator and .denominator probability densities, and the
2614 probability density supplied by the random_params_seq
2615 geneator) to be correct up to unknown normalization
2616 constants, i.e., the natural logarithms of the probabilties
2617 to be correct up to unknown additive constants. That is
2618 why the two probability densities yielded by each iteration
2619 of this generator are described as relative frequencies:
2620 the ratios among their values are meaningful, but not their
2621 absolute values.
2622
2623 If all of the supplied probabilities are, in fact, properly
2624 normalized, then the relative frequencies returned by this
2625 generator are, also, correctly normalized probability
2626 densities.
2627 """
2628 if signal_noise_pdfs is None:
2629 lnP_signal_func = self.numerator
2630 lnP_noise_func = self.denominator
2631 else:
2632 lnP_signal_func = signal_noise_pdfs.numerator
2633 lnP_noise_func = signal_noise_pdfs.denominator
2634 for args, kwargs, lnP_params in random_params_seq:
2635 lnP_signal = lnP_signal_func(*args, **kwargs)
2636 lnP_noise = lnP_noise_func(*args, **kwargs)
2637 yield self(*args, **kwargs), lnP_signal - lnP_params, lnP_noise - lnP_params
static double max(double a, double b)
Definition: EPFilters.c:43
static double min(double a, double b)
Definition: EPFilters.c:42
Subclass of the dict built-in type for storing mappings of instrument to time offset.
Definition: offsetvector.py:56
def __init__(self, instruments, delta_t, min_instruments=2, abundance_rel_accuracy=1e-4)
Model for coincidences among noise events collected from several antennas.
Definition: snglcoinc.py:1582
def marginalized_strict_coinc_counts(self, seglists, **rates)
A dictionary mapping instrument combination (as a frozenset) to the total number of coincidences invo...
Definition: snglcoinc.py:1868
def random_instruments(self, **rates)
Generator that, given the event rates for a collection of instruments, yields a sequence of two-eleme...
Definition: snglcoinc.py:1938
def strict_coinc_rates(self, **rates)
Given the event rates for a collection of instruments, compute the rates at which strict N-way coinci...
Definition: snglcoinc.py:1842
def lnP_instruments(self, **rates)
Given the event rates for a collection of instruments, compute the natural logarithm of the probabili...
Definition: snglcoinc.py:1901
def coinc_rates(self, **rates)
Given the event rates for a collection of instruments, compute the rates at which N-way coincidences ...
Definition: snglcoinc.py:1788
def plausible_toas(self, instruments)
Generator that yields dictionaries of random event time-of-arrival offsets for the given instruments ...
Definition: snglcoinc.py:1963
def all_instrument_combos(self)
A tuple of all possible instrument combinations (as frozensets).
Definition: snglcoinc.py:1755
A convenience interface to the XML document's coincidence tables, allowing for easy addition of coinc...
Definition: snglcoinc.py:1470
def coinc_rows(self, process_id, time_slide_id, events, table_name)
From a process ID, a time slide ID, and a sequence of events (generator expressions are OK),...
Definition: snglcoinc.py:1518
def __init__(self, xmldoc, coinc_definer_row)
Definition: snglcoinc.py:1471
def append_coinc(self, coinc_event_row, coinc_event_map_rows)
Appends the coinc_event row object and coinc_event_map row objects to the coinc_event and coinc_event...
Definition: snglcoinc.py:1544
Base class for parameter distribution densities for use in log likelihood ratio ranking statistics.
Definition: snglcoinc.py:2320
def copy(self)
Return a duplicate copy of this object.
Definition: snglcoinc.py:2344
def finish(self)
Ensure all internal densities are normalized, and initialize interpolator objects as needed for smoot...
Definition: snglcoinc.py:2361
def from_xml(cls, xml, name)
In the XML document tree rooted at xml, search for the serialized LnLRDensity object named name,...
Definition: snglcoinc.py:2405
def get_xml_root(cls, xml, name)
Sub-classes can use this in their overrides of the .from_xml() method to find the root element of the...
Definition: snglcoinc.py:2391
def __call__(self, *args, **kwargs)
Evaluate.
Definition: snglcoinc.py:2325
def __iadd__(self, other)
Marginalize the two densities.
Definition: snglcoinc.py:2331
def to_xml(self, name)
Serialize to an XML fragment and return the root element of the resulting XML tree.
Definition: snglcoinc.py:2382
def samples(self)
Generator returning a sequence of parameter values drawn from the distribution density.
Definition: snglcoinc.py:2372
def increment(self, *args, **kwargs)
Increment the counts defining this density at the given parameters.
Definition: snglcoinc.py:2338
Mixin to assist in implementing a log likelihood ratio ranking statistic class.
Definition: snglcoinc.py:2485
def __call__(self, *args, **kwargs)
Return the natural logarithm of the likelihood ratio for the given parameters,.
Definition: snglcoinc.py:2525
def ln_lr_samples(self, random_params_seq, signal_noise_pdfs=None)
Generator that transforms a sequence of candidate parameter samples into a sequence of log likelihood...
Definition: snglcoinc.py:2627
Time-of-arrival triangulator.
Definition: snglcoinc.py:1999
def dir2coord(n, gps)
This transforms from propagation direction vector to right ascension and declination source co-ordina...
Definition: snglcoinc.py:2269
def __call__(self, ts)
Triangulate the direction to the source of a signal based on a tuple of times when the signal was obs...
Definition: snglcoinc.py:2129
def age(self)
The earliest of the graph's head nodes' .age.
Definition: snglcoinc.py:1223
def __init__(self, coincgen_doubles_type, offset_vector_dict, coinc_window, min_instruments=2, verbose=False)
Definition: snglcoinc.py:1140
def pull(self, newly_reported=None, flushed=None, flushed_unused=None, flush=False, coinc_sieve=None, event_collector=None)
Extract newly available coincident candidates from the internal queues.
Definition: snglcoinc.py:1358
def push(self, instrument, events, t_complete)
Push new events from some instrument into the internal queues.
Definition: snglcoinc.py:1246
def age(self)
The earliest of the component nodes' .age.
Definition: snglcoinc.py:884
def t_coinc_complete(self)
The earliest of the component nodes' .t_coinc_complete.
Definition: snglcoinc.py:891
def component_offsetvectors(offset_vector, n)
Equivalent to offsetvector.component_offsetvectors() except only one input offsetvector is considered...
Definition: snglcoinc.py:875
def push(self, instrument, events, t_complete)
Push new events from some instrument into the internal queues.
Definition: snglcoinc.py:903
def __init__(self, coincgen_doubles_type, offset_vector, coinc_windows, min_instruments, time_slide_id=None)
Definition: snglcoinc.py:816
def pull(self, t)
Using the events contained in the internal queues construct and return all coincidences they particip...
Definition: snglcoinc.py:948
This class defines the coincidence test.
Definition: snglcoinc.py:614
def __init__(self, events)
Prepare to search a collection of events for coincidences with other single events.
Definition: snglcoinc.py:624
def __call__(self, event_a, offset_a, coinc_window)
Return a sequence of the events from those passed to .__init__() that are coincident with event_a.
Definition: snglcoinc.py:649
Using a pair of singlesqueue objects, constructs pairs of coincident events from streams of partially...
Definition: snglcoinc.py:578
def age(self)
The earliest of the internal queues' .age.
Definition: snglcoinc.py:697
def __init__(self, offset_vector, coinc_windows)
offset_vector must be a two-instrument offset vector.
Definition: snglcoinc.py:662
def t_coinc_complete(self)
The earliest of the internal queues' .t_coinc_complete.
Definition: snglcoinc.py:704
def pull(self, t, singles_ids)
Generate a sequence of 2-element tuples of Python IDs of coincident event pairs.
Definition: snglcoinc.py:791
def doublesgen(self, eventsa, offset_a, eventsb, singles_ids)
For internal use only.
Definition: snglcoinc.py:722
def push(self, instrument, events, t_complete)
Push new events from some instrument into the internal queues.
Definition: snglcoinc.py:716
Queue a stream of partially time ordered events: as new events are added, sort them into the queue in...
Definition: snglcoinc.py:364
def age(self)
Using .event_time() to define the times of events, the time of the oldest event in the queue or self....
Definition: snglcoinc.py:454
def event_time(event)
Override with a method to return the "time" of the given event.
Definition: snglcoinc.py:378
def queueentry_from_event(self, event)
For internal use.
Definition: snglcoinc.py:385
def t_coinc_complete(self)
The time up to which the time-shifted events in other detectors' queues can be considered complete fo...
Definition: snglcoinc.py:474
def push(self, events, t_complete)
Add new events to the queue.
Definition: snglcoinc.py:490
def pull(self, t)
t is the time up to which the calling code is in the process of constructing all available n-tuple co...
Definition: snglcoinc.py:531
def __init__(self, offset, max_coinc_window)
Initialize a new, empty, singles queue.
Definition: snglcoinc.py:413
static const INT4 a
INT8 XLALLightTravelTime(const LALDetector *aDet, const LALDetector *bDet)
def light_travel_time(instrument1, instrument2)
Compute and return the time required for light to travel through free space the distance separating t...
Definition: snglcoinc.py:80