Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALBurst 2.0.7.1-6c6b863
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Macros Modules Pages
snglcoinc_coincs_verify.py
Go to the documentation of this file.
1from bisect import bisect_left, bisect_right
2import functools
3import itertools
4import math
5import numpy
6from optparse import OptionParser
7import os
8import random
9import sys
10from tqdm import tqdm
11
12from lal import LIGOTimeGPS
13from lalburst import burca
14from lalburst import offsetvector
15from lalburst import snglcoinc
16
17import igwn_segments as segments
18from igwn_ligolw import ligolw
19from igwn_ligolw import lsctables
20from igwn_ligolw import utils as ligolw_utils
21from igwn_ligolw.utils import process as ligolw_process
22from igwn_ligolw.utils import time_slide as ligolw_time_slide
23
24
25#
26# construct a synthetic burst trigger document
27#
28
29
31 segs = {
32 "H1": segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6 + 300)),
33 "L1": segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6 + 300)),
34 "V1": segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6 + 300))
35 },
36 mean_rates = {"H1": 1.0, "L1": math.pi, "V1": math.e},
37 n_offset_vectors = 3
38):
39 assert set(mean_rates) == set(segs), "need rates and segments for the same instruments"
40
41 # start an XML document with some process metadata
42 xmldoc = ligolw.Document()
43 xmldoc.appendChild(ligolw.LIGO_LW())
44 process = ligolw_process.register_to_xmldoc(xmldoc, "snglcoinc_coincs_verify", {}, instruments = segs.keys(), comment = "artificial data for coincidence engine test suite")
45
46 # add time slide information
47 for i in range(n_offset_vectors):
48 ligolw_time_slide.get_time_slide_id(xmldoc, offsetvector.offsetvector((instrument, random.uniform(-28. * n_offset_vectors, +28. * n_offset_vectors)) for instrument in segs), create_new = process)
49
50 # add a sngl_burst table
51 snglbursttable = xmldoc.childNodes[-1].appendChild(lsctables.New(lsctables.SnglBurstTable, ["event_id", "ifo", "peak_time", "peak_time_ns", "process:process_id"]))
52 snglbursttable.sync_next_id()
53
54 # fill with random events
55 for instrument, segment in segs.items():
56 for i in range(numpy.random.poisson(mean_rates[instrument] * float(abs(segment)))):
57 snglbursttable.append(snglbursttable.RowType(
58 event_id = snglbursttable.get_next_id(),
59 ifo = instrument,
60 peak = segment[0] + random.uniform(0., float(abs(segment))),
61 process_id = process.process_id
62 ))
63
64 # done
65 return xmldoc
66
67
68#
69# brute force all-possible-coincs algorithm to generate correct answer
70#
71
72
73def do_brute_force_coinc(xmldoc, delta_t = 0.015, min_instruments = 1):
74 # register ourselves in the process metadata
75 process = ligolw_process.register_to_xmldoc(xmldoc, "brute_force_coinc", {})
76
77 # construct coinc tables interface for convenience
78 coinctables = snglcoinc.CoincTables(xmldoc, burca.StringCuspBBCoincDef)
79
80 # this is normally not the correct way to get the instrument list
81 # from a document, but here we only care about instruments that
82 # made triggers. if others were included in the analysis we don't
83 # care about them.
84 instruments = list(set(row.ifo for row in lsctables.SnglBurstTable.get_table(xmldoc)))
85 dt = dict((frozenset(pair), delta_t + snglcoinc.light_travel_time(*pair)) for pair in itertools.combinations(instruments, 2))
86
87 # create look-up table of triggers indexed by instrument, sorted by
88 # time
89 sngls_by_instrument = dict((instrument, []) for instrument in instruments)
90 for row in lsctables.SnglBurstTable.get_table(xmldoc):
91 sngls_by_instrument[row.ifo].append(row)
92 for instrument in instruments:
93 sngls_by_instrument[instrument].sort(key = lambda row: row.peak)
94
95 # iterate over time slides
96 for time_slide_id, offsetvector in lsctables.TimeSlideTable.get_table(xmldoc).as_dict().items():
97 print("offsets = %s" % str(offsetvector))
98
99 # set of event_id sets to exclude from the coinc set
100 # because they've already been found in a higher-order
101 # coinc
102 exclude = set()
103
104 # iterate over coinc orders from highest to lowest
105 # (quadruples, triples, doubles, ...)
106 for n in range(len(instruments), min_instruments - 1, -1):
107 # iterate over unique choices of that many
108 # instruments
109 for select_instruments in itertools.combinations(instruments, n):
110 offsets = tuple(offsetvector[instrument] for instrument in select_instruments)
111 total = functools.reduce(lambda a, b: a * b, map(lambda instrument: len(sngls_by_instrument[instrument]), select_instruments), 1)
112 # iterate over every n-way combination of
113 # triggers from the selected instruments
114 for sngls in tqdm(itertools.product(*map(sngls_by_instrument.__getitem__, select_instruments)), total = total, desc = ", ".join(sorted(select_instruments))):
115 # if any Delta t fails coincidence,
116 # discard this combination of
117 # triggers
118 if any(abs(t_a - t_b) > dt[frozenset((instrument_a, instrument_b))] for (instrument_a, t_a), (instrument_b, t_b) in itertools.combinations(((instrument, sngl.peak + offset) for instrument, sngl, offset in zip(select_instruments, sngls, offsets)), 2)):
119 continue
120 # if this combination of event IDs
121 # participated in a higher-order
122 # coinc, discard this combination
123 # of triggers
124 event_ids = frozenset(sngl.event_id for sngl in sngls)
125 if event_ids in exclude:
126 continue
127 # record this coinc
128 coinctables.append_coinc(*coinctables.coinc_rows(process.process_id, time_slide_id, sngls, "sngl_burst"))
129 exclude |= {frozenset(x) for i in range(n - 1, min_instruments - 1, -1) for x in itertools.combinations(event_ids, i)}
130
131 # done
132 process.set_end_time_now()
133 return xmldoc
134
135
136#
137# use snglcoinc to construct coincs
138#
139
140
142 class singlesqueue(snglcoinc.coincgen_doubles.singlesqueue):
143 @staticmethod
144 def event_time(event):
145 return event.peak
146
147 class get_coincs(object):
148 def __init__(self, events):
149 self.events = events
150 self.times = tuple(map(coincgen_doubles.singlesqueue.event_time, events))
151
152 def __call__(self, event_a, offset_a, coinc_window):
153 peak = event_a.peak + offset_a
154 return self.events[bisect_left(self.times, peak - coinc_window) : bisect_right(self.times, peak + coinc_window)]
155
156
157def do_snglcoinc(xmldoc, delta_t = 0.015, min_instruments = 1):
158 # register ourselves in the process metadata
159 process = ligolw_process.register_to_xmldoc(xmldoc, "snglcoinc_coinc", {})
160
161 # construct coinc tables interface for convenience
162 coinctables = snglcoinc.CoincTables(xmldoc, burca.StringCuspBBCoincDef)
163
164 # construct offset vector assembly graph
165 time_slide_graph = snglcoinc.TimeSlideGraph(coincgen_doubles, coinctables.time_slide_index, delta_t, min_instruments = min_instruments, verbose = True)
166
167 # collect coincs using incremental approach to minic online
168 # searches
169 with tqdm(desc = "snglcoinc", total = len(lsctables.SnglBurstTable.get_table(xmldoc))) as progress:
170 for instrument, events in itertools.groupby(sorted(lsctables.SnglBurstTable.get_table(xmldoc), key = lambda row: (row.peak, row.ifo)), lambda event: event.ifo):
171 events = tuple(events)
172 progress.update(len(events))
173 if time_slide_graph.push(instrument, events, max(event.peak for event in events)):
174 for node, events in time_slide_graph.pull():
175 coinctables.append_coinc(*coinctables.coinc_rows(process.process_id, node.time_slide_id, events, "sngl_burst"))
176 for node, events in time_slide_graph.pull(flush = True):
177 coinctables.append_coinc(*coinctables.coinc_rows(process.process_id, node.time_slide_id, events, "sngl_burst"))
178
179 # done
180 process.set_end_time_now()
181 return xmldoc
182
183
184#
185# compare the two coincidence algorithm results
186#
187
188
189class summary(object):
190 def __init__(self, xmldoc, algorithm_name):
191 process_id, = lsctables.ProcessTable.get_table(xmldoc).get_ids_by_program(algorithm_name)
192
193 coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(search = burca.StringCuspBBCoincDef.search, search_coinc_type = burca.StringCuspBBCoincDef.search_coinc_type, create_new = False)
194
195 self.sngls = dict((row.event_id, row) for row in lsctables.SnglBurstTable.get_table(xmldoc))
196 if len(lsctables.SnglBurstTable.get_table(xmldoc)) - len(self.sngls):
197 raise ValueError("document contains %d duplicate sngl_burst rows" % (len(lsctables.SnglBurstTable.get_table(xmldoc)) - len(self.sngls)))
198 self.coincs = dict((row.coinc_event_id, row) for row in lsctables.CoincTable.get_table(xmldoc) if row.coinc_def_id == coinc_def_id and row.process_id == process_id)
199 self.offsetvectors = lsctables.TimeSlideTable.get_table(xmldoc).as_dict()
200
201 coinc_event_ids = frozenset(self.coincs)
202 coinc_map = [row for row in lsctables.CoincMapTable.get_table(xmldoc) if row.table_name == "sngl_burst" and row.coinc_event_id in coinc_event_ids]
203 coinc_map.sort(key = lambda row: row.coinc_event_id)
204 self.coinc_map = {}
205 for time_slide_id in self.offsetvectors:
206 self.coinc_map[time_slide_id] = dict((frozenset(row.event_id for row in rows), coinc_event_id) for coinc_event_id, rows in itertools.groupby(coinc_map, key = lambda row: row.coinc_event_id) if self.coincs[coinc_event_id].time_slide_id == time_slide_id)
207
208 def get_sngls(self, event_ids):
209 return tuple(self.sngls[event_id] for event_id in event_ids)
210
211 def min_time(self, time_slide_id, event_ids):
212 offsetvector = self.offsetvectors[time_slide_id]
213 return min(sngl.peak + offsetvector[sngl.ifo] for sngl in self.get_sngls(event_ids))
214
215 def print_summary(self, time_slide_id, event_ids):
216 offsetvector = self.offsetvectors[time_slide_id]
217 sngls = self.get_sngls(event_ids)
218 peak_times = []
219 for sngl in sorted(sngls, key = lambda row: row.ifo):
220 peak_times.append(sngl.peak + offsetvector[sngl.ifo])
221 print("\tevent ID %d: %s: %s + %g s = %s" % (sngl.event_id, sngl.ifo, sngl.peak, offsetvector[sngl.ifo], sngl.peak + offsetvector[sngl.ifo]))
222 print("\tmax Delta t = %g s" % float(max(peak_times) - min(peak_times)))
223
224
225#
226# Main
227#
228
229
231 parser = OptionParser(
232 )
233 parser.add_option("--mode", metavar = "mode", default = "test", help = "Select operating mode. Allowed values are \"initialize\" (generate random test document), \"brute-force\" (apply brute-force algorithm to test document to construct correct-answer document), \"test\" (default; apply snglcoinc algorithm to test document and compare to brute-force result contained in the document).")
234 parser.add_option("--delta-t", metavar = "seconds", type = float, default = 0.015, help = "Coincidence window (default = 0.015 s).")
235 parser.add_option("--min-instruments", metavar = "count", type = int, default = 1, help = "Require candidates to have at least this many instruments participating (default = 2).")
236 parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
237
238 options, filenames = parser.parse_args()
239
240 if options.mode not in ("initialize", "brute-force", "test"):
241 raise ValueError("invalide --mode %s" % options.mode)
242 if options.delta_t < 0.:
243 raise ValueError("--delta-t must be >= 0")
244 if options.min_instruments < 1:
245 raise ValueError("--min-instruments must be >= 1")
246 if not filenames:
247 filenames = [os.path.join(os.environ.get("LAL_TEST_SRCDIR", "."), "snglcoinc_coincs_verify_input.xml.gz")]
248 elif len(filenames) != 1:
249 raise ValueError("only exactly one filename may be provided. if no filenames are given, a default will be used.")
250
251 return options, filenames[0]
252
253
254options, filename = parse_command_line()
255
256
257if options.mode == "initialize":
258 #
259 # generate a document with random triggers. save to disk
260 #
261
263 ligolw_utils.write_filename(xmldoc, filename, verbose = options.verbose)
264
265elif options.mode == "brute-force":
266 #
267 # load document containing triggers. use all-possible-coincs
268 # brute-force algorithm to compute coincs, save to disk
269 #
270
271 xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose)
272 do_brute_force_coinc(xmldoc, delta_t = options.delta_t, min_instruments = options.min_instruments)
273 ligolw_utils.write_filename(xmldoc, filename, verbose = options.verbose)
274
275elif options.mode == "test":
276 #
277 # load document containing coincs generated by brute-force
278 # algorithm. recompute coincs using snglcoinc incremental
279 # coincidence engine. compare the two sets of coincs
280 #
281
282 xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose)
283 do_snglcoinc(xmldoc, delta_t = options.delta_t, min_instruments = options.min_instruments)
284
285 # intialize the summary objects. this checks for duplicate singles
286 algorithm_names = ("brute_force_coinc", "snglcoinc_coinc")
287 summaries = [summary(xmldoc, algorithm_name) for algorithm_name in algorithm_names]
288
289 # check that the time slide vectors are the same or we're wasting
290 # our time
291 if summaries[0].offsetvectors != summaries[1].offsetvectors:
292 raise ValueError("documents do not contain identical offset vectors, or their IDs are not equivalent")
293
294 # check that the candidate lists are identical
295 m = 0
296 print("\ncoincs in %s that are not in %s:" % algorithm_names)
297 for time_slide_id in summaries[0].offsetvectors:
298 for n, event_ids in enumerate(sorted(set(summaries[0].coinc_map[time_slide_id]) - set(summaries[1].coinc_map[time_slide_id]), key = lambda event_ids: summaries[0].min_time(time_slide_id, event_ids)), start = m):
299 print("%d:" % n)
300 summaries[0].print_summary(time_slide_id, event_ids)
301 m += 1
302 print("\ncoincs in %s that are not in %s:" % tuple(reversed(algorithm_names)))
303 for time_slide_id in summaries[0].offsetvectors:
304 for n, event_ids in enumerate(sorted(set(summaries[1].coinc_map[time_slide_id]) - set(summaries[0].coinc_map[time_slide_id]), key = lambda event_ids: summaries[1].min_time(time_slide_id, event_ids)), start = m):
305 print("%d:" % n)
306 summaries[1].print_summary(time_slide_id, event_ids)
307 m += 1
308 if m:
309 raise ValueError("documents do not contain identical candidate sets")
static double max(double a, double b)
Definition: EPFilters.c:43
static double min(double a, double b)
Definition: EPFilters.c:42
def __call__(self, event_a, offset_a, coinc_window)
def __init__(self, xmldoc, algorithm_name)
def min_time(self, time_slide_id, event_ids)
def print_summary(self, time_slide_id, event_ids)
def do_brute_force_coinc(xmldoc, delta_t=0.015, min_instruments=1)
def make_input_test_document(segs={ "H1":segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6+300)), "L1":segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6+300)), "V1":segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6+300)) }, mean_rates={"H1":1.0, "L1":math.pi, "V1":math.e}, n_offset_vectors=3)
def do_snglcoinc(xmldoc, delta_t=0.015, min_instruments=1)