LALBurst  2.0.4.1-89842e6
snglcoinc_coincs_verify.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 from bisect import bisect_left, bisect_right
4 import functools
5 import itertools
6 import math
7 import numpy
8 from optparse import OptionParser
9 import os
10 import random
11 import sys
12 from tqdm import tqdm
13 
14 from lal import LIGOTimeGPS
15 from lalburst import burca
16 from lalburst import offsetvector
17 from lalburst import snglcoinc
18 
19 from ligo import segments
20 from ligo.lw import ligolw
21 from ligo.lw import lsctables
22 from ligo.lw import utils as ligolw_utils
23 from ligo.lw.utils import process as ligolw_process
24 from ligo.lw.utils import time_slide as ligolw_time_slide
25 
26 @lsctables.use_in
27 class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
28  pass
29 
30 
31 #
32 # construct a synthetic burst trigger document
33 #
34 
35 
37  segs = {
38  "H1": segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6 + 300)),
39  "L1": segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6 + 300)),
40  "V1": segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6 + 300))
41  },
42  mean_rates = {"H1": 1.0, "L1": math.pi, "V1": math.e},
43  n_offset_vectors = 3
44 ):
45  assert set(mean_rates) == set(segs), "need rates and segments for the same instruments"
46 
47  # start an XML document with some process metadata
48  xmldoc = ligolw.Document()
49  xmldoc.appendChild(ligolw.LIGO_LW())
50  process = ligolw_process.register_to_xmldoc(xmldoc, "snglcoinc_coincs_verify", {}, instruments = segs.keys(), comment = "artificial data for coincidence engine test suite")
51 
52  # add time slide information
53  for i in range(n_offset_vectors):
54  ligolw_time_slide.get_time_slide_id(xmldoc, offsetvector.offsetvector((instrument, random.uniform(-28. * n_offset_vectors, +28. * n_offset_vectors)) for instrument in segs), create_new = process)
55 
56  # add a sngl_burst table
57  snglbursttable = xmldoc.childNodes[-1].appendChild(lsctables.New(lsctables.SnglBurstTable, ["event_id", "ifo", "peak_time", "peak_time_ns", "process:process_id"]))
58  snglbursttable.sync_next_id()
59 
60  # fill with random events
61  for instrument, segment in segs.items():
62  for i in range(numpy.random.poisson(mean_rates[instrument] * float(abs(segment)))):
63  snglbursttable.append(snglbursttable.RowType(
64  event_id = snglbursttable.get_next_id(),
65  ifo = instrument,
66  peak = segment[0] + random.uniform(0., float(abs(segment))),
67  process_id = process.process_id
68  ))
69 
70  # done
71  return xmldoc
72 
73 
74 #
75 # brute force all-possible-coincs algorithm to generate correct answer
76 #
77 
78 
79 def do_brute_force_coinc(xmldoc, delta_t = 0.015, min_instruments = 1):
80  # register ourselves in the process metadata
81  process = ligolw_process.register_to_xmldoc(xmldoc, "brute_force_coinc", {})
82 
83  # construct coinc tables interface for convenience
84  coinctables = snglcoinc.CoincTables(xmldoc, burca.StringCuspBBCoincDef)
85 
86  # this is normally not the correct way to get the instrument list
87  # from a document, but here we only care about instruments that
88  # made triggers. if others were included in the analysis we don't
89  # care about them.
90  instruments = list(set(row.ifo for row in lsctables.SnglBurstTable.get_table(xmldoc)))
91  dt = dict((frozenset(pair), delta_t + snglcoinc.light_travel_time(*pair)) for pair in itertools.combinations(instruments, 2))
92 
93  # create look-up table of triggers indexed by instrument, sorted by
94  # time
95  sngls_by_instrument = dict((instrument, []) for instrument in instruments)
96  for row in lsctables.SnglBurstTable.get_table(xmldoc):
97  sngls_by_instrument[row.ifo].append(row)
98  for instrument in instruments:
99  sngls_by_instrument[instrument].sort(key = lambda row: row.peak)
100 
101  # iterate over time slides
102  for time_slide_id, offsetvector in lsctables.TimeSlideTable.get_table(xmldoc).as_dict().items():
103  print("offsets = %s" % str(offsetvector))
104 
105  # set of event_id sets to exclude from the coinc set
106  # because they've already been found in a higher-order
107  # coinc
108  exclude = set()
109 
110  # iterate over coinc orders from highest to lowest
111  # (quadruples, triples, doubles, ...)
112  for n in range(len(instruments), min_instruments - 1, -1):
113  # iterate over unique choices of that many
114  # instruments
115  for select_instruments in itertools.combinations(instruments, n):
116  offsets = tuple(offsetvector[instrument] for instrument in select_instruments)
117  total = functools.reduce(lambda a, b: a * b, map(lambda instrument: len(sngls_by_instrument[instrument]), select_instruments), 1)
118  # iterate over every n-way combination of
119  # triggers from the selected instruments
120  for sngls in tqdm(itertools.product(*map(sngls_by_instrument.__getitem__, select_instruments)), total = total, desc = ", ".join(sorted(select_instruments))):
121  # if any Delta t fails coincidence,
122  # discard this combination of
123  # triggers
124  if any(abs(t_a - t_b) > dt[frozenset((instrument_a, instrument_b))] for (instrument_a, t_a), (instrument_b, t_b) in itertools.combinations(((instrument, sngl.peak + offset) for instrument, sngl, offset in zip(select_instruments, sngls, offsets)), 2)):
125  continue
126  # if this combination of event IDs
127  # participated in a higher-order
128  # coinc, discard this combination
129  # of triggers
130  event_ids = frozenset(sngl.event_id for sngl in sngls)
131  if event_ids in exclude:
132  continue
133  # record this coinc
134  coinctables.append_coinc(*coinctables.coinc_rows(process.process_id, time_slide_id, sngls, "sngl_burst"))
135  exclude |= {frozenset(x) for i in range(n - 1, min_instruments - 1, -1) for x in itertools.combinations(event_ids, i)}
136 
137  # done
138  process.set_end_time_now()
139  return xmldoc
140 
141 
142 #
143 # use snglcoinc to construct coincs
144 #
145 
146 
148  class singlesqueue(snglcoinc.coincgen_doubles.singlesqueue):
149  @staticmethod
150  def event_time(event):
151  return event.peak
152 
153  class get_coincs(object):
154  def __init__(self, events):
155  self.eventsevents = events
156  self.timestimes = tuple(map(coincgen_doubles.singlesqueue.event_time, events))
157 
158  def __call__(self, event_a, offset_a, coinc_window):
159  peak = event_a.peak + offset_a
160  return self.eventsevents[bisect_left(self.timestimes, peak - coinc_window) : bisect_right(self.timestimes, peak + coinc_window)]
161 
162 
163 def do_snglcoinc(xmldoc, delta_t = 0.015, min_instruments = 1):
164  # register ourselves in the process metadata
165  process = ligolw_process.register_to_xmldoc(xmldoc, "snglcoinc_coinc", {})
166 
167  # construct coinc tables interface for convenience
168  coinctables = snglcoinc.CoincTables(xmldoc, burca.StringCuspBBCoincDef)
169 
170  # construct offset vector assembly graph
171  time_slide_graph = snglcoinc.TimeSlideGraph(coincgen_doubles, coinctables.time_slide_index, delta_t, min_instruments = min_instruments, verbose = True)
172 
173  # collect coincs using incremental approach to minic online
174  # searches
175  with tqdm(desc = "snglcoinc", total = len(lsctables.SnglBurstTable.get_table(xmldoc))) as progress:
176  for instrument, events in itertools.groupby(sorted(lsctables.SnglBurstTable.get_table(xmldoc), key = lambda row: (row.peak, row.ifo)), lambda event: event.ifo):
177  events = tuple(events)
178  progress.update(len(events))
179  if time_slide_graph.push(instrument, events, max(event.peak for event in events)):
180  for node, events in time_slide_graph.pull():
181  coinctables.append_coinc(*coinctables.coinc_rows(process.process_id, node.time_slide_id, events, "sngl_burst"))
182  for node, events in time_slide_graph.pull(flush = True):
183  coinctables.append_coinc(*coinctables.coinc_rows(process.process_id, node.time_slide_id, events, "sngl_burst"))
184 
185  # done
186  process.set_end_time_now()
187  return xmldoc
188 
189 
190 #
191 # compare the two coincidence algorithm results
192 #
193 
194 
195 class summary(object):
196  def __init__(self, xmldoc, algorithm_name):
197  process_id, = lsctables.ProcessTable.get_table(xmldoc).get_ids_by_program(algorithm_name)
198 
199  coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(search = burca.StringCuspBBCoincDef.search, search_coinc_type = burca.StringCuspBBCoincDef.search_coinc_type, create_new = False)
200 
201  self.snglssngls = dict((row.event_id, row) for row in lsctables.SnglBurstTable.get_table(xmldoc))
202  if len(lsctables.SnglBurstTable.get_table(xmldoc)) - len(self.snglssngls):
203  raise ValueError("document contains %d duplicate sngl_burst rows" % (len(lsctables.SnglBurstTable.get_table(xmldoc)) - len(self.snglssngls)))
204  self.coincscoincs = dict((row.coinc_event_id, row) for row in lsctables.CoincTable.get_table(xmldoc) if row.coinc_def_id == coinc_def_id and row.process_id == process_id)
205  self.offsetvectorsoffsetvectors = lsctables.TimeSlideTable.get_table(xmldoc).as_dict()
206 
207  coinc_event_ids = frozenset(self.coincscoincs)
208  coinc_map = [row for row in lsctables.CoincMapTable.get_table(xmldoc) if row.table_name == "sngl_burst" and row.coinc_event_id in coinc_event_ids]
209  coinc_map.sort(key = lambda row: row.coinc_event_id)
210  self.coinc_mapcoinc_map = {}
211  for time_slide_id in self.offsetvectorsoffsetvectors:
212  self.coinc_mapcoinc_map[time_slide_id] = dict((frozenset(row.event_id for row in rows), coinc_event_id) for coinc_event_id, rows in itertools.groupby(coinc_map, key = lambda row: row.coinc_event_id) if self.coincscoincs[coinc_event_id].time_slide_id == time_slide_id)
213 
214  def get_sngls(self, event_ids):
215  return tuple(self.snglssngls[event_id] for event_id in event_ids)
216 
217  def min_time(self, time_slide_id, event_ids):
218  offsetvector = self.offsetvectorsoffsetvectors[time_slide_id]
219  return min(sngl.peak + offsetvector[sngl.ifo] for sngl in self.get_snglsget_sngls(event_ids))
220 
221  def print_summary(self, time_slide_id, event_ids):
222  offsetvector = self.offsetvectorsoffsetvectors[time_slide_id]
223  sngls = self.get_snglsget_sngls(event_ids)
224  peak_times = []
225  for sngl in sorted(sngls, key = lambda row: row.ifo):
226  peak_times.append(sngl.peak + offsetvector[sngl.ifo])
227  print("\tevent ID %d: %s: %s + %g s = %s" % (sngl.event_id, sngl.ifo, sngl.peak, offsetvector[sngl.ifo], sngl.peak + offsetvector[sngl.ifo]))
228  print("\tmax Delta t = %g s" % float(max(peak_times) - min(peak_times)))
229 
230 
231 #
232 # Main
233 #
234 
235 
237  parser = OptionParser(
238  )
239  parser.add_option("--mode", metavar = "mode", default = "test", help = "Select operating mode. Allowed values are \"initialize\" (generate random test document), \"brute-force\" (apply brute-force algorithm to test document to construct correct-answer document), \"test\" (default; apply snglcoinc algorithm to test document and compare to brute-force result contained in the document).")
240  parser.add_option("--delta-t", metavar = "seconds", type = float, default = 0.015, help = "Coincidence window (default = 0.015 s).")
241  parser.add_option("--min-instruments", metavar = "count", type = int, default = 1, help = "Require candidates to have at least this many instruments participating (default = 2).")
242  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
243 
244  options, filenames = parser.parse_args()
245 
246  if options.mode not in ("initialize", "brute-force", "test"):
247  raise ValueError("invalide --mode %s" % options.mode)
248  if options.delta_t < 0.:
249  raise ValueError("--delta-t must be >= 0")
250  if options.min_instruments < 1:
251  raise ValueError("--min-instruments must be >= 1")
252  if not filenames:
253  filenames = [os.path.join(os.environ.get("LAL_TEST_SRCDIR", "."), "snglcoinc_coincs_verify_input.xml.gz")]
254  elif len(filenames) != 1:
255  raise ValueError("only exactly one filename may be provided. if no filenames are given, a default will be used.")
256 
257  return options, filenames[0]
258 
259 
260 options, filename = parse_command_line()
261 
262 
263 if options.mode == "initialize":
264  #
265  # generate a document with random triggers. save to disk
266  #
267 
269  ligolw_utils.write_filename(xmldoc, filename, verbose = options.verbose)
270 
271 elif options.mode == "brute-force":
272  #
273  # load document containing triggers. use all-possible-coincs
274  # brute-force algorithm to compute coincs, save to disk
275  #
276 
277  xmldoc = ligolw_utils.load_filename(filename, contenthandler = LIGOLWContentHandler, verbose = options.verbose)
278  do_brute_force_coinc(xmldoc, delta_t = options.delta_t, min_instruments = options.min_instruments)
279  ligolw_utils.write_filename(xmldoc, filename, verbose = options.verbose)
280 
281 elif options.mode == "test":
282  #
283  # load document containing coincs generated by brute-force
284  # algorithm. recompute coincs using snglcoinc incremental
285  # coincidence engine. compare the two sets of coincs
286  #
287 
288  xmldoc = ligolw_utils.load_filename(filename, contenthandler = LIGOLWContentHandler, verbose = options.verbose)
289  do_snglcoinc(xmldoc, delta_t = options.delta_t, min_instruments = options.min_instruments)
290 
291  # intialize the summary objects. this checks for duplicate singles
292  algorithm_names = ("brute_force_coinc", "snglcoinc_coinc")
293  summaries = [summary(xmldoc, algorithm_name) for algorithm_name in algorithm_names]
294 
295  # check that the time slide vectors are the same or we're wasting
296  # our time
297  if summaries[0].offsetvectors != summaries[1].offsetvectors:
298  raise ValueError("documents do not contain identical offset vectors, or their IDs are not equivalent")
299 
300  # check that the candidate lists are identical
301  m = 0
302  print("\ncoincs in %s that are not in %s:" % algorithm_names)
303  for time_slide_id in summaries[0].offsetvectors:
304  for n, event_ids in enumerate(sorted(set(summaries[0].coinc_map[time_slide_id]) - set(summaries[1].coinc_map[time_slide_id]), key = lambda event_ids: summaries[0].min_time(time_slide_id, event_ids)), start = m):
305  print("%d:" % n)
306  summaries[0].print_summary(time_slide_id, event_ids)
307  m += 1
308  print("\ncoincs in %s that are not in %s:" % tuple(reversed(algorithm_names)))
309  for time_slide_id in summaries[0].offsetvectors:
310  for n, event_ids in enumerate(sorted(set(summaries[1].coinc_map[time_slide_id]) - set(summaries[0].coinc_map[time_slide_id]), key = lambda event_ids: summaries[1].min_time(time_slide_id, event_ids)), start = m):
311  print("%d:" % n)
312  summaries[1].print_summary(time_slide_id, event_ids)
313  m += 1
314  if m:
315  raise ValueError("documents do not contain identical candidate sets")
static double max(double a, double b)
Definition: EPFilters.c:43
static double min(double a, double b)
Definition: EPFilters.c:42
def __call__(self, event_a, offset_a, coinc_window)
def __init__(self, xmldoc, algorithm_name)
def min_time(self, time_slide_id, event_ids)
def print_summary(self, time_slide_id, event_ids)
def do_brute_force_coinc(xmldoc, delta_t=0.015, min_instruments=1)
def make_input_test_document(segs={ "H1":segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6+300)), "L1":segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6+300)), "V1":segments.segment(LIGOTimeGPS(1e6), LIGOTimeGPS(1e6+300)) }, mean_rates={"H1":1.0, "L1":math.pi, "V1":math.e}, n_offset_vectors=3)
def do_snglcoinc(xmldoc, delta_t=0.015, min_instruments=1)