Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALBurst 2.0.7.1-b246709
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Macros Modules Pages
bucluster.py
Go to the documentation of this file.
1# Copyright (C) 2006--2021 Kipp Cannon
2#
3# This program is free software; you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation; either version 2 of the License, or (at your
6# option) any later version.
7#
8# This program is distributed in the hope that it will be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11# Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along
14# with this program; if not, write to the Free Software Foundation, Inc.,
15# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17
18#
19# =============================================================================
20#
21# Preamble
22#
23# =============================================================================
24#
25
26
27import math
28import sys
29
30
31from igwn_ligolw import lsctables
32from igwn_ligolw.utils import process as ligolw_process
33from igwn_ligolw.utils import search_summary as ligolw_search_summary
34import igwn_segments as segments
35from . import snglcluster
36
37
38__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
39from .git_version import date as __date__
40from .git_version import version as __version__
41
42
43#
44# =============================================================================
45#
46# Add Process Information
47#
48# =============================================================================
49#
50
51
52process_program_name = "lalburst_cluster"
53
54
55def append_process(xmldoc, cluster_algorithm, comment):
56 return ligolw_process.register_to_xmldoc(
57 xmldoc,
58 program = process_program_name,
59 paramdict = {
60 "cluster_algorithm": cluster_algorithm
61 },
62 version = __version__,
63 cvs_repository = "lscsoft",
64 cvs_entry_time = __date__,
65 comment = comment
66 )
67
68
69#
70# =============================================================================
71#
72# Add "Most Significant" Columns
73#
74# =============================================================================
75#
76
77
78#
79# FIXME: these columns should be generated by the C code, but that would
80# change the sngl_burst table definition and interfere with the string
81# search. Something to sort out later.
82#
83
84
85def add_ms_columns(xmldoc):
86 # add columns if required
87 add_ms_columns_to_table(lsctables.SnglBurstTable.get_table(xmldoc))
88
89def add_ms_columns_to_table(sngl_burst_table):
90 added = False
91 for colname in ("peak_frequency", "ms_start_time", "ms_start_time_ns", "ms_duration", "ms_flow", "ms_bandwidth", "ms_hrss", "ms_snr", "ms_confidence"):
92 try:
93 sngl_burst_table.getColumnByName(colname)
94 except KeyError:
95 sngl_burst_table.appendColumn(colname)
96 added = True
97 if not added:
98 # didn't add any columns, so don't muck their contents
99 return
100
101 # at least one column was added, intialize them all
102 for row in sngl_burst_table:
103 row.peak_frequency = row.central_freq
104 row.ms_period = row.period
105 row.ms_band = row.band
106 row.ms_hrss = row.amplitude
107 row.ms_snr = row.snr
108 row.ms_confidence = row.confidence
109
110
111#
112# =============================================================================
113#
114# Clustering Algorithms
115#
116# =============================================================================
117#
118
119
120#
121# "excess power" clustering algorithm
122#
123
124
125def ExcessPowerPreFunc(sngl_burst_table):
126 """
127 For speed, convert peak times to floats relative to epoch.
128 """
129 if not len(sngl_burst_table):
130 return
131 offset = sngl_burst_table[0].peak
132 for row in sngl_burst_table:
133 row.peak_time = float(row.peak - offset)
134 return offset
135
136
137
138def ExcessPowerPostFunc(sngl_burst_table, offset):
139 """
140 Restore peak times to absolute LIGOTimeGPS values.
141 """
142 for row in sngl_burst_table:
143 row.peak = offset + row.peak_time
144
145
147 """
148 Sort key for grouping excess power triggers near triggers with
149 which they might cluster.
150 """
151 return (a.ifo, a.channel, a.search, a.start)
152
153
154def ExcessPowerBailoutFunc(a, b):
155 """
156 Returns True if a's and b's (ifo, channel, seach) are different or
157 if the periods they span are disjoint. Knowing excess power
158 triggers have been ordered according to ExcessPowerSortKeyFunc(),
159 then if for a pair of events this function returns False, we know
160 the result will also be False for all other events farther apart in
161 the list. This is used to terminate the scan for events to
162 cluster.
163 """
164 return (a.ifo, a.channel, a.search) != (b.ifo, b.channel, b.search) or a.period.disjoint(b.period)
165
166
167def ExcessPowerTestFunc(a, b):
168 """
169 Return False if a and b cluster. To cluster, two events must be
170 from the same channel of the same instrument, and their
171 time-frequency tiles must be non-disjoint.
172 """
173 return (a.ifo, a.channel, a.search) != (b.ifo, b.channel, b.search) or a.period.disjoint(b.period) or a.band.disjoint(b.band)
174
175
176def ExcessPowerClusterFunc(a, b):
177 """
178 Modify a in place to be a cluster constructed from a and b. The
179 cluster's time-frequency tile is the smallest tile that contains
180 the original two tiles, and the "most signficiant" contributor for
181 the cluster is the tile whose boundaries are the SNR^{2} weighted
182 average boundaries of the two contributing tiles. The "most
183 signficiant" contributor's h_{rss}, SNR, and confidence, are copied
184 verbatim from whichever of the two contributing tiles has the
185 highest confidence. The modified event a is returned.
186 """
187 #
188 # In the special case of the two events being the exact same
189 # time-frequency tile, simply preserve the one with the highest
190 # confidence and discard the other.
191 #
192
193 if a.period == b.period and a.band == b.band:
194 if b.ms_confidence > a.ms_confidence:
195 return b
196 return a
197
198 #
199 # Compute the properties of the "most significant contributor"
200 #
201
202 if b.ms_confidence > a.ms_confidence:
203 a.ms_hrss = b.ms_hrss
204 a.ms_snr = b.ms_snr
205 a.ms_confidence = b.ms_confidence
206 a.ms_period = snglcluster.weighted_average_seg(a.ms_period, a.snr**2.0, b.ms_period, b.snr**2.0)
207 a.ms_band = snglcluster.weighted_average_seg(a.ms_band, a.snr**2.0, b.ms_band, b.snr**2.0)
208
209 #
210 # Compute the SNR squared weighted peak time and frequency (recall
211 # that the peak times have been converted to floats relative to
212 # epoch, and stored in the peak_time column).
213 #
214
215 a.peak_time = (a.snr**2.0 * a.peak_time + b.snr**2.0 * b.peak_time) / (a.snr**2.0 + b.snr**2.0)
216 a.peak_frequency = (a.snr**2.0 * a.peak_frequency + b.snr**2.0 * b.peak_frequency) / (a.snr**2.0 + b.snr**2.0)
217
218 #
219 # Compute the combined h_rss and SNR by summing the original ones.
220 # Note that no accounting of the overlap of the events is made, so
221 # these parameters are being horribly overcounted, but the SNR in
222 # particular must be summed like this in order to carry the
223 # information needed to continue computing the SNR squared weighted
224 # peak time and frequencies.
225 #
226
227 a.amplitude += b.amplitude
228 a.snr = math.sqrt(a.snr**2.0 + b.snr**2.0)
229
230 #
231 # The confidence is the confidence of the most significant tile.
232 #
233
234 a.confidence = a.ms_confidence
235
236 #
237 # The cluster's frequency band is the smallest band containing the
238 # bands of the two original events
239 #
240
241 a.band = snglcluster.smallest_enclosing_seg(a.band, b.band)
242
243 #
244 # The cluster's time interval is the smallest interval containing
245 # the intervals of the two original events
246 #
247
248 a.period = snglcluster.smallest_enclosing_seg(a.period, b.period)
249
250 #
251 # Success
252 #
253
254 return a
255
256
257def OmegaClusterFunc(a, b):
258 """
259 Modify a in place to be a cluster constructed from a and b. The
260 cluster's time-frequency tile is the smallest tile that contains
261 the original two tiles, and the "most signficiant" contributor for
262 the cluster is the tile whose boundaries are the SNR^{2} weighted
263 average boundaries of the two contributing tiles. The "most
264 signficiant" contributor's h_{rss}, SNR, and confidence, are copied
265 verbatim from whichever of the two contributing tiles has the
266 highest confidence. The modified event a is returned.
267 """
268 #
269 # In the special case of the two events being the exact same
270 # time-frequency tile, simply preserve the one with the highest
271 # confidence and discard the other.
272 #
273
274 if a.period == b.period and a.band == b.band:
275 if b.snr > a.snr:
276 return b
277 return a
278
279 #
280 # Compute the properties of the "most significant contributor"
281 #
282
283 if b.ms_snr > a.ms_snr:
284 a.ms_snr = b.ms_snr
285 a.ms_period = snglcluster.weighted_average_seg(a.ms_period, a.snr**2.0, b.ms_period, b.snr**2.0)
286 a.ms_band = snglcluster.weighted_average_seg(a.ms_band, a.snr**2.0, b.ms_band, b.snr**2.0)
287
288 #
289 # Compute the SNR squared weighted peak time and frequency (recall
290 # that the peak times have been converted to floats relative to
291 # epoch, and stored in the peak_time column).
292 #
293
294 a.peak_time = (a.snr**2.0 * a.peak_time + b.snr**2.0 * b.peak_time) / (a.snr**2.0 + b.snr**2.0)
295 a.peak_frequency = (a.snr**2.0 * a.peak_frequency + b.snr**2.0 * b.peak_frequency) / (a.snr**2.0 + b.snr**2.0)
296
297 #
298 # Compute the combined h_rss and SNR by summing the original ones.
299 # Note that no accounting of the overlap of the events is made, so
300 # these parameters are being horribly overcounted, but the SNR in
301 # particular must be summed like this in order to carry the
302 # information needed to continue computing the SNR squared weighted
303 # peak time and frequencies.
304 #
305
306 a.amplitude += b.amplitude
307 a.snr = math.sqrt(a.snr**2.0 + b.snr**2.0)
308
309 #
310 # The cluster's frequency band is the smallest band containing the
311 # bands of the two original events
312 #
313
314 a.band = snglcluster.smallest_enclosing_seg(a.band, b.band)
315
316 #
317 # The cluster's time interval is the smallest interval containing
318 # the intervals of the two original events
319 #
320
321 a.period = snglcluster.smallest_enclosing_seg(a.period, b.period)
322
323 #
324 # Success
325 #
326
327 return a
328
329#
330# =============================================================================
331#
332# Library API
333#
334# =============================================================================
335#
336
337
338def bucluster(
339 xmldoc,
340 program,
341 process,
342 prefunc,
343 postfunc,
344 testfunc,
345 clusterfunc,
346 sortkeyfunc = None,
347 bailoutfunc = None,
348 verbose = False
350 """
351 Run the clustering algorithm on the list of burst candidates. The
352 return value is the tuple (xmldoc, changed), where xmldoc is the
353 input document, and changed is a boolean that is True if the
354 contents of the sngl_burst table were altered, and False if the
355 triggers were not modified by the clustering process.
356
357 If the document does not contain a sngl_burst table, then the
358 document is not modified (including no modifications to the process
359 metadata tables).
360 """
361
362 #
363 # Extract live time segment and sngl_burst table
364 #
365
366 try:
367 sngl_burst_table = lsctables.SnglBurstTable.get_table(xmldoc)
368 except ValueError:
369 # no-op: document does not contain a sngl_burst table
370 if verbose:
371 print("document does not contain a sngl_burst table, skipping ...", file=sys.stderr)
372 return xmldoc, False
373 seglists = ligolw_search_summary.segmentlistdict_fromsearchsummary_out(xmldoc, program = program).coalesce()
374
375 #
376 # Preprocess candidates
377 #
378
379 if verbose:
380 print("pre-processing ...", file=sys.stderr)
381 preprocess_output = prefunc(sngl_burst_table)
382
383 #
384 # Cluster
385 #
386
387 table_changed = snglcluster.cluster_events(sngl_burst_table, testfunc, clusterfunc, sortkeyfunc = sortkeyfunc, bailoutfunc = bailoutfunc, verbose = verbose)
388
389 #
390 # Postprocess candidates
391 #
392
393 if verbose:
394 print("post-processing ...", file=sys.stderr)
395 postfunc(sngl_burst_table, preprocess_output)
396
397 #
398 # Update instrument list in process table and add search summary
399 # information
400 #
401
402 process.instruments = seglists.keys()
403 ligolw_search_summary.append_search_summary(xmldoc, process, inseg = seglists.extent_all(), outseg = seglists.extent_all(), nevents = len(sngl_burst_table))
404
405 #
406 # Done
407 #
408
409 return xmldoc, table_changed
def ExcessPowerSortKeyFunc(a)
Sort key for grouping excess power triggers near triggers with which they might cluster.
Definition: bucluster.py:150
def ExcessPowerBailoutFunc(a, b)
Returns True if a's and b's (ifo, channel, seach) are different or if the periods they span are disjo...
Definition: bucluster.py:163
def ExcessPowerPostFunc(sngl_burst_table, offset)
Restore peak times to absolute LIGOTimeGPS values.
Definition: bucluster.py:141
def OmegaClusterFunc(a, b)
Modify a in place to be a cluster constructed from a and b.
Definition: bucluster.py:267
def append_process(xmldoc, cluster_algorithm, comment)
Definition: bucluster.py:55
def add_ms_columns(xmldoc)
Definition: bucluster.py:85
def add_ms_columns_to_table(sngl_burst_table)
Definition: bucluster.py:89
def bucluster(xmldoc, program, process, prefunc, postfunc, testfunc, clusterfunc, sortkeyfunc=None, bailoutfunc=None, verbose=False)
Run the clustering algorithm on the list of burst candidates.
Definition: bucluster.py:360
def ExcessPowerPreFunc(sngl_burst_table)
For speed, convert peak times to floats relative to epoch.
Definition: bucluster.py:128