Source code for stats.trigger_rate

# Copyright (C) 2017,2018  Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from bisect import bisect_right
from ligo import segments
import math
import numpy
import random


from ligo.lw import ligolw
from ligo.lw import array as ligolw_array


__all__ = ["ratebin", "ratebinlist", "triggerrates"]


#
# =============================================================================
#
#               segment and segmentlist with event rate tracking
#
# =============================================================================
#


[docs]class ratebin(segments.segment): """ A version of the segment class (see ligo.segments) that carries a count of things. Arithmetic operations update the count by interpreting the count as a uniform density of things throughout the interval spanned by the segment. For example, the intersection of two ratebins is the interval they have in common, and the intersection's count is given by the sum of the densities of the two multiplied by the size of the intersection. The implementation does not enforce the use of numeric values for boundaries and counts, but is unlikely to behave sensibly unless numeric values are used. One will also encounter problems with (semi-)open intervals due to the mean density being 0 for all finite counts in such intervals, and undefined otherwise. Negative counts are allowed, and in particular the subtraction operation can yield a ratebin with a negative count if the density in the ratebin being subtracted is sufficiently high. If this is a concern, it is left as an excercise for the calling code to check for this condition. Some behaviour has been selected to accomodate the specific needs of the intended application, and might not be sensible for general use outside of that application. For example, if the count is None then the density reported is 0, not None. Like the segment class from which it is derived, ratebin objects are immutable: neither the boundaries, nor the count, can be modified after creation. Example: >>> # initialize from two boundary values without count >>> x = ratebin(0, 10) >>> x None@[0,10) >>> print(x.count) None >>> x.density # count of None --> density = 0, not None 0.0 >>> # initialize from two boundary values with count >>> x = ratebin(0, 10, count = 5) >>> x 5@[0,10) >>> x.density 0.5 >>> # intialize from a sequence without count >>> y = ratebin((5, 15)) >>> y None@[5,15) >>> # intialize from a sequence with count >>> y = ratebin((5, 15), count = 2.5) >>> y.density 0.25 >>> # initialize from a ratebin without count >>> ratebin(y) 2.5@[5,15) >>> # initialize from a ratebin with count >>> ratebin(y, count = 5) 5@[5,15) >>> # arithmetic examples >>> x | y 7.5@[0,15) >>> x + y 7.5@[0,15) >>> (x + y).density 0.5 >>> x - y 3.75@[0,5) >>> (x - y).density 0.75 >>> x & y 3.75@[5,10) >>> (x & y).density 0.75 >>> x & x 10@[0,10) >>> x 5@[0,10) BUGS: comparisons and hash behaviour are inherited from the segment class, specifically this means the count attribute is ignored for the purpose of comparisons. For example, a set will believe a ratebin object is already in the set if its boundaries match those of a ratebin in the set, regardless of what their counts are. A dictionary will do key look-up using the ratebin boundaries, only. Example: >>> x = ratebin(0, 10, count = 5) >>> s = set([x]) >>> y = ratebin(0, 10, count = 10) >>> y in s True >>> y == x True >>> x 5@[0,10) >>> y 10@[0,10) >>> # x and y compare as equal, so d gets only one entry >>> d = {x: "a", y: "b"} >>> d {5@[0,10): 'b'} """ # basic methods def __init__(self, arg0 = None, arg1 = None, count = None): # parent class' .__init__() is a no-op, so don't bother # chaining to it. need to accomodate 0, 1, or 2 optional # arguments before the optional count keyword if isinstance(arg0, self.__class__) and count is None: self._count = arg0._count else: self._count = count @property def count(self): return self._count @property def density(self): return (self._count or 0) / float(abs(self)) def __str__(self): return "%s@[%s,%s)" % (("%.16g" % self._count if self._count is not None else "None"), str(self[0]), str(self[1])) __repr__ = __str__ # arithmetic def __and__(self, other): new = super(ratebin, self).__and__(other) if self._count is not None or other._count is not None: return type(self)(new, count = (self.density + other.density) * float(abs(new))) return new def __or__(self, other): new = super(ratebin, self).__or__(other) if self._count is not None or other._count is not None: return type(self)(new, count = (self._count or 0) + (other._count or 0)) return new __add__ = __or__ def __sub__(self, other): new = super(ratebin, self).__sub__(other) if self._count is not None: return type(self)(new, count = self._count - other.density * (float(abs(self)) - float(abs(new)))) return new # # overrides to preserve count #
[docs] def protract(self, x): return self.__class__(self[0] - x, self[1] + x, count = self._count)
[docs] def contract(self, x): return self.__class__(self[0] + x, self[1] - x, count = self._count)
[docs] def shift(self, x): return self.__class__(self[0] + x, self[1] + x, count = self._count)
[docs]class ratebinlist(segments.segmentlist): """ Modified version of the segmentlist type (see ligo.segments) whose arithmetic operations implement the segment-and-count arithmetic operations defined by the ratebin type. This implemention has only been optimized for performance for the specific use cases for which it has been designed. It will likely be found to have unacceptable performance if used as a general-purpose segmentlist implementation. Example: >>> x0 = ratebinlist([ratebin(0, 10, count = 5)]) >>> x1 = ratebinlist([ratebin(5, 15, count = 2.5)]) >>> x2 = ratebinlist([ratebin(15, 25, count = 2.5)]) >>> x3 = ratebinlist([ratebin(20, 30, count = 5)]) >>> x0 | x1 [7.5@[0,15)] >>> x0 | x1 | x2 | x3 [15@[0,30)] >>> x0 - x1 [3.75@[0,5)] >>> x0 | x2 [5@[0,10), 2.5@[15,25)] >>> (x0 | x2).density 0.375 >>> x0 - x1 [3.75@[0,5)] >>> x0.density 0.5 >>> (x0 - x1).density 0.75 >>> x0 - x2 [5@[0,10)] >>> y = ratebinlist(x0) >>> y.extend(x1) >>> y.extend(x2) >>> y.extend(x3) >>> y [5@[0,10), 2.5@[5,15), 2.5@[15,25), 5@[20,30)] >>> y.coalesce() [15@[0,30)] >>> # slices are ratebinlist objects >>> (x0 | x3)[1:].density 0.5 >>> # density is 0 at times with no segments >>> x0.density_at(15) 0.0 >>> # and empty lists have a mean density of 0 (not NaN) >>> (x0 & x2).density 0.0 NOTE: the XML I/O feature of this class will only work correctly for float-valued boundaries and integer counts. """ def __getitem__(self, index): # make sure to return slices as ratebinlist objects, not # native list objects val = super(ratebinlist, self).__getitem__(index) return self.__class__(val) if isinstance(index, slice) else val # FIXME: delete after porting to 3 def __getslice__(self, *args, **kwargs): # make sure to return slices as ratebinlist objects, not # native list objects return self.__class__(super(ratebinlist, self).__getslice__(*args, **kwargs)) @property def count(self): return sum(seg.count for seg in self) @property def density(self): # NOTE: event density at times when there are no segments # is 0., not NaN! return self.count / float(abs(self)) if self else 0.
[docs] def segmentlist(self): """ Construct and return a segments.segmentlist of segments.segment objects by type-casting the contents. """ return segments.segmentlist(segments.segment(seg) for seg in self)
[docs] def add_ratebin(self, seg, count): """ Convenience method. Equivalent to self |= type(self)([ratebin(seg, count = count)]) Example: >>> x = ratebinlist() >>> x.add_ratebin((0, 10), 5) >>> x.add_ratebin((10, 20), 5) >>> x [10@[0,20)] """ seg = ratebin(seg, count = count) # tail optimization cases if not self: self.append(seg) elif not seg.disjoint(self[-1]): self[-1] |= seg elif seg.disjoint(self[-1]) > 0: self.append(seg) else: # general case. implementation of .__ior__() # allows us to use a tuple here self |= (seg,)
[docs] def find(self, item): """ Return the smallest i such that i is the index of an element that wholly contains item. Raises ValueError if no such element exists. NOTE: unlike the segmentlist class from which ratebinlist is derived, this implementation requires the ratebinlist to be coalesced. Example: >>> x = ratebinlist([ratebin(0, 10, count = 5), ratebin(15, 25, count = 2.5)]) >>> x.find(-1) Traceback (most recent call last): ... IndexError: -1 >>> x.find(0) 0 >>> x.find(5) 0 >>> # upper bounds of segments not included in span >>> x.find(10) Traceback (most recent call last): ... IndexError: 10 >>> x.find(12) Traceback (most recent call last): ... IndexError: 12 >>> x.find(15) 1 >>> x.find(20) 1 >>> x.find(26) Traceback (most recent call last): ... IndexError: 26 """ i = bisect_right(self, item) if i and item in self[i - 1]: return i - 1 raise IndexError(item)
[docs] def density_at(self, x): try: i = self.find(x) except IndexError: # density is 0 at times not covered by segments return 0. return self[i].density
def __iand__(self, other): if not other or not self: del self[:] return self if other is self: self[:] = (ratebin(seg, seg.count * 2) for seg in self) return self def andgen(self, other): self_next = iter(self).__next__ other_next = iter(other).__next__ self_seg = self_next() other_seg = other_next() try: while 1: while self_seg[1] <= other_seg[0]: self_seg = self_next() while other_seg[1] <= self_seg[0]: other_seg = other_next() if self_seg.intersects(other_seg): yield self_seg & other_seg if self_seg[1] > other_seg[1]: other_seg = other_next() else: self_seg = self_next() except StopIteration: pass i = bisect_right(self, other[0][0]) self[:] = andgen( self[(i - 1 if i else 0):bisect_right(self, other[-1][1])], other ) return self def __ior__(self, other): if other is self: self[:] = (ratebin(seg, count = 2 * seg.count) for seg in self) return self i = 0 for seg in other: i = j = bisect_right(self, seg, i) lo, hi = seg if i and self[i - 1][1] >= lo: i -= 1 lo = self[i][0] n = len(self) while j < n and self[j][0] <= hi: j += 1 if j > i: self[i] = ratebin(lo, max(hi, self[j - 1][1]), count = sum((s.count for s in self[i:j]), seg.count)) del self[i + 1 : j] else: self.insert(i, seg) i += 1 return self def __isub__(self, other): if not other: return self if other is self: del self[:] return self i = j = 0 other_lo, other_hi = other[j] while i < len(self): self_lo, self_hi = self[i] while other_hi <= self_lo: j += 1 if j >= len(other): return self other_lo, other_hi = other[j] if self_hi <= other_lo: i += 1 elif other_lo <= self_lo: if other_hi >= self_hi: del self[i] else: self[i] -= other[j] elif other_hi < self_hi: density = (self[i].count - other[j].count) / (float(abs(self[i])) - float(abs(other[j]))) self[i:i+1] = (ratebin(self_lo, other_lo, count = (other_lo - self_lo) * density), ratebin(other_hi, self_hi, count = (self_hi - other_hi) * density)) i += 1 else: self[i] -= other[j] i += 1 return self def __invert__(self): raise NotImplementedError
[docs] def coalesce(self): """ Sort the elements of the list into ascending order, and merge continuous segments into single segments. Segmentlist is modified in place. This operation is O(n log n). """ self.sort() i = j = 0 n = len(self) while j < n: lo, hi = self[j] count = self[j].count j += 1 while j < n and hi >= self[j][0]: hi = max(hi, self[j][1]) count += self[j].count j += 1 if lo != hi: self[i] = ratebin(lo, hi, count = count) i += 1 del self[i : ] return self
[docs] @classmethod def from_xml(cls, xml, name): # need to cast start and stop to floats, otherwise they are # numpy.float64 objects and confuse things return cls(ratebin((float(start), float(stop)), count = int(count)) for (start, stop, count) in ligolw_array.get_array(xml, u"%s:ratebinlist" % name).array[:])
[docs] def to_xml(self, name): # I/O is only safe for integer counts assert all(isinstance(seg.count, int) for seg in self), "counts must be type int for XML I/O" return ligolw_array.Array.build(u"%s:ratebinlist" % name, numpy.array([(seg[0], seg[1], seg.count) for seg in self], dtype = "double"))
# # ============================================================================= # # trigger rate tracking for a detector network # # ============================================================================= #
[docs]class triggerrates(segments.segmentlistdict): """ Example: >>> x = triggerrates({ ... "H1": ratebinlist([ ... ratebin((0, 10), count = 5), ... ratebin((20, 30), count = 5) ... ]), ... "V1": ratebinlist([ ... ratebin((0, 15), count = 7), ... ratebin((25, 35), count = 7) ... ]) ... }) ... >>> x {'H1': [5@[0,10), 5@[20,30)], 'V1': [7@[0,15), 7@[25,35)]} >>> y = x.copy() >>> y {'H1': [5@[0,10), 5@[20,30)], 'V1': [7@[0,15), 7@[25,35)]} >>> y == x True >>> y is x False >>> y = triggerrates.from_xml(x.to_xml("test"), "test") >>> y {'H1': [5@[0.0,10.0), 5@[20.0,30.0)], 'V1': [7@[0.0,15.0), 7@[25.0,35.0)]} >>> y == x True >>> y is x False """ @property def counts(self): return dict((key, value.count) for key, value in self.items()) @property def densities(self): return dict((key, value.density) for key, value in self.items())
[docs] def segmentlistdict(self): return segments.segmentlistdict((key, value.segmentlist()) for key, value in self.items())
def __iand__(self, other): raise NotImplementedError
[docs] def intersection(self, keys): raise NotImplementedError
[docs] def union(self, keys): raise NotImplementedError
[docs] def density_at(self, x): return dict((key, value.density_at(x)) for key, value in self.items())
[docs] def random_uniform(self): lo, hi = self.extent_all() uniform = random.uniform lnP = -math.log(hi - lo) while 1: x = uniform(lo, hi) yield x, self.density_at(x), lnP
[docs] @classmethod def from_xml(cls, xml, name): xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == u"%s:triggerrates" % name] try: xml, = xml except ValueError: raise ValueError("document must contain exactly 1 triggerrates named '%s'" % name) keys = [elem.Name.replace(u":ratebinlist", u"") for elem in xml.getElementsByTagName(ligolw.Array.tagName) if elem.hasAttribute(u"Name") and elem.Name.endswith(u":ratebinlist")] self = cls() for key in keys: self[key] = ratebinlist.from_xml(xml, key) return self
[docs] def to_xml(self, name): xml = ligolw.LIGO_LW({u"Name": u"%s:triggerrates" % name}) for key, value in self.items(): xml.appendChild(value.to_xml(key)) return xml