Source code for stats.horizonhistory

# Copyright (C) 2011--2014  Kipp Cannon, Chad Hanna, Drew Keppel
# Copyright (C) 2013  Jacob Peoples
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

## @file
# The python module to implement false alarm probability and false alarm rate
#
# ### Review Status
#
# STATUS: reviewed with actions
#
# | Names                                                                                 | Hash                                        | Date       | Diff to Head of Master      |
# | -------------------------------------------                                           | ------------------------------------------- | ---------- | --------------------------- |
# | Hanna, Cannon, Meacher, Creighton J, Robinet, Sathyaprakash, Messick, Dent, Blackburn | 7fb5f008afa337a33a72e182d455fdd74aa7aa7a | 2014-11-05 |<a href="@gstlal_inspiral_cgit_diff/python/far.py?id=HEAD&id2=7fb5f008afa337a33a72e182d455fdd74aa7aa7a">far.py</a> |
# | Hanna, Cannon, Meacher, Creighton J, Sathyaprakash,                                   | 72875f5cb241e8d297cd9b3f9fe309a6cfe3f716 | 2015-11-06 |<a href="@gstlal_inspiral_cgit_diff/python/far.py?id=HEAD&id2=72875f5cb241e8d297cd9b3f9fe309a6cfe3f716">far.py</a> |
#
# #### Action items
#

# - Address the fixed SNR PDF using median PSD which could be pre-computed and stored on disk. (Store database of SNR pdfs for a variety of horizon)
# - The binning parameters are hard-coded too; Could it be a problem?
# - Chisquare binning hasn't been tuned to be a good representation of the PDFs; could be improved in future

## @package far


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


import bisect
import copy
try:
	from fpconst import NaN, NegInf, PosInf
except ImportError:
	# fpconst is not part of the standard library and might not be
	# available
	NaN = float("nan")
	NegInf = float("-inf")
	PosInf = float("+inf")
import math
import numpy
import random


from ligo.lw import ligolw
from ligo.lw import array as ligolw_array


__all__ = ["NearestLeafTree", "HorizonHistories"]


#
# =============================================================================
#
#                       Horizon Distance Record Keeping
#
# =============================================================================
#


[docs]class NearestLeafTree(object): """ A simple binary tree in which look-ups return the value of the closest leaf. Only float objects are supported for the keys and values. Look-ups raise KeyError if the tree is empty. Example: >>> x = NearestLeafTree() >>> bool(x) False >>> x[100.0] = 120. >>> bool(x) True >>> x[104.0] = 100. >>> x[102.0] = 110. >>> x[90.] 120.0 >>> x[100.999] 120.0 >>> x[101.001] 110.0 >>> x[200.] 100.0 >>> del x[104] >>> x[200.] 110.0 >>> x.keys() [100.0, 102.0] >>> 102 in x True >>> 103 in x False >>> import sys >>> x.to_xml(u"H1").write(sys.stdout) # doctest: +NORMALIZE_WHITESPACE <Array Type="real_8" Name="H1:nearestleaftree:array"> <Dim>2</Dim> <Dim>2</Dim> <Stream Type="Local" Delimiter=" "> 100 102 120 110 </Stream> </Array> """ def __init__(self, items = ()): """ Initialize a NearestLeafTree. Example: >>> x = NearestLeafTree() >>> x = NearestLeafTree([(100., 120.), (104., 100.), (102., 110.)]) >>> y = {100.: 120., 104.: 100., 102.: 100.} >>> x = NearestLeafTree(y.items()) """ # make a copy to ensure we have a stable object self.tree = list(map(tuple, items)) if any(len(item) != 2 for item in self.tree): raise ValueError("items must be sequence of two-element sequences") self.tree.sort() def __setitem__(self, x, val): """ Example: >>> x = NearestLeafTree() >>> x[100.:200.] = 0. >>> x[150.] = 1. >>> x NearestLeafTree([(100, 0), (150, 1), (200, 0)]) >>> x[210:] = 5. >>> x[:90] = 5. >>> x NearestLeafTree([(90, 5), (100, 0), (150, 1), (200, 0), (210, 5)]) """ if isinstance(x, slice): # replace all entries in the requested range of # co-ordiantes with two entries, each with the # given value, one at the start of the range and # one at the end of the range. thus, after this # all queries within that range will return this # value. if x.step is not None: raise ValueError("%s: step not supported" % repr(x)) if x.start is None: if not self.tree: raise IndexError("open-ended slice not supported with empty tree") x = slice(min(self.minkey(), x.stop), x.stop) if x.stop is None: if not self.tree: raise IndexError("open-ended slice not supported with empty tree") x = slice(x.start, max(self.maxkey(), x.start)) if x.start > x.stop: raise ValueError("%s: bounds out of order" % repr(x)) lo = bisect.bisect_left(self.tree, (x.start, NegInf)) hi = bisect.bisect_right(self.tree, (x.stop, PosInf)) self.tree[lo:hi] = ((x.start, val), (x.stop, val)) if x.start != x.stop else ((x.start, val),) else: # replace all entries having the same co-ordinate # with this one lo = bisect.bisect_left(self.tree, (x, NegInf)) hi = bisect.bisect_right(self.tree, (x, PosInf)) self.tree[lo:hi] = ((x, val),) def __getitem__(self, x): if not self.tree: raise KeyError(x) if isinstance(x, slice): raise ValueError("slices not supported") hi = bisect.bisect_right(self.tree, (x, PosInf)) try: x_hi, val_hi = self.tree[hi] except IndexError: x_hi, val_hi = self.tree[-1] if hi: x_lo, val_lo = self.tree[hi - 1] else: x_lo, val_lo = x_hi, val_hi return val_lo if abs(x - x_lo) < abs(x_hi - x) else val_hi def __delitem__(self, x): """ Example: >>> x = NearestLeafTree([(100., 0.), (150., 1.), (200., 0.)]) >>> del x[150.] >>> x NearestLeafTree([(100, 0), (200, 0)]) >>> del x[190.] Traceback (most recent call last): ... IndexError: 190.0 >>> del x[:] >>> x NearestLeafTree([]) """ if isinstance(x, slice): if x.step is not None: raise ValueError("%s: step not supported" % repr(x)) if x.start is None: if not self.tree: # no-op return x = slice(self.minkey(), x.stop) if x.stop is None: if not self.tree: # no-op return x = slice(x.start, self.maxkey()) if x.stop < x.start: # no-op return lo = bisect.bisect_left(self.tree, (x.start, NegInf)) hi = bisect.bisect_right(self.tree, (x.stop, PosInf)) del self.tree[lo:hi] elif not self.tree: raise IndexError(x) else: lo = bisect.bisect_left(self.tree, (x, NegInf)) if self.tree[lo][0] != x: raise IndexError(x) del self.tree[lo] def __bool__(self): """ True if the tree is not empty, False otherwise. """ return bool(self.tree) def __iadd__(self, other): """ For every (key, value) pair in other, assign self[key]=value. """ for x, val in other.tree: self[x] = val return self
[docs] def keys(self): return [x for x, val in self.tree]
[docs] def values(self): return [val for x, val in self.tree]
[docs] def items(self): return list(self.tree)
[docs] def min(self): """ Return the minimum value stored in the tree. This is O(n). """ if not self.tree: raise ValueError("empty tree") return min(val for x, val in self.tree)
[docs] def minkey(self): """ Return the minimum key stored in the tree. This is O(1). """ if not self.tree: raise ValueError("empty tree") return self.tree[0][0]
[docs] def max(self): """ Return the maximum value stored in the tree. This is O(n). """ if not self.tree: raise ValueError("empty tree") return max(val for x, val in self.tree)
[docs] def maxkey(self): """ Return the maximum key stored in the tree. This is O(1). """ if not self.tree: raise ValueError("empty tree") return self.tree[-1][0]
def __contains__(self, x): """ True if a key in self equals x, False otherwise. """ try: return bool(self.tree) and self.tree[bisect.bisect_left(self.tree, (x, NegInf))][0] == x except IndexError: return False def __len__(self): """ The number of (key, value) pairs in self. """ return len(self.tree) def __repr__(self): return "NearestLeafTree([%s])" % ", ".join("(%g, %g)" % item for item in self.tree)
[docs] def functional_integral(self, lohi, w = lambda f: f): """ Given the function f(x) = self[x], compute \int_{lo}^{hi} w(f(x)) dx The arguments are the lo and hi bounds and the functional w(f). The default functional is w(f) = f. Example: >>> x = NearestLeafTree([(100., 0.), (150., 2.), (200., 0.)]) >>> x.functional_integral((130., 170.)) 80.0 >>> x.functional_integral((100., 150.)) 50.0 >>> x.functional_integral((300., 500.)) 0.0 >>> x.functional_integral((100., 150.), lambda f: f**3) 200.0 """ lo, hi = lohi if not self.tree: raise ValueError("empty tree") if lo < hi: swapped = False elif lo == hi: return NaN if math.isinf(w(self[lo])) else 0. else: # lo > hi. remove a factor of -1 from the integral lo, hi = hi, lo swapped = True # now we are certain that lo < hi and that there is at # least one entry in the tree # construct an array of (x,y) pairs such that f(x) = y and # continues to equal y until the next x. ensure the 0th # and last entries in the array are the left and right # edges of the integration domain. i = bisect.bisect_right(self.tree, (lo, NegInf)) j = bisect.bisect_right(self.tree, (hi, PosInf)) if i > 0: i -= 1 samples = self.tree[i:j+1] samples = [((a_key + b_key) / 2., b_val) for (a_key, a_val), (b_key, b_val) in zip(samples[:-1], samples[1:])] if not samples: samples = [(lo, self[lo])] elif samples[0][0] > lo: samples.insert(0, (lo, self[lo])) else: samples[0] = lo, self[lo] if samples[-1][0] < hi: samples.append((hi, self[hi])) else: samples[-1] = hi, self[hi] # return the integral result = sum((b_key - a_key) * w(a_val) for (a_key, a_val), (b_key, b_val) in zip(samples[:-1], samples[1:])) return -result if swapped else result
[docs] def weighted_mean(self, lohi, weight = lambda y: 1.): """ Given the function f(x) = self[x], compute \int_{lo}^{hi} w(f(x)) f(x) dx ------------------------------ \int_{lo}^{hi} w(f(x)) dx where w(y) is a weighting function. The default weight function is w(y) = 1. If the numerator is identically 0 and the denominator is also 0 then a value of 0 is returned rather than raising a divide-by-0 error. Example: >>> x = NearestLeafTree([(100., 0.), (150., 2.), (200., 0.)]) >>> x.weighted_mean((130., 170.)) 2.0 >>> x.weighted_mean((100., 150.)) 1.0 >>> x.weighted_mean((300., 500.)) 0.0 >>> x.weighted_mean((100., 150.), lambda x: x**3) 2.0 """ lo, hi = lohi if not self.tree: raise ValueError("empty tree") if lo > hi: # remove a common factor of -1 from the numerator # and denominator lo, hi = hi, lo elif lo == hi: return self[lo] # now we are certain that lo < hi and that there is at # least one entry in the tree # return the ratio of the two integrals num = self.functional_integral((lo, hi), lambda f: weight(f) * f) den = self.functional_integral((lo, hi), weight) return num / den if num or den else 0.0
[docs] @classmethod def from_xml(cls, xml, name): return cls(map(tuple, ligolw_array.get_array(xml, u"%s:nearestleaftree" % name).array[:]))
[docs] def to_xml(self, name): return ligolw_array.Array.build(u"%s:nearestleaftree" % name, numpy.array(self.tree, dtype = "double"))
[docs]class HorizonHistories(dict):
[docs] def copy(self): """ Override of dict's .copy() that (a) returns the correct type and (b) makes copies of the HorizonHistories' contents. """ return type(self)((key, copy.deepcopy(value)) for key, value in self.items())
def __iadd__(self, other): for key, history in other.items(): try: self[key] += history except KeyError: self[key] = copy.deepcopy(history) return self
[docs] def minkey(self): """ Return the minimum key stored in the trees. """ minkeys = tuple(history.minkey() for history in self.values() if history) if not minkeys: raise ValueError("empty trees") return min(minkeys)
[docs] def maxkey(self): """ Return the maximum key stored in the trees. """ maxkeys = tuple(history.maxkey() for history in self.values() if history) if not maxkeys: raise ValueError("empty trees") return max(maxkeys)
[docs] def getdict(self, x): return dict((key, value[x]) for key, value in self.items())
[docs] def randhorizons(self): """ Generator yielding a sequence of random horizon distance dictionaries chosen by drawing random times uniformly distributed between the lowest and highest times recorded in the history and returning the dictionary of horizon distances for each of those times. """ x_min = self.minkey() x_max = self.maxkey() getdict = self.getdict rnd = random.uniform while 1: yield getdict(rnd(x_min, x_max))
[docs] def all(self): """ Returns a list of the unique sets of horizon distances recorded in the histories. """ # unique times for which a horizon distance measurement is # available all_x = set(x for value in self.values() for x in value.keys()) # the unique horizon distances from those times, expressed # as frozensets of instrument/distance pairs result = set(frozenset(self.getdict(x).items()) for x in all_x) # return a list of the results converted back to # dictionaries return map(dict, result)
[docs] def functional_integral_dict(self, *args, **kwargs): """ Return a dictionary of the result of invoking .functional_integral() on each of the histories. args and kwargs are passed to the .functional_integral() method of the histories objects, see their documentation for more information. """ return dict((key, value.functional_integral(*args, **kwargs)) for key, value in self.items())
[docs] def functional_integral(self, lohi, w = lambda f: max(f.values())): """ Given the function f(x) = self.getdict(x), compute \int_{lo}^{hi} w(f(x)) dx The arguments are the lo and hi bounds and the functional w(f). The default functional is w(f) = max(f.values()). Example: >>> x = HorizonHistories({ ... "H1": NearestLeafTree([(100., 0.), (150., 2.), (200., 0.)]), ... "L1": NearestLeafTree([(100., 0.), (150., 20.), (200., 0.)]), ... }) >>> x.functional_integral((130., 170.)) 800.0 >>> x.functional_integral((100., 150.)) 500.0 >>> x.functional_integral((300., 500.)) 0.0 >>> x["H1"].functional_integral((100., 150.), w = lambda f: f**3) 200.0 """ lo, hi = lohi if not self or not all(self.values()): raise ValueError("empty tree or no trees") if lo < hi: swapped = False elif lo == hi: return NaN if math.isinf(w(self.getdict(lo))) else 0. else: # lo > hi. remove a factor of -1 from the integral lo, hi = hi, lo swapped = True # now we are certain that lo < hi and that there is at # least one entry in the tree # construct an array of (x,y) pairs such that f(x) = y and # continues to equal y until the next x. ensure the 0th # and last entries in the array are the left and right # edges of the integration domain. samples = sorted(set(x for value in self.values() for x in value.keys())) i = bisect.bisect_right(samples, lo) j = bisect.bisect_right(samples, hi) if i > 0: i -= 1 samples = samples[i:j+1] samples = [((a + b) / 2., self.getdict(b)) for a, b in zip(samples[:-1], samples[1:])] if not samples: samples = [(lo, self.getdict(lo))] elif samples[0][0] > lo: samples.insert(0, (lo, self.getdict(lo))) else: samples[0] = lo, self.getdict(lo) if samples[-1][0] < hi: samples.append((hi, self.getdict(hi))) else: samples[-1] = hi, self.getdict(hi) # return the integral result = sum((b_key - a_key) * w(a_val) for (a_key, a_val), (b_key, b_val) in zip(samples[:-1], samples[1:])) return -result if swapped else result
[docs] def weighted_mean_dict(self, *args, **kwargs): """ Return a dictionary of the result of invoking .weighted_mean() on each of the histories. args and kwargs are passed to the .weighted_mean() method of the histories objects, see their documentation for more information. """ return dict((key, value.weighted_mean(*args, **kwargs)) for key, value in self.items())
[docs] def compress(self, threshold = 0.03, remove_deviations = False, deviation_percent = 0.50, verbose = False): """ Remove distances that are non-zero and differ fractionally from both neighbours by less than the selected threshold. Also allows removal of uncharacteristic horizon distance values. """ abs_ln_thresh = math.log1p(threshold) for instrument, horizon_history in list(self.items()): # GPS time / distance pairs items = horizon_history.items() if remove_deviations: values = numpy.array(items)[:,1] mean_horizon = values[values!=0].mean() items = [item for item in items if item[1] < (mean_horizon * (1. + deviation_percent))] # compress array j = 1 for i in range(1, len(items) - 1): values = items[j - 1][1], items[i][1], items[i + 1][1] # remove distances that are non-zero and differ # fractionally from both neighbours by less than # the selected threshold. always keep the first # and last values if (values[0] > 0. and values[1] > 0. and values[2] > 0. and abs(math.log(values[1] / values[0])) < abs_ln_thresh and abs(math.log(values[1] / values[2])) < abs_ln_thresh): continue # remove distances that are 0 and surrounded by 0 # on both sides (basically the same as the last # test, but we can't take log(0)). if values == (0., 0., 0.): continue items[j] = items[i] j += 1 del items[j:] if verbose: print("\"%s\": %s horizon history reduced to %.3g%% of original size" % (filename, instrument, 100. * j / (i + 1.)), file=sys.stderr) # replace self[instrument] = type(horizon_history)(items)
[docs] @classmethod def from_xml(cls, xml, name): xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == u"%s:horizonhistories" % name] try: xml, = xml except ValueError: raise ValueError("document must contain exactly 1 HorizonHistories named '%s'" % name) keys = [elem.Name.replace(u":nearestleaftree", u"") for elem in xml.getElementsByTagName(ligolw.Array.tagName) if elem.hasAttribute(u"Name") and elem.Name.endswith(u":nearestleaftree")] self = cls() for key in keys: self[key] = NearestLeafTree.from_xml(xml, key) return self
[docs] def to_xml(self, name): xml = ligolw.LIGO_LW({u"Name": u"%s:horizonhistories" % name}) for key, value in self.items(): xml.appendChild(value.to_xml(key)) return xml