Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALInference 4.1.9.1-00ddc7f
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
cbcBayesBurstPPAnalysis.py
Go to the documentation of this file.
1##python
2# -*- coding: utf-8 -*-
3#
4# cbcBayesPostProc.py
5#
6# Copyright 2010
7# Benjamin Farr <bfarr@u.northwestern.edu>,
8# Will M. Farr <will.farr@ligo.org>,
9# 2014
10# Salvatore Vitale <salvatore.vitale@ligo.mit.edu>
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24# MA 02110-1301, USA.
25
26from igwn_ligolw import ligolw
27from igwn_ligolw import lsctables
28from igwn_ligolw import table
29from igwn_ligolw import utils
30import matplotlib
31matplotlib.use("Agg")
32import matplotlib.pyplot as pp
33import numpy as np
34import optparse
35import os
36import os.path
37import scipy.stats as ss
38import string
39
40class LIGOLWContentHandlerExtractSimBurstTable(ligolw.LIGOLWContentHandler):
41 def __init__(self,document):
42 ligolw.LIGOLWContentHandler.__init__(self,document)
43 self.tabname=lsctables.SimBurstTable.tableName
44 self.intable=False
46 def startElement(self,name,attrs):
47 if attrs.has_key('Name') and table.Table.TableName(attrs['Name'])==self.tabname:
48 self.tableElementName=name
49 # Got the right table, let's see if it's the right event
50 ligolw.LIGOLWContentHandler.startElement(self,name,attrs)
51 self.intable=True
52 elif self.intable: # We are in the correct table
53 ligolw.LIGOLWContentHandler.startElement(self,name,attrs)
54 def endElement(self,name):
55 if self.intable: ligolw.LIGOLWContentHandler.endElement(self,name)
56 if self.intable and name==self.tableElementName: self.intable=False
57
58posterior_name_to_sim_burst_extractor = {
59 'frequency' : lambda sb: sb.frequency,
60 'duration' : lambda sb: sb.duration,
61 'quality' : lambda sb: sb.q,
62 'hrss' : lambda sb: sb.hrss,
63 'psi' : lambda sb: sb.psi,
64 'time' : lambda sb: sb.time_geocent_gps + 1e-9*sb.time_geocent_gps_ns,
65 'ra' : lambda sb: sb.ra,
66 'dec' : lambda sb: sb.dec,
67 'polar_angle':lambda sb:sb.pol_ellipse_angle,
68 'polar_eccentricity':lambda sb:sb.pol_ellipse_e,
69 'alpha':lambda sb:sb.pol_ellipse_angle
70}
71
72
73posterior_name_to_latex_name = {
74 'frequency' : r'$f$',
75 'quality' : r'$q$',
76 'duration' : r'$\tau$',
77 'hrss' : r'$hrss$',
78 'time' : r'$t$',
79 'ra' : r'$\alpha$',
80 'dec' : r'$\delta$',
81 'psi' : r'$\psi$',
82 'polar_angle' : r'$\xi_a$',
83 'polar_eccentricity' : r'$\xi_e$',
84 'alpha' : r'$\zeta_{orb}$',
85
86
87}
88
89def fractional_rank(x, xs):
90 """Returns the fraction of samples, ``xs``, that fall below the value
91 ``x``.
92
93 """
94 nbelow = np.sum(xs < x)
95
96 return float(nbelow)/float(xs.shape[0])
97
98def pp_plot(ps, title=None, outfile=None):
99 """Generates a p-p plot for the given ps.
100
101 :param ps: The p-values whose cumulative distribution is to be
102 plotted.
103
104 :param title: An (optional) title for the plot.
105
106 :param outfile: An (optional) basename for the plot file; both
107 basename.png and basename.pdf will be created.
108
109 """
110
111 ps = np.atleast_1d(ps)
112 ps = np.sort(ps)
113 ys = np.zeros(ps.shape[0]+2)
114 ys[:-1] = np.linspace(0, 1, ps.shape[0]+1)
115 ys[-1] = 1.0
116 xs = np.zeros(ps.shape[0]+2)
117 xs[-1] = 1.0
118 xs[1:-1] = ps
119
120 pp.figure(figsize=(6,6), dpi=100)
121
122 for i in range(10):
123 syn_ps = np.random.uniform(size=ps.shape[0])
124 syn_xs = np.zeros(ps.shape[0]+2)
125 syn_xs[-1] = 1.0
126 syn_xs[1:-1] = np.sort(syn_ps)
127 pp.plot(syn_xs, ys, '-', color='0.9')
128
129 pp.plot(xs, ys, '-k')
130 pp.plot(ys, ys, '--k')
131
132 pp.xlabel(r'$p$')
133 pp.ylabel(r'$P(p)$')
134 if title is not None:
135 pp.title(title)
136
137 if outfile is not None:
138 pp.savefig(outfile + '.png')
139 pp.savefig(outfile + '.pdf')
140
141def pp_kstest_pvalue(ps):
142 """Returns the K-S p-value for the test of the given ``ps`` against a
143 uniform distribution on [0,1].
144
145 """
146 stat, p = ss.kstest(ps, lambda x: x)
147
148 return p
149
151 """Returns a named numpy array of the posterior samples in the file
152 ``f``.
153
154 """
155 with open(f, 'r') as inp:
156 header = inp.readline().split()
157 dtype = np.dtype([(n, np.float) for n in header])
158 data = np.loadtxt(inp, dtype=dtype)
159
160 return data
161
162def output_html(outdir, ks_pvalues, injnum,skypp=False):
163 """Outputs the HTML page summarizing the results.
164
165 """
166 table_row_template = string.Template("""<tr> <td> ${name} </td>
167 <td> ${pvalue} </td>
168 <td> <img src="${name}.png" alt="${name} p-p plot" width="300" height="300" /> </td> <td> <a href="${name}.png">PNG</a> <a href="${name}.pdf">PDF</a> <a href="${name}-ps.dat">p-values</a> </td> </tr>
169
170 """)
171
172 html_template = string.Template("""<!DOCTYPE html>
173 <html>
174 <head>
175 <title> LALInference P-P Plots </title>
176 </head>
177
178 <body>
179
180 <p>This page was generated with the output of ${injnum} simulations.</p>
181 ${linkstr}
182 <br>
183 <table border="1">
184 <tr>
185 <th> Parameter </th> <th> K-S p-value </th> <th> p-p Plot </th> <th> Links </th>
186 </tr>
187
188 ${tablerows}
189 ${skypp}
190 </table>
191
192 </body>
193 </html>
194
195 """)
196
197 # If this script is run with lalinference_pp_pipe then the following directory structure should exist
198 links="<ul>"
199 if os.path.isdir(os.path.join(outdir,'prior')):
200 links+='<li><a href="prior/">Prior Samples used in this test</a>'
201 if os.path.isdir(os.path.join(outdir,'injections')):
202 links+='<li><a href="injections/">Posteriors for each injection</a>'
203 links+='</ul>'
204
205 tablerows = []
206 for par, pv in ks_pvalues.items():
207 tablerows.append(table_row_template.substitute(name=par, pvalue=str(pv)))
208 tablerows = '\n'.join(tablerows)
209
210 if skypp is False:
211 skysub=''
212 else:
213 skysub='<tr> <td> sky </td> <td> See Plot </td> <td> <img src="sky_p-p.png" alt="sky p-p plot" width="300" height="300" /> </td> <td> <a href="sky_p-p.png">PNG</a> <a href="sky_p-p.pdf">PDF</a></td> </tr>'
214
215 html = html_template.substitute(tablerows=tablerows, injnum=str(injnum), linkstr=links,skypp=skysub)
216
217 with open(os.path.join(outdir, 'index.html'), 'w') as out:
218 out.write(html)
219
220if __name__ == '__main__':
221 USAGE='''%prog [options] posfile1.dat posfile2.dat ...
222 Generate PP analysis for a set of injections. posfiles must be in same order as injections.'''
223 parser = optparse.OptionParser(USAGE)
224 parser.add_option('--injXML', action='store', type='string', dest='injxml',
225 help='sim_burst XML file for injections')
226 parser.add_option('--outdir', action='store', type='string',
227 help='output directory')
228
229 parser.add_option('--postsamples', action='store', type='string',
230 default='posterior_samples.dat',
231 help='filename for posterior samples files')
232
233 parser.add_option('--par', action='append', default=[], type='string',
234 help='parameter names for the p-p plot')
235 parser.add_option('--skyPPfolder', action='store',dest='skypp',type='string',default=None,help='Path to folder containing png/pdf with 2D skyarea PP plots')
236
237 (options, args) = parser.parse_args()
238
239 injs = lsctables.SimBurstTable.get_table(utils.load_filename(options.injxml,contenthandler=LIGOLWContentHandlerExtractSimBurstTable))
240
241 if options.par == []:
242 parameters = ['frequency', 'quality', 'hrss', 'ra', 'dec', 'psi', 'time', 'alpha','polar_eccentricity']
243 else:
244 parameters = options.par
245
246 try:
247 os.mkdir(options.outdir)
248 except:
249 pass
250
251 pvalues = { }
252 posfiles=args
253 Ninj=0
254 for index,posfile in enumerate(posfiles):
255 try:
256 psamples = read_posterior_samples(posfile)
257 #index = int(element)
258 true_params = injs[index]
259 Ninj+=1
260 except:
261 # Couldn't read the posterior samples or the XML.
262 print("not found %s \n"%posfile)
263 continue
264
265 for par in parameters:
266 try:
267 samples = psamples[par]
268 true_value = posterior_name_to_sim_burst_extractor[par](true_params)
269 p = fractional_rank(true_value, samples)
270
271 try:
272 pvalues[par].append(p)
273 except:
274 pvalues[par] = [p]
275 except:
276 # Couldn't read samples for parameter or injection
277 continue
278
279 # Generate plots, K-S tests
280 ks_pvalues = {}
281 for par, ps in pvalues.items():
282 pp_plot(ps, title=posterior_name_to_latex_name[par], outfile=os.path.join(options.outdir, par))
283 pp.clf()
284 ks_pvalues[par] = pp_kstest_pvalue(ps)
285 np.savetxt(os.path.join(options.outdir, par + '-ps.dat'), np.reshape(ps, (-1, 1)))
286
287 skypp=False
288 if options.skypp is not None:
289 found=0
290 if os.path.isdir(options.skypp):
291 for i in ['png','pdf']:
292 if os.path.isfile(os.path.join(options.skypp,'p-p.%s'%i)):
293 inf=os.path.join(os.path.realpath(options.skypp),'p-p.%s'%i)
294 outf=os.path.join(options.outdir,'sky_p-p.%s'%i)
295 os.system('cp %s %s'%(inf,outf))
296 found+=1
297 else:
298 print("could not find %s\n"%os.path.join(options.skypp,'p-p.%s'%i))
299 else:
300 print("skyPPfolder %s doesn't seem to be a valid folder or cannot be read. Skipping skyPP plot\n"%os.path.realpath(options.skypp))
301
302 if found>0:
303 skypp=True
304
305 output_html(options.outdir, ks_pvalues, Ninj,skypp= skypp )
def fractional_rank(x, xs)
Returns the fraction of samples, xs, that fall below the value x.
def output_html(outdir, ks_pvalues, injnum, skypp=False)
Outputs the HTML page summarizing the results.
def pp_kstest_pvalue(ps)
Returns the K-S p-value for the test of the given ps against a uniform distribution on [0,...
def pp_plot(ps, title=None, outfile=None)
Generates a p-p plot for the given ps.
def read_posterior_samples(f)
Returns a named numpy array of the posterior samples in the file f.