27from functools
import reduce
33import astropy.table
as apt
35 from scipy.integrate
import trapezoid
38 from scipy.integrate
import trapz
as trapezoid
39from optparse
import OptionParser
41from lalinference
import git_version
42from lalinference
import bayespputils
as bppu
43from lalinference.io import read_samples, write_samples, extract_metadata
45from lalinference
import LALINFERENCE_PARAM_FIXED
as FIXED
46from lalinference
import LALINFERENCE_PARAM_OUTPUT
as OUTPUT
48__author__=
"Carl-Johan Haster <carl-johan.haster@ligo.org>>"
49__version__=
"git id %s"%git_version.id
50__date__= git_version.date
62 for arg
in parser.rargs:
64 if arg[:2] ==
"--" and len(arg) > 2:
67 if arg[:1] ==
"-" and len(arg) > 1
and not floatable(arg):
71 del parser.rargs[:len(args)]
73 if getattr(parser.values, opt.dest):
74 oldargs = getattr(parser.values, opt.dest)
77 setattr(parser.values, opt.dest, args)
79mcmc_group_id =
'/lalinference/lalinference_mcmc'
87 mcmc_diagnostics_params = [
'nLocalTemps',
'randomSeed']
91 for colname, column
in base_file.columns.items():
92 meta_dict[colname] = column.meta
94 for colname, column
in new_posterior.columns.items():
95 if colname
in mcmc_diagnostics_params:
96 column.meta = {
'vary': OUTPUT}
99 elif colname
in meta_dict:
100 column.meta = meta_dict[colname]
101 elif 'cos'+colname
in meta_dict:
102 column.meta = meta_dict[
'cos'+colname]
103 elif 'sin'+colname
in meta_dict:
104 column.meta = meta_dict[
'sin'+colname]
105 elif 'log'+colname
in meta_dict:
106 column.meta = meta_dict[
'log'+colname]
107 elif colname.startswith(
'chain_'):
108 column.meta = {
'vary': OUTPUT}
111 column.meta = {
'vary': FIXED}
120 if not data_hdf5.lower().endswith((
'.hdf5',
'.hdf',
'.h5')):
121 print(
'cbcBayesMCMC2pos only suports hdf5 input, for older file formats plese revert to cbcBayesThermoInt and cbcBayesPosProc')
124 peparser = bppu.PEOutputParser(
'hdf5')
126 ps, samps = peparser.parse(data_hdf5, deltaLogP=deltaLogP, fixedBurnins=fixedBurnin,
127 nDownsample=nDownsample, tablename=
None)
128 posterior_samples = apt.Table(samps, names=ps)
131 for i
in range(1,
int(posterior_samples[
'nTemps'][0])):
132 ps, samps = peparser.parse(data_hdf5, deltaLogP=deltaLogP, fixedBurnins=fixedBurnin,
133 nDownsample=nDownsample, tablename=
'chain_'+
str(
'%02.f' %i))
134 highTchains.append(apt.Table(samps, names=ps))
135 if verbose: print(
'chain_'+
str(
'%02.f' %i)+
' at a temperature '+
str(highTchains[i-1][
'temperature'].
mean()))
137 betas = np.zeros(len(highTchains)+1)
138 logls = np.zeros_like(betas)
140 betas[0] = 1./np.median(posterior_samples[
'temperature'])
141 logls[0] = np.mean(posterior_samples[
'logl'])
143 for i
in range(len(highTchains)):
144 betas[i+1] = 1./np.median(highTchains[i][
'temperature'])
145 logls[i+1] = np.mean(highTchains[i][
'logl'])
147 inds = np.argsort(betas)[::-1]
157 ebetas = np.concatenate((betas, [0.0]))
158 elogls = np.concatenate((logls, [logls[-1]]))
160 ebetas2 = np.concatenate((betas[::2], [0.0]))
161 elogls2 = np.concatenate((logls[::2], [logls[::2][-1]]))
163 evidence = -trapezoid(elogls, ebetas)
164 evidence2 = -trapezoid(elogls2, ebetas2)
166 posterior_samples[
'chain_log_evidence'] = evidence
167 posterior_samples[
'chain_delta_log_evidence'] = np.absolute(evidence - evidence2)
168 posterior_samples[
'chain_log_noise_evidence'] = posterior_samples[
'nullLogL']
169 posterior_samples[
'chain_log_bayes_factor'] = posterior_samples[
'chain_log_evidence'] - posterior_samples[
'chain_log_noise_evidence']
172 print(
'logZ = '+
str(posterior_samples[
'chain_log_evidence'][0])+
'+-'+
str(posterior_samples[
'chain_delta_log_evidence'][0]))
173 print(
'logB_SN = '+
str(posterior_samples[
'chain_log_bayes_factor'][0]))
177 return posterior_samples
188 log_evs = np.zeros(len(pos_chains))
189 log_noise_evs = np.zeros_like(log_evs)
191 for i
in range(len(pos_chains)):
192 log_evs[i] = pos_chains[i][
'chain_log_evidence'][0]
193 log_noise_evs[i] = pos_chains[i][
'chain_log_noise_evidence'][0]
194 if verbose: print(
'Computed log_evidences: %s'%(
str(log_evs)))
196 max_log_ev = log_evs.max()
198 if evidence_weighting:
199 fracs=[np.exp(log_ev-max_log_ev)
for log_ev
in log_evs]
201 fracs = [1.0
for _
in log_evs]
202 if verbose: print(
'Relative weights of input files: %s'%(
str(fracs)))
204 Ns=[fracs[i]/len(pos_chains[i])
for i
in range(len(fracs))]
206 fracs=[n/Ntot
for n
in Ns]
208 fracs = [1.0
for _
in fracs]
209 if verbose: print(
'Relative weights of input files taking into account their length: %s'%(
str(fracs)))
211 final_posterior = pos_chains[0][np.random.uniform(size=len(pos_chains[0]))<fracs[0]]
213 for i
in range(1,len(pos_chains)):
214 final_posterior = apt.vstack([final_posterior,
215 pos_chains[i][np.random.uniform(size=len(pos_chains[i]))<fracs[i]]])
217 final_log_evidence = reduce(np.logaddexp, log_evs) - np.log(len(log_evs))
218 final_log_noise_evidence = reduce(np.logaddexp, log_noise_evs) - np.log(len(log_noise_evs))
220 run_level=
'lalinference/lalinference_mcmc'
222 metadata[run_level] = {}
223 metadata[run_level][
'log_bayes_factor'] = final_log_evidence - final_log_noise_evidence
224 metadata[run_level][
'log_evidence'] = final_log_evidence
225 metadata[run_level][
'log_noise_evidence'] = final_log_noise_evidence
226 metadata[run_level][
'log_max_likelihood'] = final_posterior[
'logl'].
max()
230 final_posterior.remove_column(
'cycle')
232 return final_posterior, metadata
234USAGE=
'''%prog [options] PTMCMC_datafile.hdf5 [PTMCMC_datafile2.hdf5 ...]
235Compute the evidence for a set of parallel tempered MCMC chains
236thourgh thermodynamical integration. If using several input PTMCMC files,
237combine them into one set of posterior samples, weighted by their relative
241if __name__ ==
'__main__':
242 parser = OptionParser(USAGE)
244 '-p',
'--pos', action=
'store', type=
'string', default=
None,
245 help=
'Output file for posterior samples', metavar=
'posterior.hdf5')
247 '-v',
'--verbose', action=
'store_true', default=
False,
248 help=
'Print some additional information')
249 parser.add_option(
'-d',
'--data',dest=
'data',action=
'callback',
250 callback=multipleFileCB,help=
'datafile')
252 '-s',
'--downsample',action=
'store',default=
None,
253 help=
'Approximate number of samples to record in the posterior',type=
'int')
255 '-l',
'--deltaLogP',action=
'store',default=
None,
256 help=
'Difference in logPosterior to use for convergence test.',type=
'float')
258 '-b',
'--fixedBurnin',dest=
'fixedBurnin',action=
"callback",
259 callback=multipleFileCB,help=
'Fixed number of iteration for burnin.')
261 '--equal-weighting',action=
'store_true',default=
False,
262 help =
'Disable evidence weighting and just combine chains so they have equal\
263 contribution to the final result')
265 '--combine-only', action=
'store_true',default=
False,
266 help =
"Don't weight the chains at all, just concatenate them (different length\
267 inputs will have different representation in the output)")
268 opts, args = parser.parse_args()
272 datafiles = datafiles + args
274 datafiles = datafiles + opts.data
278 if len(opts.fixedBurnin) == 1:
279 fixedBurnins = [
int(opts.fixedBurnin[0])
for df
in datafiles]
281 fixedBurnins = [
int(fixedBurnin)
for fixedBurnin
in opts.fixedBurnin]
283 fixedBurnins = [
None]*len(datafiles)
285 chain_posteriors = []
287 for i
in range(len(datafiles)):
289 deltaLogP=opts.deltaLogP, fixedBurnin=fixedBurnins[i], nDownsample=opts.downsample, verbose=opts.verbose))
292 evidence_weighting =
not opts.equal_weighting,
293 combine_only = opts.combine_only)
295 for path
in datafiles:
299 path_to_samples =
'/'.join([
'',
'lalinference',run_identifier,
'posterior_samples'])
300 if path_to_samples
in metadata:
301 for colname
in final_posterior.columns:
302 metadata[path_to_samples].pop(colname,
None)
305 for level
in metadata:
306 for key
in metadata[level]:
309 if isinstance(metadata[level][key], list)
and all(isinstance(x, (str))
for x
in metadata[level][key]):
310 print(
"Warning: only printing the first of the %d entries found for metadata %s/%s. You can find the whole list in the headers of individual hdf5 output files\n"%(len(metadata[level][key]),level,key))
311 metadata[level][key] = metadata[level][key][0]
314 path=path_to_samples, metadata=metadata)
static REAL8 mean(REAL8 *array, int N)
def weight_and_combine(pos_chains, verbose=False, evidence_weighting=True, combine_only=False)
def downsample_and_evidence(data_hdf5, deltaLogP=None, fixedBurnin=None, nDownsample=None, verbose=False)
def reassign_metadata(new_posterior, original_hdf5)
def extract_metadata(filename, metadata, log_noise_evidences=[], log_max_likelihoods=[], nlive=[], dset_name=None, nest=False, strict_versions=True)
Extract metadata from HDF5 sample chain file.
def write_samples(table, filename, metadata=None, **kwargs)
Write an HDF5 sample chain file.
def read_samples(filename, path=None, tablename=POSTERIOR_SAMPLES)
Read an HDF5 sample chain file.