2from numpy
import loadtxt, logaddexp, log, mean
3from optparse
import OptionParser
8from functools
import reduce
10from lalinference
import LALInferenceHDF5PosteriorSamplesDatasetName
as posterior_dset_name
11from lalinference
import LALInferenceHDF5NestedSamplesDatasetName
as nested_dset_name
14usage =
'''%prog [-N Nlive] [-p posterior.hdf5] [-H header.txt] [--npos Npos] [--non-strict-versions] datafile1.hdf5 [datafile2.hdf5 ...]
16%prog takes at least one nested sampling output file and outputs posterior
17\tsamples. If more than one input file is specified, each file is converted,
18\tthen posterior samples drawn according to the evidence of each.
19\tIf the --npos option is used the algorithm
20\twill draw approximately that number of samples from the posterior. This may
21\tgive repeated samples in the output file. By default, the non-repeating
22\talgorithm is used, but that may not produce enough samples.
23\tThe input and output files may be in either HDF5 or ASCII format, with
24\tASCII tables being deprecated. The type will be chosen based on the file extensions.
32 log_noise_evidences = []
33 log_max_likelihoods = []
37 for path
in nested_path_list:
38 if not os.path.isfile(path):
39 print(
'Unable to open %s, skipping file'%(path))
43 input_arrays.append(tab)
45 print(
'Unable to read table from %s, skipping'%(path))
49 run_identifier =
extract_metadata(path, metadata, log_noise_evidences, log_max_likelihoods, nlive, nested_dset_name,
True, strict_versions)
51 if len(input_arrays)==0:
52 print(
'No nested samples could be read from %s'.format(
str(nested_path_list)))
56 for level
in metadata:
57 for key
in metadata[level]:
58 if isinstance(metadata[level][key], list)
and all(isinstance(x, (int,float))
for x
in metadata[level][key]):
59 metadata[level][key] =
mean(metadata[level][key])
60 elif isinstance(metadata[level][key], list)
and all(isinstance(x, (str))
for x
in metadata[level][key]):
61 print(
"Warning: only printing the first of the %d entries found for metadata %s/%s. You can find the whole list in the headers of individual hdf5 output files\n"%(len(metadata[level][key]),level,key))
62 metadata[level][key] = metadata[level][key][0]
63 log_noise_evidence = reduce(logaddexp, log_noise_evidences) - log(len(log_noise_evidences))
64 log_max_likelihood =
max(log_max_likelihoods)
66 return input_arrays, log_noise_evidence, log_max_likelihood, metadata, nlive, run_identifier
70 print(
'Warning: ASCII files are deprecated in favor of HDF5.')
71 default_header_path = nested_path_list[0]+
'_params.txt'
72 if opts.headers
is not None:
73 header_path = opts.headers
74 if not os.access(header_path, os.R_OK):
75 raise OSError(
'Unable to open header file: %r' % header_path)
76 elif os.access(default_header_path, os.R_OK):
77 header_path = default_header_path
80 if header_path
is None:
82 print(
'No header file found, assuming inspnest default')
83 header =
'mchirp eta time phi0 dist ra dec psi iota logL'
86 print(
'Reading headers from %r' % header_path)
87 with open(header_path,
'r')
as f:
89 headers = header.split()
90 input_arrays = map(loadtxt, nested_path_list)
92 log_noise_evidences = []
93 log_max_likelihoods = []
94 for path
in nested_path_list:
95 path = path.replace(
'.gz',
'')+
'_B.txt'
96 if os.access(path, os.R_OK):
97 content = loadtxt(path)
98 log_noise_evidences.append(content[2])
99 log_max_likelihoods.append(content[3])
100 if log_noise_evidences:
101 log_noise_evidence = reduce(logaddexp, log_noise_evidences)
102 log_max_likelihood =
max(log_max_likelihoods)
104 log_noise_evidence = 0
105 log_max_likelihood = 0
107 return headers, input_arrays, log_noise_evidence, log_max_likelihood
113 write_samples(posterior, posterior_path, path=
'/'.join([
'',
'lalinference',run_identifier,posterior_dset_name]), metadata=metadata, overwrite=
True)
116 log_bayes_factor, log_evidence,
117 log_noise_evidence, log_max_likelihood):
118 print(
'Warning: HDF5 output is inactive since %r was passed as '
119 'output file. ASCII output is deprecated.' % opts.pos)
122 if 1 <= opts.prec <= 20:
126 print(
'Using default precision instead of %r.' % opts.prec)
128 if posterior_path
is None:
129 posterior_file = sys.stdout
132 posterior_file = gzip.open(posterior_path,
'wb')
134 posterior_file = open(posterior_path,
'w')
135 for field
in headers:
136 posterior_file.write(
'%s\t' % field)
137 posterior_file.write(
'\n')
139 for row
in posterior:
141 posterior_file.write(
'{1:.{0}e}\t'.format(prec, value))
142 posterior_file.write(
'\n')
143 if opts.pos
is not None:
144 posterior_file.close()
147 if opts.pos
is not None:
148 evidence_file_path = opts.pos +
'_B.txt'
149 with open(evidence_file_path,
'w')
as out:
150 out.write(
'%f %f %f %f\n' % (log_bayes_factor, log_evidence,
151 log_noise_evidence, log_max_likelihood))
153 if opts.pos
is not None:
155 for dfile
in datafiles:
156 fin =
'%s_header.txt' % dfile
157 if os.path.isfile(fin):
158 with open(fin,
'r')
as f:
159 for l
in f.readlines():
163 with open(opts.pos +
'_header.txt',
'w')
as fout:
168 extension = os.path.splitext(path)[1]
169 if '.h5' in extension
or '.hdf' in extension:
171 elif extension.strip().lower() ==
'.dat':
174 raise ValueError(
"Extension %r not recognized as HDF5 or '.dat': %r"
178if __name__ ==
'__main__':
179 parser = OptionParser(usage)
181 '-N',
'--Nlive', action=
'store', type=
'int', dest=
'Nlive', default=
None,
182 help=
'Number of live points in each chain loaded', metavar=
'NUM')
184 '-p',
'--pos', action=
'store', type=
'string', default=
None,
185 help=
'Output file for posterior samples', metavar=
'posterior.dat')
187 '--npos', action=
'store', type=
'int', default=
None,
188 help=
'Draw a specific number of posteriors samples. May give '
189 'repetitions. Disabled by default.', metavar=
'NUM')
191 '-H',
'--headers', action=
'store', type=
'string', default=
None,
192 help=
'Header file explaining columns in data file',
193 metavar=
'file.dat_params.txt')
195 '-d',
'--prec', action=
'store', type=
'int', dest=
'prec', default=14,
196 help=
'Number of decimal place required for output posterior samples. '
197 'Default is 14.', metavar=
'NUM')
199 '-z',
'--gzip', action=
"store_true", dest=
'gz', default=
False,
200 help=
'Gzip compress the output posterior samples (this will append '
201 '.gz onto the posterior file). Default is no compression.')
203 '-v',
'--verbose', action=
'store_true', default=
False,
204 help=
'Print some additional information')
206 '--non-strict-versions', action=
'store_true', dest=
'nonstrict',
207 default=
False, help=
'Set this flag to allow combining of nested '
208 'sample HDF5 files that do no necessarily have '
209 'the same verion information')
210 opts, args = parser.parse_args()
214 if len(datafiles) < 1:
215 print(
'No input file specified, exiting')
218 if all(map(is_hdf5, datafiles)):
220 elif not any(map(is_hdf5, datafiles)):
223 raise ValueError(
'Input files appear to be mixed between HDF5 and ASCII.')
227 print(
'No output file given, writing to stdout.')
231 if opts.Nlive
is None and not hdf_input:
232 print(
'Must specify number of live points using the --Nlive option if '
233 'ASCII tables are used.')
235 elif hdf_input
and opts.Nlive
is not None:
236 print(
'Nlive is ignored in favor of the value in the HDF metadata.')
240 strict_versions=(
not opts.nonstrict))
241 (input_arrays, log_noise_evidence, log_max_likelihood,
242 metadata, nlive, run_identifier) = return_values
243 if len(input_arrays)==0:
244 print(
'Error: No input file were read')
246 headers = input_arrays[0].dtype.names
247 nlive = list(map(int, nlive))
249 if opts.npos
is not None:
253 sampler = draw_posterior_many
259 verbose=opts.verbose)
264 for data, n
in list(zip(input_arrays, nlive))])
266 print(
'Log evidences from input files: %s' %
str(log_evs))
269 log_evidence = reduce(logaddexp, log_evs) - log(len(log_evs))
270 log_bayes_factor = log_evidence - log_noise_evidence
273 run_level =
'/lalinference/'+run_identifier
274 if run_level
not in metadata:
275 metadata[run_level] = {}
276 metadata[run_level][
'log_bayes_factor'] = log_bayes_factor
277 metadata[run_level][
'log_evidence'] = log_evidence
278 metadata[run_level][
'log_noise_evidence'] = log_noise_evidence
279 metadata[run_level][
'log_max_likelihood'] = log_max_likelihood
285 log_bayes_factor, log_evidence,
286 log_noise_evidence, log_max_likelihood)
static REAL8 mean(REAL8 *array, int N)
def extract_metadata(filename, metadata, log_noise_evidences=[], log_max_likelihoods=[], nlive=[], dset_name=None, nest=False, strict_versions=True)
Extract metadata from HDF5 sample chain file.
def write_samples(table, filename, metadata=None, **kwargs)
Write an HDF5 sample chain file.
def read_samples(filename, path=None, tablename=POSTERIOR_SAMPLES)
Read an HDF5 sample chain file.
def compute_weights(data, Nlive)
Returns log_ev, log_wts for the log-likelihood samples in data, assumed to be a result of nested samp...
def draw_N_posterior_many(datas, Nlives, Npost, verbose=False)
Draw Npost samples from the posteriors represented by the (Nruns, Nsamples, Nparams)-shaped array dat...
def read_nested_from_ascii(nested_path_list)
def write_posterior_to_hdf(posterior_path, headers, posterior, metadata, run_identifier)
def read_nested_from_hdf5(nested_path_list, strict_versions=True)
def write_posterior_to_ascii(posterior_path, headers, posterior, log_bayes_factor, log_evidence, log_noise_evidence, log_max_likelihood)