Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALInference 4.1.9.1-5e288d3
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
lalinference_nest2pos.py
Go to the documentation of this file.
1##python
2from numpy import loadtxt, logaddexp, log, mean
3from optparse import OptionParser
4import gzip
5import os
6import os.path
7import sys
8from functools import reduce
9
10from lalinference import LALInferenceHDF5PosteriorSamplesDatasetName as posterior_dset_name
11from lalinference import LALInferenceHDF5NestedSamplesDatasetName as nested_dset_name
12from lalinference.nest2pos import draw_posterior_many, draw_N_posterior_many, compute_weights
13
14usage = '''%prog [-N Nlive] [-p posterior.hdf5] [-H header.txt] [--npos Npos] [--non-strict-versions] datafile1.hdf5 [datafile2.hdf5 ...]
15
16%prog takes at least one nested sampling output file and outputs posterior
17\tsamples. If more than one input file is specified, each file is converted,
18\tthen posterior samples drawn according to the evidence of each.
19\tIf the --npos option is used the algorithm
20\twill draw approximately that number of samples from the posterior. This may
21\tgive repeated samples in the output file. By default, the non-repeating
22\talgorithm is used, but that may not produce enough samples.
23\tThe input and output files may be in either HDF5 or ASCII format, with
24\tASCII tables being deprecated. The type will be chosen based on the file extensions.
25'''
26
27
28def read_nested_from_hdf5(nested_path_list, strict_versions=True):
29 headers = None
30 input_arrays = []
31 metadata = {}
32 log_noise_evidences = []
33 log_max_likelihoods = []
34 nlive = []
35 from lalinference.io import read_samples, extract_metadata
36
37 for path in nested_path_list:
38 if not os.path.isfile(path):
39 print('Unable to open %s, skipping file'%(path))
40 continue
41 try:
42 tab = read_samples(path,tablename=nested_dset_name)
43 input_arrays.append(tab)
44 except KeyError:
45 print('Unable to read table from %s, skipping'%(path))
46 continue
47
48 # N.B.: This appends to metadata, log_noise_evidences, log_max_likelihoods, nlive, in addition to outputting run_identifier
49 run_identifier = extract_metadata(path, metadata, log_noise_evidences, log_max_likelihoods, nlive, nested_dset_name, True, strict_versions)
50
51 if len(input_arrays)==0:
52 print('No nested samples could be read from %s'.format(str(nested_path_list)))
53 raise IOError
54
55 # for metadata which is in a list, take the average.
56 for level in metadata:
57 for key in metadata[level]:
58 if isinstance(metadata[level][key], list) and all(isinstance(x, (int,float)) for x in metadata[level][key]):
59 metadata[level][key] = mean(metadata[level][key])
60 elif isinstance(metadata[level][key], list) and all(isinstance(x, (str)) for x in metadata[level][key]):
61 print("Warning: only printing the first of the %d entries found for metadata %s/%s. You can find the whole list in the headers of individual hdf5 output files\n"%(len(metadata[level][key]),level,key))
62 metadata[level][key] = metadata[level][key][0]
63 log_noise_evidence = reduce(logaddexp, log_noise_evidences) - log(len(log_noise_evidences))
64 log_max_likelihood = max(log_max_likelihoods)
65
66 return input_arrays, log_noise_evidence, log_max_likelihood, metadata, nlive, run_identifier
67
68
69def read_nested_from_ascii(nested_path_list):
70 print('Warning: ASCII files are deprecated in favor of HDF5.')
71 default_header_path = nested_path_list[0]+'_params.txt'
72 if opts.headers is not None:
73 header_path = opts.headers
74 if not os.access(header_path, os.R_OK):
75 raise OSError('Unable to open header file: %r' % header_path)
76 elif os.access(default_header_path, os.R_OK):
77 header_path = default_header_path
78 else:
79 header_path = None
80 if header_path is None:
81 if opts.verbose:
82 print('No header file found, assuming inspnest default')
83 header = 'mchirp eta time phi0 dist ra dec psi iota logL'
84 else:
85 if opts.verbose:
86 print('Reading headers from %r' % header_path)
87 with open(header_path, 'r') as f:
88 header = f.readline()
89 headers = header.split()
90 input_arrays = map(loadtxt, nested_path_list)
91
92 log_noise_evidences = []
93 log_max_likelihoods = []
94 for path in nested_path_list:
95 path = path.replace('.gz', '')+'_B.txt'
96 if os.access(path, os.R_OK):
97 content = loadtxt(path)
98 log_noise_evidences.append(content[2])
99 log_max_likelihoods.append(content[3])
100 if log_noise_evidences:
101 log_noise_evidence = reduce(logaddexp, log_noise_evidences)
102 log_max_likelihood = max(log_max_likelihoods)
103 else:
104 log_noise_evidence = 0
105 log_max_likelihood = 0
106
107 return headers, input_arrays, log_noise_evidence, log_max_likelihood
108
109
110def write_posterior_to_hdf(posterior_path, headers, posterior, metadata,
111 run_identifier):
112 from lalinference.io import write_samples
113 write_samples(posterior, posterior_path, path='/'.join(['','lalinference',run_identifier,posterior_dset_name]), metadata=metadata, overwrite=True)
114
115def write_posterior_to_ascii(posterior_path, headers, posterior,
116 log_bayes_factor, log_evidence,
117 log_noise_evidence, log_max_likelihood):
118 print('Warning: HDF5 output is inactive since %r was passed as '
119 'output file. ASCII output is deprecated.' % opts.pos)
120
121 # set the output number format
122 if 1 <= opts.prec <= 20:
123 prec = opts.prec
124 else:
125 prec = 14
126 print('Using default precision instead of %r.' % opts.prec)
127
128 if posterior_path is None:
129 posterior_file = sys.stdout
130 else:
131 if opts.gz:
132 posterior_file = gzip.open(posterior_path, 'wb')
133 else:
134 posterior_file = open(posterior_path, 'w')
135 for field in headers:
136 posterior_file.write('%s\t' % field)
137 posterior_file.write('\n')
138
139 for row in posterior:
140 for value in row:
141 posterior_file.write('{1:.{0}e}\t'.format(prec, value))
142 posterior_file.write('\n')
143 if opts.pos is not None:
144 posterior_file.close()
145
146 # Write out an evidence file
147 if opts.pos is not None:
148 evidence_file_path = opts.pos + '_B.txt'
149 with open(evidence_file_path, 'w') as out:
150 out.write('%f %f %f %f\n' % (log_bayes_factor, log_evidence,
151 log_noise_evidence, log_max_likelihood))
152 # Write out an header (git info and command line) file
153 if opts.pos is not None:
154 strout = ''
155 for dfile in datafiles:
156 fin = '%s_header.txt' % dfile
157 if os.path.isfile(fin):
158 with open(fin, 'r') as f:
159 for l in f.readlines():
160 strout += l
161 strout += '\n\n'
162 if strout != '':
163 with open(opts.pos + '_header.txt', 'w') as fout:
164 fout.write(strout)
165
166
167def is_hdf5(path):
168 extension = os.path.splitext(path)[1]
169 if '.h5' in extension or '.hdf' in extension:
170 return True
171 elif extension.strip().lower() == '.dat':
172 return False
173 else:
174 raise ValueError("Extension %r not recognized as HDF5 or '.dat': %r"
175 % (extension, path))
176
177
178if __name__ == '__main__':
179 parser = OptionParser(usage)
180 parser.add_option(
181 '-N', '--Nlive', action='store', type='int', dest='Nlive', default=None,
182 help='Number of live points in each chain loaded', metavar='NUM')
183 parser.add_option(
184 '-p', '--pos', action='store', type='string', default=None,
185 help='Output file for posterior samples', metavar='posterior.dat')
186 parser.add_option(
187 '--npos', action='store', type='int', default=None,
188 help='Draw a specific number of posteriors samples. May give '
189 'repetitions. Disabled by default.', metavar='NUM')
190 parser.add_option(
191 '-H', '--headers', action='store', type='string', default=None,
192 help='Header file explaining columns in data file',
193 metavar='file.dat_params.txt')
194 parser.add_option(
195 '-d', '--prec', action='store', type='int', dest='prec', default=14,
196 help='Number of decimal place required for output posterior samples. '
197 'Default is 14.', metavar='NUM')
198 parser.add_option(
199 '-z', '--gzip', action="store_true", dest='gz', default=False,
200 help='Gzip compress the output posterior samples (this will append '
201 '.gz onto the posterior file). Default is no compression.')
202 parser.add_option(
203 '-v', '--verbose', action='store_true', default=False,
204 help='Print some additional information')
205 parser.add_option(
206 '--non-strict-versions', action='store_true', dest='nonstrict',
207 default=False, help='Set this flag to allow combining of nested '
208 'sample HDF5 files that do no necessarily have '
209 'the same verion information')
210 opts, args = parser.parse_args()
211
212 # Argument checking
213 datafiles = args
214 if len(datafiles) < 1:
215 print('No input file specified, exiting')
216 sys.exit(1)
217
218 if all(map(is_hdf5, datafiles)):
219 hdf_input = True
220 elif not any(map(is_hdf5, datafiles)):
221 hdf_input = False
222 else:
223 raise ValueError('Input files appear to be mixed between HDF5 and ASCII.')
224
225 if opts.pos is None:
226 hdf_output = False
227 print('No output file given, writing to stdout.')
228 else:
229 hdf_output = is_hdf5(opts.pos)
230
231 if opts.Nlive is None and not hdf_input:
232 print('Must specify number of live points using the --Nlive option if '
233 'ASCII tables are used.')
234 sys.exit(1)
235 elif hdf_input and opts.Nlive is not None:
236 print('Nlive is ignored in favor of the value in the HDF metadata.')
237
238 # Read the input file
239 return_values = read_nested_from_hdf5(datafiles,
240 strict_versions=(not opts.nonstrict))
241 (input_arrays, log_noise_evidence, log_max_likelihood,
242 metadata, nlive, run_identifier) = return_values
243 if len(input_arrays)==0:
244 print('Error: No input file were read')
245 sys.exit(1)
246 headers = input_arrays[0].dtype.names
247 nlive = list(map(int, nlive))
248
249 if opts.npos is not None:
250 def sampler(datas, Nlives, **kwargs):
251 return draw_N_posterior_many(datas, Nlives, opts.npos, **kwargs)
252 else:
253 sampler = draw_posterior_many
254
255 # Create the posterior from nested samples
256 # inarrays has shape (nfiles, nsamples. nfields)
257 posterior = sampler(input_arrays,
258 nlive,
259 verbose=opts.verbose)
260 # posterior is a list of lists/array with overall shape (nsamples, nfields)
261 # posterior = array(posterior)
262
263 log_evs, log_wts = zip(*[compute_weights(data['logL'], n)
264 for data, n in list(zip(input_arrays, nlive))])
265 if opts.verbose:
266 print('Log evidences from input files: %s' % str(log_evs))
267
268 # Compute the evidence
269 log_evidence = reduce(logaddexp, log_evs) - log(len(log_evs))
270 log_bayes_factor = log_evidence - log_noise_evidence
271
272 # Update the metadata with the new evidences
273 run_level = '/lalinference/'+run_identifier
274 if run_level not in metadata:
275 metadata[run_level] = {}
276 metadata[run_level]['log_bayes_factor'] = log_bayes_factor
277 metadata[run_level]['log_evidence'] = log_evidence
278 metadata[run_level]['log_noise_evidence'] = log_noise_evidence
279 metadata[run_level]['log_max_likelihood'] = log_max_likelihood
280 if hdf_output:
281 write_posterior_to_hdf(opts.pos, headers, posterior, metadata,
282 run_identifier)
283 else:
284 write_posterior_to_ascii(opts.pos, headers, posterior,
285 log_bayes_factor, log_evidence,
286 log_noise_evidence, log_max_likelihood)
static REAL8 mean(REAL8 *array, int N)
#define max(a, b)
def extract_metadata(filename, metadata, log_noise_evidences=[], log_max_likelihoods=[], nlive=[], dset_name=None, nest=False, strict_versions=True)
Extract metadata from HDF5 sample chain file.
Definition: hdf5.py:332
def write_samples(table, filename, metadata=None, **kwargs)
Write an HDF5 sample chain file.
Definition: hdf5.py:243
def read_samples(filename, path=None, tablename=POSTERIOR_SAMPLES)
Read an HDF5 sample chain file.
Definition: hdf5.py:168
def compute_weights(data, Nlive)
Returns log_ev, log_wts for the log-likelihood samples in data, assumed to be a result of nested samp...
Definition: nest2pos.py:25
def draw_N_posterior_many(datas, Nlives, Npost, verbose=False)
Draw Npost samples from the posteriors represented by the (Nruns, Nsamples, Nparams)-shaped array dat...
Definition: nest2pos.py:115
def read_nested_from_ascii(nested_path_list)
def write_posterior_to_hdf(posterior_path, headers, posterior, metadata, run_identifier)
def read_nested_from_hdf5(nested_path_list, strict_versions=True)
def write_posterior_to_ascii(posterior_path, headers, posterior, log_bayes_factor, log_evidence, log_noise_evidence, log_max_likelihood)