Source code for pesummary.core.file.formats.hdf5

# Licensed under an MIT style license -- see LICENSE.md

import h5py
import numpy as np
from pesummary.core.file.formats.base_read import Read
from pesummary.utils.dict import load_recursively, paths_to_key

__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]


[docs] def read_hdf5(path, **kwargs): """Grab the parameters and samples in a .hdf5 file Parameters ---------- path: str path to the result file you wish to read in kwargs: dict all kwargs passed to _read_hdf5_with_deepdish or _read_hdf5_with_h5py functions """ try: return _read_hdf5_with_deepdish(path, **kwargs) except Exception: return _read_hdf5_with_h5py(path, **kwargs)
def _read_hdf5_with_deepdish(path, remove_params=None, path_to_samples=None): """Grab the parameters and samples in a .hdf5 file with deepdish Parameters ---------- path: str path to the result file you wish to read in remove_params: list, optional parameters you wish to remove from the posterior table """ import deepdish f = deepdish.io.load(path) if path_to_samples is None: try: path_to_samples, = paths_to_key("posterior", f) path_to_samples = path_to_samples[0] except ValueError: try: path_to_samples, = paths_to_key("posterior_samples", f) path_to_samples = path_to_samples[0] except ValueError: raise ValueError( "Unable to find a 'posterior' or 'posterior_samples' group " "in the file '{}'".format(path) ) reduced_f, = load_recursively(path_to_samples, f) parameters = [i for i in reduced_f.keys()] if remove_params is not None: for param in remove_params: if param in parameters: parameters.remove(param) data = np.zeros([len(reduced_f[parameters[0]]), len(parameters)]) for num, par in enumerate(parameters): for key, i in enumerate(reduced_f[par]): data[key][num] = float(np.real(i)) data = data.tolist() for num, par in enumerate(parameters): if par == "logL": parameters[num] = "log_likelihood" return parameters, data def _read_hdf5_with_h5py( path, remove_params=None, path_to_samples=None, return_posterior_dataset=False ): """Grab the parameters and samples in a .hdf5 file with h5py Parameters ---------- path: str path to the result file you wish to read in remove_params: list, optional parameters you wish to remove from the posterior table """ import h5py import copy if path_to_samples is None: path_to_samples = Read.guess_path_to_samples(path) f = h5py.File(path, 'r') c1 = isinstance(f[path_to_samples], h5py._hl.group.Group) if c1 and "parameter_names" not in f[path_to_samples].keys(): original_parameters = [i for i in f[path_to_samples].keys()] if remove_params is not None: parameters = [ i for i in original_parameters if i not in remove_params ] else: parameters = copy.deepcopy(original_parameters) n_samples = len(f[path_to_samples][parameters[0]]) try: samples = np.array([ f[path_to_samples][original_parameters.index(i)] for i in parameters ]).T except (AttributeError, KeyError, TypeError): samples = np.array([f[path_to_samples][i] for i in parameters]).T cond1 = "loglr" not in parameters or "log_likelihood" not in \ parameters cond2 = "likelihood_stats" in f.keys() and "loglr" in \ f["likelihood_stats"] if cond1 and cond2: parameters.append("log_likelihood") for num, i in enumerate(samples): samples[num].append(float(f["likelihood_stats/loglr"][num])) elif c1: original_parameters = [ i.decode("utf-8") if isinstance(i, bytes) else i for i in f[path_to_samples]["parameter_names"] ] if remove_params is not None: parameters = [ i for i in original_parameters if i not in remove_params ] else: parameters = copy.deepcopy(original_parameters) samples = np.array(f[path_to_samples]["samples"]) elif isinstance(f[path_to_samples], h5py._hl.dataset.Dataset): parameters = list(f[path_to_samples].dtype.names) samples = np.array(f[path_to_samples]).view((float, len(parameters))).tolist() if return_posterior_dataset: return parameters, samples, f[path_to_samples] f.close() return parameters, samples def _write_hdf5( parameters, samples, outdir="./", label=None, filename=None, overwrite=False, dataset_name="posterior_samples", **kwargs ): """Write a set of samples to a hdf5 file Parameters ---------- parameters: list list of parameters samples: 2d list list of samples. Columns correspond to a given parameter outdir: str, optional directory to write the dat file label: str, optional The label of the analysis. This is used in the filename if a filename if not specified filename: str, optional The name of the file that you wish to write overwrite: Bool, optional If True, an existing file of the same name will be overwritten dataset_name: str, optional name of the dataset to store a set of samples. Default posterior_samples """ from pesummary.utils.samples_dict import SamplesDict from pesummary.utils.utils import check_filename default_filename = "pesummary_{}.h5" filename = check_filename( default_filename=default_filename, outdir=outdir, label=label, filename=filename, overwrite=overwrite ) samples = SamplesDict(parameters, np.array(samples).T) _samples = samples.to_structured_array() with h5py.File(filename, "w") as f: f.create_dataset(dataset_name, data=_samples) def write_hdf5( parameters, samples, outdir="./", label=None, filename=None, overwrite=False, **kwargs ): """Write a set of samples to a hdf5 file Parameters ---------- parameters: list list of parameters samples: 2d list list of samples. Columns correspond to a given parameter outdir: str, optional directory to write the dat file label: str, optional The label of the analysis. This is used in the filename if a filename if not specified filename: str, optional The name of the file that you wish to write overwrite: Bool, optional If True, an existing file of the same name will be overwritten """ from pesummary.io.write import _multi_analysis_write _multi_analysis_write( _write_hdf5, parameters, samples, outdir=outdir, label=label, filename=filename, overwrite=overwrite, file_format="hdf5", **kwargs )