Source code for pesummary.core.file.formats.hdf5

# Licensed under an MIT style license -- see LICENSE.md

import h5py
import numpy as np
from pesummary.core.file.formats.base_read import Read
from pesummary.utils.dict import load_recursively, paths_to_key

__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]



[docs]
def read_hdf5(path, **kwargs):
    """Grab the parameters and samples in a .hdf5 file

    Parameters
    ----------
    path: str
        path to the result file you wish to read in
    kwargs: dict
        all kwargs passed to _read_hdf5_with_deepdish or _read_hdf5_with_h5py functions
    """
    try:
        return _read_hdf5_with_deepdish(path, **kwargs)
    except Exception:
        return _read_hdf5_with_h5py(path, **kwargs)



def _read_hdf5_with_deepdish(path, remove_params=None, path_to_samples=None):
    """Grab the parameters and samples in a .hdf5 file with deepdish

    Parameters
    ----------
    path: str
        path to the result file you wish to read in
    remove_params: list, optional
        parameters you wish to remove from the posterior table
    """
    import deepdish

    f = deepdish.io.load(path)
    if path_to_samples is None:
        try:
            path_to_samples, = paths_to_key("posterior", f)
            path_to_samples = path_to_samples[0]
        except ValueError:
            try:
                path_to_samples, = paths_to_key("posterior_samples", f)
                path_to_samples = path_to_samples[0]
            except ValueError:
                raise ValueError(
                    "Unable to find a 'posterior' or 'posterior_samples' group "
                    "in the file '{}'".format(path)
                )
    reduced_f, = load_recursively(path_to_samples, f)
    parameters = [i for i in reduced_f.keys()]
    if remove_params is not None:
        for param in remove_params:
            if param in parameters:
                parameters.remove(param)
    data = np.zeros([len(reduced_f[parameters[0]]), len(parameters)])
    for num, par in enumerate(parameters):
        for key, i in enumerate(reduced_f[par]):
            data[key][num] = float(np.real(i))
    data = data.tolist()
    for num, par in enumerate(parameters):
        if par == "logL":
            parameters[num] = "log_likelihood"
    return parameters, data


def _read_hdf5_with_h5py(
    path, remove_params=None, path_to_samples=None,
    return_posterior_dataset=False
):
    """Grab the parameters and samples in a .hdf5 file with h5py

    Parameters
    ----------
    path: str
        path to the result file you wish to read in
    remove_params: list, optional
        parameters you wish to remove from the posterior table
    """
    import h5py
    import copy

    if path_to_samples is None:
        path_to_samples = Read.guess_path_to_samples(path)

    f = h5py.File(path, 'r')
    c1 = isinstance(f[path_to_samples], h5py._hl.group.Group)
    if c1 and "parameter_names" not in f[path_to_samples].keys():
        original_parameters = [i for i in f[path_to_samples].keys()]
        if remove_params is not None:
            parameters = [
                i for i in original_parameters if i not in remove_params
            ]
        else:
            parameters = copy.deepcopy(original_parameters)
        n_samples = len(f[path_to_samples][parameters[0]])
        try:
            samples = np.array([
                f[path_to_samples][original_parameters.index(i)] for i in
                parameters
            ]).T
        except (AttributeError, KeyError, TypeError):
            samples = np.array([f[path_to_samples][i] for i in parameters]).T
        cond1 = "loglr" not in parameters or "log_likelihood" not in \
            parameters
        cond2 = "likelihood_stats" in f.keys() and "loglr" in \
            f["likelihood_stats"]
        if cond1 and cond2:
            parameters.append("log_likelihood")
            for num, i in enumerate(samples):
                samples[num].append(float(f["likelihood_stats/loglr"][num]))
    elif c1:
        original_parameters = [
            i.decode("utf-8") if isinstance(i, bytes) else i for i in
            f[path_to_samples]["parameter_names"]
        ]
        if remove_params is not None:
            parameters = [
                i for i in original_parameters if i not in remove_params
            ]
        else:
            parameters = copy.deepcopy(original_parameters)
        samples = np.array(f[path_to_samples]["samples"])
    elif isinstance(f[path_to_samples], h5py._hl.dataset.Dataset):
        parameters = list(f[path_to_samples].dtype.names)
        samples = np.array(f[path_to_samples]).view((float, len(parameters))).tolist()
    if return_posterior_dataset:
        return parameters, samples, f[path_to_samples]
    f.close()
    return parameters, samples


def _write_hdf5(
    parameters, samples, outdir="./", label=None, filename=None, overwrite=False,
    dataset_name="posterior_samples", **kwargs
):
    """Write a set of samples to a hdf5 file

    Parameters
    ----------
    parameters: list
        list of parameters
    samples: 2d list
        list of samples. Columns correspond to a given parameter
    outdir: str, optional
        directory to write the dat file
    label: str, optional
        The label of the analysis. This is used in the filename if a filename
        if not specified
    filename: str, optional
        The name of the file that you wish to write
    overwrite: Bool, optional
        If True, an existing file of the same name will be overwritten
    dataset_name: str, optional
        name of the dataset to store a set of samples. Default posterior_samples
    """
    from pesummary.utils.samples_dict import SamplesDict
    from pesummary.utils.utils import check_filename

    default_filename = "pesummary_{}.h5"
    filename = check_filename(
        default_filename=default_filename, outdir=outdir, label=label, filename=filename,
        overwrite=overwrite
    )
    samples = SamplesDict(parameters, np.array(samples).T)
    _samples = samples.to_structured_array()
    with h5py.File(filename, "w") as f:
        f.create_dataset(dataset_name, data=_samples)


def write_hdf5(
    parameters, samples, outdir="./", label=None, filename=None, overwrite=False,
    **kwargs
):
    """Write a set of samples to a hdf5 file

    Parameters
    ----------
    parameters: list
        list of parameters
    samples: 2d list
        list of samples. Columns correspond to a given parameter
    outdir: str, optional
        directory to write the dat file
    label: str, optional
        The label of the analysis. This is used in the filename if a filename
        if not specified
    filename: str, optional
        The name of the file that you wish to write
    overwrite: Bool, optional
        If True, an existing file of the same name will be overwritten
    """
    from pesummary.io.write import _multi_analysis_write

    _multi_analysis_write(
        _write_hdf5, parameters, samples, outdir=outdir, label=label,
        filename=filename, overwrite=overwrite, file_format="hdf5", **kwargs
    )