Coverage for bilby/core/likelihood.py: 90%
309 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-05-06 04:57 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2025-05-06 04:57 +0000
1import copy
3import numpy as np
4from scipy.special import gammaln, xlogy
5from scipy.stats import multivariate_normal
7from .utils import infer_parameters_from_function, infer_args_from_function_except_n_args
10class Likelihood(object):
12 def __init__(self, parameters=None):
13 """Empty likelihood class to be subclassed by other likelihoods
15 Parameters
16 ==========
17 parameters: dict
18 A dictionary of the parameter names and associated values
19 """
20 self.parameters = parameters
21 self._meta_data = None
22 self._marginalized_parameters = []
24 def __repr__(self):
25 return self.__class__.__name__ + '(parameters={})'.format(self.parameters)
27 def log_likelihood(self):
28 """
30 Returns
31 =======
32 float
33 """
34 return np.nan
36 def noise_log_likelihood(self):
37 """
39 Returns
40 =======
41 float
42 """
43 return np.nan
45 def log_likelihood_ratio(self):
46 """Difference between log likelihood and noise log likelihood
48 Returns
49 =======
50 float
51 """
52 return self.log_likelihood() - self.noise_log_likelihood()
54 @property
55 def meta_data(self):
56 return getattr(self, '_meta_data', None)
58 @meta_data.setter
59 def meta_data(self, meta_data):
60 if isinstance(meta_data, dict):
61 self._meta_data = meta_data
62 else:
63 raise ValueError("The meta_data must be an instance of dict")
65 @property
66 def marginalized_parameters(self):
67 return self._marginalized_parameters
70class ZeroLikelihood(Likelihood):
71 """ A special test-only class which already returns zero likelihood
73 Parameters
74 ==========
75 likelihood: bilby.core.likelihood.Likelihood
76 A likelihood object to mimic
78 """
80 def __init__(self, likelihood):
81 super(ZeroLikelihood, self).__init__(dict.fromkeys(likelihood.parameters))
82 self.parameters = likelihood.parameters
83 self._parent = likelihood
85 def log_likelihood(self):
86 return 0
88 def noise_log_likelihood(self):
89 return 0
91 def __getattr__(self, name):
92 return getattr(self._parent, name)
95class Analytical1DLikelihood(Likelihood):
96 """
97 A general class for 1D analytical functions. The model
98 parameters are inferred from the arguments of function
100 Parameters
101 ==========
102 x, y: array_like
103 The data to analyse
104 func:
105 The python function to fit to the data. Note, this must take the
106 dependent variable as its first argument. The other arguments
107 will require a prior and will be sampled over (unless a fixed
108 value is given).
109 """
111 def __init__(self, x, y, func, **kwargs):
112 parameters = infer_parameters_from_function(func)
113 super(Analytical1DLikelihood, self).__init__(dict())
114 self.x = x
115 self.y = y
116 self._func = func
117 self._function_keys = [key for key in parameters if key not in kwargs]
118 self.kwargs = kwargs
120 def __repr__(self):
121 return self.__class__.__name__ + '(x={}, y={}, func={})'.format(self.x, self.y, self.func.__name__)
123 @property
124 def func(self):
125 """ Make func read-only """
126 return self._func
128 @property
129 def model_parameters(self):
130 """ This sets up the function only parameters (i.e. not sigma for the GaussianLikelihood) """
131 return {key: self.parameters[key] for key in self.function_keys}
133 @property
134 def function_keys(self):
135 """ Makes function_keys read_only """
136 return self._function_keys
138 @property
139 def n(self):
140 """ The number of data points """
141 return len(self.x)
143 @property
144 def x(self):
145 """ The independent variable. Setter assures that single numbers will be converted to arrays internally """
146 return self._x
148 @x.setter
149 def x(self, x):
150 if isinstance(x, int) or isinstance(x, float):
151 x = np.array([x])
152 self._x = x
154 @property
155 def y(self):
156 """ The dependent variable. Setter assures that single numbers will be converted to arrays internally """
157 return self._y
159 @y.setter
160 def y(self, y):
161 if isinstance(y, int) or isinstance(y, float):
162 y = np.array([y])
163 self._y = y
165 @property
166 def residual(self):
167 """ Residual of the function against the data. """
168 return self.y - self.func(self.x, **self.model_parameters, **self.kwargs)
171class GaussianLikelihood(Analytical1DLikelihood):
172 def __init__(self, x, y, func, sigma=None, **kwargs):
173 """
174 A general Gaussian likelihood for known or unknown noise - the model
175 parameters are inferred from the arguments of function
177 Parameters
178 ==========
179 x, y: array_like
180 The data to analyse
181 func:
182 The python function to fit to the data. Note, this must take the
183 dependent variable as its first argument. The other arguments
184 will require a prior and will be sampled over (unless a fixed
185 value is given).
186 sigma: None, float, array_like
187 If None, the standard deviation of the noise is unknown and will be
188 estimated (note: this requires a prior to be given for sigma). If
189 not None, this defines the standard-deviation of the data points.
190 This can either be a single float, or an array with length equal
191 to that for `x` and `y`.
192 """
194 super(GaussianLikelihood, self).__init__(x=x, y=y, func=func, **kwargs)
195 self.sigma = sigma
197 # Check if sigma was provided, if not it is a parameter
198 if self.sigma is None:
199 self.parameters['sigma'] = None
201 def log_likelihood(self):
202 log_l = np.sum(- (self.residual / self.sigma)**2 / 2 -
203 np.log(2 * np.pi * self.sigma**2) / 2)
204 return log_l
206 def __repr__(self):
207 return self.__class__.__name__ + '(x={}, y={}, func={}, sigma={})' \
208 .format(self.x, self.y, self.func.__name__, self.sigma)
210 @property
211 def sigma(self):
212 """
213 This checks if sigma has been set in parameters. If so, that value
214 will be used. Otherwise, the attribute sigma is used. The logic is
215 that if sigma is not in parameters the attribute is used which was
216 given at init (i.e. the known sigma as either a float or array).
217 """
218 return self.parameters.get('sigma', self._sigma)
220 @sigma.setter
221 def sigma(self, sigma):
222 if sigma is None:
223 self._sigma = sigma
224 elif isinstance(sigma, float) or isinstance(sigma, int):
225 self._sigma = sigma
226 elif len(sigma) == self.n:
227 self._sigma = sigma
228 else:
229 raise ValueError('Sigma must be either float or array-like x.')
232class PoissonLikelihood(Analytical1DLikelihood):
233 def __init__(self, x, y, func, **kwargs):
234 """
235 A general Poisson likelihood for a rate - the model parameters are
236 inferred from the arguments of function, which provides a rate.
238 Parameters
239 ==========
241 x: array_like
242 A dependent variable at which the Poisson rates will be calculated
243 y: array_like
244 The data to analyse - this must be a set of non-negative integers,
245 each being the number of events within some interval.
246 func:
247 The python function providing the rate of events per interval to
248 fit to the data. The function must be defined with the first
249 argument being a dependent parameter (although this does not have
250 to be used by the function if not required). The subsequent
251 arguments will require priors and will be sampled over (unless a
252 fixed value is given).
253 """
255 super(PoissonLikelihood, self).__init__(x=x, y=y, func=func, **kwargs)
257 def log_likelihood(self):
258 rate = self.func(self.x, **self.model_parameters, **self.kwargs)
259 if not isinstance(rate, np.ndarray):
260 raise ValueError(
261 "Poisson rate function returns wrong value type! "
262 "Is {} when it should be numpy.ndarray".format(type(rate)))
263 elif np.any(rate < 0.):
264 raise ValueError(("Poisson rate function returns a negative",
265 " value!"))
266 elif np.any(rate == 0.):
267 return -np.inf
268 else:
269 return np.sum(-rate + self.y * np.log(rate) - gammaln(self.y + 1))
271 def __repr__(self):
272 return Analytical1DLikelihood.__repr__(self)
274 @property
275 def y(self):
276 """ Property assures that y-value is a positive integer. """
277 return self.__y
279 @y.setter
280 def y(self, y):
281 if not isinstance(y, np.ndarray):
282 y = np.array([y])
283 # check array is a non-negative integer array
284 if y.dtype.kind not in 'ui' or np.any(y < 0):
285 raise ValueError("Data must be non-negative integers")
286 self.__y = y
289class ExponentialLikelihood(Analytical1DLikelihood):
290 def __init__(self, x, y, func, **kwargs):
291 """
292 An exponential likelihood function.
294 Parameters
295 ==========
297 x, y: array_like
298 The data to analyse
299 func:
300 The python function to fit to the data. Note, this must take the
301 dependent variable as its first argument. The other arguments
302 will require a prior and will be sampled over (unless a fixed
303 value is given). The model should return the expected mean of
304 the exponential distribution for each data point.
305 """
306 super(ExponentialLikelihood, self).__init__(x=x, y=y, func=func, **kwargs)
308 def log_likelihood(self):
309 mu = self.func(self.x, **self.model_parameters, **self.kwargs)
310 if np.any(mu < 0.):
311 return -np.inf
312 return -np.sum(np.log(mu) + (self.y / mu))
314 def __repr__(self):
315 return Analytical1DLikelihood.__repr__(self)
317 @property
318 def y(self):
319 """ Property assures that y-value is positive. """
320 return self._y
322 @y.setter
323 def y(self, y):
324 if not isinstance(y, np.ndarray):
325 y = np.array([y])
326 if np.any(y < 0):
327 raise ValueError("Data must be non-negative")
328 self._y = y
331class StudentTLikelihood(Analytical1DLikelihood):
332 def __init__(self, x, y, func, nu=None, sigma=1, **kwargs):
333 """
334 A general Student's t-likelihood for known or unknown number of degrees
335 of freedom, and known or unknown scale (which tends toward the standard
336 deviation for large numbers of degrees of freedom) - the model
337 parameters are inferred from the arguments of function
339 https://en.wikipedia.org/wiki/Student%27s_t-distribution#Generalized_Student's_t-distribution
341 Parameters
342 ==========
343 x, y: array_like
344 The data to analyse
345 func:
346 The python function to fit to the data. Note, this must take the
347 dependent variable as its first argument. The other arguments
348 will require a prior and will be sampled over (unless a fixed
349 value is given).
350 nu: None, float
351 If None, the number of degrees of freedom of the noise is unknown
352 and will be estimated (note: this requires a prior to be given for
353 nu). If not None, this defines the number of degrees of freedom of
354 the data points. As an example a `nu` of `len(x)-2` is equivalent
355 to having marginalised a Gaussian distribution over an unknown
356 standard deviation parameter using a uniform prior.
357 sigma: 1.0, float
358 Set the scale of the distribution. If not given then this defaults
359 to 1, which specifies a standard (central) Student's t-distribution
360 """
361 super(StudentTLikelihood, self).__init__(x=x, y=y, func=func, **kwargs)
363 self.nu = nu
364 self.sigma = sigma
366 # Check if nu was provided, if not it is a parameter
367 if self.nu is None:
368 self.parameters['nu'] = None
370 def log_likelihood(self):
371 if self.nu <= 0.:
372 raise ValueError("Number of degrees of freedom for Student's "
373 "t-likelihood must be positive")
375 nu = self.nu
376 log_l =\
377 np.sum(- (nu + 1) * np.log1p(self.lam * self.residual**2 / nu) / 2 +
378 np.log(self.lam / (nu * np.pi)) / 2 +
379 gammaln((nu + 1) / 2) - gammaln(nu / 2))
380 return log_l
382 def __repr__(self):
383 base_string = '(x={}, y={}, func={}, nu={}, sigma={})'
384 return self.__class__.__name__ + base_string.format(
385 self.x, self.y, self.func.__name__, self.nu, self.sigma)
387 @property
388 def lam(self):
389 """ Converts 'scale' to 'precision' """
390 return 1. / self.sigma ** 2
392 @property
393 def nu(self):
394 """ This checks if nu or sigma have been set in parameters. If so, those
395 values will be used. Otherwise, the attribute nu is used. The logic is
396 that if nu is not in parameters the attribute is used which was
397 given at init (i.e. the known nu as a float)."""
398 return self.parameters.get('nu', self._nu)
400 @nu.setter
401 def nu(self, nu):
402 self._nu = nu
405class Multinomial(Likelihood):
406 """
407 Likelihood for system with N discrete possibilities.
408 """
410 def __init__(self, data, n_dimensions, base="parameter_"):
411 """
413 Parameters
414 ==========
415 data: array-like
416 The number of objects in each class
417 n_dimensions: int
418 The number of classes
419 base: str
420 The base of the parameter labels
421 """
422 self.data = np.array(data)
423 self._total = np.sum(self.data)
424 super(Multinomial, self).__init__(dict())
425 self.n = n_dimensions
426 self.base = base
427 self._nll = None
429 def log_likelihood(self):
430 """
431 Since n - 1 parameters are sampled, the last parameter is 1 - the rest
432 """
433 probs = [self.parameters[self.base + str(ii)]
434 for ii in range(self.n - 1)]
435 probs.append(1 - sum(probs))
436 return self._multinomial_ln_pdf(probs=probs)
438 def noise_log_likelihood(self):
439 """
440 Our null hypothesis is that all bins have probability 1 / nbins, i.e.,
441 no bin is preferred over any other.
442 """
443 if self._nll is None:
444 self._nll = self._multinomial_ln_pdf(probs=1 / self.n)
445 return self._nll
447 def _multinomial_ln_pdf(self, probs):
448 """Lifted from scipy.stats.multinomial._logpdf"""
449 ln_prob = gammaln(self._total + 1) + np.sum(
450 xlogy(self.data, probs) - gammaln(self.data + 1), axis=-1)
451 return ln_prob
454class AnalyticalMultidimensionalCovariantGaussian(Likelihood):
455 """
456 A multivariate Gaussian likelihood
457 with known analytic solution.
459 Parameters
460 ==========
461 mean: array_like
462 Array with the mean values of distribution
463 cov: array_like
464 The ndim*ndim covariance matrix
465 """
467 def __init__(self, mean, cov):
468 self.cov = np.atleast_2d(cov)
469 self.mean = np.atleast_1d(mean)
470 self.sigma = np.sqrt(np.diag(self.cov))
471 self.pdf = multivariate_normal(mean=self.mean, cov=self.cov)
472 parameters = {"x{0}".format(i): 0 for i in range(self.dim)}
473 super(AnalyticalMultidimensionalCovariantGaussian, self).__init__(parameters=parameters)
475 @property
476 def dim(self):
477 return len(self.cov[0])
479 def log_likelihood(self):
480 x = np.array([self.parameters["x{0}".format(i)] for i in range(self.dim)])
481 return self.pdf.logpdf(x)
484class AnalyticalMultidimensionalBimodalCovariantGaussian(Likelihood):
485 """
486 A multivariate Gaussian likelihood
487 with known analytic solution.
489 Parameters
490 ==========
491 mean_1: array_like
492 Array with the mean value of the first mode
493 mean_2: array_like
494 Array with the mean value of the second mode
495 cov: array_like
496 """
498 def __init__(self, mean_1, mean_2, cov):
499 self.cov = np.atleast_2d(cov)
500 self.sigma = np.sqrt(np.diag(self.cov))
501 self.mean_1 = np.atleast_1d(mean_1)
502 self.mean_2 = np.atleast_1d(mean_2)
503 self.pdf_1 = multivariate_normal(mean=self.mean_1, cov=self.cov)
504 self.pdf_2 = multivariate_normal(mean=self.mean_2, cov=self.cov)
505 parameters = {"x{0}".format(i): 0 for i in range(self.dim)}
506 super(AnalyticalMultidimensionalBimodalCovariantGaussian, self).__init__(parameters=parameters)
508 @property
509 def dim(self):
510 return len(self.cov[0])
512 def log_likelihood(self):
513 x = np.array([self.parameters["x{0}".format(i)] for i in range(self.dim)])
514 return -np.log(2) + np.logaddexp(self.pdf_1.logpdf(x), self.pdf_2.logpdf(x))
517class JointLikelihood(Likelihood):
518 def __init__(self, *likelihoods):
519 """
520 A likelihood for combining pre-defined likelihoods.
521 The parameters dict is automagically combined through parameters dicts
522 of the given likelihoods. If parameters have different values have
523 initially different values across different likelihoods, the value
524 of the last given likelihood is chosen. This does not matter when
525 using the JointLikelihood for sampling, because the parameters will be
526 set consistently
528 Parameters
529 ==========
530 *likelihoods: bilby.core.likelihood.Likelihood
531 likelihoods to be combined parsed as arguments
532 """
533 self.likelihoods = likelihoods
534 super(JointLikelihood, self).__init__(parameters={})
535 self.__sync_parameters()
537 def __sync_parameters(self):
538 """ Synchronizes parameters between the likelihoods
539 so that all likelihoods share a single parameter dict."""
540 for likelihood in self.likelihoods:
541 self.parameters.update(likelihood.parameters)
542 for likelihood in self.likelihoods:
543 likelihood.parameters = self.parameters
545 @property
546 def likelihoods(self):
547 """ The list of likelihoods """
548 return self._likelihoods
550 @likelihoods.setter
551 def likelihoods(self, likelihoods):
552 likelihoods = copy.deepcopy(likelihoods)
553 if isinstance(likelihoods, tuple) or isinstance(likelihoods, list):
554 if all(isinstance(likelihood, Likelihood) for likelihood in likelihoods):
555 self._likelihoods = list(likelihoods)
556 else:
557 raise ValueError('Try setting the JointLikelihood like this\n'
558 'JointLikelihood(first_likelihood, second_likelihood, ...)')
559 elif isinstance(likelihoods, Likelihood):
560 self._likelihoods = [likelihoods]
561 else:
562 raise ValueError('Input likelihood is not a list of tuple. You need to set multiple likelihoods.')
564 def log_likelihood(self):
565 """ This is just the sum of the log likelihoods of all parts of the joint likelihood"""
566 return sum([likelihood.log_likelihood() for likelihood in self.likelihoods])
568 def noise_log_likelihood(self):
569 """ This is just the sum of the noise likelihoods of all parts of the joint likelihood"""
570 return sum([likelihood.noise_log_likelihood() for likelihood in self.likelihoods])
573def function_to_celerite_mean_model(func):
574 from celerite.modeling import Model as CeleriteModel
575 return _function_to_gp_model(func, CeleriteModel)
578def function_to_george_mean_model(func):
579 from celerite.modeling import Model as GeorgeModel
580 return _function_to_gp_model(func, GeorgeModel)
583def _function_to_gp_model(func, cls):
584 class MeanModel(cls):
585 parameter_names = tuple(infer_args_from_function_except_n_args(func=func, n=1))
587 def get_value(self, t):
588 params = {name: getattr(self, name) for name in self.parameter_names}
589 return func(t, **params)
591 def compute_gradient(self, *args, **kwargs):
592 pass
594 return MeanModel
597class _GPLikelihood(Likelihood):
599 def __init__(self, kernel, mean_model, t, y, yerr=1e-6, gp_class=None):
600 """
601 Basic Gaussian Process likelihood interface for `celerite` and `george`.
602 For `celerite` documentation see: https://celerite.readthedocs.io/en/stable/
603 For `george` documentation see: https://george.readthedocs.io/en/latest/
605 Parameters
606 ==========
607 kernel: Union[celerite.term.Term, george.kernels.Kernel]
608 `celerite` or `george` kernel. See the respective package documentation about the usage.
609 mean_model: Union[celerite.modeling.Model, george.modeling.Model]
610 Mean model
611 t: array_like
612 The `times` or `x` values of the data set.
613 y: array_like
614 The `y` values of the data set.
615 yerr: float, int, array_like, optional
616 The error values on the y-values. If a single value is given, it is assumed that the value
617 applies for all y-values. Default is 1e-6, effectively assuming that no y-errors are present.
618 gp_class: type, None, optional
619 GPClass to use. This is determined by the child class used to instantiate the GP. Should usually
620 not be given by the user and is mostly used for testing
621 """
622 self.kernel = kernel
623 self.mean_model = mean_model
624 self.t = np.array(t)
625 self.y = np.array(y)
626 self.yerr = np.array(yerr)
627 self.GPClass = gp_class
628 self.gp = self.GPClass(kernel=self.kernel, mean=self.mean_model, fit_mean=True, fit_white_noise=True)
629 self.gp.compute(self.t, yerr=self.yerr)
630 super().__init__(parameters=self.gp.get_parameter_dict())
632 def set_parameters(self, parameters):
633 """
634 Safely set a set of parameters to the internal instances of the `gp` and `mean_model`, as well as the
635 `parameters` dict.
637 Parameters
638 ==========
639 parameters: dict, pandas.DataFrame
640 The set of parameters we would like to set.
641 """
642 for name, value in parameters.items():
643 try:
644 self.gp.set_parameter(name=name, value=value)
645 except ValueError:
646 pass
647 self.parameters[name] = value
650class CeleriteLikelihood(_GPLikelihood):
652 def __init__(self, kernel, mean_model, t, y, yerr=1e-6):
653 """
654 Basic Gaussian Process likelihood interface for `celerite` and `george`.
655 For `celerite` documentation see: https://celerite.readthedocs.io/en/stable/
656 For `george` documentation see: https://george.readthedocs.io/en/latest/
658 Parameters
659 ==========
660 kernel: celerite.term.Term
661 `celerite` or `george` kernel. See the respective package documentation about the usage.
662 mean_model: celerite.modeling.Model
663 Mean model
664 t: array_like
665 The `times` or `x` values of the data set.
666 y: array_like
667 The `y` values of the data set.
668 yerr: float, int, array_like, optional
669 The error values on the y-values. If a single value is given, it is assumed that the value
670 applies for all y-values. Default is 1e-6, effectively assuming that no y-errors are present.
671 """
672 import celerite
673 super().__init__(kernel=kernel, mean_model=mean_model, t=t, y=y, yerr=yerr, gp_class=celerite.GP)
675 def log_likelihood(self):
676 """
677 Calculate the log-likelihood for the Gaussian process given the current parameters.
679 Returns
680 =======
681 float: The log-likelihood value.
682 """
683 self.gp.set_parameter_vector(vector=np.array(list(self.parameters.values())))
684 try:
685 return self.gp.log_likelihood(self.y)
686 except Exception:
687 return -np.inf
690class GeorgeLikelihood(_GPLikelihood):
692 def __init__(self, kernel, mean_model, t, y, yerr=1e-6):
693 """
694 Basic Gaussian Process likelihood interface for `celerite` and `george`.
695 For `celerite` documentation see: https://celerite.readthedocs.io/en/stable/
696 For `george` documentation see: https://george.readthedocs.io/en/latest/
698 Parameters
699 ==========
700 kernel: george.kernels.Kernel
701 `celerite` or `george` kernel. See the respective package documentation about the usage.
702 mean_model: george.modeling.Model
703 Mean model
704 t: array_like
705 The `times` or `x` values of the data set.
706 y: array_like
707 The `y` values of the data set.
708 yerr: float, int, array_like, optional
709 The error values on the y-values. If a single value is given, it is assumed that the value
710 applies for all y-values. Default is 1e-6, effectively assuming that no y-errors are present.
711 """
712 import george
713 super().__init__(kernel=kernel, mean_model=mean_model, t=t, y=y, yerr=yerr, gp_class=george.GP)
715 def log_likelihood(self):
716 """
717 Calculate the log-likelihood for the Gaussian process given the current parameters.
719 Returns
720 =======
721 float: The log-likelihood value.
722 """
723 for name, value in self.parameters.items():
724 try:
725 self.gp.set_parameter(name=name, value=value)
726 except ValueError:
727 raise ValueError(f"Parameter {name} not a valid parameter for the GP.")
728 try:
729 return self.gp.log_likelihood(self.y)
730 except Exception:
731 return -np.inf
734class MarginalizedLikelihoodReconstructionError(Exception):
735 pass