Source code for chi._population_models

#
# This file is part of the chi repository
# (https://github.com/DavAug/chi/) which is released under the
# BSD 3-clause license. See accompanying LICENSE.md for copyright notice and
# full license details.
#

import copy
import math

import numpy as np
from scipy.stats import norm, truncnorm
from scipy.special import erf

import chi


[docs] class PopulationModel(object): """ A base class for population models. Population models can be multi-dimensional, but unless explicitly specfied in the model description, the dimensions of the model are modelled independently. :param n_dim: The dimensionality of the population model. :type n_dim: int, optional :param dim_names: Optional names of the population dimensions. :type dim_names: List[str], optional """ def __init__(self, n_dim=1, dim_names=None): super(PopulationModel, self).__init__() if n_dim < 1: raise ValueError( 'The dimension of the population model has to be greater or ' 'equal to 1.') self._n_dim = int(n_dim) self._n_hierarchical_dim = self._n_dim self._special_dims = [] self._n_pooled_dims = 0 self._n_hetero_dims = 0 self._n_covariates = 0 self._n_ids = 1 if dim_names: if len(dim_names) != self._n_dim: raise ValueError( 'The number of dimension names has to match the number of ' 'dimensions of the population model.') dim_names = [str(name) for name in dim_names] else: dim_names = [ 'Dim. %d' % (id_dim + 1) for id_dim in range(self._n_dim)] self._dim_names = dim_names def _shape(self, score, dpsi, dtheta, reduce, flattened): """ Returns the score, dpsi and dtheta. If reduce is True, dpsi and dtheta are flattened according to the hierarchical ordering. If reduce is False, dpsi and dtheta are returned separately. If flattened is True, dtheta is flattened. If flattened is False, dtheta is returned in shape (n_ids, n_param_per_dim, n_dim) """ if reduce or flattened: # Sum contributions across individuals and flatten dtheta = np.sum(dtheta, axis=0).flatten() if reduce: return score, np.hstack((dpsi.flatten(), dtheta)) return score, dpsi, dtheta
[docs] def compute_individual_parameters( self, parameters, eta, return_eta=False, *args, **kwargs): """ Returns the individual parameters. If the model does not transform the bottom-level parameters, ``eta`` is returned. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids * n_dim)`` or ``(n_ids, n_dim)`` :rtype: np.ndarray of shape ``(n_ids, n_dim)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional """ raise NotImplementedError
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): """ Returns the log-likelihood of the population model parameters. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ raise NotImplementedError
[docs] def compute_pointwise_ll(self, parameters, observations, *args, **kwargs): """ Returns the pointwise log-likelihood of the model parameters for each observation. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape (p, n_dim) :param observations: "Observations" of the individuals. Typically refers to the values of a mechanistic model parameter for each individual. :type observations: np.ndarray of shape (n, n_dim) :returns: Log-likelihoods for each individual parameter for population parameters. :rtype: np.ndarray of length (n, n_dim) """ raise NotImplementedError
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, *args, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The sensitivities of the bottom-level log-likelihoods with respect to the ``observations`` (bottom-level parameters) may be provided using ``dlogp_dpsi``, in order to compute the sensitivities of the full hierarchical log-likelihood. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters with respect to the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. :type reduce: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ raise NotImplementedError
[docs] def get_covariate_names(self): """ Returns the names of the covariates. If name is not set, defaults are returned. """ return []
[docs] def get_dim_names(self): """ Returns the names of the dimensions. """ return copy.copy(self._dim_names)
[docs] def get_special_dims(self): r""" Returns information on pooled and heterogeneously modelled dimensions. Returns a tuple with 3 entries: 1. A list of lists, each entry containing 1. the start and end dimension of the special dimensions; 2. the associated start and end index of the model parameters; 3. and a boolean indicating whether the dimension is pooled (``True``) or heterogeneous (``False``). 2. The number of pooled dimensions. 3. The number of heterogeneous dimensions. """ return self._special_dims, self._n_pooled_dims, self._n_hetero_dims
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the names of the population model parameters. If name is not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ raise NotImplementedError
[docs] def n_covariates(self): """ Returns the number of covariates. """ return self._n_covariates
[docs] def n_dim(self): """ Returns the dimensionality of the population model. """ return self._n_dim
[docs] def n_hierarchical_dim(self): """ Returns the number of parameter dimensions whose samples are not deterministically defined by the population parameters. I.e. the number of dimensions minus the number of pooled and heterogeneously modelled dimensions. """ return self._n_hierarchical_dim
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. :param n_ids: Number of individuals. :type n_ids: int """ raise NotImplementedError
[docs] def n_ids(self): """ Returns the number of modelled individuals. If the behaviour of the population model does not change with the number of modelled individuals 0 is returned. """ return self._n_ids
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ raise NotImplementedError
[docs] def sample(self, parameters, n_samples=None, seed=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ raise NotImplementedError
[docs] def set_covariate_names(self, names=None): """ Sets the names of the covariates. If the model has no covariates, input is ignored. :param names: A list of parameter names. If ``None``, covariate names are reset to defaults. :type names: List[str] """ # Default is that models do not have covariates. return None
[docs] def set_dim_names(self, names=None): """ Sets the names of the population model dimensions. :param names: A list of dimension names. If ``None``, dimension names are reset to defaults. :type names: List[str], optional """ if names is None: # Reset names to defaults self._dim_names = [ 'Dim. %d' % (id_dim + 1) for id_dim in range(self._n_dim)] return None if len(names) != self._n_dim: raise ValueError( 'Length of names does not match the number of dimensions.') self._dim_names = [str(label) for label in names]
[docs] def set_n_ids(self, n_ids): """ Sets the number of modelled individuals. The behaviour of most population models is the same for any number of individuals, in which case ``n_ids`` is ignored. However, for some models, e.g. :class:`HeterogeneousModel` the behaviour changes with ``n_ids``. :param n_ids: Number of individuals. :type n_ids: int """ n_ids = int(n_ids) if n_ids < 1: raise ValueError( 'n_ids is invalid. The number of individuals has to be ' 'positive.') self._n_ids = n_ids
[docs] def set_parameter_names(self, names=None): """ Sets the names of the population model parameters. :param names: A list of parameter names. If ``None``, parameter names are reset to defaults. :type names: List[str] """ raise NotImplementedError
[docs] class ComposedPopulationModel(PopulationModel): r""" A multi-dimensional population model composed of mutliple population models. A :class:`ComposedPopulationModel` assumes that its constituent population models are independent. The probability density function of the composed population model is therefore given by the product of the probability density functions. For constituent population models :math:`p(\psi _1 | \theta _1), \ldots , p(\psi _n | \theta _n)`, the probability density function of the composed population model is given by .. math:: p(\psi _1, \ldots , \psi _n | \theta _1, \ldots , \theta _n) = \prod _{k=1}^n p(\psi _k | \theta _k) . Extends :class:`chi.PopulationModel`. :param population_models: A list of population models. :type population_models: List[chi.PopulationModel] """ def __init__(self, population_models): super(PopulationModel, self).__init__() # Check inputs for pop_model in population_models: if not isinstance(pop_model, chi.PopulationModel): raise TypeError( 'The population models have to be instances of ' 'chi.PopulationModel.') # Check that number of modelled individuals is compatible n_ids = 1 for pop_model in population_models: if (n_ids > 1) and (pop_model.n_ids() > 1) and ( n_ids != pop_model.n_ids()): raise ValueError( 'All population models must model the same number of ' 'individuals.') n_ids = n_ids if n_ids > 1 else pop_model.n_ids() self._population_models = population_models self._n_ids = n_ids self._n_bottom, self._n_top = self.n_hierarchical_parameters( self._n_ids) # Get properties of population models self._set_population_model_properties() # Make sure that models have unique parameter names # (if not enumerate dimensions to make them unique in most cases) names = self.get_parameter_names() if len(np.unique(names)) != len(names): dim_names = [ 'Dim. %d' % (dim_id + 1) for dim_id in range(self._n_dim)] self.set_dim_names(dim_names) def _compute_reduced_sensitivities( self, parameters, observations, dlogp_dpsi, covariates, *args, **kwargs): """ Returns the likelihood of the population model and its sensitivities with respect to the individual-level parameters and the population-level parameters. Counterpart to _compute_sensitivities, but instead of returning the sensitivities with respect to the individual-level parameters and the population-level parameters separately, only the sensitivies of the parameters exposed in a hierarchical inference settings are returned. This affects predominantly pooled and heterogenous dimensions. """ score = 0 dscore = np.empty(shape=self._n_bottom + self._n_top) dpsi = np.empty(shape=(self._n_ids, self._n_hierarchical_dim)) cov = None dlp_dpsi = None current_cov = 0 current_dim = 0 current_top = 0 current_hdim = 0 for pop_model in self._population_models: # Get covariates if self._n_covariates > 0: end_cov = current_cov + pop_model.n_covariates() cov = covariates[:, current_cov:end_cov] current_cov = end_cov # Get dlogp/dpsi n_dim = pop_model.n_dim() end_dim = current_dim + n_dim if dlogp_dpsi is not None: dlp_dpsi = dlogp_dpsi[:, current_dim:end_dim] n_b, n_t = pop_model.n_hierarchical_parameters(self._n_ids) end_top = current_top + n_t s, ds = pop_model.compute_sensitivities( parameters=parameters[current_top:end_top], observations=observations[:, current_dim:end_dim], covariates=cov, dlogp_dpsi=dlp_dpsi, reduce=True) # Add score and top sensitivities score += s dscore[self._n_bottom+current_top:self._n_bottom+end_top] = \ ds[n_b:] # Collect bottom-level sensitivities if n_b > 0: end_hdim = current_hdim + n_dim dpsi[:, current_hdim:end_hdim] = ds[:n_b].reshape( self._n_ids, n_dim) current_hdim = end_hdim current_dim = end_dim current_top = end_top # Add bottom-level sensitivities dscore[:self._n_bottom] = np.array(dpsi).flatten() return score, dscore def _compute_sensitivities( self, parameters, observations, dlogp_dpsi, covariates, *args, **kwargs): """ Returns the likelihood of the population model and its sensitivities with respect to the individual-level parameters and the population-level parameters. """ score = 0 n_ids = len(observations) dpsi = np.zeros(shape=(n_ids, self._n_dim)) dtheta = np.empty(shape=self._n_parameters) cov = None dlp_dpsi = None current_cov = 0 current_dim = 0 current_param = 0 for pop_model in self._population_models: # Get covariates if self._n_covariates > 0: end_cov = current_cov + pop_model.n_covariates() cov = covariates[:, current_cov:end_cov] current_cov = end_cov # Get dlogp/dpsi end_dim = current_dim + pop_model.n_dim() end_param = current_param + pop_model.n_parameters() if dlogp_dpsi is not None: dlp_dpsi = dlogp_dpsi[:, current_dim:end_dim] s, dp, dt = pop_model.compute_sensitivities( parameters=parameters[current_param:end_param], observations=observations[:, current_dim:end_dim], covariates=cov, dlogp_dpsi=dlp_dpsi) # Add score and sensitivities score += s dpsi[:, current_dim:end_dim] = dp dtheta[current_param:end_param] = dt current_dim = end_dim current_param = end_param return score, dpsi, dtheta def _set_population_model_properties(self): """ Sets the properties of the population model based on the submodels. """ n_dim = 0 n_parameters = 0 n_covariates = 0 n_hierarchical_dim = 0 special_dim = [] n_pooled = 0 n_hetero = 0 for pop_model in self._population_models: s, p, h = pop_model.get_special_dims() n_pooled += p n_hetero += h for entry in s: # Need to shift dimensions and parameter index special_dim += [[ entry[0] + n_dim, entry[1] + n_dim, entry[2] + n_parameters, entry[3] + n_parameters, entry[4] ]] n_covariates += pop_model.n_covariates() n_dim += pop_model.n_dim() n_hierarchical_dim += pop_model.n_hierarchical_dim() n_parameters += pop_model.n_parameters() self._n_dim = n_dim self._n_parameters = n_parameters self._n_covariates = n_covariates self._n_hierarchical_dim = n_hierarchical_dim self._special_dims = special_dim self._n_pooled_dims = n_pooled self._n_hetero_dims = n_hetero def _shape_eta(self, eta): """ Reshapes eta to numpy.ndarry of shape (n_ids, n_dim). """ n_dim = len(eta) // self._n_ids eta = eta.reshape(self._n_ids, n_dim) if n_dim == self._n_dim: # There are no special dimensions and we can just reshape and # return return eta # Some dimensions have do be filled with dummies, because pooled # and heterogeneous dimensions are special and replace dummy values # later on. if (n_dim + self._n_pooled_dims + self._n_hetero_dims) != self._n_dim: raise ValueError( 'eta is invalid. Eta cannot be reshaped into (n_ids, n_dim), ' 'even after taking pooled and heterogenuous dimensions into ' 'account.') start = 0 shift = 0 eta_prime = np.empty(shape=(self._n_ids, self._n_dim)) for s in self._special_dims: end = s[0] eta_prime[:, start:end] = eta[:, start-shift:end-shift] start = s[1] shift += start - end # Fill trailing dimensions eta_prime[:, start:] = eta[:, start-shift:] return eta_prime
[docs] def compute_individual_parameters( self, parameters, eta, covariates=None, return_eta=False): """ Returns the individual parameters. If the model does not transform the bottom-level parameters, ``eta`` is returned. If the population model does not use covariates, the covariate input is ignored. If the population model uses covariates, the covariates of the constituent population models are expected to be concatinated in the order of the consitutent models. The order of the covariates can be checked with :meth:`get_covariate_names`. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids * n_dim)`` or ``(n_ids, n_dim)`` :param covariates: Covariates of the individuals. :type covariates: np.ndarray of shape ``(n_ids, n_cov)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional :rtype: np.ndarray of shape ``(n_ids, n_dim)`` """ eta = np.asarray(eta) parameters = np.asarray(parameters) if eta.ndim == 1: eta = self._shape_eta(eta) # Compute individual parameters cov = None current_p = 0 current_dim = 0 current_cov = 0 psis = np.empty(shape=eta.shape) for pop_model in self._population_models: # Get covariates if self._n_covariates > 0: end_cov = current_cov + pop_model.n_covariates() cov = covariates[:, current_cov:end_cov] current_cov = end_cov end_p = current_p + pop_model.n_parameters() end_dim = current_dim + pop_model.n_dim() psis[:, current_dim:end_dim] = \ pop_model.compute_individual_parameters( parameters=parameters[current_p:end_p], eta=eta[:, current_dim:end_dim], covariates=cov, return_eta=return_eta) current_p = end_p current_dim = end_dim return psis
[docs] def compute_log_likelihood( self, parameters, observations, covariates=None): """ Returns the log-likelihood of the population model parameters. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param covariates: Covariates of the individuals. :type covariates: np.ndarray of shape ``(n_ids, n_cov)`` :rtype: float """ observations = np.asarray(observations) parameters = np.asarray(parameters) score = 0 cov = None current_dim = 0 current_param = 0 current_cov = 0 for pop_model in self._population_models: # Get covariates if self._n_covariates > 0: end_cov = current_cov + pop_model.n_covariates() cov = covariates[:, current_cov:end_cov] current_cov = end_cov end_dim = current_dim + pop_model.n_dim() end_param = current_param + pop_model.n_parameters() score += pop_model.compute_log_likelihood( parameters=parameters[current_param:end_param], observations=observations[:, current_dim:end_dim], covariates=cov ) current_dim = end_dim current_param = end_param return score
[docs] def compute_pointwise_ll(self, parameters, observations): r""" Returns the pointwise log-likelihood of the model parameters for each observation. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p, n_dim)`` :param observations: "Observations" of the individuals. Typically refers to the values of a mechanistic model parameter for each individual. :type observations: np.ndarray of shape ``(n, n_dim)`` :returns: Log-likelihoods for each individual parameter for population parameters. :rtype: np.ndarray of length ``(n, n_dim)`` """ raise NotImplementedError
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, covariates=None, *args, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters with respect to the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :param covariates: Covariates of individuals. :type covariates: np.ndarray of shape ``(n_ids, n_cov)`` :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ observations = np.asarray(observations) parameters = np.asarray(parameters) if reduce: return self._compute_reduced_sensitivities( parameters, observations, dlogp_dpsi, covariates, *args, **kwargs ) return self._compute_sensitivities( parameters, observations, dlogp_dpsi, covariates, *args, **kwargs)
[docs] def get_covariate_names(self): """ Returns the names of the covariates. If name is not set, defaults are returned. """ names = [] for pop_model in self._population_models: names += pop_model.get_covariate_names() return names
[docs] def get_dim_names(self): """ Returns the names of the dimensions. """ names = [] for pop_model in self._population_models: names += pop_model.get_dim_names() return names
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the names of the population model parameters. If name is not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ names = [] for pop_model in self._population_models: names += pop_model.get_parameter_names(exclude_dim_names) return names
[docs] def get_population_models(self): """ Returns the constituent population models. """ return self._population_models
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. Parameters ---------- n_ids Number of individuals. """ n_bottom, n_top = 0, 0 for pop_model in self._population_models: n_b, n_t = pop_model.n_hierarchical_parameters(n_ids) n_bottom += n_b n_top += n_t return n_bottom, n_top
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ return self._n_parameters
[docs] def sample( self, parameters, n_samples=None, seed=None, covariates=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. If the model does not depend on covariates the ``covariate`` input is ignored. If the population model uses covariates, the covariates of the constituent population models are expected to be concatinated in the order of the consitutent models. The order of the covariates can be checked with :meth:`get_covariate_names`. :param parameters: Values of the model parameters. :type parameters: List, np.ndarray of shape (n_parameters,) :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, np.random.Generator, optional :param covariates: Covariate values, specifying the sampled subpopulation. :type covariates: List, np.ndarray of shape ``(n_cov,)`` or ``(n_samples, n_cov)``, optional :returns: Samples from population model conditional on covariates. :rtype: np.ndarray of shape (n_samples, n_dim) """ parameters = np.asarray(parameters) if len(parameters) != self._n_parameters: raise ValueError( 'The number of provided parameters does not match the expected' ' number of top-level parameters.') if (self._n_covariates > 0): covariates = np.asarray(covariates) if covariates.ndim == 1: covariates = covariates[np.newaxis, :] if covariates.shape[1] != self._n_covariates: raise ValueError( 'Provided covariates do not match the number of ' 'covariates.') # Define shape of samples if n_samples is None: n_samples = 1 sample_shape = (int(n_samples), self._n_dim) samples = np.empty(shape=sample_shape) # Transform seed to random number generator, so all models use the same # seed rng = np.random.default_rng(seed=seed) # Sample from constituent population models cov = None current_dim = 0 current_param = 0 current_cov = 0 for pop_model in self._population_models: end_dim = current_dim + pop_model.n_dim() end_param = current_param + pop_model.n_parameters() # Get covariates if self._n_covariates > 0: end_cov = current_cov + pop_model.n_covariates() cov = covariates[:, current_cov:end_cov] current_cov = end_cov # Sample bottom-level parameters samples[:, current_dim:end_dim] = pop_model.sample( parameters=parameters[current_param:end_param], n_samples=n_samples, seed=rng, covariates=cov) current_dim = end_dim current_param = end_param return samples
[docs] def set_dim_names(self, names=None): r""" Sets the names of the population model dimensions. Parameters ---------- names An array-like object with string-convertable entries of length :meth:`n_dim`. If ``None``, dimension names are reset to defaults. """ if names is None: # Reset dimension names for pop_model in self._population_models: pop_model.set_dim_names() return None if len(names) != self._n_dim: raise ValueError( 'Length of names does not match the number of dimensions.') # Set dimension names names = [str(label) for label in names] current_dim = 0 for pop_model in self._population_models: end_dim = current_dim + pop_model.n_dim() pop_model.set_dim_names(names[current_dim:end_dim]) current_dim = end_dim
[docs] def set_n_ids(self, n_ids): """ Sets the number of modelled individuals. The behaviour of most population models is the same for any number of individuals, in which case ``n_ids`` is ignored. However, for some models, e.g. :class:`HeterogeneousModel` the behaviour changes with ``n_ids``. :param n_ids: Number of individuals. :type n_ids: int """ # Check cheap option first: Behaviour is not changed by input n_ids = int(n_ids) if n_ids == self._n_ids: return None for pop_model in self._population_models: pop_model.set_n_ids(n_ids) # Update n_ids and model properties self._n_ids = n_ids self._set_population_model_properties() self._n_bottom, self._n_top = self.n_hierarchical_parameters( self._n_ids)
[docs] def set_parameter_names(self, names=None): """ Sets the names of the population model parameters. :param names: A list with string-convertable entries of length :meth:`n_parameters`. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset parameter names for pop_model in self._population_models: pop_model.set_parameter_names() return None if len(names) != self._n_parameters: raise ValueError( 'Length of names does not match the number of parameters.') # Set parameter names names = [str(label) for label in names] current_param = 0 for pop_model in self._population_models: end_param = current_param + pop_model.n_parameters() pop_model.set_parameter_names(names[current_param:end_param]) current_param = end_param
[docs] class CovariatePopulationModel(PopulationModel): r""" A population model that models the parameters across individuals conditional on covariates of the inter-individual variability. A covariate population model partitions a population into subpopulations which are characterised by covariates :math:`\chi`. The inter-individual variability within a subpopulation is modelled by a non-covariate population model .. math:: p(\psi | \theta, \chi) = p(\psi | \vartheta (\theta, \chi)), where :math:`\vartheta` are the population parameters of the subpopulation which depend on global population parameters :math:`\theta` and the covariates :math:`\chi`. The ``population_model`` input defines the non-covariate population model for the subpopulations :math:`p(\psi | \vartheta )` and the ``covariate_model`` defines the relationship between the subpopulations and the covariates :math:`\vartheta (\theta, \chi)`. Extends :class:`PopulationModel`. :param population_model: Defines the distribution of the subpopulations. :type population_model: PopulationModel :param covariate_model: Defines the covariate model. :type covariate_model: CovariateModel :param dim_names: Name of dimensions. :type dim_names: List[str], optional """ def __init__(self, population_model, covariate_model, dim_names=None): super(CovariatePopulationModel, self).__init__() # Check inputs if not isinstance(population_model, PopulationModel): raise TypeError( 'The population model has to be an instance of a ' 'chi.PopulationModel.') if not isinstance(covariate_model, chi.CovariateModel): raise TypeError( 'The covariate model has to be an instance of a ' 'chi.CovariateModel.') if isinstance(population_model, ComposedPopulationModel): raise TypeError( 'The population model cannot be an instance of a ' 'chi.ComposedPopulationModel. Please compose multiple ' 'covariate models instead.') if isinstance(population_model, ReducedPopulationModel): raise TypeError( 'The population model cannot be an instance of a ' 'chi.ReducedPopulationModel. Please define a covariate ' 'population model before fixing parameters.') # Remember models self._population_model = copy.deepcopy(population_model) self._covariate_model = copy.deepcopy(covariate_model) # Get properties self._n_dim = self._population_model.n_dim() self._n_pop = self._population_model.n_parameters() self._n_hierarchical_dim = self._population_model.n_hierarchical_dim() self._n_covariates = self._covariate_model.n_covariates() self._special_dims, self._n_pooled_dims, self._n_hetero_dims = \ self._population_model.get_special_dims() # Set names and all parameters to be modelled by the covariate model n_cov = self._covariate_model.n_covariates() self._population_model.set_dim_names(dim_names) indices = [] for dim_id in range(self._n_dim): for param_id in range(self._n_pop // self._n_dim): indices.append([param_id, dim_id]) self._covariate_model.set_population_parameters(indices) names = [] for name in self._population_model.get_parameter_names(): names += [name] * n_cov self._covariate_model.set_parameter_names(names)
[docs] def compute_individual_parameters( self, parameters, eta, covariates, return_eta=False): """ Returns the individual parameters. If the model does not transform the bottom-level parameters, ``eta`` is returned. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids * n_dim)`` or ``(n_ids, n_dim)`` :param covariates: Covariates of individuals. :type covariates: np.ndarray of shape ``(n_ids, n_cov)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional :rtype: np.ndarray of shape ``(n_ids, n_dim)`` """ # Split into covariate model parameters and population parameters parameters = np.asarray(parameters) pop_params = parameters[:self._n_pop] cov_params = parameters[self._n_pop:] # Reshape population parameters to (n_params_per_dim, n_dim) # NOTE: Assumes that all dimensions have the same number of parameters # TODO: Need to introduce a population model owned method that # transforms n_p to (n_p, n_d) for varying parameters across # dimensions. n_params_per_dim = self._n_pop // self._n_dim pop_params = pop_params.reshape(n_params_per_dim, self._n_dim) # Compute vartheta(theta, chi) parameters = self._covariate_model.compute_population_parameters( cov_params, pop_params, covariates) # Compute psi(eta, vartheta) psi = self._population_model.compute_individual_parameters( parameters, eta, return_eta=return_eta) return psi
[docs] def compute_log_likelihood(self, parameters, observations, covariates): """ Returns the log-likelihood of the population model parameters. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param covariates: Covariates of individuals. :type covariates: np.ndarray of shape ``(n_ids, n_cov)`` :rtype: float """ # Split into covariate model parameters and population parameters parameters = np.asarray(parameters) pop_params = parameters[:self._n_pop] cov_params = parameters[self._n_pop:] # Reshape population parameters to (n_params_per_dim, n_dim) n_params_per_dim = self._n_pop // self._n_dim pop_params = pop_params.reshape(n_params_per_dim, self._n_dim) # Compute vartheta(theta, chi) parameters = self._covariate_model.compute_population_parameters( cov_params, pop_params, covariates) # Compute log-likelihood score = self._population_model.compute_log_likelihood( parameters, observations) return score
# def compute_pointwise_ll(self, parameters, observations): # r""" # Returns the pointwise log-likelihood of the model parameters for # each observation. # :param parameters: Values of the model parameters :math:`\vartheta`. # :type parameters: List, np.ndarray of length (p,) # :param observations: "Observations" of the individuals :math:`\eta`. # Typically refers to the inter-individual fluctuations of the # mechanistic model parameter. # :type observations: List, np.ndarray of length (n,) # :returns: Log-likelihoods of individual parameters for population # parameters. # :rtype: np.ndarray of length (n,) # """ # raise NotImplementedError # # # Compute population parameters # # parameters = self._covariate_model.compute_population_parameters( # # parameters) # # # Compute log-likelihood # # score = self._population_model.compute_pointwise_ll( # # parameters, observations) # # return score
[docs] def compute_sensitivities( self, parameters, observations, covariates, dlogp_dpsi=None, reduce=False): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The sensitivities of the bottom-level log-likelihoods with respect to the ``observations`` (bottom-level parameters) may be provided using ``dlogp_dpsi``, in order to compute the sensitivities of the full hierarchical log-likelihood. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param covariates: Covariates of individuals. :type covariates: np.ndarray of shape ``(n_ids, n_cov)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters with respect to the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ # Split into covariate model parameters and population parameters parameters = np.asarray(parameters) pop_params = parameters[:self._n_pop] cov_params = parameters[self._n_pop:] # Reshape population parameters to (n_params_per_dim, n_dim) n_params_per_dim = self._n_pop // self._n_dim pop_params = pop_params.reshape(n_params_per_dim, self._n_dim) # Compute vartheta(theta, chi) and dvartheta/dtheta parameters = self._covariate_model.compute_population_parameters( cov_params, pop_params, covariates) # Compute log-likelihood and sensitivities dscore/deta, # dscore/dvartheta score, dpsi, dvartheta = self._population_model.compute_sensitivities( parameters, observations, dlogp_dpsi=dlogp_dpsi, flattened=False) # Propagate sensitivities of score to population model parameters dpop, dcov = self._covariate_model.compute_sensitivities( cov_params, pop_params, covariates, dvartheta) dtheta = np.hstack([dpop, dcov]) if reduce: return score, np.hstack((dpsi.flatten(), dtheta)) return score, dpsi, dtheta
[docs] def get_covariate_names(self): """ Returns the names of the covariates. If name is not set, defaults are returned. """ return self._covariate_model.get_covariate_names()
[docs] def get_dim_names(self): """ Returns the names of the dimensions. """ return self._population_model.get_dim_names()
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the names of the model parameters. If name is not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ names = self._population_model.get_parameter_names(exclude_dim_names) names += self._covariate_model.get_parameter_names() return names
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. Parameters ---------- n_ids Number of individuals. """ # Get number of individual parameters n_ids, _ = self._population_model.n_hierarchical_parameters(n_ids) return (n_ids, self.n_parameters())
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ n_parameters = self._population_model.n_parameters() n_parameters += self._covariate_model.n_parameters() return n_parameters
[docs] def sample(self, parameters, covariates, n_samples=None, seed=None): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional :param covariates: Covariate values, specifying the sampled subpopulation. :type covariates: List, np.ndarray of shape ``(n_cov,)`` or ``(n_samples, n_cov)`` """ covariates = np.array(covariates) if covariates.ndim == 1: covariates = covariates[np.newaxis, :] if covariates.shape[1] != self._n_covariates: raise ValueError( 'Provided covariates do not match the number of covariates.') if n_samples is None: n_samples = 1 n_samples = int(n_samples) covariates = np.broadcast_to( covariates, (n_samples, self._n_covariates)) # Split parameters into covariate model parameters and population model # parameters parameters = np.asarray(parameters) pop_params = parameters[:self._n_pop] cov_params = parameters[self._n_pop:] # Reshape population parameters to (n_params_per_dim, n_dim) n_params_per_dim = self._n_pop // self._n_dim pop_params = pop_params.reshape(n_params_per_dim, self._n_dim) # Compute population parameters pop_params = self._covariate_model.compute_population_parameters( cov_params, pop_params, covariates) # Sample parameters from population model seed = np.random.default_rng(seed) psi = np.empty(shape=(n_samples, self._n_dim)) for ids, params in enumerate(pop_params): psi[ids] = self._population_model.sample( params, n_samples=1, seed=seed)[0] return psi
[docs] def set_covariate_names(self, names=None): """ Sets the names of the covariates. :param names: A list of parameter names. If ``None``, covariate names are reset to defaults. :type names: List """ self._covariate_model.set_covariate_names(names)
[docs] def set_dim_names(self, names=None): """ Sets the names of the population model dimensions. Setting the dimension names overwrites the parameter names of the covariate model. :param names: A list of dimension names. If ``None``, dimension names are reset to defaults. :type names: List[str], optional """ self._population_model.set_dim_names(names) # Get names of parameters affected by the covariate model names = self._population_model.get_parameter_names() names = np.array(names).reshape( self._n_pop // self._n_dim, self._n_dim) pidx, didx = self._covariate_model.get_set_population_parameters() names = names[pidx, didx] n = [] for name in names: n += [name] * self._n_covariates self._covariate_model.set_parameter_names(n)
[docs] def set_parameter_names(self, names=None): """ Sets the names of the population model parameters. :param names: A list with string-convertable entries of length :meth:`n_parameters`. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset parameter names self._population_model.set_parameter_names() self._covariate_model.set_parameter_names() return None self._population_model.set_parameter_names(names[:self._n_pop]) self._covariate_model.set_parameter_names(names[self._n_pop:])
[docs] def set_population_parameters(self, indices): """ Sets the parameters of the population model that are transformed by the covariate model. Note that this influences the number of model parameters. :param indices: A list of parameter indices [param index, dim index]. :type indices: List[Tuple[int, int]] """ # Check that indices are in bounds indices = np.array(indices) upper = np.max(indices, axis=0) n_pop = self._n_pop // self._n_dim out_of_bounds = \ (upper[0] >= n_pop) or (upper[1] >= self._n_dim) or \ (np.min(indices) < 0) if out_of_bounds: raise IndexError('The provided indices are out of bounds.') self._covariate_model.set_population_parameters(indices) # Update parameter names names = np.array(self._population_model.get_parameter_names()) names = names.reshape(n_pop, self._n_dim)[indices[:, 0], indices[:, 1]] n = [] for name in names: n += [name] * self._n_covariates self._covariate_model.set_parameter_names(n)
[docs] class GaussianModel(PopulationModel): r""" A population model which models parameters across individuals with a Gaussian distribution. A Gaussian population model assumes that a model parameter :math:`\psi` varies across individuals such that :math:`\psi` is normally distributed in the population .. math:: p(\psi |\mu, \sigma) = \frac{1}{\sqrt{2\pi} \sigma} \exp\left(-\frac{(\psi - \mu )^2} {2 \sigma ^2}\right). Here, :math:`\mu` and :math:`\sigma ^2` are the mean and variance of the Gaussian distribution. Any observed individual with parameter :math:`\psi _i` is assumed to be a realisation of the random variable :math:`\psi`. If ``centered = False`` the parametrisation is non-centered, i.e. .. math:: \psi = \mu + \sigma \eta , where :math:`\eta` models the inter-individual variability and is standard normally distributed. Extends :class:`PopulationModel`. :param n_dim: The dimensionality of the population model. :type n_dim: int, optional :param dim_names: Optional names of the population dimensions. :type dim_names: List[str], optional :param centered: Boolean flag indicating whether parametrisation is centered or non-centered. :type centered: bool, optional """ def __init__(self, n_dim=1, dim_names=None, centered=True): super(GaussianModel, self).__init__(n_dim, dim_names) # Set number of parameters self._n_parameters = 2 * self._n_dim # Set default parameter names self._parameter_names = ['Mean'] * self._n_dim + ['Std.'] * self._n_dim self._centered = bool(centered) def _compute_dpsi(self, sigma, observations): """ Computes the partial derivatives of psi = mu + sigma eta w.r.t. eta, mu and sigma. sigma: (n_ids, n_dim) observations: (n_ids, n_dim) rtype: np.ndarray of shape (n_ids, n_dim), np.ndarray of shape (n_ids, 2, n_dim) """ n_ids, n_dim = observations.shape dpsi_deta = sigma dpsi_dtheta = np.empty(shape=(n_ids, 2, n_dim)) dpsi_dtheta[:, 0] = np.ones(shape=(n_ids, n_dim)) dpsi_dtheta[:, 1] = observations return dpsi_deta, dpsi_dtheta @staticmethod def _compute_log_likelihood(mus, vars, observations): # pragma: no cover r""" Calculates the log-likelihood. mus shape: (n_ids, n_dim) vars shape: (n_ids, n_dim) observations: (n_ids, n_dim) """ # Compute log-likelihood score with np.errstate(divide='ignore'): log_likelihood = - np.sum( np.log(2 * np.pi * vars) / 2 + (observations - mus) ** 2 / (2 * vars)) # If score evaluates to NaN, return -infinity if np.isnan(log_likelihood): return -np.inf return log_likelihood def _compute_non_centered_sensitivities( self, sigmas, observations, dlogp_dpsi): """ Returns the log-likelihood and the sensitivities with respect to eta and theta. """ # Copmute score zeros = np.zeros(shape=(1, self._n_dim)) ones = np.ones(shape=(1, self._n_dim)) score = self._compute_log_likelihood(zeros, ones, observations) # Compute sensitivities if dlogp_dpsi is None: dlogp_dpsi = np.zeros((1, self._n_dim)) deta = self._compute_sensitivities(zeros, ones, observations) dpsi_deta, dpsi_dtheta = self._compute_dpsi(sigmas, observations) dlogp_deta = dlogp_dpsi * dpsi_deta + deta dlogp_dtheta = dlogp_dpsi[:, np.newaxis, :] * dpsi_dtheta return score, dlogp_deta, dlogp_dtheta @staticmethod def _compute_pointwise_ll(mean, var, observations): # pragma: no cover r""" Calculates the pointwise log-likelihoods using numba speed up. """ # Compute log-likelihood score log_likelihood = \ - np.log(2 * np.pi * var) / 2 \ - (observations - mean) ** 2 / (2 * var) # If score evaluates to NaN, return -infinity mask = np.isnan(log_likelihood) if np.any(mask): log_likelihood[mask] = -np.inf return log_likelihood return log_likelihood def _compute_sensitivities(self, mus, vars, psi): # pragma: no cover r""" Calculates the log-likelihood and its sensitivities using numba speed up. Expects: mus shape: (n_ids, n_dim) vars shape: (n_ids, n_dim) observations: (n_ids, n_dim) Returns: deta for non-centered of shape (n_ids, n_dim) and deta and dtheta for centered dtheta: np.ndarray of shape (n_ids, n_parameters, n_dim) """ # Compute sensitivities w.r.t. observations (psi) with np.errstate(divide='ignore'): dpsi = (mus - psi) / vars if not self._centered: # Despite the naming, this is really deta return dpsi # Compute sensitivities w.r.t. parameters n_ids = len(psi) with np.errstate(divide='ignore'): dmus = (psi - mus) / vars dstd = (-1 + (psi - mus)**2 / vars) / np.sqrt(vars) # Collect sensitivities n_ids, n_dim = psi.shape dtheta = np.empty(shape=(n_ids, 2, n_dim)) dtheta[:, 0] = dmus dtheta[:, 1] = dstd return dpsi, dtheta
[docs] def compute_individual_parameters( self, parameters, eta, return_eta=False, *args, **kwargs): r""" Returns the individual parameters. If ``centered = True``, the model does not transform the parameters and ``eta`` is returned. If ``centered = False``, the individual parameters are defined as .. math:: \psi = \mu + \sigma \eta, where :math:`\mu` and :math:`\sigma` are the model parameters and :math:`\eta` are the inter-individual fluctuations. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids * n_dim)`` or ``(n_ids, n_dim)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional :rtype: np.ndarray of shape ``(n_ids, n_dim)`` """ eta = np.asarray(eta) if eta.ndim == 1: eta = eta.reshape(self._n_ids, self._n_dim) if self._centered or return_eta: return eta parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: n_parameters = parameters[np.newaxis, ...] mu = parameters[:, 0] sigma = parameters[:, 1] if np.any(sigma < 0): return np.full(shape=eta.shape, fill_value=np.nan) psi = mu + sigma * eta return psi
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): """ Returns the log-likelihood of the population model parameters. If ``centered = False``, the log-likelihood of the standard normal is returned. The contribution of the population parameters to the log-likelihood can be computed with the log-likelihood of the individual parameters, see :class:`chi.ErrorModel`. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] if not self._centered: mus = np.zeros(shape=(1, self._n_dim)) vars = np.ones(shape=(1, self._n_dim)) score = self._compute_log_likelihood(mus, vars, observations) return score parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: parameters = parameters[np.newaxis, ...] # Parse parameters mus = parameters[:, 0] sigmas = parameters[:, 1] vars = sigmas**2 if np.any(sigmas < 0): # The std. of the Gaussian distribution is strictly positive return -np.inf return self._compute_log_likelihood(mus, vars, observations)
# def compute_pointwise_ll( # self, parameters, observations): # pragma: no cover # r""" # Returns the pointwise log-likelihood of the model parameters for # each observation. # The pointwise log-likelihood of a Gaussian distribution is # the log-pdf evaluated at the observations # .. math:: # L(\mu , \sigma | \psi _i) = # \log p(\psi _i | # \mu , \sigma ) , # where # :math:`\psi _i` are the "observed" parameters :math:`\psi` from # individual :math:`i`. # Parameters # ---------- # parameters # An array-like object with the model parameter values, i.e. # [:math:`\mu`, :math:`\sigma`]. # observations # An array like object with the parameter values for the # individuals. # """ # # TODO: Needs proper research to establish which pointwise # # log-likelihood makes sense for hierarchical models. # # Also needs to be adapted to match multi-dimensional API. # raise NotImplementedError # observations = np.asarray(observations) # mean, std = parameters # var = std**2 # eps = 1E-6 # if (std <= 0) or (var <= eps): # # The std. of the Gaussian distribution is strictly positive # return np.full(shape=len(observations), fill_value=-np.inf) # return self._compute_pointwise_ll(mean, var, observations)
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, flattened=True, *args, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivities to the population parameters as well as the observations. The sensitivities of the bottom-level log-likelihoods with respect to the ``observations`` (bottom-level parameters) may be provided using ``dlogp_dpsi``, in order to compute the sensitivities of the full hierarchical log-likelihood. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :param flattened: Boolean flag that indicates whether the sensitivities w.r.t. the population parameters are returned as 1-dim. array. If ``False`` sensitivities are returned in shape ``(n_ids, n_param_per_dim, n_dim)``. :type flattened: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: parameters = parameters[np.newaxis, ...] # Parse parameters mus = parameters[:, 0] sigmas = parameters[:, 1] vars = sigmas**2 if np.any(sigmas < 0): # The std. of the Gaussian distribution is strictly positive score = -np.inf dtheta = np.empty((len(observations), 2, self._n_dim)) dpsi = np.empty(observations.shape) return self._shape(score, dpsi, dtheta, reduce, flattened) if not self._centered: score, dpsi, dtheta = self._compute_non_centered_sensitivities( sigmas, observations, dlogp_dpsi) return self._shape(score, dpsi, dtheta, reduce, flattened) # Compute for centered parametrisation score = self._compute_log_likelihood(mus, vars, observations) dpsi, dtheta = self._compute_sensitivities(mus, vars, observations) if dlogp_dpsi is not None: dpsi += dlogp_dpsi return self._shape(score, dpsi, dtheta, reduce, flattened)
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the name of the the population model parameters. If name were not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ if exclude_dim_names: return copy.copy(self._parameter_names) # Append dimension names names = [] for name_id, name in enumerate(self._parameter_names): current_dim = name_id % self._n_dim names += [name + ' ' + self._dim_names[current_dim]] return names
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. :param n_ids: Number of individuals. :type n_ids: int """ n_ids = int(n_ids) return (n_ids * self._n_dim, self._n_parameters)
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ return self._n_parameters
[docs] def sample(self, parameters, n_samples=None, seed=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. If ``centered = False`` random samples from the standard normal distribution are returned. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ parameters = np.asarray(parameters) if len(parameters.flatten()) != self._n_parameters: raise ValueError( 'The number of provided parameters does not match the expected' ' number of top-level parameters.') if parameters.ndim != 2: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) # Define shape of samples if n_samples is None: n_samples = 1 sample_shape = (int(n_samples), self._n_dim) # Get parameters mus = parameters[0] sigmas = parameters[1] if not self._centered: mus = np.zeros(mus.shape) sigmas = np.ones(sigmas.shape) if np.any(sigmas < 0): # The std. of the Gaussian distribution are # strictly positive raise ValueError( 'A Gaussian distribution only accepts strictly positive ' 'standard deviations.') # Sample from population distribution rng = np.random.default_rng(seed=seed) samples = rng.normal( loc=mus, scale=sigmas, size=sample_shape) return samples
[docs] def set_parameter_names(self, names=None): r""" Sets the names of the population model parameters. The population parameter of a GaussianModel are the population mean and standard deviation of the parameter :math:`\psi`. :param names: A list with string-convertable entries of length :meth:`n_parameters`. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset names to defaults self._parameter_names = [ 'Mean'] * self._n_dim + ['Std.'] * self._n_dim return None if len(names) != self._n_parameters: raise ValueError( 'Length of names does not match the number of parameters.') self._parameter_names = [str(label) for label in names]
[docs] class HeterogeneousModel(PopulationModel): """ A population model which imposes no relationship on the model parameters across individuals. A heterogeneous model assumes that the parameters across individuals are independent. Extends :class:`PopulationModel`. .. note:: Heterogeneous population models are special: the number of parameters depends on the number of modelled individuals. :param n_dim: The dimensionality of the population model. :type n_dim: int, optional :param dim_names: Optional names of the population dimensions. :type dim_names: List[str], optional :param n_ids: Number of modelled individuals. :type n_ids: int, optional """ def __init__(self, n_dim=1, dim_names=None, n_ids=1): super(HeterogeneousModel, self).__init__(n_dim, dim_names) self._n_hierarchical_dim = 0 self.set_n_ids(n_ids, no_shortcut=True) # Set special dimensions self._n_pooled_dims = 0 self._n_hetero_dims = self._n_dim def _shape(self, score, dpsi, dtheta, reduce, flattened): """ Returns the score, dpsi and dtheta. If reduce is True, dpsi and dtheta are flattened according to the hierarchical ordering. If reduce is False, dpsi and dtheta are returned separately. If flattened is True, dtheta is flattened. If flattened is False, dtheta is returned in shape (n_ids, n_param_per_dim, n_dim) """ if reduce or flattened: # Since all dtheta are zero, this is a more efficient # implementation than summing dtheta = np.zeros(self._n_parameters) if reduce: # non-zero entries can come from dpsi dtheta = dpsi.flatten() return score, dtheta return score, dpsi, dtheta
[docs] def compute_individual_parameters( self, parameters, eta, return_eta=False, *args, **kwargs): """ Returns the individual parameters. If the model does not transform the bottom-level parameters, ``eta`` is returned. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: np.ndarray of shape ``(n_ids, n_dim)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional """ if parameters.ndim == 1: parameters = parameters.reshape(self._n_ids, self._n_dim) elif parameters.ndim == 3: parameters = np.diagonal(parameters, axis1=0, axis2=1).T return parameters
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): r""" Returns the log-likelihood of the population model parameters. A heterogenous population model is equivalent to a multi-dimensional delta-distribution, where each bottom-level parameter is determined by a separate delta-distribution. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) elif parameters.ndim == 3: # Heterogenous model is special, because n_param_per_dim = n_ids. # But after covariate transformation, the covariate information is # in the n_ids dimension. parameters = parameters[:, 0, :] # Return -inf if any of the observations do not equal the heterogenous # parameters mask = np.not_equal(observations, parameters) if np.any(mask): return -np.inf # Otherwise return 0 return 0
# def compute_pointwise_ll(self, parameters, observations): # r""" # Returns the pointwise log-likelihood of the model parameters for # each observation. # A heterogenous population model imposes no restrictions on the # individuals, as a result the log-likelihood score is zero # irrespective # of the model parameters. # Parameters # ---------- # parameters # An array-like object with the parameters of the population model. # observations # An array-like object with the observations of the individuals. # Each entry is assumed to belong to one individual. # """ # raise NotImplementedError
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, flattened=True, *args, **kwargs): r""" Returns the log-likelihood of the population parameters and its sensitivities w.r.t. the parameters and the observations. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :param flattened: Boolean flag that indicates whether the sensitivities w.r.t. the population parameters are returned as 1-dim. array. If ``False`` sensitivities are returned in shape ``(n_ids, n_param_per_dim, n_dim)``. :type flattened: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) elif parameters.ndim == 3: # Heterogenous model is special, because n_param_per_dim = n_ids. # But after covariate transformation, the covariate information is # in the n_ids dimension. parameters = parameters[:, 0, :] # Return -inf if any of the observations does not equal the # heterogenous parameters n_ids = len(observations) mask = observations != parameters if np.any(mask): score = -np.inf dpsi = np.empty(observations.shape) dtheta = np.empty((n_ids, self._n_ids, self._n_dim)) return self._shape(score, dpsi, dtheta, reduce, flattened) # Otherwise return score = 0 dpsi = np.zeros(observations.shape) if dlogp_dpsi is not None: dpsi += dlogp_dpsi dtheta = np.zeros((n_ids, self._n_ids, self._n_dim)) return self._shape(score, dpsi, dtheta, reduce, flattened)
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the name of the the population model parameters. If name were not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ if exclude_dim_names: return copy.copy(self._parameter_names) # Append dimension names names = [] for name_id, name in enumerate(self._parameter_names): current_dim = name_id % self._n_dim names += [name + ' ' + self._dim_names[current_dim]] return names
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. :param n_ids: Number of individuals. :type n_ids: int """ n_ids = int(n_ids) return (0, n_ids * self._n_dim)
[docs] def n_ids(self): """ Returns the number of modelled individuals. If the behaviour of the population model does not change with the number of modelled individuals 0 is returned. """ return self._n_ids
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ return self._n_parameters
[docs] def sample(self, parameters, n_samples=None, seed=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. For ``n_samples > 1`` the samples are randomly drawn from the ``n_ids`` individuals. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ parameters = np.asarray(parameters) if parameters.ndim != 2: parameters = parameters.reshape(self._n_ids, self._n_dim) # Randomly sample from n_ids ids = np.arange(self._n_ids) rng = np.random.default_rng(seed=seed) n_samples = n_samples if n_samples else 1 parameters = parameters[ rng.choice(ids, size=n_samples, replace=True)] return parameters
[docs] def set_n_ids(self, n_ids, no_shortcut=False): """ Sets the number of modelled individuals. The behaviour of most population models is the same for any number of individuals, in which case ``n_ids`` is ignored. However, for some models, e.g. :class:`HeterogeneousModel` the behaviour changes with ``n_ids``. :param n_ids: Number of individuals. :type n_ids: int :param no_shortcut: Boolean flag that prevents exiting the method early when n_ids are unchanged. :type no_shortcut: bool, optional """ n_ids = int(n_ids) if n_ids < 1: raise ValueError( 'The number of modelled individuals needs to be greater or ' 'equal to 1.') if (n_ids == self._n_ids) and not no_shortcut: return None self._n_ids = n_ids self._n_parameters = self._n_ids * self._n_dim self._parameter_names = [] for _id in range(self._n_ids): self._parameter_names += ['ID %d' % (_id + 1)] * self._n_dim # Update special dims self._special_dims = [[0, self._n_dim, 0, self._n_parameters, False]]
[docs] def set_parameter_names(self, names=None): r""" Sets the names of the population model parameters. :param names: A list with string-convertable entries of length :meth:`n_parameters`. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset names to defaults self._parameter_names = [] for _id in range(self._n_ids): self._parameter_names += ['ID %d' % (_id + 1)] * self._n_dim return None if len(names) != self._n_parameters: raise ValueError( 'Length of names does not match the number of parameters.') self._parameter_names = [str(label) for label in names]
[docs] class LogNormalModel(PopulationModel): r""" A population model which models parameters across individuals with a lognormal distribution. A lognormal population model assumes that a model parameter :math:`\psi` varies across individuals such that :math:`\psi` is log-normally distributed in the population .. math:: p(\psi |\mu _{\text{log}}, \sigma _{\text{log}}) = \frac{1}{\psi} \frac{1}{\sqrt{2\pi} \sigma _{\text{log}}} \exp\left(-\frac{(\log \psi - \mu _{\text{log}})^2} {2 \sigma ^2_{\text{log}}}\right). Here, :math:`\mu _{\text{log}}` and :math:`\sigma ^2_{\text{log}}` are the mean and variance of :math:`\log \psi` in the population, respectively. Any observed individual with parameter :math:`\psi _i` is assumed to be a realisation of the random variable :math:`\psi`. If ``centered = False`` the parametrisation is non-centered, i.e. .. math:: \log \psi = \mu _{\text{log}} + \sigma _{\text{log}} \eta , where :math:`\eta` models the inter-individual variability and is standard normally distributed. Extends :class:`PopulationModel`. :param n_dim: The dimensionality of the population model. :type n_dim: int, optional :param dim_names: Optional names of the population dimensions. :type dim_names: List[str], optional :param centered: Boolean flag indicating whether parametrisation is centered or non-centered. :type centered: bool, optional """ def __init__(self, n_dim=1, dim_names=None, centered=True): super(LogNormalModel, self).__init__(n_dim, dim_names) # Set number of parameters self._n_parameters = 2 * self._n_dim # Set default parameter names self._parameter_names = [ 'Log mean'] * self._n_dim + ['Log std.'] * self._n_dim self._centered = bool(centered) def _compute_dpsi(self, mu, sigma, etas): """ Computes the partial derivatives of psi = exp(mu + sigma eta) w.r.t. eta, mu and sigma. mu: (n_ids, n_dim) sigma: (n_ids, n_dim) etas: (n_ids, n_dim) rtype: np.ndarray of shape (n_ids, n_dim), np.ndarray of shape (n_ids, 2, n_dim) """ n_ids, n_dim = etas.shape psi = np.exp(mu + sigma * etas) dpsi_deta = sigma * psi dpsi_dtheta = np.empty(shape=(n_ids, 2, n_dim)) dpsi_dtheta[:, 0] = psi dpsi_dtheta[:, 1] = etas * psi return dpsi_deta, dpsi_dtheta @staticmethod def _compute_log_likelihood(mus, vars, observations): r""" Calculates the log-likelihood using. mus shape: (n_ids, n_dim) vars shape: (n_ids, n_dim) observations: (n_ids, n_dim) """ # Compute log-likelihood score with np.errstate(divide='ignore'): log_likelihood = - np.sum( np.log(2 * np.pi * vars) / 2 + np.log(observations) + (np.log(observations) - mus)**2 / 2 / vars) # If score evaluates to NaN, return -infinity if np.isnan(log_likelihood): return -np.inf return log_likelihood @staticmethod def _compute_non_centered_log_likelihood(observations): # pragma: no cover r""" Calculates the log-likelihood. observations: (n_ids, n_dim) """ # Compute log-likelihood score log_likelihood = - np.sum( np.log(2 * np.pi) / 2 + observations ** 2 / 2) # If score evaluates to NaN, return -infinity if np.isnan(log_likelihood): return -np.inf return log_likelihood def _compute_non_centered_sensitivities( self, mus, sigmas, observations, dlogp_dpsi): """ Returns the log-likelihood and the sensitivities with respect to eta and theta. """ # Copmute score score = self._compute_non_centered_log_likelihood(observations) # Compute sensitivities if dlogp_dpsi is None: dlogp_dpsi = np.zeros((1, self._n_dim)) deta = -observations dpsi_deta, dpsi_dtheta = self._compute_dpsi(mus, sigmas, observations) dlogp_deta = dlogp_dpsi * dpsi_deta + deta dlogp_dtheta = dlogp_dpsi[:, np.newaxis, :] * dpsi_dtheta return score, dlogp_deta, dlogp_dtheta @staticmethod def _compute_pointwise_ll(mean, var, observations): # pragma: no cover r""" Calculates the pointwise log-likelihoods using numba speed up. """ # Compute log-likelihood score with np.errstate(divide='ignore'): log_psi = np.log(observations) log_likelihood = \ - np.log(2 * np.pi * var) / 2 \ - log_psi \ - (log_psi - mean) ** 2 / (2 * var) # If score evaluates to NaN, return -infinity mask = np.isnan(log_likelihood) if np.any(mask): log_likelihood[mask] = -np.inf return log_likelihood return log_likelihood def _compute_sensitivities(self, mus, vars, psi): # pragma: no cover r""" Calculates the log-likelihood and its sensitivities using numba speed up. Expects: mus shape: (n_ids, n_dim) vars shape: (n_ids, n_dim) observations: (n_ids, n_dim) Returns: deta for non-centered of shape (n_ids, n_dim) and deta and dtheta for centered dtheta: np.ndarray of shape (n_ids, n_parameters, n_dim) """ # Compute sensitivities n_ids = len(psi) with np.errstate(divide='ignore'): dpsi = - ((np.log(psi) - mus) / vars + 1) / psi dmus = (np.log(psi) - mus) / vars dstd = (-1 + (np.log(psi) - mus)**2 / vars) / np.sqrt(vars) # Collect sensitivities n_ids, n_dim = psi.shape dtheta = np.empty(shape=(n_ids, 2, n_dim)) dtheta[:, 0] = dmus dtheta[:, 1] = dstd return dpsi, dtheta
[docs] def compute_individual_parameters( self, parameters, eta, return_eta=False, *args, **kwargs): r""" Returns the individual parameters. If ``centered = True``, the model does not transform the parameters and ``eta`` is returned. If ``centered = False``, the individual parameters are computed using .. math:: \psi = \mathrm{e}^{ \mu _{\mathrm{log}} + \sigma _{\mathrm{log}} \eta}, where :math:`\mu _{\mathrm{log}}` and :math:`\sigma _{\mathrm{log}}` are the model parameters and :math:`\eta` are the inter-individual fluctuations. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids * n_dim)`` or ``(n_ids, n_dim)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional :rtype: np.ndarray of shape ``(n_ids, n_dim)`` """ eta = np.asarray(eta) if eta.ndim == 1: eta = eta.reshape(self._n_ids, self._n_dim) if self._centered or return_eta: return eta parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: n_parameters = parameters[np.newaxis, ...] mu = parameters[:, 0] sigma = parameters[:, 1] if np.any(sigma < 0): return np.full(shape=eta.shape, fill_value=np.nan) psi = np.exp(mu + sigma * eta) return psi
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): """ Returns the log-likelihood of the population model parameters. If ``centered = False``, the log-likelihood of the standard normal is returned. The contribution of the population parameters to the log-likelihood can be computed with the log-likelihood of the individual parameters, see :class:`chi.ErrorModel`. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] if not self._centered: return self._compute_non_centered_log_likelihood(observations) parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: parameters = parameters[np.newaxis, ...] # Parse parameters mus = parameters[:, 0] sigmas = parameters[:, 1] vars = sigmas**2 if np.any(sigmas < 0): # The scale. of the lognormal distribution is strictly positive return -np.inf return self._compute_log_likelihood(mus, vars, observations)
# def compute_pointwise_ll( # self, parameters, observations): # pragma: no cover # r""" # Returns the pointwise log-likelihood of the model parameters for # each observation. # The pointwise log-likelihood of a LogNormalModel is the log-pdf # evaluated at the observations # .. math:: # L(\mu _{\text{log}}, \sigma _{\text{log}}| \psi _i) = # \log p(\psi _i | # \mu _{\text{log}}, \sigma _{\text{log}}) , # where # :math:`\psi _i` are the "observed" parameters :math:`\psi` from # individual :math:`i`. # Parameters # ---------- # parameters # An array-like object with the model parameter values, i.e. # [:math:`\mu _{\text{log}}`, :math:`\sigma _{\text{log}}`]. # observations # An array like object with the parameter values for the # individuals, # i.e. [:math:`\psi _1, \ldots , \psi _N`]. # """ # # TODO: Needs proper research to establish which pointwise # # log-likelihood makes sense for hierarchical models. # # Also needs to be adapted to match multi-dimensional API. # raise NotImplementedError # observations = np.asarray(observations) # mean, std = parameters # var = std**2 # eps = 1E-12 # if (std <= 0) or (var <= eps) or np.any(observations == 0): # # The standard deviation of log psi is strictly positive # return np.full(shape=len(observations), fill_value=-np.inf) # return self._compute_pointwise_ll(mean, var, observations)
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, flattened=True, *args, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The sensitivities of the bottom-level log-likelihoods with respect to the ``observations`` (bottom-level parameters) may be provided using ``dlogp_dpsi``, in order to compute the sensitivities of the full hierarchical log-likelihood. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :param flattened: Boolean flag that indicates whether the sensitivities w.r.t. the population parameters are returned as 1-dim. array. If ``False`` sensitivities are returned in shape ``(n_ids, n_param_per_dim, n_dim)``. :type flattened: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: n_parameters = parameters[np.newaxis, ...] # Parse parameters mus = parameters[:, 0] sigmas = parameters[:, 1] vars = sigmas**2 if np.any(sigmas < 0): # The scale of the lognormal distribution is strictly positive score = -np.inf dpsi = np.empty(observations.shape) dtheta = np.empty((len(observations), 2, self._n_dim)) return self._shape(score, dpsi, dtheta, reduce, flattened) if not self._centered: score, dpsi, dtheta = self._compute_non_centered_sensitivities( mus, sigmas, observations, dlogp_dpsi) return self._shape(score, dpsi, dtheta, reduce, flattened) # Compute for centered parametrisation score = self._compute_log_likelihood(mus, vars, observations) dpsi, dtheta = self._compute_sensitivities(mus, vars, observations) if dlogp_dpsi is not None: dpsi += dlogp_dpsi return self._shape(score, dpsi, dtheta, reduce, flattened)
[docs] def get_mean_and_std(self, parameters): r""" Returns the mean and the standard deviation of the population for given :math:`\mu _{\text{log}}` and :math:`\sigma _{\text{log}}`. The mean and variance of the parameter :math:`\psi`, :math:`\mu = \mathbb{E}\left[ \psi \right]` and :math:`\sigma ^2 = \text{Var}\left[ \psi \right]`, are given by .. math:: \mu = \mathrm{e}^{\mu _{\text{log}} + \sigma ^2_{\text{log}} / 2} \quad \text{and} \quad \sigma ^2 = \mu ^2 \left( \mathrm{e}^{\sigma ^2_{\text{log}}} - 1\right) . :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape (p,) or (p_per_dim, n_dim) """ # Check input parameters = np.asarray(parameters) if parameters.ndim != 2: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) mus = parameters[0] sigmas = parameters[1] if np.any(sigmas < 0): raise ValueError('The standard deviation cannot be negative.') # Compute mean and standard deviation mean = np.exp(mus + sigmas**2 / 2) std = np.sqrt( np.exp(2 * mus + sigmas**2) * (np.exp(sigmas**2) - 1)) return np.vstack([mean, std])
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the name of the the population model parameters. If name were not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ if exclude_dim_names: return copy.copy(self._parameter_names) # Append dimension names names = [] for name_id, name in enumerate(self._parameter_names): current_dim = name_id % self._n_dim names += [name + ' ' + self._dim_names[current_dim]] return names
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. Parameters ---------- n_ids Number of individuals. """ n_ids = int(n_ids) return (n_ids * self._n_dim, self._n_parameters)
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ return self._n_parameters
[docs] def sample(self, parameters, n_samples=None, seed=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. If ``centered = False`` random samples from the standard normal distribution are returned. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ parameters = np.asarray(parameters) if len(parameters.flatten()) != self._n_parameters: raise ValueError( 'The number of provided parameters does not match the expected' ' number of top-level parameters.') if parameters.ndim != 2: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) # Define shape of samples if n_samples is None: n_samples = 1 sample_shape = (int(n_samples), self._n_dim) # Instantiate random number generator rng = np.random.default_rng(seed=seed) # Get parameters mus = parameters[0] sigmas = parameters[1] if not self._centered: mus = np.zeros(mus.shape) sigmas = np.ones(sigmas.shape) return rng.normal(loc=mus, scale=sigmas, size=sample_shape) if np.any(sigmas <= 0): raise ValueError( 'A log-normal distribution only accepts strictly positive ' 'standard deviations.') # Sample from population distribution # (Mean and sigma are the mean and standard deviation of # the log samples) samples = rng.lognormal( mean=mus, sigma=sigmas, size=sample_shape) return samples
[docs] def set_parameter_names(self, names=None): r""" Sets the names of the population model parameters. The population parameter of a LogNormalModel are the population mean and standard deviation of the parameter :math:`\psi`. :param names: A list with string-convertable entries of length :meth:`n_parameters`. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset names to defaults self._parameter_names = [ 'Log mean'] * self._n_dim + ['Log std.'] * self._n_dim return None if len(names) != self._n_parameters: raise ValueError( 'Length of names does not match the number of parameters.') self._parameter_names = [str(label) for label in names]
[docs] class PooledModel(PopulationModel): """ A population model which pools the model parameters across individuals. A pooled model assumes that the parameters across individuals do not vary. As a result, all individual parameters are set to the same value. Extends :class:`PopulationModel`. :param n_dim: The dimensionality of the population model. :type n_dim: int, optional :param dim_names: Optional names of the population dimensions. :type dim_names: List[str], optional """ def __init__(self, n_dim=1, dim_names=None): super(PooledModel, self).__init__(n_dim, dim_names) # Set number of parameters self._n_parameters = self._n_dim # Set number of hierarchical dimensions self._n_hierarchical_dim = 0 # Set special dimensions self._special_dims = [[0, self._n_dim, 0, self._n_parameters, True]] self._n_pooled_dims = self._n_dim self._n_hetero_dims = 0 # Set default parameter names self._parameter_names = ['Pooled'] * self._n_dim def _shape(self, score, dpsi, dtheta, reduce, flattened): """ Returns the score, dpsi and dtheta. If reduce is True, dpsi and dtheta are flattened according to the hierarchical ordering. If reduce is False, dpsi and dtheta are returned separately. If flattened is True, dtheta is flattened. If flattened is False, dtheta is returned in shape (n_ids, n_param_per_dim, n_dim) """ if reduce or flattened: # Since all dtheta are zero, this is a more efficient # implementation than summing dtheta = np.zeros(self._n_parameters) if reduce: # dpsi can be non-zero dtheta = np.sum(dpsi, axis=0) return score, dtheta return score, dpsi, dtheta
[docs] def compute_individual_parameters( self, parameters, eta, return_eta=False, *args, **kwargs): """ Returns the individual parameters. If the model does not transform the bottom-level parameters, ``eta`` is returned. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: np.ndarray of shape ``(n_ids, n_dim)`` :param return_eta: A boolean flag indicating whether eta is returned regardless of whether the parametrisation is centered or not. :type return_eta: boolean, optional """ if parameters.ndim < 3: parameters = np.broadcast_to( parameters.reshape(1, self._n_dim), shape=(self._n_ids, self._n_dim)) elif parameters.ndim == 3: parameters = parameters[:, 0, :] return parameters
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): """ Returns the log-likelihood of the population model parameters. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) elif parameters.ndim == 3: parameters = parameters[:, 0] # Return -inf if any of the observations do not equal the pooled # parameter mask = observations != parameters if np.any(mask): return -np.inf # Otherwise return 0 return 0
[docs] def compute_pointwise_ll(self, parameters, observations): r""" Returns the pointwise log-likelihood of the model parameters for each observation. A pooled population model is a delta-distribution centred at the population model parameter. As a result the log-likelihood score is 0, if all individual parameters are equal to the population parameter, and :math:`-\infty` otherwise. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape (p,) or (p_per_dim, n_dim) :param observations: "Observations" of the individuals. Typically refers to the values of a mechanistic model parameter for each individual, i.e. [:math:`\psi _1, \ldots , \psi _N`]. :type observations: np.ndarray of shape (n, n_dim) :returns: Log-likelihoods for each individual parameter for population parameters. :rtype: np.ndarray of length (n, n_dim) """ observations = np.asarray(observations) parameters = np.asarray(parameters) if parameters.ndim != 2: parameters = parameters.reshape(1, self._n_dim) # Return -inf if any of the observations does not equal the pooled # parameter log_likelihood = np.zeros_like(observations, dtype=float) mask = observations != parameters log_likelihood[mask] = -np.inf return log_likelihood
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, flattened=True, *args, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :param flattened: Boolean flag that indicates whether the sensitivities w.r.t. the population parameters are returned as 1-dim. array. If ``False`` sensitivities are returned in shape ``(n_ids, n_param_per_dim, n_dim)``. :type flattened: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 3: parameters = parameters[:, 0] # Return -inf if any of the observations does not equal the pooled # parameter mask = observations != parameters if np.any(mask): score = -np.inf dpsi = np.empty(observations.shape) dtheta = np.empty((len(observations), 1, self._n_dim)) return self._shape(score, dpsi, dtheta, reduce, flattened) # Otherwise return score = 0 dpsi = np.zeros(observations.shape) if dlogp_dpsi is not None: dpsi += dlogp_dpsi dtheta = np.zeros((len(observations), 1, self._n_dim)) return self._shape(score, dpsi, dtheta, reduce, flattened)
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the name of the the population model parameters. If name were not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ if exclude_dim_names: return copy.copy(self._parameter_names) # Append dimension names names = [] for name_id, name in enumerate(self._parameter_names): current_dim = name_id % self._n_dim names += [name + ' ' + self._dim_names[current_dim]] return names
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. :param n_ids: Number of individuals. :type n_ids: int """ return (0, self._n_parameters)
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ return self._n_parameters
[docs] def sample(self, parameters, n_samples=None, *args, **kwargs): r""" Returns random samples from the underlying population distribution. For a PooledModel the input top-level parameters are copied ``n_samples`` times and are returned. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ parameters = np.asarray(parameters) if len(parameters.flatten()) != self._n_parameters: raise ValueError( 'The number of provided parameters does not match the expected' ' number of top-level parameters.') if parameters.ndim != 2: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) # If only one sample is requested, return input parameters if n_samples is None: return parameters # If more samples are wanted, broadcast input parameter to shape # (n_samples, n_dim) sample_shape = (int(n_samples), self._n_dim) samples = np.broadcast_to(parameters, shape=sample_shape) return samples
[docs] def set_parameter_names(self, names=None): """ Sets the names of the population model parameters. :param names: A list with string-convertable entries of length :meth:`n_parameters`. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset names to defaults self._parameter_names = ['Pooled'] * self._n_dim return None if len(names) != self._n_parameters: raise ValueError( 'Length of names does not match n_parameters.') self._parameter_names = [str(label) for label in names]
[docs] class ReducedPopulationModel(PopulationModel): """ A class that can be used to permanently fix model parameters of a :class:`PopulationModel` instance. This may be useful to explore simplified versions of a model. Extends :class:`chi.PopulationModel`. :param population_model: A population model. :type population_model: chi.PopulationModel """ def __init__(self, population_model): super(ReducedPopulationModel, self).__init__() # Check inputs if not isinstance(population_model, PopulationModel): raise TypeError( 'The population model has to be an instance of a ' 'chi.PopulationModel.') self._population_model = population_model # Set defaults self._fixed_params_mask = None self._fixed_params_values = None self._n_parameters = population_model.n_parameters() self._n_dim = population_model.n_dim() self._n_covariates = population_model.n_covariates() self._n_hierarchical_dim = population_model.n_hierarchical_dim()
[docs] def compute_individual_parameters(self, parameters, eta, *args, **kwargs): """ Returns the individual parameters. If the model does not transform the bottom-level parameters, ``eta`` is returned. :param parameters: Model parameters. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param eta: Inter-individual fluctuations. :type eta: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: np.ndarray of shape ``(n_ids, n_dim)`` """ # Get fixed parameter values if self._fixed_params_mask is not None: self._fixed_params_values[~self._fixed_params_mask] = parameters parameters = self._fixed_params_values return self._population_model.compute_individual_parameters( parameters, eta, *args, **kwargs)
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): """ Returns the log-likelihood of the population model parameters. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)`` or ``(n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ parameters = np.asarray(parameters).flatten() # Get fixed parameter values if self._fixed_params_mask is not None: self._fixed_params_values[~self._fixed_params_mask] = parameters parameters = self._fixed_params_values # Compute log-likelihood score = self._population_model.compute_log_likelihood( parameters, observations, *args, **kwargs) return score
# def compute_pointwise_ll(self, parameters, observations): # """ # Returns the pointwise log-likelihood of the population model # parameters # for each observation. # Parameters # ---------- # parameters # An array-like object with the parameters of the population model. # observations # An array-like object with the observations of the individuals. # Each # entry is assumed to belong to one individual. # """ # # TODO: Needs proper research to establish which pointwise # # log-likelihood makes sense for hierarchical models. # # Also needs to be adapted to match multi-dimensional API. # raise NotImplementedError # # # Get fixed parameter values # # if self._fixed_params_mask is not None: # # self._fixed_params_values[~self._fixed_params_mask] = \ # parameters # # parameters = self._fixed_params_values # # # Compute log-likelihood # # scores = self._population_model.compute_pointwise_ll( # # parameters, observations) # # return scores
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The sensitivities of the bottom-level log-likelihoods with respect to the ``observations`` (bottom-level parameters) may be provided using ``dlogp_dpsi``, in order to compute the sensitivities of the full hierarchical log-likelihood. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)`` or ``(n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters with respect to the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ parameters = np.asarray(parameters).flatten() # Get fixed parameter values if self._fixed_params_mask is not None: self._fixed_params_values[~self._fixed_params_mask] = parameters parameters = self._fixed_params_values # Compute log-likelihood and sensitivities kwargs['flattened'] = True output = self._population_model.compute_sensitivities( parameters=parameters, observations=observations, dlogp_dpsi=dlogp_dpsi, reduce=reduce, **kwargs) if self._fixed_params_mask is None: return output # Need to filter sensitivities of fixed top-level parameters if not reduce: score, dpsi, dtheta = output return score, dpsi, dtheta[~self._fixed_params_mask] score, dscore = output n_bottom, _ = self._population_model.n_hierarchical_parameters( len(observations)) dpsi = dscore[:n_bottom] dtheta = dscore[n_bottom:][~self._fixed_params_mask] return score, np.hstack((dpsi, dtheta))
[docs] def fix_parameters(self, name_value_dict): """ Fixes the value of model parameters, and effectively removes them as a parameter from the model. Fixing the value of a parameter at ``None``, sets the parameter free again. :param name_value_dict: A dictionary with model parameter names as keys, and parameter values as values. :type name_value_dict: Dict[str:float] """ # Check type try: name_value_dict = dict(name_value_dict) except (TypeError, ValueError): raise ValueError( 'The name-value dictionary has to be convertable to a python ' 'dictionary.') # If no model parameters have been fixed before, instantiate a mask # and values if self._fixed_params_mask is None: self._fixed_params_mask = np.zeros( shape=self._n_parameters, dtype=bool) if self._fixed_params_values is None: self._fixed_params_values = np.empty(shape=self._n_parameters) # Update the mask and values for index, name in enumerate( self._population_model.get_parameter_names()): try: value = name_value_dict[name] except KeyError: # KeyError indicates that parameter name is not being fixed continue # Fix parameter if value is not None, else unfix it self._fixed_params_mask[index] = value is not None self._fixed_params_values[index] = value # If all parameters are free, set mask and values to None again if np.all(~self._fixed_params_mask): self._fixed_params_mask = None self._fixed_params_values = None
[docs] def get_covariate_names(self): """ Returns the names of the covariates. If name is not set, defaults are returned. """ return self._population_model.get_covariate_names()
[docs] def get_dim_names(self): """ Returns the names of the dimensions. """ return self._population_model.get_dim_names()
[docs] def get_special_dims(self): r""" Returns information on pooled and heterogeneously modelled dimensions. Returns a tuple with 3 entries: 1. A list of lists, each entry containing 1. the start and end dimension of the special dimensions; 2. the associated start and end index of the model parameters; 3. and a boolean indicating whether the dimension is pooled (``True``) or heterogeneous (``False``). 2. The number of pooled dimensions. 3. The number of heterogeneous dimensions. """ special_dims, n_pooled_dims, n_hetero_dims = \ self._population_model.get_special_dims() if self._fixed_params_mask is None: return special_dims, n_pooled_dims, n_hetero_dims # If parameters are fixed, we need to reindex top level parameters s_dims = [] for s in special_dims: start = s[2] end = s[3] # Shift by number of leading fixed parameters start -= int(np.sum(self._fixed_params_mask[:start])) end -= int(np.sum(self._fixed_params_mask[:end])) s_dims += [[ s[0], s[1], start, end, s[4] ]] return s_dims, n_pooled_dims, n_hetero_dims
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the name of the the population model parameters. If name were not set, defaults are returned. """ names = self._population_model.get_parameter_names(exclude_dim_names) # Remove fixed model parameters if self._fixed_params_mask is not None: names = np.array(names) names = names[~self._fixed_params_mask] names = list(names) return copy.copy(names)
[docs] def get_population_model(self): """ Returns the original population model. """ return self._population_model
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. Parameters ---------- n_ids Number of individuals. """ # Get individual parameters n_indiv, n_pop = self._population_model.n_hierarchical_parameters( n_ids) # If parameters have been fixed, updated number of population # parameters if self._fixed_params_mask is not None: n_fixed = int(np.sum(self._fixed_params_mask)) n_pop = self._n_parameters - n_fixed return (n_indiv, n_pop)
[docs] def n_fixed_parameters(self): """ Returns the number of fixed model parameters. """ if self._fixed_params_mask is None: return 0 n_fixed = int(np.sum(self._fixed_params_mask)) return n_fixed
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ # Get number of fixed parameters n_fixed = 0 if self._fixed_params_mask is not None: n_fixed = int(np.sum(self._fixed_params_mask)) # Subtract fixed parameters from total number n_parameters = self._n_parameters - n_fixed return n_parameters
[docs] def sample(self, parameters, n_samples=None, seed=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ # Get fixed parameter values if self._fixed_params_mask is not None: self._fixed_params_values[~self._fixed_params_mask] = parameters parameters = self._fixed_params_values # Sample from population model sample = self._population_model.sample( parameters=parameters, n_samples=n_samples, seed=seed, *args, **kwargs) return sample
[docs] def set_covariate_names(self, names=None): """ Sets the names of the covariates. If the model has no covariates, input is ignored. :param names: A list of parameter names. If ``None``, covariate names are reset to defaults. :type names: List """ self._population_model.set_covariate_names(names)
[docs] def set_dim_names(self, names=None): """ Sets the names of the population model dimensions. :param names: A list of dimension names. If ``None``, dimension names are reset to defaults. :type names: List[str], optional """ self._population_model.set_dim_names(names)
[docs] def set_n_ids(self, n_ids): """ Sets the number of modelled individuals. The behaviour of most population models is the same for any number of individuals, in which case ``n_ids`` is ignored. However, for some models, e.g. :class:`HeterogeneousModel` the behaviour changes with ``n_ids``. :param n_ids: Number of individuals. :type n_ids: int """ self._population_model.set_n_ids(n_ids)
[docs] def set_parameter_names(self, names=None): """ Sets the names of the population model parameters. Parameters ---------- names A dictionary that maps the current parameter names to new names. If ``None``, parameter names are reset to defaults. """ if names is None: # Reset names to defaults self._population_model.set_parameter_names() return None # Check input if len(names) != self.n_parameters(): raise ValueError( 'Length of names does not match n_parameters.') # Limit the length of parameter names for name in names: if len(name) > 50: raise ValueError( 'Parameter names cannot exceed 50 characters.') parameter_names = [str(label) for label in names] # Reconstruct full list of error model parameters if self._fixed_params_mask is not None: names = np.array( self._population_model.get_parameter_names(), dtype='U50') names[~self._fixed_params_mask] = parameter_names parameter_names = names # Set parameter names self._population_model.set_parameter_names(parameter_names)
[docs] class TruncatedGaussianModel(PopulationModel): r""" A population model which models model parameters across individuals as Gaussian random variables which are truncated at zero. A truncated Gaussian population model assumes that a model parameter :math:`\psi` varies across individuals such that :math:`\psi` is Gaussian distributed in the population for :math:`\psi` greater 0 .. math:: p(\psi |\mu, \sigma) = \frac{1}{1 - \Phi (-\mu / \sigma )} \frac{1}{\sqrt{2\pi} \sigma} \exp\left(-\frac{(\psi - \mu )^2} {2 \sigma ^2}\right)\quad \text{for} \quad \psi > 0 and :math:`p(\psi |\mu, \sigma) = 0` for :math:`\psi \leq 0`. :math:`\Phi (\psi )` denotes the cumulative distribution function of the Gaussian distribution. Here, :math:`\mu` and :math:`\sigma ^2` are the mean and variance of the untruncated Gaussian distribution. Any observed individual with parameter :math:`\psi _i` is assumed to be a realisation of the random variable :math:`\psi`. Extends :class:`PopulationModel`. :param n_dim: The dimensionality of the population model. :type n_dim: int, optional :param dim_names: Optional names of the population dimensions. :type dim_names: List[str], optional """ def __init__(self, n_dim=1, dim_names=None): super(TruncatedGaussianModel, self).__init__(n_dim, dim_names) # Set number of parameters self._n_parameters = 2 * self._n_dim # Set default parameter names self._parameter_names = ['Mu'] * self._n_dim + ['Sigma'] * self._n_dim @staticmethod def _compute_log_likelihood(mus, sigmas, observations): # pragma: no cover r""" Calculates the log-likelihood using numba speed up. We are using the relationship between the Gaussian CDF and the error function ..math:: Phi(x) = (1 + erf(x/sqrt(2))) / 2 mus shape: (n_ids, n_dim) sigmas shape: (n_ids, n_dim) observations: (n_ids, n_dim) """ # Return infinity if any psis are negative if np.any(observations < 0): return -np.inf # Compute log-likelihood score with np.errstate(divide='ignore'): log_likelihood = - np.sum( np.log(2 * np.pi * sigmas**2) / 2 + (observations - mus) ** 2 / (2 * sigmas**2) + np.log(1 - _norm_cdf(-mus/sigmas))) # If score evaluates to NaN, return -infinity if np.isnan(log_likelihood): return -np.inf return log_likelihood @staticmethod def _compute_pointwise_ll(mean, std, observations): # pragma: no cover """ Calculates the pointwise log-likelihoods using numba speed up. """ # Compute log-likelihood score log_likelihood = \ - np.log(2 * np.pi * std**2) / 2 \ - (observations - mean) ** 2 / (2 * std**2) \ - np.log(1 - math.erf(-mean/std/math.sqrt(2))) + np.log(2) # If score evaluates to NaN, return -infinity mask = np.isnan(log_likelihood) if np.any(mask): log_likelihood[mask] = -np.inf return log_likelihood return log_likelihood def _compute_sensitivities(self, mus, sigmas, psi): # pragma: no cover """ Calculates the log-likelihood and its sensitivities. Expects: mus shape: (n_ids, n_dim) sigmas shape: (n_ids, n_dim) psi: (n_ids, n_dim) Returns: log_likelihood: float dpsi: (n_ids, n_dim) dtheta: (n_ids, n_parameters, n_dim) """ # Compute log-likelihood score log_likelihood = self._compute_log_likelihood(mus, sigmas, psi) n_ids = len(psi) dtheta = np.empty(shape=(n_ids, self._n_parameters, self._n_dim)) if np.isinf(log_likelihood): return (-np.inf, np.empty(shape=psi.shape), dtheta) # Compute sensitivities with np.errstate(divide='ignore'): dpsi = (mus - psi) / sigmas**2 dtheta[:, 0] = ( (psi - mus) / sigmas - _norm_pdf(mus/sigmas) / (1 - _norm_cdf(-mus/sigmas)) ) / sigmas dtheta[:, 1] = ( -1 + (psi - mus)**2 / sigmas**2 + _norm_pdf(mus/sigmas) * mus / sigmas / (1 - _norm_cdf(-mus/sigmas)) ) / sigmas return log_likelihood, dpsi, dtheta
[docs] def compute_log_likelihood( self, parameters, observations, *args, **kwargs): """ Returns the log-likelihood of the population model parameters. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :rtype: float """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: parameters = parameters[np.newaxis, ...] # Parse parameters mus = parameters[:, 0] sigmas = parameters[:, 1] if np.any(sigmas <= 0): # Gaussians are only defined for positive sigmas. return -np.inf return self._compute_log_likelihood(mus, sigmas, observations)
# def compute_pointwise_ll(self, parameters, observations): # r""" # Returns the pointwise log-likelihood of the model parameters for # each observation. # The pointwise log-likelihood of a truncated Gaussian distribution is # the log-pdf evaluated at the observations # .. math:: # L(\mu , \sigma | \psi _i) = # \log p(\psi _i | # \mu , \sigma ) , # where # :math:`\psi _i` are the "observed" parameters :math:`\psi` from # individual :math:`i`. # Parameters # ---------- # parameters # An array-like object with the model parameter values, i.e. # [:math:`\mu`, :math:`\sigma`]. # observations # An array like object with the parameter values for the # individuals, # i.e. [:math:`\psi _1, \ldots , \psi _N`]. # """ # # TODO: Needs proper research to establish which pointwise # # log-likelihood makes sense for hierarchical models. # # Also needs to be adapted to match multi-dimensional API. # raise NotImplementedError # # observations = np.asarray(observations) # # mean, std = parameters # # if (mean <= 0) or (std <= 0): # # # The mean and std. of the Gaussian distribution are # # # strictly positive if truncated at zero # # return np.full(shape=len(observations), fill_value=-np.inf) # # return self._compute_pointwise_ll(mean, std, observations)
[docs] def compute_sensitivities( self, parameters, observations, dlogp_dpsi=None, reduce=False, flattened=True, *args, **kwargs): """ Returns the log-likelihood of the population model parameters and its sensitivity to the population parameters as well as the observations. The sensitivities of the bottom-level log-likelihoods with respect to the ``observations`` (bottom-level parameters) may be provided using ``dlogp_dpsi``, in order to compute the sensitivities of the full hierarchical log-likelihood. The log-likelihood and sensitivities are returned as a tuple ``(score, deta, dtheta)``. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(n_parameters,)``, ``(n_param_per_dim, n_dim)`` or ``(n_ids, n_param_per_dim, n_dim)`` :param observations: Individual model parameters. :type observations: np.ndarray of shape ``(n_ids, n_dim)`` :param dlogp_dpsi: The sensitivities of the log-likelihood of the individual parameters with respect to the individual parameters. :type dlogp_dpsi: np.ndarray of shape ``(n_ids, n_dim)``, optional :param reduce: A boolean flag indicating whether the sensitivites are returned as an array of shape ``(n_hierarchical_parameters,)``. ``reduce`` is prioritised over ``flattened``. :type reduce: bool, optional :param flattened: Boolean flag that indicates whether the sensitivities w.r.t. the population parameters are returned as 1-dim. array. If ``False`` sensitivities are returned in shape ``(n_ids, n_param_per_dim, n_dim)``. :type flattened: bool, optional :rtype: Tuple[float, np.ndarray of shape ``(n_ids, n_dim)``, np.ndarray of shape ``(n_parameters,)``] """ observations = np.asarray(observations) if observations.ndim == 1: observations = observations[:, np.newaxis] parameters = np.asarray(parameters) if parameters.ndim == 1: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(1, n_parameters, self._n_dim) elif parameters.ndim == 2: parameters = parameters[np.newaxis, ...] # Parse parameters mus = parameters[:, 0] sigmas = parameters[:, 1] if np.any(sigmas <= 0): # Gaussians are only defined for positive sigmas. n_ids, n_dim = observations.shape score = -np.inf dpsi = np.empty(observations.shape) dtheta = np.empty((n_ids, self._n_parameters, n_dim)) return self._shape(score, dpsi, dtheta, reduce, flattened) score, dpsi, dtheta = self._compute_sensitivities( mus, sigmas, observations) if dlogp_dpsi is not None: dpsi += dlogp_dpsi return self._shape(score, dpsi, dtheta, reduce, flattened)
[docs] def get_mean_and_std(self, parameters): r""" Returns the mean and the standard deviation of the population for given :math:`\mu` and :math:`\sigma`. The mean and variance of the parameter :math:`\psi` are given by .. math:: \mathbb{E}\left[ \psi \right] = \mu + \sigma F(\mu/\sigma) \quad \text{and} \quad \text{Var}\left[ \psi \right] = \sigma ^2 \left[ 1 - \frac{\mu}{\sigma}F(\mu/\sigma) - F(\mu/\sigma) ^2 \right], where :math:`F(\mu/\sigma) = \phi(\mu/\sigma )/(1-\Phi(-\mu/\sigma))` is a function given by the Gaussian probability density function :math:`\phi(\psi)` and the Gaussian cumulative distribution function :math:`\Phi(\psi)`. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :rtype: np.ndarray of shape ``(p_per_dim, n_dim)`` """ # Check input parameters = np.asarray(parameters) if parameters.ndim != 2: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) mus = parameters[0] sigmas = parameters[1] if np.any(sigmas < 0): raise ValueError('The standard deviation cannot be negative.') # Compute mean and standard deviation output = np.empty((self._n_parameters, self._n_dim)) output[0] = \ mus + sigmas * norm.pdf(mus/sigmas) / (1 - norm.cdf(-mus/sigmas)) output[1] = np.sqrt( sigmas**2 * ( 1 - mus / sigmas * norm.pdf(mus/sigmas) / (1 - norm.cdf(-mus/sigmas)) - (norm.pdf(mus/sigmas) / (1 - norm.cdf(-mus/sigmas)))**2) ) return output
[docs] def get_parameter_names(self, exclude_dim_names=False): """ Returns the name of the the population model parameters. If name were not set, defaults are returned. :param exclude_dim_names: A boolean flag that indicates whether the dimension name is appended to the parameter name. :type exclude_dim_names: bool, optional """ if exclude_dim_names: return copy.copy(self._parameter_names) # Append dimension names names = [] for name_id, name in enumerate(self._parameter_names): current_dim = name_id % self._n_dim names += [name + ' ' + self._dim_names[current_dim]] return names
[docs] def n_hierarchical_parameters(self, n_ids): """ Returns a tuple of the number of individual parameters and the number of population parameters that this model expects in context of a :class:`HierarchicalLogLikelihood`, when ``n_ids`` individuals are modelled. :param n_ids: Number of individuals. :type n_ids: int """ n_ids = int(n_ids) return (n_ids * self._n_dim, self._n_parameters)
[docs] def n_parameters(self): """ Returns the number of parameters of the population model. """ return self._n_parameters
[docs] def sample(self, parameters, n_samples=None, seed=None, *args, **kwargs): """ Returns random samples from the population distribution. The returned value is a NumPy array with shape ``(n_samples, n_dim)``. If ``centered = False`` random samples from the standard normal distribution are returned. :param parameters: Parameters of the population model. :type parameters: np.ndarray of shape ``(p,)`` or ``(p_per_dim, n_dim)`` :param n_samples: Number of samples. If ``None``, one sample is returned. :type n_samples: int, optional :param seed: A seed for the pseudo-random number generator. :type seed: int, optional """ parameters = np.asarray(parameters) if len(parameters.flatten()) != self._n_parameters: raise ValueError( 'The number of provided parameters does not match the expected' ' number of top-level parameters.') if parameters.ndim != 2: n_parameters = len(parameters) // self._n_dim parameters = parameters.reshape(n_parameters, self._n_dim) # Define shape of samples if n_samples is None: n_samples = 1 sample_shape = (int(n_samples), self._n_dim) # Get parameters mus = parameters[0] sigmas = parameters[1] if np.any(sigmas < 0): # The std. of the Gaussian distribution are # strictly positive raise ValueError( 'A truncated Gaussian distribution only accepts strictly ' 'positive standard deviations.') # Convert seed to int if seed is a rng # (Unfortunately truncated normal is not yet available with numpys # random number generator API) if isinstance(seed, np.random.Generator): # Draw new seed such that rng is propagated, but truncated normal # samples can also be seeded. seed = seed.integers(low=0, high=1E6) np.random.seed(seed) # Sample from population distribution samples = truncnorm.rvs( a=0, b=np.inf, loc=mus, scale=sigmas, size=sample_shape) return samples
[docs] def set_parameter_names(self, names=None): """ Sets the names of the population model parameters. :param names: A list of parameter names. If ``None``, parameter names are reset to defaults. :type names: List[str] """ if names is None: # Reset names to defaults self._parameter_names = [ 'Mu'] * self._n_dim + ['Sigma'] * self._n_dim return None if len(names) != self._n_parameters: raise ValueError( 'Length of names does not match the number of parameters.') self._parameter_names = [str(label) for label in names]
def _norm_cdf(x): # pragma: no cover """ Returns the cumulative distribution function value of a standard normal Gaussian distribtion. """ return 0.5 * (1 + erf(x/np.sqrt(2))) def _norm_pdf(x): # pragma: no cover """ Returns the probability density function value of a standard normal Gaussian distribtion. """ return np.exp(-x**2/2) / np.sqrt(2 * np.pi)