#
# This file is part of the chi repository
# (https://github.com/DavAug/chi/) which is released under the
# BSD 3-clause license. See accompanying LICENSE.md for copyright notice and
# full license details.
#
import copy
import numpy as np
import pandas as pd
import pints
import xarray as xr
import chi
[docs]
class AveragedPredictiveModel(object):
"""
A base class for predictive models whose parameters are drawn from
a distribution.
"""
def __init__(self, predictive_model):
super(AveragedPredictiveModel, self).__init__()
# Check inputs
if not isinstance(predictive_model, chi.PredictiveModel):
raise ValueError(
'The provided predictive model has to be an instance of a '
'chi.PredictiveModel.')
self._predictive_model = predictive_model
[docs]
def get_dosing_regimen(self, final_time=None):
"""
Returns the dosing regimen of the compound in form of a
:class:`pandas.DataFrame`.
The dataframe has a time, a duration, and a dose column, which indicate
the time point and duration of the dose administration in the time
units of the mechanistic model, :meth:`MechanisticModel.time_unit`. The
dose column specifies the amount of the compound that is being
administered in units of the drug amount variable of the mechanistic
model.
If an indefinitely administered dosing regimen is set, i.e. a
finite duration and undefined number of doses, see
:meth:`set_dosing_regimen`, only the first administration of the
dose will appear in the dataframe. Alternatively, a final dose time
``final_time`` can be provided, up to which the dose events are
registered.
If no dosing regimen has been set, ``None`` is returned.
Parameters
----------
final_time
Time up to which dose events are registered in the dataframe. If
``None``, all dose events are registered, except for indefinite
dosing regimens. Here, only the first dose event is registered.
"""
return self._predictive_model.get_dosing_regimen(final_time)
[docs]
def get_n_outputs(self):
"""
Returns the number of outputs.
"""
return self._predictive_model.get_n_outputs()
[docs]
def get_output_names(self):
"""
Returns the output names.
"""
return self._predictive_model.get_output_names()
[docs]
def get_predictive_model(self):
"""
Returns the predictive model.
"""
return self._predictive_model
[docs]
def sample(
self, times, n_samples=None, seed=None, include_regimen=False):
"""
Samples virtual measurements from the model of the data-generating
process and returns them in form of a :class:`pandas.DataFrame`.
"""
raise NotImplementedError
[docs]
def set_dosing_regimen(
self, dose, start=0, duration=0.01, period=None, num=None):
"""
Sets the dosing regimen of the administered compound.
By default the dose is administered as a bolus injection (duration on
a time scale that is 100 fold smaller than the time unit). To
model an infusion of the dose over a longer time period, the
``duration`` can be adjusted to the appropriate time scale.
By default the dose is administered once. To apply multiple doses
provide a dose administration period.
.. note::
This method requires a :class:`MechanisticModel` that supports
dose administration.
Parameters
----------
dose
The amount of the compound that is injected at each administration.
start
Start time of the treatment.
duration
Duration of dose administration. For a bolus injection, a dose
duration of 1% of the time unit should suffice. By default the
duration is set to 0.01 (bolus).
period
Periodicity at which doses are administered. If ``None`` the dose
is administered only once.
num
Number of administered doses. If ``None`` and the periodicity of
the administration is not ``None``, doses are administered
indefinitely.
"""
self._predictive_model.set_dosing_regimen(
dose, start, duration, period, num)
[docs]
class PosteriorPredictiveModel(AveragedPredictiveModel):
r"""
Implements the posterior predictive model of the modelled
data-generating process and the associated parameter posterior
distribution.
A :class:`PosteriorPredictiveModel` is instantiated with a
:class:`PredictiveModel` and samples from the associated posterior
distribution in form of a :class:`xarray.Dataset`. The samples approximate
the posterior distribution of the model parameters.
Formally, the posterior predictive model is defined as
.. math::
p(y | \mathcal{D}, t) = \int \text{d}\psi \,
p(y | \psi , t)\, p(\psi | \mathcal{D}),
where :math:`p(y | \psi, t)` is the model of the data-generating process
and :math:`p(\psi | \mathcal{D})` is the posterior distribution of
the model parmeters.
:param predictive_model: A predictive model which defines the distribution
of observable biomarkers over time conditioned on parameter values.
:type predictive_model: PredictiveModel
:param posterior_samples: Samples from the posterior distribution of the
model parameters.
:type posterior_samples: xarray.Dataset
:param param_map: A dictionary which can be used to map predictive model
parameter names to the parameter names in the :class:`xarray.Dataset`.
If ``None``, it is assumed that the names are identical.
:type param_map: dict, optional
"""
def __init__(
self, predictive_model, posterior_samples, param_map=None):
super(PosteriorPredictiveModel, self).__init__(predictive_model)
# Check input
if not isinstance(posterior_samples, xr.Dataset):
raise TypeError(
'The posterior samples have to be a xarray.Dataset.')
dims = sorted(list(posterior_samples.dims))
expected_dims = ['chain', 'draw', 'individual']
if (len(dims) == 2):
expected_dims = ['chain', 'draw']
for dim in expected_dims:
if dim not in dims:
raise ValueError(
'The posterior samples must have the dimensions '
'(chain, draw, individual). The current dimensions are <'
+ str(dims) + '>.')
# Set default parameter map (no mapping)
if param_map is None:
param_map = {}
try:
param_map = dict(param_map)
except (TypeError, ValueError):
raise ValueError(
'The parameter map has to be convertable to a python '
'dictionary.')
# Check that the parameters of the posterior can be identified in the
# dataset
self._parameter_names = self._check_parameters(
posterior_samples, param_map)
# Store posterior
self._posterior = posterior_samples
def _check_parameters(self, posterior_samples, param_map):
"""
Checks whether the parameters of the posterior exist in the dataset
and returns them.
"""
# Create map from posterior parameter names to model parameter names
model_names = self._predictive_model.get_parameter_names()
for param_id, name in enumerate(model_names):
try:
model_names[param_id] = param_map[name]
except KeyError:
# The name is not mapped
pass
# Check that model names are in the dataset
for parameter in model_names:
if parameter not in posterior_samples.data_vars:
raise ValueError(
'The parameter <' + str(parameter) + '> cannot be found '
'in the posterior.')
return model_names
[docs]
def sample(
self, times, n_samples=None, individual=None, seed=None,
include_regimen=False, covariates=None):
"""
Samples virtual measurements from the posterior predictive
model and returns them in form of a :class:`pandas.DataFrame`.
For each of the ``n_samples`` a parameter set is drawn from the
approximate posterior distribution. These paramaters are then used to
sample from the predictive model.
:param times: Times for the virtual "measurements".
:type times: list, numpy.ndarray of shape (n,)
:param n_samples: The number of virtual "measurements" that are
performed at each time point. If ``None`` the biomarkers are
measured only once at each time point.
:type n_samples: int, optional
:param individual: The ID of the modelled individual. If
``None``, either the first ID or the population is simulated.
:type individual: str, optional
:param seed: Seed for the pseudo-random number generator.
:type seed: int or :class:`numpy.random.Generator`, optional
:param include_regimen: A boolean flag which determines whether the
dosing regimen information is included in the output. Only possible
when ``return_df=True``.
:type include_regimen: bool, optional
:param covariates: Covariate values, specifying the sampled
subpopulation.
:type covariates: List, np.ndarray of shape ``(n_cov,)`` or
``(n_samples, n_cov)``, optional
:rtype: :class:`pandas.DataFrame`
"""
# Make sure n_samples is an integer
if n_samples is None:
n_samples = 1
n_samples = int(n_samples)
# Check individual for population model
if isinstance(self._predictive_model, chi.PopulationPredictiveModel):
individual = None
# Check individual for individual predictive model
else:
try:
ids = self._posterior.individual
except AttributeError: # pragma: no cover
# Posterior only contains information about one individual.
# Hack to make this work
individual = 'some value'
ids = ['some value']
# Get default individual
if individual is None:
individual = str(ids.data[0])
# Make sure individual exists
if individual not in ids:
raise ValueError(
'The individual <' + str(individual) + '> could not be '
'found in the ID column.')
# Sort times
times = np.sort(times)
# Instantiate random number generator for sampling from the posterior
rng = np.random.default_rng(seed=seed)
# Sort parameters into numpy array for simplified sampling
n_chains = len(self._posterior.chain)
n_parameters = self._predictive_model.n_parameters()
try:
n_draws = len(self._posterior.sel(
individual=individual).dropna(dim='draw').draw)
# Note: ValueError -> KeyError for xarray>=0.19.0
except (ValueError, KeyError):
n_draws = len(self._posterior.dropna(dim='draw').draw)
posterior = np.empty(shape=(n_chains * n_draws, n_parameters))
for param_id, parameter in enumerate(self._parameter_names):
try:
posterior[:, param_id] = self._posterior[parameter].sel(
individual=individual).dropna(dim='draw').values.flatten()
# Note: ValueError -> KeyError for xarray>=0.19.0
except (ValueError, KeyError):
# If individual dimension does not exist, the parameter must
# be a population parameter.
posterior[:, param_id] = self._posterior[
parameter].dropna(dim='draw').values.flatten()
# Create container for samples
container = pd.DataFrame(
columns=['ID', 'Time', 'Observable', 'Value'])
# Get model outputs
outputs = self._predictive_model.get_output_names()
# Draw samples
sample_ids = np.arange(start=1, stop=n_samples+1)
for sample_id in sample_ids:
# Sample parameter from posterior
parameters = rng.choice(posterior)
# Sample from predictive model
sample = self._predictive_model.sample(
parameters, times, n_samples, rng, return_df=False,
covariates=covariates)
# Append samples to dataframe
for output_id, name in enumerate(outputs):
container = pd.concat([container, pd.DataFrame({
'ID': sample_id,
'Time': times,
'Observable': name,
'Value': sample[output_id, :, 0]})])
# Add dosing regimen, if set
final_time = np.max(times)
regimen = self.get_dosing_regimen(final_time)
if (regimen is not None) and (include_regimen is True):
# Append dosing regimen only once for all samples
container = pd.concat([container, regimen])
return container
[docs]
class PredictiveModel(object):
r"""
Implements a model of a data-generating process.
The model is defined by an instance of a :class:`MechanisticModel` and an
instance of an :class:`ErrorModel` for each mechanistic model output
.. math::
p(y | \psi , t),
where :math:`y` are measurements of quantities of interest, :math:`\psi`
are the model parameters and :math:`t` is the time.
Parameters
----------
mechanistic_model
An instance of a :class:`MechanisticModel`.
error_models
A list of :class:`ErrorModel` instances, one for each model output of
the mechanistic model.
outputs
A list of the model outputs, which maps the error models to the model
outputs. If ``None`` the error models are assumed to be listed in the
same order as the model outputs.
"""
def __init__(self, mechanistic_model, error_models, outputs=None):
super(PredictiveModel, self).__init__()
# Check inputs
if not isinstance(
mechanistic_model, chi.MechanisticModel):
raise TypeError(
'The mechanistic model has to be an instance of a '
'chi.MechanisticModel.')
error_models = \
error_models if isinstance(error_models, list) else [error_models]
for error_model in error_models:
if not isinstance(
error_model, (chi.ErrorModel, chi.ReducedErrorModel)):
raise TypeError(
'All error models have to be instances of a '
'chi.ErrorModel.')
# Copy mechanistic model
mechanistic_model = mechanistic_model.copy()
# Set outputs
if outputs is not None:
mechanistic_model.set_outputs(outputs)
# Get number of outputs
n_outputs = mechanistic_model.n_outputs()
if len(error_models) != n_outputs:
raise ValueError(
'Wrong number of error models. One error model has to be '
'provided for each mechanistic error model.')
# Copy error models
error_models = [
copy.deepcopy(error_model) for error_model in error_models]
# Remember models
self._mechanistic_model = mechanistic_model
self._error_models = error_models
# Set parameter names and number of parameters
self._set_error_model_parameter_names()
self._set_number_and_parameter_names()
def _set_error_model_parameter_names(self):
"""
Resets the error model parameter names and prepends the output name
if more than one output exists.
"""
# Reset error model parameter names to defaults
for error_model in self._error_models:
error_model.set_parameter_names(None)
# Rename error model parameters, if more than one output
n_outputs = self._mechanistic_model.n_outputs()
if n_outputs > 1:
# Get output names
outputs = self._mechanistic_model.outputs()
for output_id, error_model in enumerate(self._error_models):
# Get original parameter names
names = error_model.get_parameter_names()
# Prepend output name
output = outputs[output_id]
names = [output + ' ' + name for name in names]
# Set new parameter names
error_model.set_parameter_names(names)
def _set_number_and_parameter_names(self):
"""
Sets the number and names of the free model parameters.
"""
# Get mechanistic model parameters
parameter_names = self._mechanistic_model.parameters()
# Get error model parameters
for error_model in self._error_models:
parameter_names += error_model.get_parameter_names()
# Update number and names
self._parameter_names = parameter_names
self._n_parameters = len(self._parameter_names)
[docs]
def fix_parameters(self, name_value_dict):
"""
Fixes the value of model parameters, and effectively removes them as a
parameter from the model. Fixing the value of a parameter at ``None``,
sets the parameter free again.
Parameters
----------
name_value_dict
A dictionary with model parameter names as keys, and parameter
value as values.
"""
# Check type of dictionanry
try:
name_value_dict = dict(name_value_dict)
except (TypeError, ValueError):
raise ValueError(
'The name-value dictionary has to be convertable to a python '
'dictionary.')
# Get submodels
mechanistic_model = self._mechanistic_model
error_models = self._error_models
# Convert models to reduced models
if not isinstance(mechanistic_model, chi.ReducedMechanisticModel):
mechanistic_model = chi.ReducedMechanisticModel(mechanistic_model)
for model_id, error_model in enumerate(error_models):
if not isinstance(error_model, chi.ReducedErrorModel):
error_models[model_id] = chi.ReducedErrorModel(error_model)
# Fix model parameters
mechanistic_model.fix_parameters(name_value_dict)
for error_model in error_models:
error_model.fix_parameters(name_value_dict)
# If no parameters are fixed, get original model back
if mechanistic_model.n_fixed_parameters() == 0:
mechanistic_model = mechanistic_model.mechanistic_model()
for model_id, error_model in enumerate(error_models):
if error_model.n_fixed_parameters() == 0:
error_model = error_model.get_error_model()
error_models[model_id] = error_model
# Safe reduced models
self._mechanistic_model = mechanistic_model
self._error_models = error_models
# Update names and number of parameters
self._set_number_and_parameter_names()
[docs]
def get_dosing_regimen(self, final_time=None):
"""
Returns the dosing regimen of the compound in form of a
:class:`pandas.DataFrame`.
The dataframe has a time, a duration, and a dose column, which indicate
the time point and duration of the dose administration in the time
units of the mechanistic model, :meth:`MechanisticModel.time_unit`. The
dose column specifies the amount of the compound that is being
administered in units of the drug amount variable of the mechanistic
model.
If an indefinitely administered dosing regimen is set, i.e. a
finite duration and undefined number of doses, see
:meth:`set_dosing_regimen`, only the first administration of the
dose will appear in the dataframe. Alternatively, a final dose time
``final_time`` can be provided, up to which the dose events are
registered.
If no dosing regimen has been set, ``None`` is returned.
Parameters
----------
final_time
Time up to which dose events are registered in the dataframe. If
``None``, all dose events are registered, except for indefinite
dosing regimens. Here, only the first dose event is registered.
"""
# Get regimen
try:
regimen = self._mechanistic_model.dosing_regimen()
except AttributeError:
# The model does not support dosing regimens
return None
# Return None if regimen is not set
if regimen is None:
return regimen
# Make sure that final_time is positive
if final_time is None:
final_time = np.inf
# Sort regimen into dataframe
regimen_df = pd.DataFrame(columns=['Time', 'Duration', 'Dose'])
for dose_event in regimen.events():
# Get dose amount
dose_rate = dose_event.level()
dose_duration = dose_event.duration()
dose_amount = dose_rate * dose_duration
# Get dosing time points
start_time = dose_event.start()
period = dose_event.period()
n_doses = dose_event.multiplier()
if start_time > final_time:
# Dose event exceeds final dose time and is therefore
# not registered
continue
if period == 0:
# Dose is administered only once
regimen_df = pd.concat([regimen_df, pd.DataFrame({
'Time': [start_time],
'Duration': [dose_duration],
'Dose': [dose_amount]})])
# Continue to next dose event
continue
if n_doses == 0:
# The dose event would be administered indefinitely, so we
# stop with final_time or 1.
n_doses = 1
if np.isfinite(final_time):
n_doses = int(abs(final_time) // period)
# Construct dose times
dose_times = [start_time + n * period for n in range(n_doses)]
# Make sure that even for finite periodic dose events the final
# time is not exceeded
dose_times = np.array(dose_times)
mask = dose_times <= final_time
dose_times = dose_times[mask]
# Add dose administrations to dataframe
regimen_df = pd.concat([regimen_df, pd.DataFrame({
'Time': dose_times,
'Duration': dose_duration,
'Dose': dose_amount})])
# If no dose event before final_time exist, return None
if regimen_df.empty:
return None
return regimen_df
[docs]
def get_n_outputs(self):
"""
Returns the number of outputs.
"""
return self._mechanistic_model.n_outputs()
[docs]
def get_output_names(self):
"""
Returns the output names.
"""
return self._mechanistic_model.outputs()
[docs]
def get_parameter_names(self):
"""
Returns the parameter names of the predictive model.
"""
return copy.copy(self._parameter_names)
[docs]
def get_submodels(self):
"""
Returns the submodels of the predictive model in form of a dictionary.
"""
# Get original submodels
mechanistic_model = self._mechanistic_model
if isinstance(mechanistic_model, chi.ReducedMechanisticModel):
mechanistic_model = mechanistic_model.mechanistic_model()
error_models = []
for error_model in self._error_models:
# Get original error model
if isinstance(error_model, chi.ReducedErrorModel):
error_model = error_model.get_error_model()
error_models.append(error_model)
submodels = dict({
'Mechanistic model': mechanistic_model,
'Error models': error_models})
return submodels
[docs]
def n_parameters(self):
"""
Returns the number of parameters of the predictive model.
"""
return self._n_parameters
[docs]
def sample(
self, parameters, times, n_samples=None, seed=None,
return_df=True, include_regimen=False, *args, **kwargs):
"""
Samples "measurements" of the biomarkers from the predictive model and
returns them in form of a :class:`pandas.DataFrame` or a
:class:`numpy.ndarray`.
The mechanistic model is solved for the provided parameters and times,
and samples around this solution are drawn from the error models for
each time point.
The number of samples for each time point can be specified with
``n_samples``.
Parameters
----------
parameters
An array-like object with the parameter values of the predictive
model.
times
An array-like object with times at which the virtual "measurements"
are performed.
n_samples
The number of virtual "measurements" that are performed at each
time point. If ``None`` the biomarkers are measured only once
at each time point.
seed
A seed for the pseudo-random number generator or a
:class:`numpy.random.Generator`.
return_df
A boolean flag which determines whether the output is returned as a
:class:`pandas.DataFrame` or a :class:`numpy.ndarray`. If ``False``
the samples are returned as a numpy array of shape
``(n_outputs, n_times, n_samples)``.
include_regimen
A boolean flag which determines whether the dosing regimen
information is included in the output. If the samples are returned
as a :class:`numpy.ndarray`, the dosing information is not
included.
"""
parameters = np.asarray(parameters)
if len(parameters) != self._n_parameters:
raise ValueError(
'The length of parameters does not match n_parameters.')
# Sort parameters into mechanistic model params and error params
n_parameters = self._mechanistic_model.n_parameters()
mechanistic_params = parameters[:n_parameters]
error_params = parameters[n_parameters:]
# Solve mechanistic model
times = np.sort(times)
outputs = self._mechanistic_model.simulate(mechanistic_params, times)
# Create numpy container for samples
n_outputs = len(outputs)
n_times = len(times)
n_samples = n_samples if n_samples is not None else 1
container = np.empty(shape=(n_outputs, n_times, n_samples))
# Sample error around mechanistic model outputs
start_index = 0
for output_id, error_model in enumerate(self._error_models):
end_index = start_index + error_model.n_parameters()
# Sample
container[output_id, ...] = error_model.sample(
parameters=error_params[start_index:end_index],
model_output=outputs[output_id],
n_samples=n_samples,
seed=seed)
# Update start index
start_index = end_index
if return_df is False:
# Return samples in numpy array format
return container
# Structure samples in a pandas.DataFrame
output_names = self._mechanistic_model.outputs()
sample_ids = np.arange(start=1, stop=n_samples+1)
samples = pd.DataFrame(
columns=['ID', 'Time', 'Observable', 'Value'])
# Fill in all samples at a specific time point at once
for output_id, name in enumerate(output_names):
for time_id, time in enumerate(times):
samples = pd.concat([samples, pd.DataFrame({
'ID': sample_ids,
'Time': time,
'Observable': name,
'Value': container[output_id, time_id, :]})])
# Add dosing regimen information, if set
final_time = np.max(times)
regimen = self.get_dosing_regimen(final_time)
if (regimen is not None) and (include_regimen is True):
# Add dosing regimen for each sample
for _id in sample_ids:
regimen['ID'] = _id
samples = pd.concat([samples, regimen])
return samples
[docs]
def set_dosing_regimen(
self, dose, start=0, duration=0.01, period=None, num=None):
"""
Sets the dosing regimen with which the compound is administered.
By default the dose is administered as a bolus injection (duration on
a time scale that is 100 fold smaller than the basic time unit). To
model an infusion of the dose over a longer time period, the
``duration`` can be adjusted to the appropriate time scale.
By default the dose is administered once. To apply multiple doses
provide a dose administration period.
.. note::
This method requires a :class:`MechanisticModel` that supports
compound administration.
Parameters
----------
dose
The amount of the compound that is injected at each administration.
start
Start time of the treatment.
duration
Duration of dose administration. For a bolus injection, a dose
duration of 1% of the time unit should suffice. By default the
duration is set to 0.01 (bolus).
period
Periodicity at which doses are administered. If ``None`` the dose
is administered only once.
num
Number of administered doses. If ``None`` and the periodicity of
the administration is not ``None``, doses are administered
indefinitely.
"""
try:
self._mechanistic_model.set_dosing_regimen(
dose, start, duration, period, num)
except AttributeError:
# This error means that the mechanistic model is a
# PharmacodynamicModel and therefore no dosing regimen can be set.
raise AttributeError(
'The mechanistic model does not support to set dosing '
'regimens. This may be because the underlying '
'chi.MechanisticModel is a '
'chi.PharmacodynamicModel.')
[docs]
class PopulationPredictiveModel(PredictiveModel):
r"""
Implements a model of a data-generating process.
The model is defined by an instance of a :class:`PredictiveModel` and an
instance of a :class:`PopulationModel`. The predictive model
:math:`p(y | \psi, t)` defines the data-generating process for an
individual in the population with parameters :math:`\psi`. The population
model :math:`p(\psi | \theta)` defines how the parameters vary across
individuals in the population.
As a result, the data-generating process is defined as
.. math::
p(y | \theta, t) =
\int \mathrm{d}\psi \, p(y | \psi, t)\, p(\psi |\theta).
Extends :class:`PredictiveModel`.
Parameters
----------
predictive_model
An instance of a :class:`PredictiveModel`.
population_model
An instance of a :class:`PopulationModel`.
"""
def __init__(self, predictive_model, population_model):
# Check inputs
if not isinstance(predictive_model, chi.PredictiveModel):
raise TypeError(
'The predictive model has to be an instance of '
'chi.PredictiveModel.')
if not isinstance(population_model, chi.PopulationModel):
raise TypeError(
'The population model has to be an instance of '
'chi.PopulationModel.')
# Get number and names of non-population predictive model
n_parameters = predictive_model.n_parameters()
# Check that there is one population model for each model parameter
if population_model.n_dim() != n_parameters:
raise ValueError(
'The dimension of the population model has to be the same as '
'the number of predictive model parameters.')
# Remember predictive model and population model
self._predictive_model = predictive_model
self._population_model = population_model
[docs]
def fix_parameters(self, name_value_dict):
"""
Fixes the value of model parameters, and effectively removes them as a
parameter from the model. Fixing the value of a parameter at ``None``,
sets the parameter free again.
Parameters
----------
name_value_dict
A dictionary with model parameter names as keys, and parameter
values as values.
"""
# Check type of dictionanry
try:
name_value_dict = dict(name_value_dict)
except (TypeError, ValueError):
raise ValueError(
'The name-value dictionary has to be convertable to a python '
'dictionary.')
# Convert models to reduced models
pop_model = self._population_model
if not isinstance(pop_model, chi.ReducedPopulationModel):
pop_model = chi.ReducedPopulationModel(pop_model)
# Fix model parameters
pop_model.fix_parameters(name_value_dict)
if pop_model.n_fixed_parameters() == 0:
pop_model = pop_model.get_population_model()
# Safe reduced models
self._population_model = pop_model
[docs]
def get_dosing_regimen(self, final_time=None):
"""
Returns the dosing regimen of the compound in form of a
:class:`pandas.DataFrame`.
The dataframe has a time, a duration, and a dose column, which indicate
the time point and duration of the dose administration in the time
units of the mechanistic model, :meth:`MechanisticModel.time_unit`. The
dose column specifies the amount of the compound that is being
administered in units of the drug amount variable of the mechanistic
model.
If an indefinitely administered dosing regimen is set, i.e. a
finite duration and undefined number of doses, see
:meth:`set_dosing_regimen`, only the first administration of the
dose will appear in the dataframe. Alternatively, a final dose time
``final_time`` can be provided, up to which the dose events are
registered.
If no dosing regimen has been set, ``None`` is returned.
Parameters
----------
final_time
Time up to which dose events are registered in the dataframe. If
``None``, all dose events are registered, except for indefinite
dosing regimens. Here, only the first dose event is registered.
"""
return self._predictive_model.get_dosing_regimen(final_time)
[docs]
def get_n_outputs(self):
"""
Returns the number of outputs.
"""
return self._predictive_model.get_n_outputs()
[docs]
def get_output_names(self):
"""
Returns the output names.
"""
return self._predictive_model.get_output_names()
[docs]
def get_parameter_names(self):
"""
Returns the parameter names of the predictive model.
"""
return self._population_model.get_parameter_names()
[docs]
def n_parameters(self):
"""
Returns the number of parameters of the predictive model.
"""
return self._population_model.n_parameters()
[docs]
def sample(
self, parameters, times, n_samples=None, seed=None, return_df=True,
include_regimen=False, covariates=None):
"""
Samples measurements of the observables from virtual patients.
Virtual patients are sampled from the population model and measured by
sampling from the individual-level predictive model. Each virtual
patient is measured at each of the provided time points.
The number of virtual patients that is being measured can be specified
with ``n_samples``.
If the data-generating process does not depend on covariates, the
``covariates`` input is ignored.
:param parameters: Population model parameters.
:type parameters: np.ndarry of shape ``(n_parameters,)``
:param times: Measurement time points.
:type times: np.ndarray of shape ``(n_times,)``
:param n_samples: Number of virtual patients.
:type n_samples: int, optional
:param seed: Seed for the pseudo-random number generator.
:type seed: int or :class:`numpy.random.Generator`, optional
:param return_df: A boolean flag which determines whether the output is
returned as a :class:`pandas.DataFrame` or a
:class:`numpy.ndarray`.
:type return_df: bool, optional
:param include_regimen: A boolean flag which determines whether the
dosing regimen information is included in the output. Only possible
when ``return_df=True``.
:type include_regimen: bool, optional
:param covariates: Covariate values, specifying the sampled
subpopulation.
:type covariates: List, np.ndarray of shape ``(n_cov,)`` or
``(n_samples, n_cov)``, optional
:rtype: :class:`pandas.DataFrame` or np.ndarray of shape
``(n_outputs, n_times, n_samples)``
"""
# Check inputs
if not n_samples:
n_samples = 1
n_samples = int(n_samples)
parameters = np.asarray(parameters)
if len(parameters) != self.n_parameters():
raise ValueError(
'The length of parameters does not match n_parameters.')
if (self._population_model.n_covariates() > 0):
covariates = np.asarray(covariates)
if covariates.ndim == 1:
covariates = covariates[np.newaxis, :]
n_s, n_c = covariates.shape
if n_c != self._population_model.n_covariates():
raise ValueError(
'Provided covariates do not match the number of '
'covariates.')
if (n_s > 1) and (n_s != n_samples):
raise ValueError(
'Provided covariates cannot be broadcasted to number of '
'samples.')
if seed is not None:
seed = np.random.default_rng(seed)
# Sample individuals from population model
patients = self._population_model.sample(
parameters=parameters, n_samples=n_samples, seed=seed,
covariates=covariates)
patients = self._population_model.compute_individual_parameters(
parameters=parameters, eta=patients, covariates=covariates)
# Create numpy container for samples (measurements of virtual patients)
n_outputs = self._predictive_model.get_n_outputs()
n_times = len(times)
measurements = np.empty(shape=(n_outputs, n_times, n_samples))
# Sample measurements for each patient
times = np.sort(times)
for patient_id, patient in enumerate(patients):
measurements[..., patient_id] = self._predictive_model.sample(
parameters=patient, times=times, seed=seed, return_df=False
)[..., 0]
if return_df is False:
# Return samples in numpy array format
return measurements
# Structure samples in a pandas.DataFrame
# (Exploit how .flatten() arranges measurements)
names = self._predictive_model.get_output_names()
output_names = []
for name in names:
output_names += [name] * (n_times * n_samples)
times = np.broadcast_to(
times[np.newaxis, :, np.newaxis],
shape=(n_outputs, n_times, n_samples)).flatten()
sample_ids = np.arange(start=1, stop=n_samples+1)
sample_ids = np.broadcast_to(
sample_ids[np.newaxis, np.newaxis, :],
shape=(n_outputs, n_times, n_samples)).flatten()
measurements = pd.DataFrame({
'ID': sample_ids,
'Time': times,
'Observable': output_names,
'Value': measurements.flatten()})
# Append covariates, if used
if covariates is not None:
names = self._population_model.get_covariate_names()
sample_ids = np.arange(start=1, stop=n_samples+1)
for idc, covariate in enumerate(names):
measurements = pd.concat([measurements, pd.DataFrame({
'ID': sample_ids,
'Time': np.nan,
'Observable': covariate,
'Value': covariates[..., idc]})])
# Add dosing regimen information, if set
if include_regimen:
final_time = np.max(times)
regimen = self.get_dosing_regimen(final_time)
if regimen is not None:
# Add dosing regimen for each sample
sample_ids = np.arange(start=1, stop=n_samples+1)
for _id in sample_ids:
regimen['ID'] = _id
measurements = pd.concat([measurements, regimen])
return measurements
[docs]
def set_dosing_regimen(
self, dose, start=0, duration=0.01, period=None, num=None):
"""
Sets the dosing regimen with which the compound is administered.
By default the dose is administered as a bolus injection (duration on
a time scale that is 100 fold smaller than the basic time unit). To
model an infusion of the dose over a longer time period, the
``duration`` can be adjusted to the appropriate time scale.
By default the dose is administered once. To apply multiple doses
provide a dose administration period.
.. note::
This method requires a :class:`MechanisticModel` that supports
compound administration.
Parameters
----------
dose
The amount of the compound that is injected at each administration.
start
Start time of the treatment.
duration
Duration of dose administration. For a bolus injection, a dose
duration of 1% of the time unit should suffice. By default the
duration is set to 0.01 (bolus).
period
Periodicity at which doses are administered. If ``None`` the dose
is administered only once.
num
Number of administered doses. If ``None`` and the periodicity of
the administration is not ``None``, doses are administered
indefinitely.
"""
self._predictive_model.set_dosing_regimen(
dose, start, duration, period, num)
[docs]
class PriorPredictiveModel(AveragedPredictiveModel):
"""
Implements a model that predicts the change of observable biomarkers over
time based on the provided distribution of model parameters prior to the
inference.
A prior predictive model may be used to check whether the assumptions about
the parameter distribution ``log_prior`` lead to a predictive distirbution
that encapsulates the expected measurement values of preclinical and
clinical biomarkers.
A PriorPredictiveModel is instantiated with an instance of a
:class:`PredictiveModel` and a :class:`pints.LogPrior` of the same
parametric dimension as the predictive model. Future biomarker
"measurements" can then be predicted by first sampling parameter values
from the log-prior distribution, and then generating "virtual" measurements
from the predictive model with those parameters.
Parameters
----------
predictive_model
An instance of a :class:`PredictiveModel`.
log_prior
An instance of a :class:`pints.LogPrior` of the same dimensionality as
the number of predictive model parameters.
"""
def __init__(self, predictive_model, log_prior):
super(PriorPredictiveModel, self).__init__(predictive_model)
if not isinstance(log_prior, pints.LogPrior):
raise ValueError(
'The provided log-prior has to be an instance of a '
'pints.LogPrior.')
if predictive_model.n_parameters() != log_prior.n_parameters():
raise ValueError(
'The dimension of the log-prior has to be the same as the '
'number of parameters of the predictive model.')
self._log_prior = log_prior
[docs]
def sample(
self, times, n_samples=None, seed=None, include_regimen=False,
covariates=None):
"""
Samples "measurements" of the biomarkers from the prior predictive
model and returns them in form of a :class:`pandas.DataFrame`.
For each of the ``n_samples`` a parameter set is drawn from the
log-prior. These paramaters are then used to sample from the predictive
model.
:param times: Times for the virtual "measurements".
:type times: list, numpy.ndarray of shape (n,)
:param n_samples: The number of virtual "measurements" that are
performed at each time point. If ``None`` the biomarkers are
measured only once at each time point.
:type n_samples: int, optional
:param seed: Seed for the pseudo-random number generator.
:type seed: int or :class:`numpy.random.Generator`, optional
:param include_regimen: A boolean flag which determines whether the
dosing regimen information is included in the output. Only possible
when ``return_df=True``.
:type include_regimen: bool, optional
:param covariates: Covariate values, specifying the sampled
subpopulation.
:type covariates: List, np.ndarray of shape ``(n_cov,)`` or
``(n_samples, n_cov)``, optional
:rtype: :class:`pandas.DataFrame`
"""
# Make sure n_samples is an integer
if n_samples is None:
n_samples = 1
n_samples = int(n_samples)
# Set seed for prior samples
# (does not affect predictive model)
if seed is not None:
# TODO: pints.Priors are not meant to be seeded, so fails when
# anything else but np.random is used.
np.random.seed(seed)
# Set predictive model base seed
base_seed = seed
# Sort times
times = np.sort(times)
# Create container for samples
container = pd.DataFrame(
columns=['ID', 'Time', 'Observable', 'Value'])
# Get model outputs
outputs = self._predictive_model.get_output_names()
# Draw samples
sample_ids = np.arange(start=1, stop=n_samples+1)
for sample_id in sample_ids:
# Sample parameter
parameters = self._log_prior.sample().flatten()
if seed is not None:
# Set seed for predictive model to base_seed + sample_id
# (Needs to change every iteration)
seed = base_seed + sample_id
# Sample from predictive model
sample = self._predictive_model.sample(
parameters, times, n_samples, seed, return_df=False,
covariates=covariates)
# Append samples to dataframe
for output_id, name in enumerate(outputs):
container = pd.concat([container, pd.DataFrame({
'ID': sample_id,
'Time': times,
'Observable': name,
'Value': sample[output_id, :, 0]})])
# Add dosing regimen, if set
final_time = np.max(times)
regimen = self.get_dosing_regimen(final_time)
if (regimen is not None) and (include_regimen is True):
# Append dosing regimen only once for all samples
container = pd.concat([container, regimen])
return container
[docs]
class PAMPredictiveModel(AveragedPredictiveModel):
r"""
A model that is defined by the probabilistic average of
posterior predictive models.
Probabilistic averging of models is the weighted average of the predictive
distributions of individual models
.. math::
p(x | x^{\mathrm{obs}}) = \sum _m w_m\, p_m(x | x^{\mathrm{obs}}),
where the sum runs over the individual models and :math:`w_m` is the weight
of model :math:`m`.
.. warning::
Does currently not support CovariatePopulationModels.
:param predictive_models: A list of predictive models.
:type predictive_models: List[PosteriorPredictiveModel] of length
`n_models`.
:param weights: The weights of candidate models. Weights are normalised
automatically.
:type weights: List np.ndarray of length `n_models`.
"""
def __init__(self, predictive_models, weights):
# Check inputs
for predictive_model in predictive_models:
if not isinstance(predictive_model, PosteriorPredictiveModel):
raise TypeError(
'The predictive models must be instances of '
'chi.PosteriorPredictiveModel.')
predictive_model = predictive_models[0].get_predictive_model()
super(PAMPredictiveModel, self).__init__(predictive_model)
n_outputs = self._predictive_model.get_n_outputs()
for predictive_model in predictive_models:
if n_outputs != predictive_model.get_n_outputs():
raise ValueError(
'All predictive models must have the same number of '
'outputs.')
output_names = self._predictive_model.get_output_names()
for predictive_model in predictive_models:
if output_names != predictive_model.get_output_names():
raise Warning(
'The predictive models appear to have different output '
'names. Stacking of the predictive distributions might '
'therefore not meaningful.')
if len(predictive_models) != len(weights):
raise ValueError(
'The model weights must be of the same length as the number '
'of predictive models.')
weights = np.array(weights, dtype=float)
# Remember models and normalised weights
self._predictive_models = predictive_models
self._weights = weights / np.sum(weights)
[docs]
def get_predictive_model(self):
"""
Returns a list of the
:class:`chi.PosteriorPredictiveModel` instances.
"""
return self._predictive_models
[docs]
def get_weights(self):
"""
Returns the weights of the individual predictive models.
"""
return copy.copy(self._weights)
[docs]
def sample(
self, times, n_samples=None, individual=None, seed=None,
include_regimen=False):
"""
Samples "measurements" of the biomarkers from the posterior predictive
model and returns them in form of a :class:`pandas.DataFrame`.
For each of the ``n_samples`` a parameter set is drawn from the
approximate posterior distribution. These paramaters are then used to
sample from the predictive model.
:param times: Times for the virtual "measurements".
:type times: list, numpy.ndarray of shape (n,)
:param n_samples: The number of virtual "measurements" that are
performed at each time point. If ``None`` the biomarkers are
measured only once at each time point.
:type n_samples: int, optional
:param individual: The ID of the modelled individual. If
``None``, either the first ID or the population is simulated.
:type individual: str, optional
:param seed: A seed for the pseudo-random number generator.
:type seed: int
:param include_regimen: A boolean flag which determines whether the
information about the dosing regimen is included.
:type include_regimen: bool, optional
"""
# Make sure n_samples is an integer
if n_samples is None:
n_samples = 1
n_samples = int(n_samples)
# Instantiate random number generator
seed = np.random.default_rng(seed=seed)
# Sample number of samples from each predictive model
n_models = len(self._predictive_models)
model_indices = np.arange(n_models)
model_draws = np.random.choice(
model_indices, p=self._weights, size=n_samples)
samples_per_model = np.zeros(n_models, dtype=int)
for model_id, model in enumerate(model_indices):
samples_per_model[model_id] = np.sum(
model_draws == model)
# Sample from predictive models
samples = []
for model_id, n_samples in enumerate(samples_per_model):
# Skip model if no samples are drawn from it
if n_samples == 0:
continue
# Sample
model = self._predictive_models[model_id]
s = model.sample(times, n_samples, individual, seed=seed)
# Shift IDs by number of previous draws
s['ID'] += int(np.sum(samples_per_model[:model_id]))
# Append samples to list
samples.append(s)
# Concatenate all samples to one dataframe
samples = pd.concat(samples)
# Add dosing regimen, if set
final_time = np.max(times)
regimen = self.get_dosing_regimen(final_time)
if (regimen is not None) and (include_regimen is True):
# Append dosing regimen only once for all samples
samples = pd.concat([samples, regimen])
return samples
[docs]
def set_dosing_regimen(
self, dose, start=0, duration=0.01, period=None, num=None):
"""
Sets the dosing regimen with which the compound is administered.
By default the dose is administered as a bolus injection (duration on
a time scale that is 100 fold smaller than the basic time unit). To
model an infusion of the dose over a longer time period, the
``duration`` can be adjusted to the appropriate time scale.
By default the dose is administered once. To apply multiple doses
provide a dose administration period.
.. note::
This method requires a :class:`MechanisticModel` that supports
dose administration.
Parameters
----------
dose
The amount of the compound that is injected at each administration.
start
Start time of the treatment.
duration
Duration of dose administration. For a bolus injection, a dose
duration of 1% of the time unit should suffice. By default the
duration is set to 0.01 (bolus).
period
Periodicity at which doses are administered. If ``None`` the dose
is administered only once.
num
Number of administered doses. If ``None`` and the periodicity of
the administration is not ``None``, doses are administered
indefinitely.
"""
for predictive_model in self._predictive_models:
predictive_model.set_dosing_regimen(
dose, start, duration, period, num)