Skip to content

Commit

Permalink
Merge pull request #1 from ben18785/add-simulation-functions
Browse files Browse the repository at this point in the history
Create simulation.R
  • Loading branch information
ben18785 authored Jul 29, 2024
2 parents ea78218 + 953afaa commit 332cd69
Show file tree
Hide file tree
Showing 4 changed files with 247 additions and 5 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: This package uses Stan to estimate epidemiological quantities of in
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
RoxygenNote: 7.3.2
Biarch: true
Depends:
R (>= 3.4.0)
Expand Down
14 changes: 11 additions & 3 deletions R/fit_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@
#' @param N number of data points
#' @param C case counts
#' @param w discretised serial interval
#' @param is_sampling Boolean indicating if sampling is to be used (if TRUE) or
#' optimisation (if FALSE). Defaults to TRUE
#' @param ...
#'
#' @return a stanfit object
#' @export
fit_epifilter <- function(N, C, w, ...) {
fit_epifilter <- function(N, C, w, is_sampling=TRUE, ...) {
wmax <- length(w)
standata <- list(N=N,
C=C,
wmax=wmax,
w=w)
out <- rstan::sampling(stanmodels$epifilter, data = standata, ...)
if(is_sampling)
out <- rstan::sampling(stanmodels$epifilter, data = standata, ...)
else
out <- rstan::optimizing(stanmodels$epifilter, data = standata, ...)
return(out)
}

Expand All @@ -39,6 +44,9 @@ fit_epifilter_covariates <- function(N, C, w, X, ...) {
wmax=wmax,
N_covariates=N_covariates,
X=X)
out <- rstan::sampling(stanmodels$epifilter, data = standata, ...)
if(is_sampling)
out <- rstan::sampling(stanmodels$epifilter_covariates, data = standata, ...)
else
out <- rstan::optimizing(stanmodels$epifilter_covariates, data = standata, ...)
return(out)
}
98 changes: 98 additions & 0 deletions R/simulation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@


#' Generate a Discretized Serial Interval Distribution
#'
#' This function generates a discretized serial interval distribution based on
#' the gamma distribution characterized by the specified mean and standard deviation.
#'
#' @param nt An integer specifying the number of time points (length of the distribution).
#' @param mean_si A numeric value representing the mean of the serial interval distribution.
#' @param sd_si A numeric value representing the standard deviation of the serial interval distribution.
#'
#' @return A numeric vector of length `nt` representing the discretized serial interval distribution.
#'
#' @details
#' The serial interval distribution is discretized using a gamma distribution
#' with shape and scale parameters derived from the specified mean and standard deviation.
#' The discretization is performed by computing the difference in the cumulative distribution
#' function (CDF) of the gamma distribution at successive integer time points.
#'
#' @examples
#' \dontrun{
#' generate_vector_serial(10, 5, 2)
#' }
generate_vector_serial <- function(nt, mean_si, sd_si) {

t = 1:nt

# Shape-scale parameters of gamma serial interval
pms = c(0,0); pms[1] = mean_si^2/sd_si^2; pms[2] = sd_si^2/mean_si

# Discretise serial interval distribution
tdist = c(0, t); w_dist = rep(0, nt)
for (i in 1:nt){
w_dist[i] = pgamma(tdist[i+1], shape = pms[1], scale = pms[2]) -
pgamma(tdist[i], shape = pms[1], scale = pms[2])
}

w_dist
}

#' Simulate a Renewal Epidemic Model
#'
#' This function simulates an epidemic using a renewal model based on the
#' specified time-varying reproduction number (Rt), the serial interval distribution
#' characterized by a gamma distribution, and an initial number of cases.
#'
#' @param rt_fun A function that takes a vector of time points and returns a vector
#' of reproduction numbers (Rt) corresponding to those time points.
#' @param nt An integer specifying the number of time points (duration of the epidemic simulation).
#' @param mean_si A numeric value representing the mean of the serial interval distribution.
#' @param sd_si A numeric value representing the standard deviation of the serial interval distribution.
#' @param i_0 An integer specifying the initial number of cases at time point 1.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{i_t}{A numeric vector of the incidence of new cases at each time point.}
#' \item{lambda_t}{A numeric vector of the total infectiousness at each time point.}
#' \item{w_dist}{A numeric vector of the discretized serial interval distribution.}
#' \item{r_t}{A numeric vector of the reproduction number (Rt) at each time point.}
#' \item{t}{A numeric vector of the time points from 1 to nt.}
#' }
#'
#' @details
#' The renewal model is a common approach for simulating the spread of infectious diseases.
#' The total infectiousness at each time point is computed as the convolution of past incidence
#' with the serial interval distribution. The incidence at each time point is then simulated
#' as a Poisson random variable with a mean equal to the product of the total infectiousness and
#' the time-varying reproduction number.
#'
#' @examples
#' \dontrun{
#' rt_fun <- function(t) { 1.5 * exp(-0.05 * t) }
#' simulate_renewal_epidemic(rt_fun, 100, 5, 2, 10)
#' }
simulate_renewal_epidemic <- function(Rt_fun, nt, mean_si, sd_si, i_0){

# Time series and Rt
t = 1:nt
Rt <- vector(length = nt)
for(i in seq_along(Rt)) {
Rt[i] = rt_fun(t[i])
}

# Total infectiousness and incidence with initial imports
Lt = rep(0, nt); It = Lt; It[1] = i_0; Lt[1] = It[1]

w_dist <- generate_vector_serial(nt, mean_si, sd_si)

# Simulate from standard renewal model
for(i in 2:nt){
# Total infectiousness is a convolution
Lt[i] = sum(It[seq(i-1, 1, -1)]*w_dist[1:(i-1)])
# Poisson renewal model
It[i] = rpois(1, Lt[i]*Rt[i])
}

data.frame(t=t, i_t=It, R_t=Rt, lambda_t=Lt, w_dist=w_dist)
}
138 changes: 137 additions & 1 deletion vignettes/fitting_synthetic_data_using_epidp.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ knitr::opts_chunk$set(

```{r setup}
library(epidp)
library(ggplot2)
library(dplyr)
library(magrittr)
library(purrr)
library(tidyr)
options(mc.cores=4)
```


Expand All @@ -28,5 +34,135 @@ I_t \sim \text{Poisson}(R_t \Lambda_t),

where $\Lambda:=\sum_{s=1}^t \omega_s I_{t-s}$.


## Step function in $R_t$
We first generate case data assuming a step function in $R_t$.
We first generate case data assuming a step function for $R_t$.
```{r}
rt_fun = function(t){
if(t <= 60)
R = 2
else if (t <= 90)
R = 0.5
else
R = 1
R
}
# simulation parameters
nt <- 200
mean_si <- 6.5
sd_si <- 4.03
i_0 <- 10
# data frame of outputs
epidemic_df <- simulate_renewal_epidemic(rt_fun, nt, mean_si, sd_si, i_0)
# plot
epidemic_df %>%
select(-c(w_dist, lambda_t)) %>%
pivot_longer(c(i_t, R_t)) %>%
ggplot(aes(x = t, y = value, colour = name)) +
geom_line() +
scale_y_continuous(
name = "Incidence (i_t)",
sec.axis = sec_axis(~ . / 1000, name = "Reproduction Number (R_t)")
) +
labs(x = "Time", colour = "Variable") +
theme_minimal()
```

We now use a Stan version of EpiFilter to estimate the maximum a posteriori estimates
of $R_t$ and overlay these on top of the actual values. Note, these estimates do
not have uncertainty associated with them but the benefit of this is that estimation
is instantaneous. The estimates are close to the actual $R_t$ values after an initial
period when case counts are low.
```{r}
# fit model
fit <- fit_epifilter(
N=length(epidemic_df$i_t),
C=epidemic_df$i_t,
w=epidemic_df$w_dist,
is_sampling=FALSE,
as_vector=FALSE
)
# plot
R <- fit$par$R
epidemic_df %>%
mutate(estimated=R) %>%
rename(true=R_t) %>%
select(t, estimated, true) %>%
pivot_longer(c(estimated, true)) %>%
ggplot(aes(x=t, y=value)) +
geom_line(aes(colour=name)) +
scale_color_brewer("R_t", palette = "Dark2") +
ylab("R_t")
```


## Sinusoidal function in $R_t$
We now generate case data assuming a sinusoidal $R_t$.
```{r}
# define sinusoidal Rt
rt_fun <- function(t) {
1.3 + 1.2 * sin(4 * (pi / 180) * t)
}
nt <- 200
mean_si <- 6.5
sd_si <- 4.03
i_0 <- 10
# data frame of outputs
epidemic_df <- simulate_renewal_epidemic(rt_fun, nt, mean_si, sd_si, i_0)
# plot
epidemic_df %>%
select(-c(w_dist, lambda_t)) %>%
pivot_longer(c(i_t, R_t)) %>%
ggplot(aes(x = t, y = value, colour = name)) +
geom_line() +
scale_y_continuous(
name = "Incidence (i_t)",
sec.axis = sec_axis(~ . / 1000, name = "Reproduction Number (R_t)")
) +
labs(x = "Time", colour = "Variable") +
theme_minimal()
```

We now use a Stan version of EpiFilter to estimate the $R_t$ profile.
```{r}
# fit model
fit <- fit_epifilter(
N=length(epidemic_df$i_t),
C=epidemic_df$i_t,
w=epidemic_df$w_dist
)
# look at MCMC summaries
print(fit, c("sigma", "R"))
```

We now overlay the estimated $R_t$ versus the actual values. The estimated $R_t$ values
coincide reasonably with the true values.
```{r}
# extract posterior quantiles
R_draws <- rstan::extract(fit, "R")[[1]]
lower <- apply(R_draws, 2, function(x) quantile(x, 0.025))
middle <- apply(R_draws, 2, function(x) quantile(x, 0.5))
upper <- apply(R_draws, 2, function(x) quantile(x, 0.975))
# plot
epidemic_df %>%
mutate(
lower=lower,
middle=middle,
upper=upper
) %>%
select(t, R_t, lower, middle, upper) %>%
ggplot(aes(x=t, y=R_t)) +
geom_line(colour="red") +
geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
geom_line(aes(y=middle))
```


0 comments on commit 332cd69

Please sign in to comment.