Merge pull request #1 from ben18785/add-simulation-functions

Create simulation.R
ben18785 · Jul 29, 2024 · 332cd69 · 332cd69
2 parents ea78218 + 953afaa
commit 332cd69
Show file tree

Hide file tree

Showing 4 changed files with 247 additions and 5 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Description: This package uses Stan to estimate epidemiological quantities of in
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.2
+RoxygenNote: 7.3.2
 Biarch: true
 Depends: 
     R (>= 3.4.0)

diff --git a/R/fit_models.R b/R/fit_models.R
@@ -4,17 +4,22 @@
 #' @param N number of data points
 #' @param C case counts
 #' @param w discretised serial interval
+#' @param is_sampling Boolean indicating if sampling is to be used (if TRUE) or
+#' optimisation (if FALSE). Defaults to TRUE
 #' @param ...
 #'
 #' @return a stanfit object
 #' @export
-fit_epifilter <- function(N, C, w, ...) {
+fit_epifilter <- function(N, C, w, is_sampling=TRUE, ...) {
   wmax <- length(w)
   standata <- list(N=N,
                    C=C,
                    wmax=wmax,
                    w=w)
-  out <- rstan::sampling(stanmodels$epifilter, data = standata, ...)
+  if(is_sampling)
+    out <- rstan::sampling(stanmodels$epifilter, data = standata, ...)
+  else
+    out <- rstan::optimizing(stanmodels$epifilter, data = standata, ...)
   return(out)
 }
 
@@ -39,6 +44,9 @@ fit_epifilter_covariates <- function(N, C, w, X, ...) {
                    wmax=wmax,
                    N_covariates=N_covariates,
                    X=X)
-  out <- rstan::sampling(stanmodels$epifilter, data = standata, ...)
+  if(is_sampling)
+    out <- rstan::sampling(stanmodels$epifilter_covariates, data = standata, ...)
+  else
+    out <- rstan::optimizing(stanmodels$epifilter_covariates, data = standata, ...)
   return(out)
 }
diff --git a/R/simulation.R b/R/simulation.R
@@ -0,0 +1,98 @@
+
+
+#' Generate a Discretized Serial Interval Distribution
+#'
+#' This function generates a discretized serial interval distribution based on
+#' the gamma distribution characterized by the specified mean and standard deviation.
+#'
+#' @param nt An integer specifying the number of time points (length of the distribution).
+#' @param mean_si A numeric value representing the mean of the serial interval distribution.
+#' @param sd_si A numeric value representing the standard deviation of the serial interval distribution.
+#'
+#' @return A numeric vector of length `nt` representing the discretized serial interval distribution.
+#'
+#' @details
+#' The serial interval distribution is discretized using a gamma distribution
+#' with shape and scale parameters derived from the specified mean and standard deviation.
+#' The discretization is performed by computing the difference in the cumulative distribution
+#' function (CDF) of the gamma distribution at successive integer time points.
+#'
+#' @examples
+#' \dontrun{
+#' generate_vector_serial(10, 5, 2)
+#' }
+generate_vector_serial <- function(nt, mean_si, sd_si) {
+
+  t = 1:nt
+
+  # Shape-scale parameters of gamma serial interval
+  pms = c(0,0); pms[1] = mean_si^2/sd_si^2; pms[2] = sd_si^2/mean_si
+
+  # Discretise serial interval distribution
+  tdist = c(0, t); w_dist = rep(0, nt)
+  for (i in 1:nt){
+    w_dist[i] = pgamma(tdist[i+1], shape = pms[1], scale = pms[2]) -
+      pgamma(tdist[i], shape = pms[1], scale = pms[2])
+  }
+
+  w_dist
+}
+
+#' Simulate a Renewal Epidemic Model
+#'
+#' This function simulates an epidemic using a renewal model based on the
+#' specified time-varying reproduction number (Rt), the serial interval distribution
+#' characterized by a gamma distribution, and an initial number of cases.
+#'
+#' @param rt_fun A function that takes a vector of time points and returns a vector
+#'   of reproduction numbers (Rt) corresponding to those time points.
+#' @param nt An integer specifying the number of time points (duration of the epidemic simulation).
+#' @param mean_si A numeric value representing the mean of the serial interval distribution.
+#' @param sd_si A numeric value representing the standard deviation of the serial interval distribution.
+#' @param i_0 An integer specifying the initial number of cases at time point 1.
+#'
+#' @return A list containing the following components:
+#' \describe{
+#'   \item{i_t}{A numeric vector of the incidence of new cases at each time point.}
+#'   \item{lambda_t}{A numeric vector of the total infectiousness at each time point.}
+#'   \item{w_dist}{A numeric vector of the discretized serial interval distribution.}
+#'   \item{r_t}{A numeric vector of the reproduction number (Rt) at each time point.}
+#'   \item{t}{A numeric vector of the time points from 1 to nt.}
+#' }
+#'
+#' @details
+#' The renewal model is a common approach for simulating the spread of infectious diseases.
+#' The total infectiousness at each time point is computed as the convolution of past incidence
+#' with the serial interval distribution. The incidence at each time point is then simulated
+#' as a Poisson random variable with a mean equal to the product of the total infectiousness and
+#' the time-varying reproduction number.
+#'
+#' @examples
+#' \dontrun{
+#' rt_fun <- function(t) { 1.5 * exp(-0.05 * t) }
+#' simulate_renewal_epidemic(rt_fun, 100, 5, 2, 10)
+#' }
+simulate_renewal_epidemic <- function(Rt_fun, nt, mean_si, sd_si, i_0){
+
+  # Time series and Rt
+  t = 1:nt
+  Rt <- vector(length = nt)
+  for(i in seq_along(Rt)) {
+    Rt[i] = rt_fun(t[i])
+  }
+
+  # Total infectiousness and incidence with initial imports
+  Lt = rep(0, nt); It = Lt; It[1] = i_0; Lt[1] = It[1]
+
+  w_dist <- generate_vector_serial(nt, mean_si, sd_si)
+
+  # Simulate from standard renewal model
+  for(i in 2:nt){
+    # Total infectiousness is a convolution
+    Lt[i] = sum(It[seq(i-1, 1, -1)]*w_dist[1:(i-1)])
+    # Poisson renewal model
+    It[i] = rpois(1, Lt[i]*Rt[i])
+  }
+
+  data.frame(t=t, i_t=It, R_t=Rt, lambda_t=Lt, w_dist=w_dist)
+}
diff --git a/vignettes/fitting_synthetic_data_using_epidp.Rmd b/vignettes/fitting_synthetic_data_using_epidp.Rmd
@@ -16,6 +16,12 @@ knitr::opts_chunk$set(
 
 ```{r setup}
 library(epidp)
+library(ggplot2)
+library(dplyr)
+library(magrittr)
+library(purrr)
+library(tidyr)
+options(mc.cores=4)
 ```
 
 
@@ -28,5 +34,135 @@ I_t \sim \text{Poisson}(R_t \Lambda_t),
 
 where $\Lambda:=\sum_{s=1}^t \omega_s I_{t-s}$.
 
+
 ## Step function in $R_t$
-We first generate case data assuming a step function in $R_t$.
+We first generate case data assuming a step function for $R_t$.
+```{r}
+rt_fun = function(t){
+  if(t <= 60)
+    R = 2
+  else if (t <= 90)
+    R = 0.5
+  else
+    R = 1
+  R
+}
+
+# simulation parameters
+nt <- 200
+mean_si <- 6.5
+sd_si <- 4.03
+i_0 <- 10
+
+# data frame of outputs
+epidemic_df <- simulate_renewal_epidemic(rt_fun, nt, mean_si, sd_si, i_0)
+
+# plot
+epidemic_df %>%
+  select(-c(w_dist, lambda_t)) %>%
+  pivot_longer(c(i_t, R_t)) %>%
+  ggplot(aes(x = t, y = value, colour = name)) +
+  geom_line() +
+  scale_y_continuous(
+    name = "Incidence (i_t)",
+    sec.axis = sec_axis(~ . / 1000, name = "Reproduction Number (R_t)")
+  ) +
+  labs(x = "Time", colour = "Variable") +
+  theme_minimal()
+```
+
+We now use a Stan version of EpiFilter to estimate the maximum a posteriori estimates
+of $R_t$ and overlay these on top of the actual values. Note, these estimates do
+not have uncertainty associated with them but the benefit of this is that estimation
+is instantaneous. The estimates are close to the actual $R_t$ values after an initial
+period when case counts are low.
+```{r}
+# fit model
+fit <- fit_epifilter(
+  N=length(epidemic_df$i_t),
+  C=epidemic_df$i_t,
+  w=epidemic_df$w_dist,
+  is_sampling=FALSE,
+  as_vector=FALSE
+)
+
+# plot
+R <- fit$par$R
+epidemic_df %>% 
+  mutate(estimated=R) %>% 
+  rename(true=R_t) %>% 
+  select(t, estimated, true) %>% 
+  pivot_longer(c(estimated, true)) %>% 
+  ggplot(aes(x=t, y=value)) +
+  geom_line(aes(colour=name)) +
+  scale_color_brewer("R_t", palette = "Dark2") +
+  ylab("R_t")
+```
+
+
+## Sinusoidal function in $R_t$
+We now generate case data assuming a sinusoidal $R_t$.
+```{r}
+# define sinusoidal Rt
+rt_fun <- function(t) {
+  1.3 + 1.2 * sin(4 * (pi / 180) * t)
+}
+nt <- 200
+mean_si <- 6.5
+sd_si <- 4.03
+i_0 <- 10
+
+# data frame of outputs
+epidemic_df <- simulate_renewal_epidemic(rt_fun, nt, mean_si, sd_si, i_0)
+
+# plot
+epidemic_df %>%
+  select(-c(w_dist, lambda_t)) %>%
+  pivot_longer(c(i_t, R_t)) %>%
+  ggplot(aes(x = t, y = value, colour = name)) +
+  geom_line() +
+  scale_y_continuous(
+    name = "Incidence (i_t)",
+    sec.axis = sec_axis(~ . / 1000, name = "Reproduction Number (R_t)")
+  ) +
+  labs(x = "Time", colour = "Variable") +
+  theme_minimal()
+```
+
+We now use a Stan version of EpiFilter to estimate the $R_t$ profile.
+```{r}
+# fit model
+fit <- fit_epifilter(
+  N=length(epidemic_df$i_t),
+  C=epidemic_df$i_t,
+  w=epidemic_df$w_dist
+)
+
+# look at MCMC summaries
+print(fit, c("sigma", "R"))
+```
+
+We now overlay the estimated $R_t$ versus the actual values. The estimated $R_t$ values
+coincide reasonably with the true values.
+```{r}
+# extract posterior quantiles
+R_draws <- rstan::extract(fit, "R")[[1]]
+lower <- apply(R_draws, 2, function(x) quantile(x, 0.025))
+middle <- apply(R_draws, 2, function(x) quantile(x, 0.5))
+upper <- apply(R_draws, 2, function(x) quantile(x, 0.975))
+
+# plot
+epidemic_df %>% 
+  mutate(
+    lower=lower,
+    middle=middle,
+    upper=upper
+  ) %>% 
+  select(t, R_t, lower, middle, upper) %>% 
+  ggplot(aes(x=t, y=R_t)) +
+  geom_line(colour="red") +
+  geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
+  geom_line(aes(y=middle))
+```
+
+