diff --git a/DESCRIPTION b/DESCRIPTION index 439208c..b7fafc8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,3 +39,4 @@ Suggests: VignetteBuilder: knitr URL: https://ben18785.github.io/epidp/ Config/testthat/edition: 3 +LazyData: true diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..104b511 --- /dev/null +++ b/R/data.R @@ -0,0 +1,14 @@ +#' Daily COVID-19 case data for Bogota and Medellin and corresponding Google mobility data +#' +#' @format ## `covid_colombia_cases_deaths_mobility` +#' A data frame with 1348 rows and 10 columns: +#' \describe{ +#' \item{date}{Daily} +#' \item{city}{Bogota or Medellin} +#' \item{cases}{Daily counts of COVID-19 cases} +#' \item{deaths}{Daily counts of COVID-19 deaths} +#' ... +#' } +#' @source +#' @source +"covid_colombia_cases_deaths_mobility" diff --git a/data/covid_colombia_cases_deaths_mobility.rda b/data/covid_colombia_cases_deaths_mobility.rda new file mode 100644 index 0000000..863b3d2 Binary files /dev/null and b/data/covid_colombia_cases_deaths_mobility.rda differ diff --git a/vignettes/articles/fitting_real_covid19_data.Rmd b/vignettes/articles/fitting_real_covid19_data.Rmd new file mode 100644 index 0000000..9190af9 --- /dev/null +++ b/vignettes/articles/fitting_real_covid19_data.Rmd @@ -0,0 +1,136 @@ +--- +title: "Fitting real COVID-19 case data" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(epidp) +library(ggplot2) +library(dplyr) +library(magrittr) +library(purrr) +library(tidyr) +``` + +# Estimating $R_t$ for Bogota using only the case data +We first load the COVID-19 data for Colombia and graph it. +```{r} +# load data +data("covid_colombia_cases_deaths_mobility") + +# plot +covid_colombia_cases_deaths_mobility %>% + pivot_longer(c(cases, deaths)) %>% + ggplot(aes(x=date, y=value)) + + geom_line() + + facet_grid(vars(name), vars(city), + scales="free") + + xlab("Date") + + ylab("Count") +``` +We first estimate $R_t$ for Bogota using only the case data using optimisation to give us a quick set of estimates. +```{r} +df_bogota <- covid_colombia_cases_deaths_mobility %>% + filter(city=="Bogota") + +# generate serial interval for COVID-19 based on reasonable mean, sd +mean_si <- 6.5 +sd_si <- 4.03 +w <- generate_vector_serial(nrow(df_bogota), mean_si, sd_si) + +# fit using optimisation +fit <- fit_epifilter( + N=nrow(df_bogota), + C=df_bogota$cases, + w=w, + is_sampling=FALSE, + as_vector=FALSE +) + +# plot +R <- fit$par$R +df_bogota %>% + mutate(Rt=R) %>% + select(date, Rt, cases) %>% + filter(date >= as.Date("2020-04-01")) %>% + pivot_longer(-date) %>% + ggplot(aes(x=date, y=value)) + + geom_line() + + scale_color_brewer("R_t", palette = "Dark2") + + ylab("R_t") + + facet_grid(vars(name), scales = "free") +``` +We now fit using a fully Bayesian framework which outputs uncertainty. +```{r} +fit <- fit_epifilter( + N=nrow(df_bogota), + C=df_bogota$cases, + w=w, + is_sampling=TRUE, + iter=50, + chains=1 # as CRAN does not allow multiple cores +) + +# extract posterior quantiles +R_draws <- rstan::extract(fit, "R")[[1]] +lower <- apply(R_draws, 2, function(x) quantile(x, 0.025)) +middle <- apply(R_draws, 2, function(x) quantile(x, 0.5)) +upper <- apply(R_draws, 2, function(x) quantile(x, 0.975)) + +# plot +df_bogota %>% + mutate( + lower=lower, + middle=middle, + upper=upper + ) %>% + select(date, lower, middle, upper) %>% + filter(date >= as.Date("2020-04-01")) %>% + ggplot(aes(x=date)) + + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.6) + + geom_line(aes(y=middle), colour="blue") + + geom_hline(yintercept = 1, linetype=2, colour="orange") + + xlab("Date") + + ylab("R_t") +``` + +# Probing the drivers of $R_t$ using mobility data +We assume a relationship between workplace mobility $m_t$ and $R_t$ of the form: + +$$ +\log(R_t) = \alpha + \beta m_t + \epsilon_t, +$$ + +where $\epsilon_t$ represents the components of $R_t$ unrelated to workplace mobility. + +We now fit this model using `epidp`. + +```{r} +X <- tibble( + cons=rep(1, nrow(df_bogota)), + m=df_bogota$workplaces + ) %>% + mutate( + m=scale(m)[, 1] + ) %>% + as.matrix() + +fit <- fit_epifilter_covariates( + N=nrow(df_bogota), + C=df_bogota$cases, + w=w, + X=X, + is_sampling=TRUE, + iter=50, # probably too few iterations + chains=1 # as CRAN does not allow multiple cores; should run with more cores +) + +print(fit, "beta[2]") +``` +This negative association probably is a result of individuals responding to the COVID-19 pandemic conditions or governmental policy. diff --git a/vignettes/articles/fitting_synthetic_data_including_covariates.Rmd b/vignettes/articles/fitting_synthetic_data_including_covariates.Rmd index 1ca9deb..80276cc 100644 --- a/vignettes/articles/fitting_synthetic_data_including_covariates.Rmd +++ b/vignettes/articles/fitting_synthetic_data_including_covariates.Rmd @@ -19,8 +19,7 @@ library(tidyr) options(mc.cores=4) ``` -In this document, we explore how the incorporation of covariate information affects -estimation of $R_t$. +In this document, we explore how the incorporation of covariate information affects estimation of $R_t$. ## Added Gaussian noise around a sinusoidal covariate-driven mean $R_t$ ```{r} diff --git a/vignettes/fitting_synthetic_data_using_epidp.Rmd b/vignettes/fitting_synthetic_data_using_epidp.Rmd index 24c3d64..4cf24bd 100644 --- a/vignettes/fitting_synthetic_data_using_epidp.Rmd +++ b/vignettes/fitting_synthetic_data_using_epidp.Rmd @@ -21,12 +21,10 @@ library(dplyr) library(magrittr) library(purrr) library(tidyr) -options(mc.cores=4) ``` -In this vignette, we show how `epidp` can be used to generate then fit to synthetically -generated infection data. Throughout, we assume a generating process of the form: +In this vignette, we show how `epidp` can be used to generate then fit to synthetically generated infection data. Throughout, we assume a generating process of the form: \begin{equation} I_t \sim \text{Poisson}(R_t \Lambda_t), @@ -149,8 +147,7 @@ fit <- fit_epifilter( print(fit, c("sigma", "R")) ``` -We now overlay the estimated $R_t$ versus the actual values. The estimated $R_t$ values -coincide reasonably with the true values. +We now overlay the estimated $R_t$ versus the actual values. The estimated $R_t$ values coincide reasonably with the true values. ```{r} # extract posterior quantiles R_draws <- rstan::extract(fit, "R")[[1]]