diff --git a/DESCRIPTION b/DESCRIPTION index 76174c9..167acce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: CohortMethod Type: Package Title: New-User Cohort Method with Large Scale Propensity and Outcome Models -Version: 5.3.0 -Date: 2024-05-31 +Version: 5.4.0 +Date: 2024-09-27 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut", "cre")), person("Marc", "Suchard", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index 6979e5c..96baca9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -CohortMethod 5.3.1 +CohortMethod 5.4.0 ================== Changes: @@ -9,6 +9,8 @@ Changes: 3. The `cohorts` argument of `insertExportedResultsInSqlite()` has column `cohortId` renamed to `cohortDefinitionId`. +4. Added computation of MDRR for logistic models. + CohortMethod 5.3.0 diff --git a/R/Power.R b/R/Power.R index 2c8eca8..90c7371 100644 --- a/R/Power.R +++ b/R/Power.R @@ -21,7 +21,8 @@ #' @details #' Compute the minimum detectable relative risk (MDRR) and expected standard error (SE) for a given #' study population, using the actual observed sample size and number of outcomes. Currently, only -#' computations for Cox models are implemented. For Cox model, the computations by Schoenfeld (1983) is used. +#' computations for Cox and logistic models are implemented. For Cox model, the computations by +#' Schoenfeld (1983) is used. For logistic models Wald's z-test is used. #' #' @param population A data frame describing the study population as created using the #' \code{\link{createStudyPopulation}} function. This should at least have these @@ -46,14 +47,16 @@ computeMdrr <- function(population, alpha = 0.05, power = 0.8, twoSided = TRUE, checkmate::assertNumber(alpha, lower = 0, upper = 1, add = errorMessages) checkmate::assertNumber(power, lower = 0, upper = 1, add = errorMessages) checkmate::assertLogical(twoSided, len = 1, add = errorMessages) - checkmate::assertChoice(modelType, c("cox"), add = errorMessages) + checkmate::assertChoice(modelType, c("cox", "logistic"), add = errorMessages) checkmate::reportAssertions(collection = errorMessages) pTarget <- mean(population$treatment) totalEvents <- sum(population$outcomeCount != 0) + totalSubjects <- nrow(population) mdrr <- computeMdrrFromAggregateStats( pTarget = pTarget, totalEvents = totalEvents, + totalSubjects = totalSubjects, alpha = alpha, power = power, twoSided = twoSided, @@ -74,7 +77,7 @@ computeMdrr <- function(population, alpha = 0.05, power = 0.8, twoSided = TRUE, return(result) } -computeMdrrFromAggregateStats <- function(pTarget, totalEvents, alpha = 0.05, power = 0.8, twoSided = TRUE, modelType = "cox") { +computeMdrrFromAggregateStats <- function(pTarget, totalEvents, totalSubjects, alpha = 0.05, power = 0.8, twoSided = TRUE, modelType = "cox") { if (totalEvents == 0) { return(Inf) } @@ -83,15 +86,22 @@ computeMdrrFromAggregateStats <- function(pTarget, totalEvents, alpha = 0.05, po } else { z1MinAlpha <- qnorm(1 - alpha) } - zBeta <- -qnorm(1 - power) - - pComparator <- 1 - pTarget - if (modelType == "cox") { + zBeta <- -qnorm(1 - power) + pComparator <- 1 - pTarget mdrr <- exp(sqrt((zBeta + z1MinAlpha)^2 / (totalEvents * pTarget * pComparator))) + } else if (modelType == "logistic") { + pBaseline <- totalEvents / totalSubjects + se <- sqrt((1 / (totalEvents * (1 - pBaseline)))) + fun <- function(z) { + pnorm(z - z1MinAlpha) - power + } + z <- uniroot(fun, c(0, 1e+07))$root + mdrr <- exp(z * se) } else { - return(NA) + stop("Unknown model type: ", modelType) } + return(mdrr) } #' Get the distribution of follow-up time diff --git a/R/RunAnalyses.R b/R/RunAnalyses.R index 1899a3a..25c7c2e 100644 --- a/R/RunAnalyses.R +++ b/R/RunAnalyses.R @@ -1723,12 +1723,13 @@ summarizeResults <- function(referenceTable, outputFolder, mainFileName, interac twoSided = FALSE, upper = TRUE) } - pTarget <- outcomeModel$populationCounts$targetExposures / - (outcomeModel$populationCounts$targetExposures + outcomeModel$populationCounts$comparatorExposures) + totalSubjects <- outcomeModel$populationCounts$targetExposures + outcomeModel$populationCounts$comparatorExposures totalEvents <- outcomeModel$outcomeCounts$targetOutcomes + outcomeModel$outcomeCounts$comparatorOutcomes + pTarget <- outcomeModel$populationCounts$targetExposures / totalSubjects mdrr <- computeMdrrFromAggregateStats( pTarget = pTarget, totalEvents = totalEvents, + totalSubjects = totalSubjects, modelType = outcomeModel$outcomeModelType ) attrition <- getAttritionTable(outcomeModel) diff --git a/man/computeMdrr.Rd b/man/computeMdrr.Rd index b15db16..b7fb9f6 100644 --- a/man/computeMdrr.Rd +++ b/man/computeMdrr.Rd @@ -35,7 +35,8 @@ Compute the minimum detectable relative risk \details{ Compute the minimum detectable relative risk (MDRR) and expected standard error (SE) for a given study population, using the actual observed sample size and number of outcomes. Currently, only -computations for Cox models are implemented. For Cox model, the computations by Schoenfeld (1983) is used. +computations for Cox and logistic models are implemented. For Cox model, the computations by +Schoenfeld (1983) is used. For logistic models Wald's z-test is used. } \references{ Schoenfeld DA (1983) Sample-size formula for the proportional-hazards regression model, Biometrics, diff --git a/tests/testthat/test-power.R b/tests/testthat/test-power.R new file mode 100644 index 0000000..44ec069 --- /dev/null +++ b/tests/testthat/test-power.R @@ -0,0 +1,34 @@ +library(testthat) +library(CohortMethod) + +test_that("Logistic regression power calculations", { + alpha <- 0.05 # Significance level + power <- 0.80 # Desired power + p_null <- 0.1 # Baseline probability of event + n <- 400 # Sample size + + mdrr <- CohortMethod:::computeMdrrFromAggregateStats(pTarget = NA, + totalEvents = n * p_null, + totalSubjects = n, + alpha = alpha, + power = power, + twoSided = TRUE, + modelType = "logistic") + + + + # Estimate the coefficient (log(odds ratio)) + beta <- log(mdrr) + + # Estimate the standard error (for a simple logistic model, it's based on p_null and sample size) + se_beta <- sqrt((1 / (n * p_null * (1 - p_null)))) + + # Wald Z statistic + z_value <- beta / se_beta + + # Determine power using normal distribution + z_critical <- qnorm(1 - alpha / 2) + power_achieved <- pnorm(z_value - z_critical) + + expect_equal(power_achieved, power, tolerance = 1e-3) +})