diff --git a/NEWS.md b/NEWS.md index 03691f93..ad87a36b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,9 +7,17 @@ Changes: 2. Added the `getGeneralizabilityTable()` function. -3. Improved computation of overall standard deviation when computing covariate balance. Should produce more accurate balance estimations. +3. Improved computation of overall standard deviation when computing covariate balance (actually computing the SD instead of taking the mean of the target and comparator). Should produce more accurate balance estimations. -4. Generated population objects now keep track of likely target estimator (e.g. 'ATT'). Informs selection of base population when calling `getGeneralizabilityTable()`. +4. Generated population objects now keep track of likely target estimator (e.g. 'ATT', or 'ATE'). This informs selection of base population when calling `getGeneralizabilityTable()`. + +5. Deprecated the `attritionFractionThreshold` argument of the `createCmDiagnosticThresholds` function, and instead added the `generalizabilitySdmThreshold` argument. + +6. The results schema specifications of the `exportToCsv()` function has changed: + - Removed the `attrition_fraction` and `attrition_diagnostic` fields from the `cm_diagnostics_summary ` table. + - Added the `generalizability_max_sdm` and `generalizabiltiy_diagnostic` fields to the `cm_diagnostics_summary` table. + - Added the `mean_before`, `mean_after`, `target_std_diff`, `comparator_std_diff`, and `target_comparator_std_diff` fields to both the `cm_covariate_balance` and `cm_shared_covariate_balance` tables. + Bugfixes: diff --git a/R/Export.R b/R/Export.R index 0321f741..0782e35b 100644 --- a/R/Export.R +++ b/R/Export.R @@ -45,10 +45,11 @@ getResultsDataModel <- function() { #' any covariate has an SDM exceeding this threshold, the diagnostic will #' fail. #' @param equipoiseThreshold What is the minimum required equipoise? -#' @param attritionFractionThreshold What is the maximum allowed attrition fraction? If the attrition -#' between the input target cohort and the target cohort entering the -#' outcome model is greater than this fraction, the diagnostic will -#' fail. +#' @param attritionFractionThreshold DEPRECATED. See `generalizabilitySdmThreshold` instead. +#' @param generalizabilitySdmThreshold What is the maximum allowed standardized difference of mean +#' (SDM)when comparing the population before and after PS +#' adjustments? If the SDM is greater than this value, the diagnostic +#' will fail. #' #' @return #' An object of type `CmDiagnosticThresholds`. @@ -58,14 +59,19 @@ createCmDiagnosticThresholds <- function(mdrrThreshold = 10, easeThreshold = 0.25, sdmThreshold = 0.1, equipoiseThreshold = 0.2, - attritionFractionThreshold = 1) { + attritionFractionThreshold = NULL, + generalizabilitySdmThreshold = 1) { errorMessages <- checkmate::makeAssertCollection() checkmate::assertNumeric(mdrrThreshold, len = 1, lower = 0, add = errorMessages) checkmate::assertNumeric(easeThreshold, len = 1, lower = 0, add = errorMessages) checkmate::assertNumeric(sdmThreshold, len = 1, lower = 0, add = errorMessages) checkmate::assertNumeric(equipoiseThreshold, len = 1, lower = 0, add = errorMessages) - checkmate::assertNumeric(attritionFractionThreshold, len = 1, lower = 0, add = errorMessages) + checkmate::assertNumeric(generalizabilitySdmThreshold, len = 1, lower = 0, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) + if (!is.null(attritionFractionThreshold)) { + warning("The attritionFractionThreshold argument is deprecated and will be ignored. ", + "See generalizabilitySdmThreshold instead.") + } thresholds <- list() for (name in names(formals(createCmDiagnosticThresholds))) { thresholds[[name]] <- get(name) @@ -535,7 +541,8 @@ exportCohortMethodResults <- function(outputFolder, "calibratedCi95Ub", "calibratedP", "calibratedLogRr", - "calibratedSeLogRr" + "calibratedSeLogRr", + "targetEstimator" ) %>% mutate(databaseId = !!databaseId) %>% enforceMinCellValue("targetSubjects", minCellCount) %>% @@ -579,7 +586,8 @@ exportCmInteractionResults <- function(outputFolder, "calibratedCi95Ub", "calibratedP", "calibratedLogRr", - "calibratedSeLogRr" + "calibratedSeLogRr", + "targetEstimator" ) %>% mutate(databaseId = !!databaseId) %>% enforceMinCellValue("targetSubjects", minCellCount) %>% @@ -730,7 +738,13 @@ tidyBalance <- function(balance, minCellCount) { stdDiffBefore = "beforeMatchingStdDiff", targetMeanAfter = "afterMatchingMeanTarget", comparatorMeanAfter = "afterMatchingMeanComparator", - stdDiffAfter = "afterMatchingStdDiff" + stdDiffAfter = "afterMatchingStdDiff", + meanBefore = "beforeMatchingMean", + meanAfter = "afterMatchingMean", + "targetStdDiff", + "comparatorStdDiff", + "targetComparatorStdDiff", + ) %>% mutate( targetMeanBefore = ifelse(is.na(.data$targetMeanBefore), 0, .data$targetMeanBefore), @@ -738,14 +752,25 @@ tidyBalance <- function(balance, minCellCount) { stdDiffBefore = ifelse(is.na(.data$stdDiffBefore), 0, .data$stdDiffBefore), targetMeanAfter = ifelse(is.na(.data$targetMeanAfter), 0, .data$targetMeanAfter), comparatorMeanAfter = ifelse(is.na(.data$comparatorMeanAfter), 0, .data$comparatorMeanAfter), - stdDiffAfter = ifelse(is.na(.data$stdDiffAfter), 0, .data$stdDiffAfter) + stdDiffAfter = ifelse(is.na(.data$stdDiffAfter), 0, .data$stdDiffAfter), + meanBefore = ifelse(is.na(.data$meanBefore), 0, .data$meanBefore), + meanAfter = ifelse(is.na(.data$stdDiffAfter), 0, .data$meanAfter), + targetStdDiff = ifelse(is.na(.data$targetStdDiff), 0, .data$targetStdDiff), + comparatorStdDiff = ifelse(is.na(.data$comparatorStdDiff), 0, .data$comparatorStdDiff), + targetComparatorStdDiff = ifelse(is.na(.data$targetComparatorStdDiff), 0, .data$targetComparatorStdDiff) ) %>% filter(!(round(.data$targetMeanBefore) == 0 & round(.data$comparatorMeanBefore, 3) == 0 & round(.data$stdDiffBefore, 3) == 0 & round(.data$targetMeanAfter, 3) == 0 & round(.data$comparatorMeanAfter, 3) == 0 & - round(.data$stdDiffAfter, 3) == 0)) %>% + round(.data$stdDiffAfter, 3) == 0 & + round(.data$meanBefore, 3) == 0 & + round(.data$meanAfter, 3) == 0 & + round(.data$targetStdDiff, 3) == 0 & + round(.data$comparatorStdDiff, 3) == 0 & + round(.data$targetComparatorStdDiff, 3) == 0) + ) %>% enforceMinCellValue("targetMeanBefore", minCellCount / inferredTargetBeforeSize, silent = TRUE @@ -762,13 +787,26 @@ tidyBalance <- function(balance, minCellCount) { minCellCount / inferredComparatorAfterSize, silent = TRUE ) %>% + enforceMinCellValue("meanBefore", + minCellCount / inferredComparatorAfterSize, + silent = TRUE + ) %>% + enforceMinCellValue("meanAfter", + minCellCount / inferredComparatorAfterSize, + silent = TRUE + ) %>% mutate( targetMeanBefore = round(.data$targetMeanBefore, 3), comparatorMeanBefore = round(.data$comparatorMeanBefore, 3), stdDiffBefore = round(.data$stdDiffBefore, 3), targetMeanAfter = round(.data$targetMeanAfter, 3), comparatorMeanAfter = round(.data$comparatorMeanAfter, 3), - stdDiffAfter = round(.data$stdDiffAfter, 3) + stdDiffAfter = round(.data$stdDiffAfter, 3), + meanBefore = round(.data$meanBefore, 3), + meanAfter = round(.data$meanAfter, 3), + targetStdDiff = round(.data$targetStdDiff, 3), + comparatorStdDiff = round(.data$comparatorStdDiff, 3), + targetComparatorStdDiff = round(.data$targetComparatorStdDiff, 3) ) %>% return() } @@ -1119,59 +1157,64 @@ exportDiagnosticsSummary <- function(outputFolder, cmDiagnosticThresholds) { message("- diagnostics_summary table") reference <- getFileReference(outputFolder) + resultsSummary <- getResultsSummary(outputFolder) - getMaxSdm <- function(balanceFile) { + getMaxSdms <- function(balanceFile) { balance <- readRDS(file.path(outputFolder, balanceFile)) if (nrow(balance) == 0) { - return(as.numeric(NA)) + tibble(balanceFile = !!balanceFile, + maxSdm = as.numeric(NA), + maxTargetSdm = as.numeric(NA), + maxComparatorSdm = as.numeric(NA), + maxTargetComparatorSdm = as.numeric(NA)) %>% + return() } else { - return(max(abs(balance$afterMatchingStdDiff), na.rm = TRUE)) + tibble(balanceFile = !!balanceFile, + maxSdm = as.numeric(max(abs(balance$afterMatchingStdDiff), na.rm = TRUE)), + maxTargetSdm = as.numeric(max(abs(balance$targetStdDiff), na.rm = TRUE)), + maxComparatorSdm = as.numeric(max(abs(balance$comparatorStdDiff), na.rm = TRUE)), + maxTargetComparatorSdm = as.numeric(max(abs(balance$targetComparatorStdDiff), na.rm = TRUE))) %>% + return() } } - getEquipoise <- function(sharedPsFile) { ps <- readRDS(file.path(outputFolder, sharedPsFile)) - return(computeEquipoise(ps)) + tibble(sharedPsFile = !!sharedPsFile, + equipoise = computeEquipoise(ps)) %>% + return() } balanceFiles <- reference %>% filter(.data$balanceFile != "") %>% distinct(.data$balanceFile) %>% pull() - maxSdm <- as.numeric(sapply(balanceFiles, getMaxSdm)) - + maxSdm <- bind_rows(lapply(balanceFiles, getMaxSdms)) %>% + select("balanceFile", "maxSdm") sharedBalanceFiles <- reference %>% filter(.data$sharedBalanceFile != "") %>% distinct(.data$sharedBalanceFile) %>% pull() - sharedMaxSdm <- as.numeric(sapply(sharedBalanceFiles, getMaxSdm)) - + sharedMaxSdm <- bind_rows(lapply(sharedBalanceFiles, getMaxSdms)) %>% + rename(sharedBalanceFile = "balanceFile", + sharedMaxSdm = "maxSdm") sharedPsFiles <- reference %>% filter(.data$sharedPsFile != "") %>% distinct(.data$sharedPsFile) %>% pull() - equipoise <- as.numeric(sapply(sharedPsFiles, getEquipoise)) - - results1 <- reference %>% + equipoise <- bind_rows(lapply(sharedPsFiles, getEquipoise)) + results <- reference %>% filter(.data$outcomeOfInterest) %>% - left_join(tibble( - balanceFile = balanceFiles, - maxSdm = maxSdm - ), - by = "balanceFile" - ) %>% - left_join(tibble( - sharedBalanceFile = sharedBalanceFiles, - sharedMaxSdm = sharedMaxSdm - ), - by = "sharedBalanceFile" - ) %>% - left_join(tibble( - sharedPsFile = sharedPsFiles, - equipoise = equipoise - ), - by = "sharedPsFile" - ) %>% + inner_join( + resultsSummary, + by = join_by("analysisId", "targetId", "comparatorId", "outcomeId")) %>% + left_join(maxSdm, by = "balanceFile") %>% + left_join(sharedMaxSdm, by = "sharedBalanceFile") %>% + mutate(generalizabilityMaxSdm = if_else(.data$targetEstimator == "att", + .data$maxTargetSdm, + if_else(.data$targetEstimator == "atu", + .data$maxComparatorSdm, + .data$maxTargetComparatorSdm))) %>% + left_join(equipoise, by = "sharedPsFile") %>% select( "analysisId", "targetId", @@ -1179,22 +1222,14 @@ exportDiagnosticsSummary <- function(outputFolder, "outcomeId", "maxSdm", "sharedMaxSdm", - "equipoise" - ) - - results2 <- getResultsSummary(outputFolder) %>% - select( - "analysisId", - "targetId", - "comparatorId", - "outcomeId", + "equipoise", "mdrr", - "attritionFraction", + "generalizabilityMaxSdm", "ease" ) - results <- results1 %>% - inner_join(results2, by = c("analysisId", "targetId", "comparatorId", "outcomeId")) %>% + # Apply diagnostics thresholds: + results <- results %>% mutate(balanceDiagnostic = case_when( is.na(.data$maxSdm) ~ "NOT EVALUATED", .data$maxSdm < cmDiagnosticThresholds$sdmThreshold ~ "PASS", @@ -1215,9 +1250,9 @@ exportDiagnosticsSummary <- function(outputFolder, .data$mdrr < cmDiagnosticThresholds$mdrrThreshold ~ "PASS", TRUE ~ "FAIL" )) %>% - mutate(attritionDiagnostic = case_when( - is.na(.data$attritionFraction) ~ "NOT EVALUATED", - .data$attritionFraction < cmDiagnosticThresholds$attritionFractionThreshold ~ "PASS", + mutate(generalizabilityDiagnostic = case_when( + is.na(.data$generalizabilityMaxSdm) ~ "NOT EVALUATED", + .data$generalizabilityMaxSdm < cmDiagnosticThresholds$generalizabilitySdmThreshold ~ "PASS", TRUE ~ "FAIL" )) %>% mutate(easeDiagnostic = case_when( @@ -1226,13 +1261,18 @@ exportDiagnosticsSummary <- function(outputFolder, TRUE ~ "FAIL" )) %>% mutate(unblind = ifelse(.data$mdrrDiagnostic != "FAIL" & - .data$attritionDiagnostic != "FAIL" & + .data$generalizabilityDiagnostic != "FAIL" & .data$easeDiagnostic != "FAIL" & .data$equipoiseDiagnostic != "FAIL" & .data$balanceDiagnostic != "FAIL" & .data$sharedBalanceDiagnostic != "FAIL", 1, 0), databaseId = !!databaseId) + # Add deprecated fields: + results <- results %>% + mutate(attritionFraction = as.numeric(NA), + attritionDiagnostic = "NOT EVALUATED") + if (nrow(results) == 0) { results <- createEmptyResult("cm_diagnostics_summary") } diff --git a/R/RunAnalyses.R b/R/RunAnalyses.R index 4db53c98..f4dfc838 100644 --- a/R/RunAnalyses.R +++ b/R/RunAnalyses.R @@ -473,7 +473,7 @@ runCmAnalyses <- function(connectionDetails, tasks <- split(subset, subset$sharedPsFile) cluster <- ParallelLogger::makeCluster(min(length(tasks), multiThreadingSettings$trimMatchStratifyThreads)) ParallelLogger::clusterRequire(cluster, "CohortMethod") - dummy <- ParallelLogger::clusterApply(cluster, tasks, addPsToStudyPop, outputFolder = outputFolder) + dummy <- ParallelLogger::clusterApply(cluster, tasks, addPsToStudyPopForSubset, outputFolder = outputFolder) ParallelLogger::stopCluster(cluster) } } @@ -805,25 +805,37 @@ doFitSharedPsModel <- function(params, refitPsForEveryStudyPopulation) { return(NULL) } -addPsToStudyPop <- function(subset, outputFolder) { +addPsToStudyPopForSubset <- function(subset, outputFolder) { ps <- readRDS(file.path(outputFolder, subset$sharedPsFile[1])) addToStudyPop <- function(i) { refRow <- subset[i, ] studyPop <- readRDS(file.path(outputFolder, refRow$studyPopFile)) - newMetaData <- attr(studyPop, "metaData") - newMetaData$psModelCoef <- attr(ps, "metaData")$psModelCoef - newMetaData$psModelPriorVariance <- attr(ps, "metaData")$psModelPriorVariance - idx <- match(studyPop$rowId, ps$rowId) - studyPop$propensityScore <- ps$propensityScore[idx] - studyPop$iptw <- ps$iptw[idx] - attr(studyPop, "metaData") <- newMetaData + studyPop <- addPsToStudyPopulation(studyPop, ps) saveRDS(studyPop, file.path(outputFolder, refRow$psFile)) return(NULL) } plyr::l_ply(1:nrow(subset), addToStudyPop) } +addPsToStudyPopulation <- function(studyPopulation, ps) { + # Merge meta-data + newMetaData <- attr(studyPopulation, "metaData") + psMetaData <- attr(ps, "metaData") + missingColumns <- setdiff(names(psMetaData), names(newMetaData)) + newMetaData <- append(newMetaData, psMetaData[missingColumns]) + attr(studyPopulation, "metaData") <- newMetaData + + # Merge data + missingColumns <- setdiff(names(ps), names(studyPopulation)) + idx <- match(studyPopulation$rowId, ps$rowId) + studyPopulation <- bind_cols( + studyPopulation, + ps[idx, missingColumns] + ) + return(studyPopulation) +} + applyTrimMatchStratify <- function(ps, arguments) { if (!is.null(arguments$trimByPsArgs)) { functionArgs <- arguments$trimByPsArgs @@ -943,12 +955,8 @@ doFitOutcomeModelPlus <- function(params) { studyPop <- do.call("createStudyPopulation", args) if (!is.null(params$args$createPsArgs)) { - # Add PS ps <- getPs(params$sharedPsFile) - idx <- match(studyPop$rowId, ps$rowId) - studyPop$propensityScore <- ps$propensityScore[idx] - studyPop$iptw <- ps$iptw[idx] - ps <- studyPop + ps <- addPsToStudyPopulation(studyPop, ps) } else { ps <- studyPop } @@ -1724,7 +1732,7 @@ summarizeResults <- function(referenceTable, outputFolder, mainFileName, interac seLogRr = if (is.null(coefficient)) NA else outcomeModel$outcomeModelTreatmentEstimate$seLogRr, llr = if (is.null(coefficient)) NA else outcomeModel$outcomeModelTreatmentEstimate$llr, mdrr = !!mdrr, - attritionFraction = !!attritionFraction + targetEstimator = outcomeModel$targetEstimator ) mainResults[[i]] <- mainResult @@ -1741,7 +1749,8 @@ summarizeResults <- function(referenceTable, outputFolder, mainFileName, interac ci95Ub = exp(outcomeModel$outcomeModelInteractionEstimates$logUb95[j]), p = !!p, logRr = outcomeModel$outcomeModelInteractionEstimates$logRr[j], - seLogRr = outcomeModel$outcomeModelInteractionEstimates$seLogRr[j] + seLogRr = outcomeModel$outcomeModelInteractionEstimates$seLogRr[j], + targetEstimator = outcomeModel$targetEstimator ) } } diff --git a/R/Viewer.R b/R/Viewer.R index 02cd8c9a..9de4f2b6 100644 --- a/R/Viewer.R +++ b/R/Viewer.R @@ -99,12 +99,12 @@ uploadExportedResults <- function(connectionDetails, message(e) }) }) + rdmsFile <- system.file("csv", "resultsDataModelSpecification.csv", package = "CohortMethod") + specification <- readr::read_csv(file = rdmsFile, show_col_types = FALSE) %>% + SqlRender::snakeCaseToCamelCaseNames() if (!append) { # Create tables - rdmsFile <- system.file("csv", "resultsDataModelSpecification.csv", package = "CohortMethod") - specification <- readr::read_csv(file = rdmsFile, show_col_types = FALSE) %>% - SqlRender::snakeCaseToCamelCaseNames() sql <- ResultModelManager::generateSqlSchema(csvFilepath = rdmsFile) sql <- SqlRender::render( sql = sql, diff --git a/extras/MultiAnalysesVignetteDataFetch.R b/extras/MultiAnalysesVignetteDataFetch.R index da1232b5..238c4d20 100644 --- a/extras/MultiAnalysesVignetteDataFetch.R +++ b/extras/MultiAnalysesVignetteDataFetch.R @@ -237,6 +237,8 @@ cmAnalysis5 <- createCmAnalysis(analysisId = 5, createStudyPopArgs = createStudyPopArgs, createPsArgs = createPsArgs, matchOnPsArgs = matchOnPsArgs, + computeSharedCovariateBalanceArgs = computeSharedCovBalArgs, + computeCovariateBalanceArgs = computeCovBalArgs, fitOutcomeModelArgs = fitOutcomeModelArgs4) interactionCovariateIds <- c(8532001, 201826210, 21600960413) # Female, T2DM, concurent use of antithrombotic agents @@ -252,6 +254,8 @@ cmAnalysis6 <- createCmAnalysis(analysisId = 6, createStudyPopArgs = createStudyPopArgs, createPsArgs = createPsArgs, stratifyByPsArgs = stratifyByPsArgs, + computeSharedCovariateBalanceArgs = computeSharedCovBalArgs, + computeCovariateBalanceArgs = computeCovBalArgs, fitOutcomeModelArgs = fitOutcomeModelArgs5) cmAnalysisList <- list(cmAnalysis1, cmAnalysis2, cmAnalysis3, cmAnalysis4, cmAnalysis5, cmAnalysis6) @@ -285,9 +289,9 @@ exportToCsv( ) # Cleanup ---------------------------------------------------------------------- -sql <- "DROP TABLE @resultsDatabaseSchema.outcomes" +sql <- "DROP TABLE @cohortDatabaseSchema.@cohortTable;" connection <- DatabaseConnector::connect(connectionDetails) -DatabaseConnector::renderTranslateExecuteSql(connection, sql, resultsDatabaseSchema = resultsDatabaseSchema) +DatabaseConnector::renderTranslateExecuteSql(connection, sql, cohortDatabaseSchema = cohortDatabaseSchema, cohortTable = cohortTable) DatabaseConnector::disconnect(connection) # Shiny app -------------------------------------------------------------------- diff --git a/extras/SingleStudyVignetteDataFetch.R b/extras/SingleStudyVignetteDataFetch.R index ed49311b..26b864a7 100644 --- a/extras/SingleStudyVignetteDataFetch.R +++ b/extras/SingleStudyVignetteDataFetch.R @@ -36,8 +36,6 @@ cohortDatabaseSchema <- "scratch_mschuemi" cohortTable <- "cm_vignette" - - # Define exposure cohorts ------------------------------------------------------ library(Capr) diff --git a/inst/csv/resultsDataModelSpecification.csv b/inst/csv/resultsDataModelSpecification.csv index 306d557a..32868c08 100644 --- a/inst/csv/resultsDataModelSpecification.csv +++ b/inst/csv/resultsDataModelSpecification.csv @@ -1,172 +1,186 @@ -table_name,column_name,data_type,is_required,primary_key,min_cell_count,description -cm_attrition,sequence_number,int,Yes,Yes,No,The place in the sequence of steps defining the final analysis cohort. 1 indicates the original exposed population without any inclusion criteria. -cm_attrition,description,varchar,Yes,No,No,"A description of the last restriction, e.g. ""Removing persons with the outcome prior"". " -cm_attrition,subjects,int,Yes,No,Yes,The number of subjects in the cohort. -cm_attrition,exposure_id,int,Yes,Yes,No,The identifier of the exposure cohort to which the attrition applies. Can be either the target or comparator cohort ID. -cm_attrition,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_attrition,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_attrition,analysis_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_attrition,outcome_id,int,Yes,Yes,No,Foreign key referencing the cm_analysis table. -cm_attrition,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_follow_up_dist,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_follow_up_dist,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_follow_up_dist,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_follow_up_dist,analysis_id,int,Yes,Yes,No,Foreign key referencing the cm_analysis table. -cm_follow_up_dist,target_min_days,float,No,No,No,The minimum number of observation days for a person. -cm_follow_up_dist,target_p_10_days,float,No,No,No,The 10^th^ percentile of number of observation days for a person in the target group. -cm_follow_up_dist,target_p_25_days,float,No,No,No,The 25^th^ percentile of number of observation days for a person in the target group. -cm_follow_up_dist,target_median_days,float,No,No,No,The median number of observation days for a person in the target group. -cm_follow_up_dist,target_p_75_days,float,No,No,No,The 75^th^ percentile of number of observation days for a person in the target group. -cm_follow_up_dist,target_p_90_days,float,No,No,No,The 90^th^ percentile of number of observation days for a person in the target group. -cm_follow_up_dist,target_max_days,float,No,No,No,The maximum number of observation days for a person in the target group. -cm_follow_up_dist,comparator_min_days,float,No,No,No,The minimum number of observation days for a person in the comparator group. -cm_follow_up_dist,comparator_p_10_days,float,No,No,No,The 10^th^ percentile of number of observation days for a person in the comparator group. -cm_follow_up_dist,comparator_p_25_days,float,No,No,No,The 25^th^ percentile of number of observation days for a person in the comparator group. -cm_follow_up_dist,comparator_median_days,float,No,No,No,The median number of observation days for a person in the comparator group. -cm_follow_up_dist,comparator_p_75_days,float,No,No,No,The 75^th^ percentile of number of observation days for a person in the comparator group. -cm_follow_up_dist,comparator_p_90_days,float,No,No,No,The 90^th^ percentile of number of observation days for a person in the comparator group. -cm_follow_up_dist,comparator_max_days,float,No,No,No,The maximum number of observation days for a person in the comparator group. -cm_follow_up_dist,target_min_date,Date,No,No,No,The first start date of the target cohort observed in the data (after applying all restrictions). -cm_follow_up_dist,target_max_date,Date,No,No,No,The last start date of the target cohort observed in the data (after applying all restrictions). -cm_follow_up_dist,comparator_min_date,Date,No,No,No,The first start date of the comparator cohort observed in the data (after applying all restrictions). -cm_follow_up_dist,comparator_max_date,Date,No,No,No,The last start date of the comparator cohort observed in the data (after applying all restrictions). -cm_follow_up_dist,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_analysis,analysis_id,int,Yes,Yes,No,A unique identifier for an analysis. -cm_analysis,description,varchar,Yes,No,No,"A description for an analysis, e.g. 'On-treatment'." -cm_analysis,definition,varchar,Yes,No,No,A CohortMethod JSON object specifying the analysis. -cm_result,analysis_id,int,Yes,Yes,No,Foreign key referencing the cm_analysis table. -cm_result,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_result,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_result,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_result,rr,float,No,No,No,The estimated relative risk (e.g. the hazard ratio). -cm_result,ci_95_lb,float,No,No,No,The lower bound of the 95% confidence interval of the relative risk. -cm_result,ci_95_ub,float,No,No,No,The upper bound of the 95% confidence interval of the relative risk. -cm_result,p,float,No,No,No,The two-sided p-value considering the null hypothesis of no effect. -cm_result,target_subjects,int,Yes,No,Yes,The number of subject in the target cohort. -cm_result,comparator_subjects,int,Yes,No,Yes,The number of subject in the comparator cohort. -cm_result,target_days,int,Yes,No,No,The number of days observed in the target cohort. -cm_result,comparator_days,int,Yes,No,No,The number of days observed in the comparator cohort. -cm_result,target_outcomes,int,Yes,No,Yes,The number of outcomes observed in the target cohort. -cm_result,comparator_outcomes,int,Yes,No,Yes,The number of outcomes observed in the comparator cohort. -cm_result,log_rr,float,No,No,No,The log of the relative risk. -cm_result,se_log_rr,float,No,No,No,The standard error of the log of the relative risk. -cm_result,llr,float,No,No,No,The log of the likelihood ratio (of the MLE vs the null hypothesis of no effect). -cm_result,calibrated_rr,float,No,No,No,The calibrated relative risk. -cm_result,calibrated_ci_95_lb,float,No,No,No,The lower bound of the calibrated 95% confidence interval of the relative risk. -cm_result,calibrated_ci_95_ub,float,No,No,No,The upper bound of the calibrated 95% confidence interval of the relative risk. -cm_result,calibrated_p,float,No,No,No,The calibrated two-sided p-value. -cm_result,calibrated_log_rr,float,No,No,No,The log of the calibrated relative risk. -cm_result,calibrated_se_log_rr,float,No,No,No,The standard error of the log of the calibrated relative risk. -cm_result,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_interaction_result,analysis_id,int,Yes,Yes,No,Foreign key referencing the cm_analysis table. -cm_interaction_result,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_interaction_result,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_interaction_result,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_interaction_result,interaction_covariate_id,int,Yes,Yes,No,Foreign key referencing the cm_covariate table. -cm_interaction_result,rr,float,Yes,No,No,The estimated relative risk (e.g. the ratio of hazard ratios). -cm_interaction_result,ci_95_lb,float,Yes,No,No,The lower bound of the 95% confidence interval of the relative risk. -cm_interaction_result,ci_95_ub,float,Yes,No,No,The upper bound of the 95% confidence interval of the relative risk. -cm_interaction_result,p,float,Yes,No,No,The two-sided p-value considering the null hypothesis of no interaction effect. -cm_interaction_result,target_subjects,int,Yes,No,Yes,The number of subject in the target cohort. -cm_interaction_result,comparator_subjects,int,Yes,No,Yes,The number of subject in the comparator cohort. -cm_interaction_result,target_days,int,Yes,No,No,The number of days observed in the target cohort. -cm_interaction_result,comparator_days,int,Yes,No,No,The number of days observed in the comparator cohort. -cm_interaction_result,target_outcomes,int,Yes,No,Yes,The number of outcomes observed in the target cohort. -cm_interaction_result,comparator_outcomes,int,Yes,No,Yes,The number of outcomes observed in the comparator cohort. -cm_interaction_result,log_rr,float,No,No,No,The log of the relative risk. -cm_interaction_result,se_log_rr,float,No,No,No,The standard error of the log of the relative risk. -cm_interaction_result,calibrated_rr,float,No,No,No,The calibrated relative risk. -cm_interaction_result,calibrated_ci_95_lb,float,No,No,No,The lower bound of the calibrated 95% confidence interval of the relative risk. -cm_interaction_result,calibrated_ci_95_ub,float,No,No,No,The upper bound of the calibrated 95% confidence interval of the relative risk. -cm_interaction_result,calibrated_p,float,No,No,No,The calibrated two-sided p-value. -cm_interaction_result,calibrated_log_rr,float,No,No,No,The log of the calibrated relative risk. -cm_interaction_result,calibrated_se_log_rr,float,No,No,No,The standard error of the log of the calibrated relative risk. -cm_interaction_result,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_covariate,covariate_id,bigint,Yes,Yes,No,A unique identified for a covariate. -cm_covariate,covariate_name,varchar,Yes,No,No,"A name for a covariate, e.g. 'Age group: 20-25 years'." -cm_covariate,analysis_id,int,Yes,Yes,No,Foreign key referencing the cm_analysis table. -cm_covariate,covariate_analysis_id,int,Yes,No,No,Foreign key referencing the cm_covariate_analysis table. -cm_covariate,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_covariate_analysis,covariate_analysis_id,int,Yes,Yes,No,A unique identifier for a covariate analysis (only guaranteed to be unique for a given analysis_id and database_id). -cm_covariate_analysis,covariate_analysis_name,varchar,Yes,No,No,"A name for a covariate analysis, e.g. 'Demographics: age group'." -cm_covariate_analysis,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_covariate_balance,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_covariate_balance,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_covariate_balance,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_covariate_balance,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_covariate_balance,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_covariate_balance,covariate_id,bigint,Yes,Yes,No,A foreign key referencing the cm_covariate table. -cm_covariate_balance,target_mean_before,float,No,No,Yes,The mean value of the covariate in the target cohort before propensity score adjustment. -cm_covariate_balance,comparator_mean_before,float,No,No,Yes,The mean value of the covariate in the comparator cohort before propensity score adjustment. -cm_covariate_balance,std_diff_before,float,No,No,No,The standardized difference of the means between the target and comparator cohort before propensity score adjustment. -cm_covariate_balance,target_mean_after,float,No,No,Yes,The mean value of the covariate in the target cohort after propensity score adjustment. -cm_covariate_balance,comparator_mean_after,float,No,No,Yes,The mean value of the covariate in the comparator cohort after propensity score adjustment. -cm_covariate_balance,std_diff_after,float,No,No,No,The standardized difference of the means between the target and comparator cohort after propensity score adjustment. -cm_diagnostics_summary,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_diagnostics_summary,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_diagnostics_summary,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_diagnostics_summary,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_diagnostics_summary,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_diagnostics_summary,max_sdm,float,No,No,No,The maximum absolute standardized difference of mean. -cm_diagnostics_summary,shared_max_sdm,float,No,No,No,The maximum absolute standardized difference of mean of the shared balance (shared across outcomes). -cm_diagnostics_summary,equipoise,float,No,No,No,The fraction of the study population with a preference score between 0.3 and 0.7. -cm_diagnostics_summary,mdrr,float,No,No,No,The minimum detectable relative risk. -cm_diagnostics_summary,attrition_fraction,float,No,No,No,The fraction of the target population lost by between initial cohort and outcome model due to various restrictions. -cm_diagnostics_summary,ease,float,No,No,No,The expected absolute systematic error. -cm_diagnostics_summary,balance_diagnostic,varchar(20),Yes,No,No,Pass / warning / fail classification of the balance diagnostic (max_sdm). -cm_diagnostics_summary,shared_balance_diagnostic,varchar(20),Yes,No,No,Pass / warning / fail classification of the shared balance diagnostic (shared_max_sdm). -cm_diagnostics_summary,equipoise_diagnostic,varchar(20),Yes,No,No,Pass / warning / fail classification of the equipoise diagnostic. -cm_diagnostics_summary,mdrr_diagnostic,varchar(20),Yes,No,No,Pass / warning / fail classification of the MDRR diagnostic. -cm_diagnostics_summary,attrition_diagnostic,varchar(20),Yes,No,No,Pass / warning / fail classification of the attrition fraction diagnostic. -cm_diagnostics_summary,ease_diagnostic,varchar(20),Yes,No,No,Pass / warning / fail classification of the EASE diagnostic. -cm_diagnostics_summary,unblind,int,Yes,No,No,"Is unblinding the result recommended? (1 = yes, 0 = no)" -cm_target_comparator_outcome,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_target_comparator_outcome,outcome_of_interest,int,Yes,No,No,"Is the outcome of interest (1 = yes, 0 = no)" -cm_target_comparator_outcome,true_effect_size,float,No,No,No,The true effect size for the target-comparator-outcome. For negatitive controls this equals 1. -cm_target_comparator_outcome,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_target_comparator_outcome,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_kaplan_meier_dist,time_day,int,Yes,Yes,No,Time in days since cohort start. -cm_kaplan_meier_dist,target_survival,float,Yes,No,No,The estimated survival fraction in the target cohort. -cm_kaplan_meier_dist,target_survival_lb,float,Yes,No,No,The lower bound of the 95% confidence interval of the survival fraction in the target cohort. -cm_kaplan_meier_dist,target_survival_ub,float,Yes,No,No,The upper bound of the 95% confidence interval of the survival fraction in the target cohort. -cm_kaplan_meier_dist,comparator_survival,float,Yes,No,No,The estimated survival fraction in the comparator cohort. -cm_kaplan_meier_dist,comparator_survival_lb,float,Yes,No,No,The lower bound of the 95% confidence interval of the survival fraction in the comparator cohort. -cm_kaplan_meier_dist,comparator_survival_ub,float,Yes,No,No,The upper bound of the 95% confidence interval of the survival fraction in the comparator cohort. -cm_kaplan_meier_dist,target_at_risk,int,No,No,Yes,The number of subjects still at risk in the target cohort. -cm_kaplan_meier_dist,comparator_at_risk,int,No,No,Yes,The number of subjects still at risk in the comparator cohort. -cm_kaplan_meier_dist,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_kaplan_meier_dist,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_kaplan_meier_dist,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_kaplan_meier_dist,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_kaplan_meier_dist,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_likelihood_profile,log_rr,float,Yes,Yes,No,The log of the relative risk where the likelihood is sampled. -cm_likelihood_profile,log_likelihood,float,Yes,No,No,The normalized log likelihood. -cm_likelihood_profile,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_likelihood_profile,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_likelihood_profile,outcome_id,int,Yes,Yes,No,The identifier for the outcome cohort. -cm_likelihood_profile,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_likelihood_profile,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_preference_score_dist,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_preference_score_dist,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_preference_score_dist,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_preference_score_dist,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_preference_score_dist,preference_score,float,Yes,Yes,No,A preference score value. -cm_preference_score_dist,target_density,float,Yes,No,No,The distribution density for the target cohort at the given preference score. -cm_preference_score_dist,comparator_density,float,Yes,No,No,The distribution density for the comparator cohort at the given preference score. -cm_propensity_model,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_propensity_model,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_propensity_model,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_propensity_model,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_propensity_model,covariate_id,bigint,Yes,Yes,No,Foreign key referencing the cm_covariate table. 0 is reserved for the intercept. -cm_propensity_model,coefficient,float,Yes,No,No,The coefficient (beta) for the covariate in the propensity model. -cm_shared_covariate_balance,database_id,varchar,Yes,Yes,No,Foreign key referencing the database. -cm_shared_covariate_balance,target_id,int,Yes,Yes,No,The identifier for the target cohort. -cm_shared_covariate_balance,comparator_id,int,Yes,Yes,No,The identifier for the comparator cohort. -cm_shared_covariate_balance,analysis_id,int,Yes,Yes,No,A foreign key referencing the cm_analysis table. -cm_shared_covariate_balance,covariate_id,bigint,Yes,Yes,No,A foreign key referencing the cm_covariate table. -cm_shared_covariate_balance,target_mean_before,float,No,No,Yes,The mean value of the covariate in the target cohort before propensity score adjustment. -cm_shared_covariate_balance,comparator_mean_before,float,No,No,Yes,The mean value of the covariate in the comparator cohort before propensity score adjustment. -cm_shared_covariate_balance,std_diff_before,float,No,No,No,The standardized difference of the means between the target and comparator cohort before propensity score adjustment. -cm_shared_covariate_balance,target_mean_after,float,No,No,Yes,The mean value of the covariate in the target cohort after propensity score adjustment. -cm_shared_covariate_balance,comparator_mean_after,float,No,No,Yes,The mean value of the covariate in the comparator cohort after propensity score adjustment. -cm_shared_covariate_balance,std_diff_after,float,No,No,No,The standardized difference of the means between the target and comparator cohort after propensity score adjustment. +table_name,column_name,data_type,is_required,primary_key,min_cell_count,deprecated,description +cm_attrition,sequence_number,int,Yes,Yes,No,No,The place in the sequence of steps defining the final analysis cohort. 1 indicates the original exposed population without any inclusion criteria. +cm_attrition,description,varchar,Yes,No,No,No,"A description of the last restriction, e.g. ""Removing persons with the outcome prior"". " +cm_attrition,subjects,int,Yes,No,Yes,No,The number of subjects in the cohort. +cm_attrition,exposure_id,int,Yes,Yes,No,No,The identifier of the exposure cohort to which the attrition applies. Can be either the target or comparator cohort ID. +cm_attrition,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_attrition,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_attrition,analysis_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_attrition,outcome_id,int,Yes,Yes,No,No,Foreign key referencing the cm_analysis table. +cm_attrition,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_follow_up_dist,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_follow_up_dist,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_follow_up_dist,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_follow_up_dist,analysis_id,int,Yes,Yes,No,No,Foreign key referencing the cm_analysis table. +cm_follow_up_dist,target_min_days,float,No,No,No,No,The minimum number of observation days for a person. +cm_follow_up_dist,target_p_10_days,float,No,No,No,No,The 10^th^ percentile of number of observation days for a person in the target group. +cm_follow_up_dist,target_p_25_days,float,No,No,No,No,The 25^th^ percentile of number of observation days for a person in the target group. +cm_follow_up_dist,target_median_days,float,No,No,No,No,The median number of observation days for a person in the target group. +cm_follow_up_dist,target_p_75_days,float,No,No,No,No,The 75^th^ percentile of number of observation days for a person in the target group. +cm_follow_up_dist,target_p_90_days,float,No,No,No,No,The 90^th^ percentile of number of observation days for a person in the target group. +cm_follow_up_dist,target_max_days,float,No,No,No,No,The maximum number of observation days for a person in the target group. +cm_follow_up_dist,comparator_min_days,float,No,No,No,No,The minimum number of observation days for a person in the comparator group. +cm_follow_up_dist,comparator_p_10_days,float,No,No,No,No,The 10^th^ percentile of number of observation days for a person in the comparator group. +cm_follow_up_dist,comparator_p_25_days,float,No,No,No,No,The 25^th^ percentile of number of observation days for a person in the comparator group. +cm_follow_up_dist,comparator_median_days,float,No,No,No,No,The median number of observation days for a person in the comparator group. +cm_follow_up_dist,comparator_p_75_days,float,No,No,No,No,The 75^th^ percentile of number of observation days for a person in the comparator group. +cm_follow_up_dist,comparator_p_90_days,float,No,No,No,No,The 90^th^ percentile of number of observation days for a person in the comparator group. +cm_follow_up_dist,comparator_max_days,float,No,No,No,No,The maximum number of observation days for a person in the comparator group. +cm_follow_up_dist,target_min_date,Date,No,No,No,No,The first start date of the target cohort observed in the data (after applying all restrictions). +cm_follow_up_dist,target_max_date,Date,No,No,No,No,The last start date of the target cohort observed in the data (after applying all restrictions). +cm_follow_up_dist,comparator_min_date,Date,No,No,No,No,The first start date of the comparator cohort observed in the data (after applying all restrictions). +cm_follow_up_dist,comparator_max_date,Date,No,No,No,No,The last start date of the comparator cohort observed in the data (after applying all restrictions). +cm_follow_up_dist,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_analysis,analysis_id,int,Yes,Yes,No,No,A unique identifier for an analysis. +cm_analysis,description,varchar,Yes,No,No,No,"A description for an analysis, e.g. 'On-treatment'." +cm_analysis,definition,varchar,Yes,No,No,No,A CohortMethod JSON object specifying the analysis. +cm_result,analysis_id,int,Yes,Yes,No,No,Foreign key referencing the cm_analysis table. +cm_result,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_result,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_result,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_result,rr,float,No,No,No,No,The estimated relative risk (e.g. the hazard ratio). +cm_result,ci_95_lb,float,No,No,No,No,The lower bound of the 95% confidence interval of the relative risk. +cm_result,ci_95_ub,float,No,No,No,No,The upper bound of the 95% confidence interval of the relative risk. +cm_result,p,float,No,No,No,No,The two-sided p-value considering the null hypothesis of no effect. +cm_result,target_subjects,int,Yes,No,Yes,No,The number of subject in the target cohort. +cm_result,comparator_subjects,int,Yes,No,Yes,No,The number of subject in the comparator cohort. +cm_result,target_days,int,Yes,No,No,No,The number of days observed in the target cohort. +cm_result,comparator_days,int,Yes,No,No,No,The number of days observed in the comparator cohort. +cm_result,target_outcomes,int,Yes,No,Yes,No,The number of outcomes observed in the target cohort. +cm_result,comparator_outcomes,int,Yes,No,Yes,No,The number of outcomes observed in the comparator cohort. +cm_result,log_rr,float,No,No,No,No,The log of the relative risk. +cm_result,se_log_rr,float,No,No,No,No,The standard error of the log of the relative risk. +cm_result,llr,float,No,No,No,No,The log of the likelihood ratio (of the MLE vs the null hypothesis of no effect). +cm_result,calibrated_rr,float,No,No,No,No,The calibrated relative risk. +cm_result,calibrated_ci_95_lb,float,No,No,No,No,The lower bound of the calibrated 95% confidence interval of the relative risk. +cm_result,calibrated_ci_95_ub,float,No,No,No,No,The upper bound of the calibrated 95% confidence interval of the relative risk. +cm_result,calibrated_p,float,No,No,No,No,The calibrated two-sided p-value. +cm_result,calibrated_log_rr,float,No,No,No,No,The log of the calibrated relative risk. +cm_result,calibrated_se_log_rr,float,No,No,No,No,The standard error of the log of the calibrated relative risk. +cm_result,target_estimator,varchar,Yes,No,No,No,"The target estimator, for example ""att"", ""ate"", ""atu"" or ""ato""." +cm_result,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_interaction_result,analysis_id,int,Yes,Yes,No,No,Foreign key referencing the cm_analysis table. +cm_interaction_result,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_interaction_result,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_interaction_result,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_interaction_result,interaction_covariate_id,int,Yes,Yes,No,No,Foreign key referencing the cm_covariate table. +cm_interaction_result,rr,float,Yes,No,No,No,The estimated relative risk (e.g. the ratio of hazard ratios). +cm_interaction_result,ci_95_lb,float,Yes,No,No,No,The lower bound of the 95% confidence interval of the relative risk. +cm_interaction_result,ci_95_ub,float,Yes,No,No,No,The upper bound of the 95% confidence interval of the relative risk. +cm_interaction_result,p,float,Yes,No,No,No,The two-sided p-value considering the null hypothesis of no interaction effect. +cm_interaction_result,target_subjects,int,Yes,No,Yes,No,The number of subject in the target cohort. +cm_interaction_result,comparator_subjects,int,Yes,No,Yes,No,The number of subject in the comparator cohort. +cm_interaction_result,target_days,int,Yes,No,No,No,The number of days observed in the target cohort. +cm_interaction_result,comparator_days,int,Yes,No,No,No,The number of days observed in the comparator cohort. +cm_interaction_result,target_outcomes,int,Yes,No,Yes,No,The number of outcomes observed in the target cohort. +cm_interaction_result,comparator_outcomes,int,Yes,No,Yes,No,The number of outcomes observed in the comparator cohort. +cm_interaction_result,log_rr,float,No,No,No,No,The log of the relative risk. +cm_interaction_result,se_log_rr,float,No,No,No,No,The standard error of the log of the relative risk. +cm_interaction_result,calibrated_rr,float,No,No,No,No,The calibrated relative risk. +cm_interaction_result,calibrated_ci_95_lb,float,No,No,No,No,The lower bound of the calibrated 95% confidence interval of the relative risk. +cm_interaction_result,calibrated_ci_95_ub,float,No,No,No,No,The upper bound of the calibrated 95% confidence interval of the relative risk. +cm_interaction_result,calibrated_p,float,No,No,No,No,The calibrated two-sided p-value. +cm_interaction_result,calibrated_log_rr,float,No,No,No,No,The log of the calibrated relative risk. +cm_interaction_result,calibrated_se_log_rr,float,No,No,No,No,The standard error of the log of the calibrated relative risk. +cm_interaction_result,target_estimator,varchar,Yes,No,No,No,"The target estimator, for example ""att"", ""ate"", ""atu"" or ""ato""." +cm_interaction_result,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_covariate,covariate_id,bigint,Yes,Yes,No,No,A unique identified for a covariate. +cm_covariate,covariate_name,varchar,Yes,No,No,No,"A name for a covariate, e.g. 'Age group: 20-25 years'." +cm_covariate,analysis_id,int,Yes,Yes,No,No,Foreign key referencing the cm_analysis table. +cm_covariate,covariate_analysis_id,int,Yes,No,No,No,Foreign key referencing the cm_covariate_analysis table. +cm_covariate,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_covariate_analysis,covariate_analysis_id,int,Yes,Yes,No,No,A unique identifier for a covariate analysis (only guaranteed to be unique for a given analysis_id and database_id). +cm_covariate_analysis,covariate_analysis_name,varchar,Yes,No,No,No,"A name for a covariate analysis, e.g. 'Demographics: age group'." +cm_covariate_analysis,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_covariate_balance,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_covariate_balance,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_covariate_balance,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_covariate_balance,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_covariate_balance,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_covariate_balance,covariate_id,bigint,Yes,Yes,No,No,A foreign key referencing the cm_covariate table. +cm_covariate_balance,target_mean_before,float,No,No,Yes,No,The mean value of the covariate in the target cohort before propensity score adjustment. +cm_covariate_balance,comparator_mean_before,float,No,No,Yes,No,The mean value of the covariate in the comparator cohort before propensity score adjustment. +cm_covariate_balance,mean_before,float,No,No,Yes,No,The mean value of the covariate in the union of the target and comparator cohort before propensity score adjustment. +cm_covariate_balance,std_diff_before,float,No,No,No,No,The standardized difference of the means between the target and comparator cohort before propensity score adjustment. +cm_covariate_balance,mean_after,float,No,No,Yes,No,The mean value of the covariate in the union of the target and comparator cohort after propensity score adjustment. +cm_covariate_balance,target_mean_after,float,No,No,Yes,No,The mean value of the covariate in the target cohort after propensity score adjustment. +cm_covariate_balance,comparator_mean_after,float,No,No,Yes,No,The mean value of the covariate in the comparator cohort after propensity score adjustment. +cm_covariate_balance,std_diff_after,float,No,No,No,No,The standardized difference of the means between the target and comparator cohort after propensity score adjustment. +cm_covariate_balance,target_std_diff,float,No,No,No,No,The standardized difference of the means before and after propensity score adjustment in the target cohort. +cm_covariate_balance,comparator_std_diff,float,No,No,No,No,The standardized difference of the means before and after propensity score adjustment in the comparator cohort. +cm_covariate_balance,target_comparator_std_diff,float,No,No,No,No,The standardized difference of the means before and after propensity score adjustment in the union of the target and comparator cohorts. +cm_diagnostics_summary,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_diagnostics_summary,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_diagnostics_summary,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_diagnostics_summary,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_diagnostics_summary,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_diagnostics_summary,max_sdm,float,No,No,No,No,The maximum absolute standardized difference of mean. +cm_diagnostics_summary,shared_max_sdm,float,No,No,No,No,The maximum absolute standardized difference of mean of the shared balance (shared across outcomes). +cm_diagnostics_summary,equipoise,float,No,No,No,No,The fraction of the study population with a preference score between 0.3 and 0.7. +cm_diagnostics_summary,mdrr,float,No,No,No,No,The minimum detectable relative risk. +cm_diagnostics_summary,attrition_fraction,float,No,No,No,Yes,DEPRECATED: The fraction of the target population lost by between initial cohort and outcome model due to various restrictions. +cm_diagnostics_summary,generalizability_max_sdm ,float,No,No,No,No,The maximum absolute standardized difference of mean comparing before to after adjustment. +cm_diagnostics_summary,ease,float,No,No,No,No,The expected absolute systematic error. +cm_diagnostics_summary,balance_diagnostic,varchar(20),Yes,No,No,No,Pass / warning / fail classification of the balance diagnostic (max_sdm). +cm_diagnostics_summary,shared_balance_diagnostic,varchar(20),Yes,No,No,No,Pass / warning / fail classification of the shared balance diagnostic (shared_max_sdm). +cm_diagnostics_summary,equipoise_diagnostic,varchar(20),Yes,No,No,No,Pass / warning / fail classification of the equipoise diagnostic. +cm_diagnostics_summary,mdrr_diagnostic,varchar(20),Yes,No,No,No,Pass / warning / fail classification of the MDRR diagnostic. +cm_diagnostics_summary,attrition_diagnostic,varchar(20),No,No,No,Yes,DEPRECATED: Pass / warning / fail classification of the attrition fraction diagnostic. +cm_diagnostics_summary,generalizability_diagnostic,varchar(20),Yes,No,No,No,Pass / warning / fail classification of the generalizability diagnostic. +cm_diagnostics_summary,ease_diagnostic,varchar(20),Yes,No,No,No,Pass / warning / fail classification of the EASE diagnostic. +cm_diagnostics_summary,unblind,int,Yes,No,No,No,"Is unblinding the result recommended? (1 = yes, 0 = no)" +cm_target_comparator_outcome,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_target_comparator_outcome,outcome_of_interest,int,Yes,No,No,No,"Is the outcome of interest (1 = yes, 0 = no)" +cm_target_comparator_outcome,true_effect_size,float,No,No,No,No,The true effect size for the target-comparator-outcome. For negatitive controls this equals 1. +cm_target_comparator_outcome,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_target_comparator_outcome,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_kaplan_meier_dist,time_day,int,Yes,Yes,No,No,Time in days since cohort start. +cm_kaplan_meier_dist,target_survival,float,Yes,No,No,No,The estimated survival fraction in the target cohort. +cm_kaplan_meier_dist,target_survival_lb,float,Yes,No,No,No,The lower bound of the 95% confidence interval of the survival fraction in the target cohort. +cm_kaplan_meier_dist,target_survival_ub,float,Yes,No,No,No,The upper bound of the 95% confidence interval of the survival fraction in the target cohort. +cm_kaplan_meier_dist,comparator_survival,float,Yes,No,No,No,The estimated survival fraction in the comparator cohort. +cm_kaplan_meier_dist,comparator_survival_lb,float,Yes,No,No,No,The lower bound of the 95% confidence interval of the survival fraction in the comparator cohort. +cm_kaplan_meier_dist,comparator_survival_ub,float,Yes,No,No,No,The upper bound of the 95% confidence interval of the survival fraction in the comparator cohort. +cm_kaplan_meier_dist,target_at_risk,int,No,No,Yes,No,The number of subjects still at risk in the target cohort. +cm_kaplan_meier_dist,comparator_at_risk,int,No,No,Yes,No,The number of subjects still at risk in the comparator cohort. +cm_kaplan_meier_dist,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_kaplan_meier_dist,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_kaplan_meier_dist,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_kaplan_meier_dist,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_kaplan_meier_dist,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_likelihood_profile,log_rr,float,Yes,Yes,No,No,The log of the relative risk where the likelihood is sampled. +cm_likelihood_profile,log_likelihood,float,Yes,No,No,No,The normalized log likelihood. +cm_likelihood_profile,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_likelihood_profile,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_likelihood_profile,outcome_id,int,Yes,Yes,No,No,The identifier for the outcome cohort. +cm_likelihood_profile,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_likelihood_profile,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_preference_score_dist,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_preference_score_dist,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_preference_score_dist,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_preference_score_dist,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_preference_score_dist,preference_score,float,Yes,Yes,No,No,A preference score value. +cm_preference_score_dist,target_density,float,Yes,No,No,No,The distribution density for the target cohort at the given preference score. +cm_preference_score_dist,comparator_density,float,Yes,No,No,No,The distribution density for the comparator cohort at the given preference score. +cm_propensity_model,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_propensity_model,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_propensity_model,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_propensity_model,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_propensity_model,covariate_id,bigint,Yes,Yes,No,No,Foreign key referencing the cm_covariate table. 0 is reserved for the intercept. +cm_propensity_model,coefficient,float,Yes,No,No,No,The coefficient (beta) for the covariate in the propensity model. +cm_shared_covariate_balance,database_id,varchar,Yes,Yes,No,No,Foreign key referencing the database. +cm_shared_covariate_balance,target_id,int,Yes,Yes,No,No,The identifier for the target cohort. +cm_shared_covariate_balance,comparator_id,int,Yes,Yes,No,No,The identifier for the comparator cohort. +cm_shared_covariate_balance,analysis_id,int,Yes,Yes,No,No,A foreign key referencing the cm_analysis table. +cm_shared_covariate_balance,covariate_id,bigint,Yes,Yes,No,No,A foreign key referencing the cm_covariate table. +cm_shared_covariate_balance,mean_before,float,No,No,Yes,No,The mean value of the covariate in the union of the target and comparator cohort before propensity score adjustment. +cm_shared_covariate_balance,target_mean_before,float,No,No,Yes,No,The mean value of the covariate in the target cohort before propensity score adjustment. +cm_shared_covariate_balance,comparator_mean_before,float,No,No,Yes,No,The mean value of the covariate in the comparator cohort before propensity score adjustment. +cm_shared_covariate_balance,std_diff_before,float,No,No,No,No,The standardized difference of the means between the target and comparator cohort before propensity score adjustment. +cm_shared_covariate_balance,mean_after,float,No,No,Yes,No,The mean value of the covariate in the union of the target and comparator cohort after propensity score adjustment. +cm_shared_covariate_balance,target_mean_after,float,No,No,Yes,No,The mean value of the covariate in the target cohort after propensity score adjustment. +cm_shared_covariate_balance,comparator_mean_after,float,No,No,Yes,No,The mean value of the covariate in the comparator cohort after propensity score adjustment. +cm_shared_covariate_balance,std_diff_after,float,No,No,No,No,The standardized difference of the means between the target and comparator cohort after propensity score adjustment. +cm_shared_covariate_balance,target_std_diff,float,No,No,No,No,The standardized difference of the means before and after propensity score adjustment in the target cohort. +cm_shared_covariate_balance,comparator_std_diff,float,No,No,No,No,The standardized difference of the means before and after propensity score adjustment in the comparator cohort. +cm_shared_covariate_balance,target_comparator_std_diff,float,No,No,No,No,The standardized difference of the means before and after propensity score adjustment in the union of the target and comparator cohorts. diff --git a/man/createCmDiagnosticThresholds.Rd b/man/createCmDiagnosticThresholds.Rd index 17c40fc4..73a32024 100644 --- a/man/createCmDiagnosticThresholds.Rd +++ b/man/createCmDiagnosticThresholds.Rd @@ -9,7 +9,8 @@ createCmDiagnosticThresholds( easeThreshold = 0.25, sdmThreshold = 0.1, equipoiseThreshold = 0.2, - attritionFractionThreshold = 1 + attritionFractionThreshold = NULL, + generalizabilitySdmThreshold = 1 ) } \arguments{ @@ -25,10 +26,12 @@ fail.} \item{equipoiseThreshold}{What is the minimum required equipoise?} -\item{attritionFractionThreshold}{What is the maximum allowed attrition fraction? If the attrition -between the input target cohort and the target cohort entering the -outcome model is greater than this fraction, the diagnostic will -fail.} +\item{attritionFractionThreshold}{DEPRECATED. See \code{generalizabilitySdmThreshold} instead.} + +\item{generalizabilitySdmThreshold}{What is the maximum allowed standardized difference of mean +(SDM)when comparing the population before and after PS +adjustments? If the SDM is greater than this value, the diagnostic +will fail.} } \value{ An object of type \code{CmDiagnosticThresholds}. diff --git a/tests/testthat/test-balance.R b/tests/testthat/test-balance.R new file mode 100644 index 00000000..1e406436 --- /dev/null +++ b/tests/testthat/test-balance.R @@ -0,0 +1,82 @@ +library("testthat") + +set.seed(1234) +data(cohortMethodDataSimulationProfile) +sampleSize <- 1000 +cohortMethodData <- simulateCohortMethodData(cohortMethodDataSimulationProfile, n = sampleSize) +# Enforce weighed calculation for after adjustment, but use same weight for +# all so (unweighted) gold standard applies: +studyPop <- cohortMethodData$cohorts %>% + collect() %>% + mutate(iptw = 0.1) +results <- computeCovariateBalance(studyPop, cohortMethodData) %>% + filter(!is.na(beforeMatchingMeanTarget), + !is.na(beforeMatchingMeanComparator)) + +test_that("Test computation of covariate means and SDs", { + # Too computationally expensive to test all, so randomly pick 5: + covariateIds <- sample(results$covariateId, 5) + covariates <- cohortMethodData$covariates %>% + filter(covariateId %in% covariateIds) %>% + compute() + # covariateId = covariateIds[1] + for (covariateId in covariateIds) { + result <- results %>% + filter(covariateId == !!covariateId) + denseData <- cohortMethodData$cohorts %>% + left_join(covariates %>% + filter(covariateId == !!covariateId), + by = join_by("rowId")) %>% + mutate(covariateValue = if_else(is.na(covariateValue), 0, covariateValue)) + + # Overall + gs <- denseData %>% + summarise(mean = mean(covariateValue), + sd = sd(covariateValue)) %>% + collect() + expect_equal(result$beforeMatchingMean, gs$mean, tolerance = 0.01) + expect_equal(result$beforeMatchingSd, gs$sd, tolerance = 0.01) + expect_equal(result$afterMatchingMean, gs$mean, tolerance = 0.01) + expect_equal(result$afterMatchingSd, gs$sd, tolerance = 0.01) + + # Target + gs <- denseData %>% + filter(treatment == 1) %>% + summarise(mean = mean(covariateValue), + sd = sd(covariateValue)) %>% + collect() + expect_equal(result$beforeMatchingMeanTarget, gs$mean, tolerance = 0.01) + expect_equal(result$beforeMatchingSdTarget, gs$sd, tolerance = 0.01) + expect_equal(result$afterMatchingMeanTarget, gs$mean, tolerance = 0.01) + expect_equal(result$afterMatchingSdTarget, gs$sd, tolerance = 0.01) + + # Comparator + gs <- denseData %>% + filter(treatment == 0) %>% + summarise(mean = mean(covariateValue), + sd = sd(covariateValue)) %>% + collect() + expect_equal(result$beforeMatchingMeanComparator, gs$mean, tolerance = 0.01) + expect_equal(result$beforeMatchingSdComparator, gs$sd, tolerance = 0.01) + expect_equal(result$afterMatchingMeanComparator, gs$mean, tolerance = 0.01) + expect_equal(result$afterMatchingSdComparator, gs$sd, tolerance = 0.01) + } +}) + +test_that("Test computation of SDMs", { + sdm <- (results$beforeMatchingMeanTarget - results$beforeMatchingMeanComparator) / results$beforeMatchingSd + expect_equal(results$beforeMatchingStdDiff, sdm) + + sdm <- (results$afterMatchingMeanTarget - results$afterMatchingMeanComparator) / results$afterMatchingSd + expect_equal(results$afterMatchingStdDiff, sdm) + + sdm <- (results$beforeMatchingMean - results$afterMatchingMean) / results$beforeMatchingSd + expect_equal(results$targetComparatorStdDiff, sdm) + + sdm <- (results$beforeMatchingMeanTarget - results$afterMatchingMeanTarget) / results$beforeMatchingSdTarget + expect_equal(results$targetStdDiff, sdm) + + sdm <- (results$beforeMatchingMeanComparator - results$afterMatchingMeanComparator) / results$beforeMatchingSdComparator + expect_equal(results$comparatorStdDiff, sdm) + +}) diff --git a/tests/testthat/testthat-problems.rds b/tests/testthat/testthat-problems.rds new file mode 100644 index 00000000..2836d074 Binary files /dev/null and b/tests/testthat/testthat-problems.rds differ