diff --git a/.github/workflows/R_CMD_check_Hades.yml b/.github/workflows/R_CMD_check_Hades.yml index 55540db9..1d42cf1b 100644 --- a/.github/workflows/R_CMD_check_Hades.yml +++ b/.github/workflows/R_CMD_check_Hades.yml @@ -20,8 +20,6 @@ jobs: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: windows-latest, r: '4.2.3'} - {os: macOS-latest, r: '4.2.3'} diff --git a/NAMESPACE b/NAMESPACE index eb4a066d..d8c310bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,7 @@ export(getDbCohortMethodData) export(getDefaultCmTable1Specifications) export(getFileReference) export(getFollowUpDistribution) +export(getGeneralizabilityTable) export(getInteractionResultsSummary) export(getOutcomeModel) export(getPsModel) diff --git a/R/Balance.R b/R/Balance.R index 57eeeaa2..158547e4 100644 --- a/R/Balance.R +++ b/R/Balance.R @@ -188,7 +188,7 @@ computeMeansPerGroup <- function(cohorts, cohortMethodData, covariateFilter) { -"overallSumWSqrTarget", -"overallSumWSqrComparator") - return(result) + return(result) } #' Compute covariate balance before and after PS adjustment @@ -221,7 +221,8 @@ computeMeansPerGroup <- function(cohorts, cohortMethodData, covariateFilter) { #' #' @return #' Returns a tibble describing the covariate balance before and after PS adjustment, -#' with one row per covariate and the following columns: +#' with one row per covariate, with the same data as the `covariateRef` table in the `CohortMethodData` object, +#' and the following additional columns: #' #' - beforeMatchingMeanTarget: The (weighted) mean value in the target before PS adjustment. #' - beforeMatchingMeanComparator: The (weighted) mean value in the comparator before PS adjustment. @@ -397,6 +398,10 @@ computeCovariateBalance <- function(population, ) %>% arrange(desc(abs(.data$beforeMatchingStdDiff))) + metaData <- attr(population, "metaData") + if (!is.null(metaData) && !is.null(metaData$targetEstimator)) { + attr(balance, "targetEstimator") <- metaData$targetEstimator + } delta <- Sys.time() - start message(paste("Computing covariate balance took", signif(delta, 3), attr(delta, "units"))) return(balance) @@ -761,3 +766,102 @@ plotCovariatePrevalence <- function(balance, } return(plot) } + +#' Get information on generalizability +#' +#' @description +#' to assess generalizability we compare the distribution of covariates before and after +#' any (propensity score) adjustments. We compute the standardized difference of mean as +#' our metric of generalizability. (Lipton et al., 2017) +#' +#' Depending on our target estimand, we need to consider a different base population for +#' generalizability. For example, if we aim to estimate the average treatment effect in +#' thetreated (ATT), our base population should be the target population, meaning we +#' should consider the covariate distribution before and after PS adjustment in the target +#' population only. By default this function will attempt to select the right base +#' population based on what operations have been performed on the population. For example, +#' if PS matching has been performed we assume the target estimand is the ATT, and the +#' target population is selected as base. +#' +#' Requires running [computeCovariateBalance()]` first. +#' +#' @param balance A data frame created by the `computeCovariateBalance` function. +#' @param baseSelection The selection of the population to consider for generalizability. +#' Options are "auto", "target", "comparator", and "both". The "auto" +#' option will attempt to use the balance meta-data to pick the most +#' appropriate population based on the target estimator. +#' +#' @return +#' A tibble with the following columns: +#' +#' - covariateId: The ID of the covariate. Can be linked to the `covariates` and `covariateRef` +#' tables in the `CohortMethodData` object. +#' - covariateName: The name of the covariate. +#' - beforeMatchingMean: The mean covariate value before any (propensity score) adjustment. +#' - afterMatchingMean: The mean covariate value after any (propensity score) adjustment. +#' - stdDiff: The standardized difference of means between before and after adjustment. +#' +#' The tibble also has a 'baseSelection' attribute, documenting the base population used +#' to assess generalizability. +#' +#' @references Tipton E, Hallberg K, Hedges LV, Chan W (2017) Implications of Small Samples +#' for Generalization: Adjustments and Rules of Thumb, Eval Rev. Oct;41(5):472-505. +#' +#' @export +getGeneralizabilityTable <- function(balance, baseSelection = "auto") { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertDataFrame(balance, add = errorMessages) + checkmate::assertCharacter(baseSelection, len = 1, add = errorMessages) + checkmate::assertChoice(baseSelection, c("auto", "target", "comparator", "both"), add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + + if (baseSelection == "auto") { + targetEstimator <- attr(balance, "targetEstimator") + if (is.null(targetEstimator)) { + stop("The baseSelection is set to 'auto' but the balance object does not contain a target estimator attribute. ", + "Please set the baseSelection manually.") + } + if (targetEstimator == "ate" | targetEstimator == "ato") { + baseSelection <- "both" + message("Selecting both target and comparator as base for generalizability") + } else if (targetEstimator == "att") { + baseSelection <- "target" + message("Selecting target as base for generalizability") + } else if (targetEstimator == "atu") { + baseSelection <- "comparator" + message("Selecting comparator as base for generalizability") + } else { + stop("Unkown target estimator: ", targetEstimator) + } + } + if (baseSelection == "target") { + generalizability <- balance %>% + mutate(absGeneralizabilityStdDiff = abs(.data$targetStdDiff)) %>% + arrange(desc(.data$absGeneralizabilityStdDiff)) %>% + select("covariateId", + "covariateName", + beforeMatchingMean = "beforeMatchingMeanTarget", + afterMatchingMean = "afterMatchingMeanTarget", + stdDiff = "targetStdDiff") + } else if (baseSelection == "comparator") { + generalizability <- balance %>% + mutate(absGeneralizabilityStdDiff = abs(.data$comparatorStdDiff)) %>% + arrange(desc(.data$absGeneralizabilityStdDiff)) %>% + select("covariateId", + "covariateName", + beforeMatchingMean = "beforeMatchingMeanComparator", + afterMatchingMean = "afterMatchingMeanComparator", + stdDiff = "comparatorStdDiff") + } else { + generalizability <- balance %>% + mutate(absGeneralizabilityStdDiff = abs(.data$targetComparatorStdDiff)) %>% + arrange(desc(.data$absGeneralizabilityStdDiff)) %>% + select("covariateId", + "covariateName", + "beforeMatchingMean", + "afterMatchingMean", + stdDiff = "targetComparatorStdDiff") + } + attr(generalizability, "baseSelection") <- baseSelection + return(generalizability) +} diff --git a/R/OutcomeModels.R b/R/OutcomeModels.R index 2163738d..ab1b403f 100644 --- a/R/OutcomeModels.R +++ b/R/OutcomeModels.R @@ -647,6 +647,7 @@ print.OutcomeModel <- function(x, ...) { writeLines(paste("Stratified:", x$outcomeModelStratified)) writeLines(paste("Use covariates:", x$outcomeModelUseCovariates)) writeLines(paste("Use inverse probability of treatment weighting:", x$inversePtWeighting)) + writeLines(paste("Target estimand:", x$targetEstimator)) writeLines(paste("Status:", x$outcomeModelStatus)) if (!is.null(x$outcomeModelPriorVariance) && !is.na(x$outcomeModelPriorVariance)) { writeLines(paste("Prior variance:", x$outcomeModelPriorVariance)) diff --git a/extras/SingleStudyVignetteDataFetch.R b/extras/SingleStudyVignetteDataFetch.R index 32ca79fe..ed49311b 100644 --- a/extras/SingleStudyVignetteDataFetch.R +++ b/extras/SingleStudyVignetteDataFetch.R @@ -36,14 +36,14 @@ cohortDatabaseSchema <- "scratch_mschuemi" cohortTable <- "cm_vignette" -osteoArthritisOfKneeConceptId <- 4079750 -celecoxibConceptId <- 1118084 -diclofenacConceptId <- 1124300 + # Define exposure cohorts ------------------------------------------------------ library(Capr) -library(CirceR) +osteoArthritisOfKneeConceptId <- 4079750 +celecoxibConceptId <- 1118084 +diclofenacConceptId <- 1124300 osteoArthritisOfKnee <- cs( descendants(osteoArthritisOfKneeConceptId), name = "Osteoarthritis of knee" @@ -81,22 +81,21 @@ diclofenacCohort <- cohort( persistenceWindow = 30, surveillanceWindow = 0)) ) +# Define outcome cohort -------------------------------------------------------- +library(PhenotypeLibrary) +outcomeCohorts <- getPlCohortDefinitionSet(77) # GI bleed + +# Generate cohorts ------------------------------------------------------------- +library(CirceR) exposureCohorts <- tibble(cohortId = c(1,2), cohortName = c("Celecoxib", "Diclofenac"), json = c(as.json(celecoxibCohort), as.json(diclofenacCohort))) exposureCohorts$sql <- sapply(exposureCohorts$json, buildCohortQuery, options = createGenerateOptions()) - -# Define outcome cohort -------------------------------------------------------- -library(PhenotypeLibrary) -outcomeCohorts <- getPlCohortDefinitionSet(77) # GI bleed - - -# Generate cohorts ------------------------------------------------------------- -library(CohortGenerator) allCohorts <- bind_rows(outcomeCohorts, exposureCohorts) +library(CohortGenerator) cohortTableNames <- getCohortTableNames(cohortTable = cohortTable) createCohortTables(connectionDetails = connectionDetails, cohortDatabaseSchema = cohortDatabaseSchema, @@ -216,6 +215,11 @@ saveRDS(balance, file = file.path(folder, "balance.rds")) table1 <- createCmTable1(balance) print(table1, row.names = FALSE, right = FALSE) plotCovariateBalanceScatterPlot(balance, showCovariateCountLabel = TRUE, showMaxLabel = TRUE, fileName = "extras/balanceScatterplot.png") +getGeneralizabilityTable(balance) + +balanceIptw <- computeCovariateBalance(ps, cohortMethodData) +saveRDS(balanceIptw, file = file.path(folder, "balanceIptw.rds")) +getGeneralizabilityTable(balanceIptw) outcomeModel <- fitOutcomeModel( population = studyPop, @@ -230,6 +234,15 @@ coef(outcomeModel) confint(outcomeModel) saveRDS(outcomeModel, file = file.path(folder, "OutcomeModel1.rds")) +outcomeModel <- fitOutcomeModel( + population = ps, + modelType = "cox", + stratified = FALSE, + useCovariates = FALSE, + inversePtWeighting = TRUE +) +saveRDS(outcomeModel, file = file.path(folder, "OutcomeModel1b.rds")) + outcomeModel <- fitOutcomeModel( population = matchedPop, modelType = "cox", diff --git a/extras/balanceScatterplot.png b/extras/balanceScatterplot.png index 3c126742..95415612 100644 Binary files a/extras/balanceScatterplot.png and b/extras/balanceScatterplot.png differ diff --git a/inst/sql/VignetteOutcomes.sql b/inst/sql/VignetteOutcomes.sql deleted file mode 100644 index 3f8c69f1..00000000 --- a/inst/sql/VignetteOutcomes.sql +++ /dev/null @@ -1,17 +0,0 @@ -/*********************************** -File VignetteOutcomes.sql -***********************************/ -DROP TABLE IF EXISTS @resultsDatabaseSchema.outcomes; - -SELECT ancestor_concept_id AS cohort_definition_id, - condition_start_date AS cohort_start_date, - condition_end_date AS cohort_end_date, - condition_occurrence.person_id AS subject_id -INTO @resultsDatabaseSchema.outcomes -FROM @cdmDatabaseSchema.condition_occurrence -INNER JOIN @cdmDatabaseSchema.visit_occurrence - ON condition_occurrence.visit_occurrence_id = visit_occurrence.visit_occurrence_id -INNER JOIN @cdmDatabaseSchema.concept_ancestor - ON condition_concept_id = descendant_concept_id -WHERE ancestor_concept_id IN (192671, 24609, 29735, 73754, 80004, 134718, 139099, 141932, 192367, 193739, 194997, 197236, 199074, 255573, 257007, 313459, 314658, 316084, 319843, 321596, 374366, 375292, 380094, 433753, 433811, 436665, 436676, 436940, 437784, 438134, 440358, 440374, 443617, 443800, 4084966, 4288310) - AND visit_occurrence.visit_concept_id IN (9201, 9203); diff --git a/man/computeCovariateBalance.Rd b/man/computeCovariateBalance.Rd index 66b04da1..f1ae47eb 100644 --- a/man/computeCovariateBalance.Rd +++ b/man/computeCovariateBalance.Rd @@ -33,7 +33,8 @@ balance will be computed for all variables found in the data.} } \value{ Returns a tibble describing the covariate balance before and after PS adjustment, -with one row per covariate and the following columns: +with one row per covariate, with the same data as the \code{covariateRef} table in the \code{CohortMethodData} object, +and the following additional columns: \itemize{ \item beforeMatchingMeanTarget: The (weighted) mean value in the target before PS adjustment. \item beforeMatchingMeanComparator: The (weighted) mean value in the comparator before PS adjustment. diff --git a/man/getGeneralizabilityTable.Rd b/man/getGeneralizabilityTable.Rd new file mode 100644 index 00000000..c70dae71 --- /dev/null +++ b/man/getGeneralizabilityTable.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Balance.R +\name{getGeneralizabilityTable} +\alias{getGeneralizabilityTable} +\title{Get information on generalizability} +\usage{ +getGeneralizabilityTable(balance, baseSelection = "auto") +} +\arguments{ +\item{balance}{A data frame created by the \code{computeCovariateBalance} function.} + +\item{baseSelection}{The selection of the population to consider for generalizability. +Options are "auto", "target", "comparator", and "both". The "auto" +option will attempt to use the balance meta-data to pick the most +appropriate population based on the target estimator.} +} +\value{ +A tibble with the following columns: +\itemize{ +\item covariateId: The ID of the covariate. Can be linked to the \code{covariates} and \code{covariateRef} +tables in the \code{CohortMethodData} object. +\item covariateName: The name of the covariate. +\item beforeMatchingMean: The mean covariate value before any (propensity score) adjustment. +\item afterMatchingMean: The mean covariate value after any (propensity score) adjustment. +\item stdDiff: The standardized difference of means between before and after adjustment. +} + +The tibble also has a 'baseSelection' attribute, documenting the base population used +to assess generalizability. +} +\description{ +to assess generalizability we compare the distribution of covariates before and after +any (propensity score) adjustments. We compute the standardized difference of mean as +our metric of generalizability. (Lipton et al., 2017) + +Depending on our target estimand, we need to consider a different base population for +generalizability. For example, if we aim to estimate the average treatment effect in +thetreated (ATT), our base population should be the target population, meaning we +should consider the covariate distribution before and after PS adjustment in the target +population only. By default this function will attempt to select the right base +population based on what operations have been performed on the population. For example, +if PS matching has been performed we assume the target estimand is the ATT, and the +target population is selected as base. + +Requires running \code{\link[=computeCovariateBalance]{computeCovariateBalance()}}` first. +} +\references{ +Tipton E, Hallberg K, Hedges LV, Chan W (2017) Implications of Small Samples +for Generalization: Adjustments and Rules of Thumb, Eval Rev. Oct;41(5):472-505. +} diff --git a/vignettes/SingleStudies.Rmd b/vignettes/SingleStudies.Rmd index 0bf00cb3..1368ee92 100644 --- a/vignettes/SingleStudies.Rmd +++ b/vignettes/SingleStudies.Rmd @@ -29,7 +29,7 @@ This vignette describes how you can use the `CohortMethod` package to perform a # Installation instructions -Before installing the `CohortMethod` package make sure you have Java available. Java can be downloaded from [www.java.com](http://www.java.com). For Windows users, RTools is also necessary. RTools can be downloaded from [CRAN](http://cran.r-project.org/bin/windows/Rtools/). +Before installing the `CohortMethod` package make sure you have Java available. For Windows users, RTools is also necessary. See [these instructions](https://ohdsi.github.io/Hades/rSetup.html) for properly configuring your R environment. The `CohortMethod` package is currently maintained in a [Github repository](https://github.com/OHDSI/CohortMethod), and has dependencies on other packages in Github. All of these packages can be downloaded and installed from within R using the `drat` package: @@ -55,122 +55,120 @@ connectionDetails <- createConnectionDetails(dbms = "postgresql", password = "supersecret") cdmDatabaseSchema <- "my_cdm_data" -resultsDatabaseSchema <- "my_results" +cohortDatabaseSchema <- "my_results" +cohortTable <- "my_cohorts" options(sqlRenderTempEmulationSchema = NULL) ``` -The last two lines define the `cdmDatabaseSchema` and `resultSchema` variables. We'll use these later to tell R where the data in CDM format live, and where we want to write intermediate tables. Note that for Microsoft SQL Server, databaseschemas need to specify both the database and the schema, so for example `cdmDatabaseSchema <- "my_cdm_data.dbo"`. +The last few lines define the `cdmDatabaseSchema`, `cohortDatabaseSchema`, and `cohortTable` variables. We'll use these later to tell R where the data in CDM format live, and where we want to write intermediate tables. Note that for Microsoft SQL Server, databaseschemas need to specify both the database and the schema, so for example `cdmDatabaseSchema <- "my_cdm_data.dbo"`. For database platforms that do not support temp tables, such as Oracle, it is also necessary to provide a schema where the user has write access that can be used to emulate temp tables. PostgreSQL supports temp tables, so we can set `options(sqlRenderTempEmulationSchema = NULL)` (or not set the `sqlRenderTempEmulationSchema` at all.) ## Preparing the exposures and outcome(s) -We need to define the exposures and outcomes for our study. One could use an external cohort definition tools, but in this example we do this by writing SQL statements against the OMOP CDM that populate a table of events in which we are interested. The resulting table should have the same structure as the `cohort` table in the CDM. This means it should have the fields `cohort_definition_id`, `cohort_start_date`, `cohort_end_date`,and `subject_id`. - -For our example study, we have created a file called *coxibVsNonselVsGiBleed.sql* with the following contents: - -```sql -/*********************************** -File coxibVsNonselVsGiBleed.sql -***********************************/ - -DROP TABLE IF EXISTS @resultsDatabaseSchema.coxibVsNonselVsGiBleed; - -CREATE TABLE @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - cohort_definition_id INT, - cohort_start_date DATE, - cohort_end_date DATE, - subject_id BIGINT - ); - -INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id - ) -SELECT 1, -- Exposure - drug_era_start_date, - drug_era_end_date, - person_id -FROM @cdmDatabaseSchema.drug_era -WHERE drug_concept_id = 1118084;-- celecoxib - -INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id - ) -SELECT 2, -- Comparator - drug_era_start_date, - drug_era_end_date, - person_id -FROM @cdmDatabaseSchema.drug_era -WHERE drug_concept_id = 1124300; --diclofenac - -INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id - ) -SELECT 3, -- Outcome - condition_start_date, - condition_end_date, - condition_occurrence.person_id -FROM @cdmDatabaseSchema.condition_occurrence -INNER JOIN @cdmDatabaseSchema.visit_occurrence - ON condition_occurrence.visit_occurrence_id = visit_occurrence.visit_occurrence_id -WHERE condition_concept_id IN ( - SELECT descendant_concept_id - FROM @cdmDatabaseSchema.concept_ancestor - WHERE ancestor_concept_id = 192671 -- GI - Gastrointestinal haemorrhage - ) - AND visit_occurrence.visit_concept_id IN (9201, 9203); -``` - -This is parameterized SQL which can be used by the `SqlRender` package. We use parameterized SQL so we do not have to pre-specify the names of the CDM and result schemas. That way, if we want to run the SQL on a different schema, we only need to change the parameter values; we do not have to change the SQL code. By also making use of translation functionality in `SqlRender`, we can make sure the SQL code can be run in many different environments. - -```{r eval=FALSE} -library(SqlRender) -sql <- readSql("coxibVsNonselVsGiBleed.sql") -sql <- render(sql, - cdmDatabaseSchema = cdmDatabaseSchema, - resultsDatabaseSchema = resultsDatabaseSchema) -sql <- translate(sql, targetDialect = connectionDetails$dbms) - -connection <- connect(connectionDetails) -executeSql(connection, sql) -``` - -In this code, we first read the SQL from the file into memory. In the next line, we replace the two parameter names with the actual values. We then translate the SQL into the dialect appropriate for the DBMS we already specified in the `connectionDetails`. Next, we connect to the server, and submit the rendered and translated SQL. - -If all went well, we now have a table with the events of interest. We can see how many events per type: - -```{r eval=FALSE} -sql <- paste("SELECT cohort_definition_id, COUNT(*) AS count", - "FROM @resultsDatabaseSchema.coxibVsNonselVsGiBleed", - "GROUP BY cohort_definition_id") -sql <- render(sql, resultsDatabaseSchema = resultsDatabaseSchema) -sql <- translate(sql, targetDialect = connectionDetails$dbms) - -querySql(connection, sql) +We need to define the exposures and outcomes for our study. Here, we will define our exposures using the OHDSI `Capr` package. We define two cohorts, one for celecoxib and one for diclofenac. For each cohort we require a prior diagnosis of 'osteoarthritis of knee', and 365 days of continuous prior observation. we restrict to the first exposure per person: + +```{r eval=FALSE} +library(Capr) + +osteoArthritisOfKneeConceptId <- 4079750 +celecoxibConceptId <- 1118084 +diclofenacConceptId <- 1124300 +osteoArthritisOfKnee <- cs( + descendants(osteoArthritisOfKneeConceptId), + name = "Osteoarthritis of knee" +) +attrition = attrition( + "prior osteoarthritis of knee" = withAll( + atLeast(1, condition(osteoArthritisOfKnee), duringInterval(eventStarts(-Inf, 0))) + ) +) +celecoxib <- cs( + descendants(celecoxibConceptId), + name = "Celecoxib" +) +diclofenac <- cs( + descendants(diclofenacConceptId), + name = "Diclofenac" +) +celecoxibCohort <- cohort( + entry = entry( + drug(celecoxib, firstOccurrence()), + observationWindow = continuousObservation(priorDays = 365) + ), + attrition = attrition, + exit = exit(endStrategy = drugExit(celecoxib, + persistenceWindow = 30, + surveillanceWindow = 0)) +) +diclofenacCohort <- cohort( + entry = entry( + drug(diclofenac, firstOccurrence()), + observationWindow = continuousObservation(priorDays = 365) + ), + attrition = attrition, + exit = exit(endStrategy = drugExit(diclofenac, + persistenceWindow = 30, + surveillanceWindow = 0)) +) +``` + +We'll pull the outcome definition from the OHDSI `PhenotypeLibrary`: + +```{r eval=FALSE} +library(PhenotypeLibrary) +outcomeCohorts <- getPlCohortDefinitionSet(77) # GI bleed +``` + +We combine the exposure and outcome cohort definitions, and use `CohortGenerator` to generate the cohorts: + +```{r eval=FALSE} +library(CirceR) +# For exposures, create a cohort definition set table as required by CohortGenerator: +exposureCohorts <- tibble(cohortId = c(1,2), + cohortName = c("Celecoxib", "Diclofenac"), + json = c(as.json(celecoxibCohort), + as.json(diclofenacCohort))) +exposureCohorts$sql <- sapply(exposureCohorts$json, + buildCohortQuery, + options = createGenerateOptions()) +allCohorts <- bind_rows(outcomeCohorts, + exposureCohorts) + +library(CohortGenerator) +cohortTableNames <- getCohortTableNames(cohortTable = cohortTable) +createCohortTables(connectionDetails = connectionDetails, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTableNames = cohortTableNames) +generateCohortSet(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTableNames = cohortTableNames, + cohortDefinitionSet = allCohorts) +``` + +If all went well, we now have a table with the cohorts of interest. We can see how many entries per cohort: + +```{r eval=FALSE} +connection <- DatabaseConnector::connect(connectionDetails) +sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohortDatabaseSchema.@cohortTable GROUP BY cohort_definition_id" +DatabaseConnector::renderTranslateQuerySql(connection, sql, cohortDatabaseSchema = cohortDatabaseSchema, cohortTable = cohortTable) +DatabaseConnector::disconnect(connection) ``` ```{r echo=FALSE,message=FALSE} -data.frame(cohort_concept_id = c(1,2,3),count = c(50000,50000,15000)) +data.frame(cohort_concept_id = c(1,2,77),count = c(109307,176675,733601)) ``` ## Extracting the data from the server -Now we can tell `CohortMethod` to define the cohorts based on our events, construct covariates, and extract all necessary data for our analysis. +Now we can tell `CohortMethod` to extract the cohorts, construct covariates, and extract all necessary data for our analysis. -**Important**: The target and comparator drug must not be included in the covariates, including any descendant concepts. You will need to manually add the drugs and descendants to the `excludedCovariateConceptIds` of the covariate settings. In this example code we exclude all NSAIDs from the covariates by pointing to the concept ID of the NSAID class and specifying `addDescendantsToExclude = TRUE`. +**Important**: The target and comparator drug must not be included in the covariates, including any descendant concepts. You will need to manually add the drugs and descendants to the `excludedCovariateConceptIds` of the covariate settings. In this example code we exclude the concepts for celecoxib and diclofenac and specify `addDescendantsToExclude = TRUE`: ```{r eval=FALSE} -nsaids <- 21603933 - # Define which types of covariates must be constructed: -covSettings <- createDefaultCovariateSettings(excludedCovariateConceptIds = nsaids, - addDescendantsToExclude = TRUE) +covSettings <- createDefaultCovariateSettings( + excludedCovariateConceptIds = c(diclofenacConceptId, celecoxibConceptId), + addDescendantsToExclude = TRUE +) #Load data: cohortMethodData <- getDbCohortMethodData( @@ -178,18 +176,11 @@ cohortMethodData <- getDbCohortMethodData( cdmDatabaseSchema = cdmDatabaseSchema, targetId = 1, comparatorId = 2, - outcomeIds = 3, - studyStartDate = "", - studyEndDate = "", - exposureDatabaseSchema = resultsDatabaseSchema, - exposureTable = "coxibVsNonselVsGiBleed", - outcomeDatabaseSchema = resultsDatabaseSchema, - outcomeTable = "coxibVsNonselVsGiBleed", - cdmVersion = cdmVersion, - firstExposureOnly = TRUE, - removeDuplicateSubjects = "remove all", - restrictToCommonPeriod = FALSE, - washoutPeriod = 180, + outcomeIds = 77, + exposureDatabaseSchema = cohortDatabaseSchema, + exposureTable = cohortTable, + outcomeDatabaseSchema = cohortDatabaseSchema, + outcomeTable = cohortTable, covariateSettings = covSettings ) cohortMethodData @@ -236,7 +227,7 @@ We can use the `loadCohortMethodData()` function to load the data in a future se Typically, a new user is defined as first time use of a drug (either target or comparator), and typically a washout period (a minimum number of days prior first use) is used to make sure it is truly first use. When using the `CohortMethod` package, you can enforce the necessary requirements for new use in three ways: -1. When creating the cohorts in the database, for example when using a cohort definition tool. +1. When creating the cohorts in the database, for example using `Capr`. 2. When loading the cohorts using the `getDbCohortMethodData` function, you can use the `firstExposureOnly`, `removeDuplicateSubjects`, `restrictToCommonPeriod`, and `washoutPeriod` arguments. (As shown in the example above). 3. When defining the study population using the `createStudyPopulation` function (see below) using the `firstExposureOnly`, `removeDuplicateSubjects`, `restrictToCommonPeriod`, and `washoutPeriod` arguments. @@ -352,11 +343,24 @@ if (folderExists) { } propensityModel$covariateName <- truncRight(as.character(propensityModel$covariateName), 40) head(propensityModel) - } +} ``` One advantage of using the regularization when fitting the propensity model is that most coefficients will shrink to zero and fall out of the model. It is a good idea to inspect the remaining variables for anything that should not be there, for example variations of the drugs of interest that we forgot to exclude. +Finally, we can inspect the percent of the population in equipoise, meaning they have a prefence score between 0.3 and 0.7: + +```{r eval=FALSE} +CohortMethod::computeEquipoise(ps) +``` +```{r echo=FALSE,message=FALSE} +if (folderExists) { + CohortMethod::computeEquipoise(ps) +} +``` + +A low equipoise indicates there is little overlap between the target and comparator populations. + ## Using the propensity score We can use the propensity scores to trim, stratify, match, or weigh our population. For example, one could trim to equipoise, meaning only subjects with a preference score between 0.25 and 0.75 are kept: @@ -485,24 +489,28 @@ if (folderExists) { \fontsize{10}{12} \selectfont -## Inserting the population cohort in the database ## +## Generalizability -For various reasons it might be necessary to insert the study population back into the database, for example because we want to use an external cohort characterization tool. We can use the `insertDbPopulation` function for this purpose: +The goal of any propensity score adjustments is typically to make the target and comparator cohorts comparably, to allow proper causal inference. However, in doing so, we often need to modify our population, for example dropping subjects that have no counterpart in the other exposure cohort. The population we end up estimating an effect for may end up being very different from the population we started with. An important question is: how different? And it what ways? If the populations before and after adjustment are very different, our estimated effect may not generalize to the original population (if effect modification is present). The `getGeneralizabilityTable()` function informs on these differences: ```{r eval=FALSE} -insertDbPopulation( - population = matchedPop, - cohortIds = c(101,100), - connectionDetails = connectionDetails, - cohortDatabaseSchema = resultsDatabaseSchema, - cohortTable = "coxibVsNonselVsGiBleed", - createTable = FALSE, - cdmVersion = cdmVersion -) +getGeneralizabilityTable(balance) ``` -This function will store the population in a table with the same structure as the `cohort` table in the CDM, in this case in the same table where we had created our original cohorts. +```{r comment=NA,echo=FALSE,message=FALSE} +if (folderExists) { + table <- getGeneralizabilityTable(balance) + truncRight <- function(x, n){ + nc <- nchar(x) + x[nc > (n - 3)] <- paste('...',substr(x[nc > (n - 3)], nc[nc > (n - 3)] - n + 1, nc[nc > (n - 3)]),sep = "") + x + } + table$covariateName <- truncRight(table$covariateName, 30) + table +} +``` +In this case, because we used PS matching, we are likely aiming to estimate the average treatment effect in the treated (ATT). For this reason, the `getGeneralizabilityTable()` function automatically selected the target cohort as the basis for evaluating generalizability: it shows, for each covariate, the mean value before and PS adjustment in the target cohort. Also shown is the standardized difference of mean, and the table is reverse sorted by the absolute standard difference of mean (ASDM). # Follow-up and power @@ -757,7 +765,7 @@ We can also plot time-to-event, showing both events before and after the index d ```{r eval=FALSE} plotTimeToEvent(cohortMethodData = cohortMethodData, - outcomeId = 3, + outcomeId = 77, firstExposureOnly = FALSE, washoutPeriod = 0, removeDuplicateSubjects = "keep all", @@ -771,7 +779,7 @@ plotTimeToEvent(cohortMethodData = cohortMethodData, ```{r echo=FALSE, message=FALSE, results='hide'} if (folderExists) { plotTimeToEvent(cohortMethodData = cohortMethodData, - outcomeId = 3, + outcomeId = 77, firstExposureOnly = FALSE, washoutPeriod = 0, removeDuplicateSubjects = "keep all",