diff --git a/CohortMethod.Rproj b/CohortMethod.Rproj index ac857c97..c0f72a6e 100644 --- a/CohortMethod.Rproj +++ b/CohortMethod.Rproj @@ -16,6 +16,7 @@ AutoAppendNewline: Yes StripTrailingWhitespace: Yes BuildType: Package +PackageCleanBeforeInstall: No PackageInstallArgs: --no-multiarch --with-keep.source PackageBuildArgs: --resave-data PackageRoxygenize: rd,collate,namespace diff --git a/DESCRIPTION b/DESCRIPTION index ac1b9f44..31159720 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: CohortMethod Type: Package Title: New-User Cohort Method with Large Scale Propensity and Outcome Models Version: 5.0.0 -Date: 2022-06-21 +Date: 2023-04-17 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut", "cre")), person("Marc", "Suchard", role = c("aut")), diff --git a/R/Export.R b/R/Export.R index b96745b8..0f3fd1b3 100644 --- a/R/Export.R +++ b/R/Export.R @@ -59,6 +59,13 @@ createCmDiagnosticThresholds <- function(mdrrThreshold = 10, sdmThreshold = 0.1, equipoiseThreshold = 0.2, attritionFractionThreshold = 1) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertNumeric(mdrrThreshold, len = 1, lower = 0, add = errorMessages) + checkmate::assertNumeric(easeThreshold, len = 1, lower = 0, add = errorMessages) + checkmate::assertNumeric(sdmThreshold, len = 1, lower = 0, add = errorMessages) + checkmate::assertNumeric(equipoiseThreshold, len = 1, lower = 0, add = errorMessages) + checkmate::assertNumeric(attritionFractionThreshold, len = 1, lower = 0, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) thresholds <- list() for (name in names(formals(createCmDiagnosticThresholds))) { thresholds[[name]] <- get(name) diff --git a/R/KaplanMeier.R b/R/KaplanMeier.R index 4f1898a8..c0b36a4d 100644 --- a/R/KaplanMeier.R +++ b/R/KaplanMeier.R @@ -16,7 +16,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - #' Plot the Kaplan-Meier curve #' #' @description diff --git a/R/RunAnalyses.R b/R/RunAnalyses.R index e71fd01c..9ea648f0 100644 --- a/R/RunAnalyses.R +++ b/R/RunAnalyses.R @@ -215,11 +215,7 @@ runCmAnalyses <- function(connectionDetails, refitPsForEveryStudyPopulation = TRUE, multiThreadingSettings = createMultiThreadingSettings()) { errorMessages <- checkmate::makeAssertCollection() - if (is(connectionDetails, "connectionDetails")) { - checkmate::assertClass(connectionDetails, "connectionDetails", add = errorMessages) - } else { - checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) - } + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) checkmate::assertCharacter(cdmDatabaseSchema, len = 1, add = errorMessages) checkmate::assertCharacter(tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages) checkmate::assertCharacter(exposureDatabaseSchema, len = 1, add = errorMessages) @@ -1623,7 +1619,7 @@ createReferenceTable <- function(cmAnalysisList, #' @param outputFolder Name of the folder where all the outputs have been written to. #' #' @return -#' A tibble containing file nams of artifacts generated for each target-comparator-outcome-analysis combination. +#' A tibble containing file names of artifacts generated for each target-comparator-outcome-analysis combination. #' #' @export getFileReference <- function(outputFolder) { diff --git a/R/Simulation.R b/R/Simulation.R index aef977ac..403a4f23 100644 --- a/R/Simulation.R +++ b/R/Simulation.R @@ -62,9 +62,6 @@ createCohortMethodDataSimulationProfile <- function(cohortMethodData) { select("covariateId", "prevalence") %>% collect() - # covariatePrevalence <- sums$prevalence - # names(covariatePrevalence) <- sums$covariateId - message("Computing propensity model") propensityScore <- createPs(cohortMethodData, maxCohortSizeForFitting = 25000, @@ -84,7 +81,6 @@ createCohortMethodDataSimulationProfile <- function(cohortMethodData) { removeSubjectsWithPriorOutcome = FALSE, outcomeId = outcomeId ) - # studyPop <- trimByPsToEquipoise(studyPop) studyPop <- matchOnPs(studyPop, caliper = 0.25, caliperScale = "standardized", maxRatio = 1) outcomeModel <- fitOutcomeModel( population = studyPop, @@ -172,7 +168,6 @@ simulateCohortMethodData <- function(profile, n = 10000) { checkmate::assertInt(n, lower = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - message("Generating covariates") # Treatment variable is generated elsewhere: covariatePrevalence <- profile$covariatePrevalence[names(profile$covariatePrevalence) != "1"] diff --git a/R/Viewer.R b/R/Viewer.R index 7a96e996..ed774c44 100644 --- a/R/Viewer.R +++ b/R/Viewer.R @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + # exportFolder = file.path(folder, "export") # library(dplyr) @@ -31,6 +32,13 @@ #' #' @export insertExportedResultsInSqlite <- function(sqliteFileName, exportFolder, cohorts) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertCharacter(sqliteFileName, len = 1, add = errorMessages) + checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) + checkmate::assertDataFrame(cohorts, add = errorMessages) + checkmate::assertNames(colnames(cohorts), must.include = c("cohortId", "cohortName"), add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + ensureInstalled("RSQLite") connectionDetails <- DatabaseConnector::createConnectionDetails( dbms = "sqlite", @@ -62,7 +70,6 @@ insertExportedResultsInSqlite <- function(sqliteFileName, exportFolder, cohorts) #' using the [exportToCsv()] function. #' @template Cohorts #' -#' #' @return #' Does not return anything. Is called for the side-effect of having the results uploaded #' to the server. @@ -73,7 +80,15 @@ uploadExportedResults <- function(connectionDetails, append = FALSE, exportFolder, cohorts) { - # ensureInstalled("CohortGenerator") + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) + checkmate::assertCharacter(databaseSchema, len = 1, add = errorMessages) + checkmate::assertLogical(append, len = 1, add = errorMessages) + checkmate::assertCharacter(exportFolder, len = 1, add = errorMessages) + checkmate::assertDataFrame(cohorts, add = errorMessages) + checkmate::assertNames(colnames(cohorts), must.include = c("cohortId", "cohortName"), add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + ensureInstalled("ResultModelManager") connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) @@ -81,7 +96,6 @@ uploadExportedResults <- function(connectionDetails, if (!append) { # Create tables rdmsFile <- system.file("csv", "resultsDataModelSpecification.csv", package = "CohortMethod") - # specification <- CohortGenerator::readCsv(file = rdmsFile) specification <- readr::read_csv(file = rdmsFile, show_col_types = FALSE) %>% SqlRender::snakeCaseToCamelCaseNames() sql <- ResultModelManager::generateSqlSchema(csvFilepath = rdmsFile) @@ -147,6 +161,10 @@ uploadExportedResults <- function(connectionDetails, #' #' @export launchResultsViewerUsingSqlite <- function(sqliteFileName) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertCharacter(sqliteFileName, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + ensureInstalled("RSQLite") connectionDetails <- DatabaseConnector::createConnectionDetails( dbms = "sqlite", @@ -171,6 +189,11 @@ launchResultsViewerUsingSqlite <- function(sqliteFileName) { #' #' @export launchResultsViewer <- function(connectionDetails, databaseSchema) { + errorMessages <- checkmate::makeAssertCollection() + checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages) + checkmate::assertCharacter(databaseSchema, len = 1, add = errorMessages) + checkmate::reportAssertions(collection = errorMessages) + ensureInstalled("ShinyAppBuilder") ensureInstalled("markdown") aboutModule <- ShinyAppBuilder::createDefaultAboutConfig( diff --git a/README.md b/README.md index df80795d..9c2eee59 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ User Documentation Documentation can be found on the [package website](https://ohdsi.github.io/CohortMethod). PDF versions of the documentation are also available: + * Vignette: [Single studies using the CohortMethod package](https://raw.githubusercontent.com/OHDSI/CohortMethod/main/inst/doc/SingleStudies.pdf) * Vignette: [Running multiple analyses at once using the CohortMethod package](https://raw.githubusercontent.com/OHDSI/CohortMethod/main/inst/doc/MultipleAnalyses.pdf) * Package manual: [CohortMethod.pdf](https://raw.githubusercontent.com/OHDSI/CohortMethod/main/extras/CohortMethod.pdf) diff --git a/_pkgdown.yml b/_pkgdown.yml index e7fcd75a..407e84fd 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -74,11 +74,20 @@ reference: - createMultiThreadingSettings - createDefaultMultiThreadingSettings - runCmAnalyses + - title: "Results of multiple analyses" + desc: > + Functions for viewing results of multiple analyses. + contents: - getFileReference - getResultsSummary - getInteractionResultsSummary - createCmDiagnosticThresholds - exportToCsv + - getResultsDataModel + - insertExportedResultsInSqlite + - launchResultsViewerUsingSqlite + - uploadExportedResults + - launchResultsViewer - title: "Simulation" desc: > Functions for simulating cohort method data objects. diff --git a/docs/articles/MultipleAnalyses.html b/docs/articles/MultipleAnalyses.html index f71f0329..acf4fd72 100644 --- a/docs/articles/MultipleAnalyses.html +++ b/docs/articles/MultipleAnalyses.html @@ -89,7 +89,7 @@
vignettes/MultipleAnalyses.Rmd
MultipleAnalyses.Rmd
/***********************************
File VignetteOutcomes.sql
***********************************/
-IF OBJECT_ID('@resultsDatabaseSchema.outcomes', 'U') IS NOT NULL
- DROP TABLE @resultsDatabaseSchema.outcomes;
-
-SELECT ancestor_concept_id AS cohort_definition_id,
- condition_start_date AS cohort_start_date,
- condition_end_date AS cohort_end_date,
- condition_occurrence.person_id AS subject_id
-INTO @resultsDatabaseSchema.outcomes
-FROM @cdmDatabaseSchema.condition_occurrence
-INNER JOIN @cdmDatabaseSchema.visit_occurrence
- ON condition_occurrence.visit_occurrence_id = visit_occurrence.visit_occurrence_id
-INNER JOIN @cdmDatabaseSchema.concept_ancestor
- ON condition_concept_id = descendant_concept_id
-WHERE ancestor_concept_id IN (192671, 24609, 29735, 73754, 80004, 134718, 139099,
-141932, 192367, 193739, 194997, 197236, 199074, 255573, 257007, 313459, 314658,
-316084, 319843, 321596, 374366, 375292, 380094, 433753, 433811, 436665, 436676,
-436940, 437784, 438134, 440358, 440374, 443617, 443800, 4084966, 4288310)
- AND visit_occurrence.visit_concept_id IN (9201, 9203);
+DROP TABLE IF EXISTS @resultsDatabaseSchema.outcomes;
+
+SELECT ancestor_concept_id AS cohort_definition_id,
+ condition_start_date AS cohort_start_date,
+ condition_end_date AS cohort_end_date,
+ condition_occurrence.person_id AS subject_id
+INTO @resultsDatabaseSchema.outcomes
+FROM @cdmDatabaseSchema.condition_occurrence
+INNER JOIN @cdmDatabaseSchema.visit_occurrence
+ ON condition_occurrence.visit_occurrence_id = visit_occurrence.visit_occurrence_id
+INNER JOIN @cdmDatabaseSchema.concept_ancestor
+ ON condition_concept_id = descendant_concept_id
+WHERE ancestor_concept_id IN (192671, 24609, 29735, 73754, 80004, 134718, 139099,
+141932, 192367, 193739, 194997, 197236, 199074, 255573, 257007, 313459, 314658,
+316084, 319843, 321596, 374366, 375292, 380094, 433753, 433811, 436665, 436676,
+436940, 437784, 438134, 440358, 440374, 443617, 443800, 4084966, 4288310)
+ AND visit_occurrence.visit_concept_id IN (9201, 9203);
This is parameterized SQL which can be used by the
SqlRender
package. We use parameterized SQL so we do not
have to pre-specify the names of the CDM and result schemas. That way,
@@ -488,7 +487,7 @@
## # A tibble: 6 x 27
-## analysisId targetId comparat~1 outco~2 trueE~3 targe~4 compa~5 targe~6 compa~7
-## <int> <int> <int> <int> <dbl> <int> <int> <dbl> <dbl>
-## 1 1 1118084 1124300 29735 1 86294 99497 1.15e7 7116045
-## 2 1 1118084 1124300 140673 1 86718 99941 1.15e7 7152671
-## 3 1 1118084 1124300 192671 NA 84447 97429 1.12e7 6952965
-## 4 1 1118084 1124300 197494 1 86718 99941 1.15e7 7152671
-## 5 1 1118084 1124300 198185 1 86718 99941 1.15e7 7152671
-## 6 1 1118084 1124300 198199 1 86718 99941 1.15e7 7152671
-## # ... with 18 more variables: targetOutcomes <dbl>, comparatorOutcomes <dbl>,
+## analysisId targetId comparatorId outcomeId trueEffectSize targetSubjects
+## <int> <int> <int> <int> <dbl> <int>
+## 1 1 1118084 1124300 29735 1 86294
+## 2 1 1118084 1124300 140673 1 86718
+## 3 1 1118084 1124300 192671 NA 84447
+## 4 1 1118084 1124300 197494 1 86718
+## 5 1 1118084 1124300 198185 1 86718
+## 6 1 1118084 1124300 198199 1 86718
+## # i 21 more variables: comparatorSubjects <int>, targetDays <dbl>,
+## # comparatorDays <dbl>, targetOutcomes <dbl>, comparatorOutcomes <dbl>,
## # rr <dbl>, ci95Lb <dbl>, ci95Ub <dbl>, p <dbl>, logRr <dbl>, seLogRr <dbl>,
## # llr <dbl>, mdrr <dbl>, attritionFraction <dbl>, calibratedRr <dbl>,
## # calibratedCi95Lb <dbl>, calibratedCi95Ub <dbl>, calibratedP <dbl>,
-## # calibratedLogRr <dbl>, calibratedSeLogRr <dbl>, ease <dbl>, and abbreviated
-## # variable names 1: comparatorId, 2: outcomeId, 3: trueEffectSize,
-## # 4: targetSubjects, 5: comparatorSubjects, 6: targetDays, ...
+## # calibratedLogRr <dbl>, calibratedSeLogRr <dbl>, ease <dbl>
This tells us, per target-comparator-outcome-analysis combination, the estimated relative risk and 95% confidence interval, as well as the number of people in the treated and comparator group (after trimming and @@ -670,16 +668,102 @@
The results generated so far all reside in binary object on your
+local file system, mixing aggregate statistics such as hazard ratios
+with patient-level data including propensity scores per person. How
+could we share our results with others, possibly outside our
+organization? This is where the exportToCsv()
function
+comes in. This function exports all results, including diagnostics to
+CSV (comma-separated values) files. These files only contain aggregate
+statistics, not patient-level data. The format is CSV files to enable
+human review.
+exportToCsv(
+ outputFolder,
+ exportFolder = file.path(folder, "export"),
+ databaseId = "My CDM",
+ minCellCount = 5,
+ maxCores = parallel::detectCores()
+)
Any person counts in the results that are smaller than the
+minCellCount
argument will be blinded, by replacing the
+count with the negative minCellCount
. For example, if the
+number of people with the outcome is 3, and
+minCellCount = 5
, the count will be reported to be -5,
+which in the Shiny app will be displayed as ‘<5’.
Information on the data model used to generate the CSV files can be
+retrieved using getResultsDataModel()
:
## # A tibble: 171 x 7
+## table_name column_name data_type is_required primary_key min_cell_count
+## <chr> <chr> <chr> <chr> <chr> <chr>
+## 1 cm_attrition sequence_n~ int Yes Yes No
+## 2 cm_attrition description varchar Yes No No
+## 3 cm_attrition subjects int Yes No Yes
+## 4 cm_attrition exposure_id int Yes Yes No
+## 5 cm_attrition target_id int Yes Yes No
+## 6 cm_attrition comparator~ int Yes Yes No
+## 7 cm_attrition analysis_id int Yes Yes No
+## 8 cm_attrition outcome_id int Yes Yes No
+## 9 cm_attrition database_id varchar Yes Yes No
+## 10 cm_follow_up_di~ target_id int Yes Yes No
+## # i 161 more rows
+## # i 1 more variable: description <chr>
+Finally, we can view the results in a Shiny app. For this we must
+first load the CSV files produced by exportToCsv()
into a
+database. We could use the uploadExportedResults()
function
+for this. However, if we just want to view the results ourselves we can
+create a small SQLite database ourselves without having to set up a
+database server. In any case we need to specify the names of the
+exposure and outcome cohorts we used in our study. We can create the
+SQLite database using:
+cohorts <- data.frame(
+ cohortId = c(
+ 1118084,
+ 1124300,
+ 192671),
+ cohortName = c(
+ "Celecoxib",
+ "Diclofenac",
+ "GI Bleed"
+ )
+)
+
+insertExportedResultsInSqlite(
+ sqliteFileName = file.path(folder, "myResults.sqlite"),
+ exportFolder = file.path(folder, "export"),
+ cohorts = cohorts
+)
Next we launch the Shiny app using:
+
+launchResultsViewerUsingSqlite(
+ sqliteFileName = file.path(folder, "myResults.sqlite")
+)
Considerable work has been dedicated to provide the
CohortMethod
package.
+citation("CohortMethod")
## ## To cite package 'CohortMethod' in publications use: ## -## Schuemie M, Suchard M, Ryan P (2022). _CohortMethod: New-User Cohort +## Schuemie M, Suchard M, Ryan P (2023). _CohortMethod: New-User Cohort ## Method with Large Scale Propensity and Outcome Models_. ## https://ohdsi.github.io/CohortMethod, ## https://github.com/OHDSI/CohortMethod. @@ -690,13 +774,13 @@
Acknowledgments## title = {CohortMethod: New-User Cohort Method with Large Scale Propensity and Outcome ## Models}, ## author = {Martijn Schuemie and Marc Suchard and Patrick Ryan}, -## year = {2022}, +## year = {2023}, ## note = {https://ohdsi.github.io/CohortMethod, ## https://github.com/OHDSI/CohortMethod}, ## }
Further,
-CohortMethod
makes extensive use of theCyclops
package.+DROP TABLE IF EXISTS @resultsDatabaseSchema.coxibVsNonselVsGiBleed; + +CREATE TABLE @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( +INT, + cohort_definition_id DATE, + cohort_start_date DATE, + cohort_end_date + subject_id BIGINT + ); +INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( + + cohort_definition_id, + cohort_start_date, + cohort_end_date, + subject_id + )SELECT 1, -- Exposure + + drug_era_start_date, + drug_era_end_date, + person_idFROM @cdmDatabaseSchema.drug_era +WHERE drug_concept_id = 1118084;-- celecoxib + +INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( + + cohort_definition_id, + cohort_start_date, + cohort_end_date, + subject_id + )SELECT 2, -- Comparator + + drug_era_start_date, + drug_era_end_date, + person_idFROM @cdmDatabaseSchema.drug_era +WHERE drug_concept_id = 1124300; --diclofenac + +INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( + + cohort_definition_id, + cohort_start_date, + cohort_end_date, + subject_id + )SELECT 3, -- Outcome + + condition_start_date, + condition_end_date, + condition_occurrence.person_idFROM @cdmDatabaseSchema.condition_occurrence +INNER JOIN @cdmDatabaseSchema.visit_occurrence +ON condition_occurrence.visit_occurrence_id = visit_occurrence.visit_occurrence_id + WHERE condition_concept_id IN ( +SELECT descendant_concept_id + FROM @cdmDatabaseSchema.concept_ancestor + WHERE ancestor_concept_id = 192671 -- GI - Gastrointestinal haemorrhage + + )AND visit_occurrence.visit_concept_id IN (9201, 9203);+citation("Cyclops")
## ## To cite Cyclops in publications use: diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-11-1.png index 9af08299..c0e9ef43 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-11-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-16-1.png index 64a45f26..fdc81535 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-16-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-18-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-18-1.png index e21b5efc..5bbda4de 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-18-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-18-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-20-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-20-1.png index ea9a588e..3fac1787 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-20-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-20-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-22-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-22-1.png index c8e24bdc..076a61a3 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-22-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-22-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-24-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-24-1.png index 44100932..03794db6 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-24-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-24-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-26-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-26-1.png index 97fec1f5..e63c3a49 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-26-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-26-1.png differ diff --git a/docs/articles/SingleStudies.html b/docs/articles/SingleStudies.html index 03cd3ffa..7c2e6d54 100644 --- a/docs/articles/SingleStudies.html +++ b/docs/articles/SingleStudies.html @@ -89,7 +89,7 @@
Single studies using the CohortMethod
Martijn J. Schuemie, Marc A. Suchard and Patrick Ryan
-2023-02-28
+2023-04-17
Source:vignettes/SingleStudies.Rmd
@@ -175,61 +175,60 @@SingleStudies.Rmd
Preparing the exposures and outcom File coxibVsNonselVsGiBleed.sql ***********************************/ -IF OBJECT_ID('@resultsDatabaseSchema.coxibVsNonselVsGiBleed', 'U') IS NOT NULL -DROP TABLE @resultsDatabaseSchema.coxibVsNonselVsGiBleed; - -CREATE TABLE @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( -INT, - cohort_definition_id DATE, - cohort_start_date DATE, - cohort_end_date - subject_id BIGINT - ); -INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id - )SELECT 1, -- Exposure - - drug_era_start_date, - drug_era_end_date, - person_idFROM @cdmDatabaseSchema.drug_era -WHERE drug_concept_id = 1118084;-- celecoxib - -INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id - )SELECT 2, -- Comparator - - drug_era_start_date, - drug_era_end_date, - person_idFROM @cdmDatabaseSchema.drug_era -WHERE drug_concept_id = 1124300; --diclofenac - -INSERT INTO @resultsDatabaseSchema.coxibVsNonselVsGiBleed ( - - cohort_definition_id, - cohort_start_date, - cohort_end_date, - subject_id - )SELECT 3, -- Outcome - - condition_start_date, - condition_end_date, - condition_occurrence.person_idFROM @cdmDatabaseSchema.condition_occurrence -INNER JOIN @cdmDatabaseSchema.visit_occurrence -ON condition_occurrence.visit_occurrence_id = visit_occurrence.visit_occurrence_id - WHERE condition_concept_id IN ( -SELECT descendant_concept_id - FROM @cdmDatabaseSchema.concept_ancestor - WHERE ancestor_concept_id = 192671 -- GI - Gastrointestinal haemorrhage - - )AND visit_occurrence.visit_concept_id IN (9201, 9203);
This is parameterized SQL which can be used by the
SqlRender
package. We use parameterized SQL so we do not
have to pre-specify the names of the CDM and result schemas. That way,
@@ -322,7 +321,7 @@
One additional filtering step that is often used is matching or trimming on propensity scores, as will be discussed next.
One advantage of using the regularization when fitting the propensity model is that most coefficients will shrink to zero and fall out of the model. It is a good idea to inspect the remaining variables for anything @@ -566,12 +565,12 @@
## # A tibble: 6 x 5
## description targetPersons comparatorPersons targetExposures comparatorExposures
## <chr> <dbl> <dbl> <dbl> <dbl>
-## 1 Original cohorts 190395 1006837 333945 1827883
-## 2 First exp. only & removed s ... 87178 734982 87178 734982
+## 1 Original cohorts 856973 915830 1946114 1786318
+## 2 First exp. only & removed s ... 373874 541386 373874 541386
## 3 Random sample 50000 50000 50000 50000
-## 4 No prior outcome 48681 48712 48681 48712
-## 5 Have at least 1 days at ris ... 48641 48683 48641 48683
-## 6 Matched on propensity score 25399 25399 25399 25399
+## 4 No prior outcome 48700 48715 48700 48715
+## 5 Have at least 1 days at ris ... 48667 48688 48667 48688
+## 6 Matched on propensity score 22339 22339 22339 22339
Or, if we like, we can plot an attrition diagram:
drawAttritionDiagram(matchedPop)
plotCovariateBalanceScatterPlot(balance, showCovariateCountLabel = TRUE, showMaxLabel = TRUE)
## Warning: Removed 25390 rows containing missing values (`geom_point()`).
+## Warning: Removed 23590 rows containing missing values (`geom_point()`).
plotCovariateBalanceOfTopVariables(balance)
## targetPersons comparatorPersons targetExposures comparatorExposures targetDays comparatorDays totalOutcomes mdrr se
-## 1 48641 48683 48641 48683 6440255 3434785 270 1.406351 0.1217161
+## targetPersons comparatorPersons targetExposures comparatorExposures targetDays comparatorDays totalOutcomes mdrr se
+## 1 48667 48688 48667 48688 7421404 3693928 554 1.26878 0.08497186
In this example we used the studyPop
object, so the
population before any matching or trimming. If we want to know the MDRR
after matching, we use the matchedPop
object we created
@@ -759,8 +747,8 @@
## targetPersons comparatorPersons targetExposures comparatorExposures targetDays comparatorDays totalOutcomes mdrr se
-## 1 25399 25399 25399 25399 3142203 1979636 141 1.602992 0.1684304
+## targetPersons comparatorPersons targetExposures comparatorExposures targetDays comparatorDays totalOutcomes mdrr se
+## 1 22339 22339 22339 22339 3118703 1801997 226 1.451674 0.133038
Even thought the MDRR in the matched population is higher, meaning we have less power, we should of course not be fooled: matching most likely eliminates confounding, and is therefore preferred to not matching.
@@ -772,8 +760,8 @@
getFollowUpDistribution(population = matchedPop)
## 100% 75% 50% 25% 0% Treatment
-## 1 2 60 60 119 5006 1
-## 2 2 43 60 62 2771 0
+## 1 2 60 60 126 4184 1
+## 2 2 45 60 67 2996 0
The output is telling us number of days of follow-up each quantile of the study population has. We can also plot the distribution:
@@ -801,8 +789,8 @@+## Estimate lower .95 upper .95 logRr seLogRr +## treatment 1.25115 1.03524 1.51802 0.22406 0.0976Fitting a simple outcome model## Use inverse probability of treatment weighting: FALSE ## Status: OK ## -## Estimate lower .95 upper .95 logRr seLogRr -## treatment 0.82576 0.63952 1.06949 -0.19146 0.1312
But of course we want to make use of the matching done on the propensity score:
@@ -816,8 +804,8 @@+## Estimate lower .95 upper .95 logRr seLogRr +## treatment 0.9942982 0.7212731 1.3608869 -0.0057181 0.162Fitting a simple outcome model## Use inverse probability of treatment weighting: FALSE ## Status: OK ## -## Estimate lower .95 upper .95 logRr seLogRr -## treatment 0.87246 0.57923 1.29831 -0.13644 0.2059
Note that we define the sub-population to be only those in the
matchedPop
object, which we created earlier by matching on
the propensity score. We also now use a stratified Cox model,
@@ -835,8 +823,8 @@
Note that you can use the grepCovariateNames
to find
covariate IDs.
It is prudent to verify that covariate balance has also been achieved @@ -899,10 +887,10 @@
exp(coef(outcomeModel))
## 900000010805
-## 0.7967149
+## 0.9985318
-## [1] 0.4642065 1.3465141
+## [1] 0.6870719 1.4438456
We can also see the covariates that ended up in the outcome model:
getOutcomeModel(outcomeModel, cohortMethodData)
## coefficient id name
-## 1 -0.2272584 9e+11 Treatment
+## coefficient id name
+## 1 -0.001469294 9e+11 Treatment
##
## To cite package 'CohortMethod' in publications use:
##
-## Schuemie M, Suchard M, Ryan P (2022). _CohortMethod: New-User Cohort Method with Large Scale Propensity and Outcome Models_. https://ohdsi.github.io/CohortMethod,
+## Schuemie M, Suchard M, Ryan P (2023). _CohortMethod: New-User Cohort Method with Large Scale Propensity and Outcome Models_. https://ohdsi.github.io/CohortMethod,
## https://github.com/OHDSI/CohortMethod.
##
## A BibTeX entry for LaTeX users is
@@ -974,7 +962,7 @@ Acknowledgments## title = {CohortMethod: New-User Cohort Method with Large Scale Propensity and Outcome
## Models},
## author = {Martijn Schuemie and Marc Suchard and Patrick Ryan},
-## year = {2022},
+## year = {2023},
## note = {https://ohdsi.github.io/CohortMethod,
## https://github.com/OHDSI/CohortMethod},
## }
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-21-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-21-1.png
index bd891ffa..8eac343a 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-21-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-21-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-25-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-25-1.png
index 4dfda65d..64152d8e 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-25-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-25-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-27-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-27-1.png
index 4ee452c0..4900e013 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-27-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-27-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-29-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-29-1.png
index 16e35ec9..b57065b8 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-29-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-29-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-33-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-33-1.png
index 35761354..14ff8bbc 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-33-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-33-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-37-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-37-1.png
index ec6acfd7..00c43feb 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-37-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-37-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-39-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-39-1.png
index 1a244902..7a43a30f 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-39-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-39-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-50-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-50-1.png
index 58fad45b..eaf0d183 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-50-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-50-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-60-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-60-1.png
index 471adc95..cf409532 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-60-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-60-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-70-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-70-1.png
index 51cd3990..f9b7ffa1 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-70-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-70-1.png differ
diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-72-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-72-1.png
index 3b297634..80ab6608 100644
Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-72-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-72-1.png differ
diff --git a/docs/articles/shinyApp.png b/docs/articles/shinyApp.png
new file mode 100644
index 00000000..672f7c6f
Binary files /dev/null and b/docs/articles/shinyApp.png differ
diff --git a/docs/index.html b/docs/index.html
index fed59d44..c1cc2dbf 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -179,7 +179,15 @@ Documentation can be found on the package website.
-PDF versions of the documentation are also available: * Vignette: Single studies using the CohortMethod package * Vignette: Running multiple analyses at once using the CohortMethod package * Package manual: CohortMethod.pdf
+PDF versions of the documentation are also available:
+Removing deprecated excludeDrugsFromCovariates
argument from getDbCohortMethodData()
function.
Removing deprecated oracleTempSchema
argument from getDbCohortMethodData()
and runCmAnalyses()
functions.
Removing deprecated addExposureDaysToStart
and addExposureDaysToStart
arguments from createStudyPopulation()
and plotTimeToEvent()
functions.
The removeDuplicateSubjects
argument of getDbCohortMethodData()
and createStudyPopulation()
is no longer allowed to be a boolean.
Adding computeEquipoise()
function.
Output likelihood profile as data frame instead of named vector for consistency with other HADES packages.
Added the covariateFilter
argument to the computeCovariateBalance
function, to allow balance to be computed only for a subset of covariates.
Now computing IPTW in createPs()
, and truncating IPTW can be done in truncateIptw()
. The computeCovariateBalance()
function now computes balance using IPTW if no stratumId
column is found in the population
argument.
Removing PS of exactly 0 and exactly 1 when computing the standard deviation of the logit for the matching caliper to allow matching when some subjects have perfectly predictable treatment assignment.
Adding maxRows
argument to computePsAuc()
function to improve speed for very large study populations.
Dropping support for CDM v4.
Major overhaul of the multiple-analyses framework:
Added the createOutcome()
function, to be used with createTargetComparatorOutcomes()
. This allow the priorOutcomeLookback
, riskWindowStart
, startAnchor
, riskWindowEnd
, and endAnchor
arguments to be specified per outcome. These settings (if provided) will override the settings created using the createCreateStudyPopulationArgs()
function. In addition, the createOutcome()
function has an outcomeOfInterest
and trueEffectSize
argument (see below).
Added empirical calibration to the getResultsSummary()
function. Controls can be identified by the trueEffectSize
argument in the createOutcome()
function.
Dropping arguments like createPs
and fitOutcomeModel
from the createCmAnalysis()
function. Instead, not providing createPsArgs
or fitOutcomeModelArgs
is assumed to mean skipping propensity score creation or outcome model fitting, respectively.
Added the exportToCsv()
function for exporting study results to CSV files that do not contain patient-level information and can therefore be shared between sites. The getResultsDataModel()
function returns the data model for these CSV files.
Added the uploadExportedResults()
and insertExportedResultsInSqlite()
functions for uploading the results from the CSV files in a database. The launchResultsViewer()
and launchResultsViewerUsingSqlite()
functions were added for launching a Shiny app to view the results in the (SQLite) database.
Bug fixes:
maxWeight
when performng IPTW.exportToCsv(
outputFolder,
exportFolder = file.path(outputFolder, "export"),
- databaseId = 1,
+ databaseId,
minCellCount = 5,
maxCores = 1,
cmDiagnosticThresholds = createCmDiagnosticThresholds()
diff --git a/docs/reference/getDbCohortMethodData.html b/docs/reference/getDbCohortMethodData.html
index e03ded1d..4d74ab8f 100644
--- a/docs/reference/getDbCohortMethodData.html
+++ b/docs/reference/getDbCohortMethodData.html
@@ -86,7 +86,7 @@ Get the cohort data from the server
outcomeTable = "condition_occurrence",
cdmVersion = "5",
firstExposureOnly = FALSE,
- removeDuplicateSubjects = FALSE,
+ removeDuplicateSubjects = "keep all",
restrictToCommonPeriod = FALSE,
washoutPeriod = 0,
maxCohortSize = 0,
diff --git a/docs/reference/getFileReference.html b/docs/reference/getFileReference.html
index e19c4af1..6898d041 100644
--- a/docs/reference/getFileReference.html
+++ b/docs/reference/getFileReference.html
@@ -82,7 +82,7 @@ Arguments
Value
-A tibble containing file nams of artifacts generated for each target-comparator-outcome-analysis combination.
+A tibble containing file names of artifacts generated for each target-comparator-outcome-analysis combination.
Get results data model
+getResultsDataModel()
A tibble specifying the tables and columns generated by the exportToCsv()
function.
Run a list of analyses
Functions for viewing results of multiple analyses.
+Get file reference
Export cohort method results to CSV files
Get results data model
Insert exported results into a SQLite database
Launch Shiny app using a SQLite database
Upload exported results to a database
Launch Shiny app using
Functions for simulating cohort method data objects.
diff --git a/docs/reference/insertExportedResultsInSqlite.html b/docs/reference/insertExportedResultsInSqlite.html new file mode 100644 index 00000000..1e8ffec6 --- /dev/null +++ b/docs/reference/insertExportedResultsInSqlite.html @@ -0,0 +1,125 @@ + +R/Viewer.R
+ insertExportedResultsInSqlite.Rd
Insert exported results into a SQLite database
+insertExportedResultsInSqlite(sqliteFileName, exportFolder, cohorts)
The name of the SQLite file to store the results in. If the +file does not exist it will be created.
The folder containing the CSV files to upload, as generated
+using the exportToCsv()
function.
A data frame describing the cohorts used in the study. Should
+include the target, comparator, and outcome of interest cohorts.
+The data frame should at least have a cohortId
and cohortName
+columns.
Does not return anything. Called for the side effect of inserting data into the +SQLite database.
+Launch Shiny app using
+launchResultsViewer(connectionDetails, databaseSchema)
An R object of type connectionDetails
created using the
+DatabaseConnector::createConnectionDetails()
function.
The name of the database schema where the results were
+written using uploadExportedResults()
.
Does not return anything. Is called for the side-effect of launching the Shiny +app.
+R/Viewer.R
+ launchResultsViewerUsingSqlite.Rd
Launch Shiny app using a SQLite database
+launchResultsViewerUsingSqlite(sqliteFileName)
The name of the SQLite file where the results were stored
+using the insertExportedResultsInSqlite()
function.
Does not return anything. Is called for the side-effect of launching the Shiny +app.
+Upload exported results to a database
+uploadExportedResults(
+ connectionDetails,
+ databaseSchema,
+ append = FALSE,
+ exportFolder,
+ cohorts
+)
An R object of type connectionDetails
created using the
+DatabaseConnector::createConnectionDetails()
function.
The name of the database schema where the results will be +written.
Append the results to existing tables? Can be used for +uploading results from multiple databases into a single +results schema.
The folder containing the CSV files to upload, as generated
+using the exportToCsv()
function.
A data frame describing the cohorts used in the study. Should
+include the target, comparator, and outcome of interest cohorts.
+The data frame should at least have a cohortId
and cohortName
+columns.
Does not return anything. Is called for the side-effect of having the results uploaded +to the server.
+