> # --- > # > # 01-phenotype-evaluation.R > # > # --- > > # install.packages("remotes") > # remotes::install_github("OHDSI/CohortGenerator") > # remotes::install_github("OHDSI/CohortDiagnostics") > # remotes::install_github("OHDSI/ROhdsiWebApi") > # remotes::install_github("OHDSI/OhdsiShinyModules") > > # --- > # > # libraries > # > # --- > > library(CohortGenerator) Loading required package: DatabaseConnector Loading required package: R6 > > # --- R Version --------------------- > R.Version() $platform [1] "x86_64-w64-mingw32" $arch [1] "x86_64" $os [1] "mingw32" $crt [1] "ucrt" $system [1] "x86_64, mingw32" $status [1] "" $major [1] "4" $minor [1] "3.1" $year [1] "2023" $month [1] "06" $day [1] "16" $`svn rev` [1] "84548" $language [1] "R" $version.string [1] "R version 4.3.1 (2023-06-16 ucrt)" $nickname [1] "Beagle Scouts" > # --- Java Version ------------------ > system("java -version") java version "1.8.0_261" Java(TM) SE Runtime Environment (build 1.8.0_261-b12) Java HotSpot(TM) 64-Bit Server VM (build 25.261-b12, mixed mode) [1] 0 > # -----------------------------------" > > # > # functions to get databricks token (user will be prompted for keyring password) and url with token as password > # > > getToken <- function () { + return ( + keyring::backend_file$new()$get( + service = "databricks", + user = "token", + keyring = "databricks_keyring" + ) + ) + } > > getUrl <- function () { + url <- "jdbc:databricks://nachc-databricks.cloud.databricks.com:443/default;transportMode=http;ssl=1;httpPath=sql/protocolv1/o/3956472157536757/0123-223459-leafy532;AuthMech=3;UID=token;PWD=" + return ( + paste(url, getToken(), sep = "") + ) + } > > # --- > # > # variables for the current execution > # > # --- > > baseUrl <- "http://localhost:8080/WebAPI" > atlasCohortIds <- c(4,5) > > connectionDetails <- DatabaseConnector::createConnectionDetails ( + dbms = "spark", + connectionString = getUrl(), + pathToDriver="D:\\_YES_2023-05-28\\workspace\\SosExamples\\_COVID\\02-data-diagnostics\\drivers\\databricks\\" + ) > > databaseId <- "demo_cdm" > cdmDatabaseSchema <- "demo_cdm" > cohortDatabaseSchema <- "demo_cdm" > > dataFolder <- "D:\\_YES_2023-05-28\\workspace\\SosExamples\\_COVID\\04-phenotype-evaluation\\output\\demo_db\\" > incrementalFolder <- paste("incremental_", databaseId, sep="") > > # --- > # > # phenotype evaluation implementation > # > # --- > > cohortDefinitionSet <- ROhdsiWebApi::exportCohortDefinitionSet( + baseUrl = baseUrl, + cohortIds = atlasCohortIds, + generateStats = TRUE + ) Fetching cohortId: 4 Fetching cohortId: 5 > > # View(cohortDefinitionSet) > > cohorTableNames <- getCohortTableNames(cohortTable = "cohort") > > library(CohortGenerator) > > cohortTableNames <- getCohortTableNames(cohortTable = "cohort") > > library(CohortGenerator) > > cohortTableNames <- getCohortTableNames(cohortTable = "cohort") > > createCohortTables( + connectionDetails = connectionDetails, + cohortDatabaseSchema = cohortDatabaseSchema, + incremental = TRUE + ) Connecting using Spark JDBC driver Table "cohort" already exists and in incremental mode, so not recreating it. Table "cohort_inclusion" already exists and in incremental mode, so not recreating it. Table "cohort_inclusion_result" already exists and in incremental mode, so not recreating it. Table "cohort_inclusion_stats" already exists and in incremental mode, so not recreating it. Table "cohort_summary_stats" already exists and in incremental mode, so not recreating it. Table "cohort_censor_stats" already exists and in incremental mode, so not recreating it. > > generateCohortSet( + connectionDetails = connectionDetails, + cohortDefinitionSet = cohortDefinitionSet, + cohortTableNames = cohortTableNames, + incremental = TRUE, + incrementalFolder = incrementalFolder, + cdmDatabaseSchema = cdmDatabaseSchema + ) Connecting using Spark JDBC driver Initiating cluster consisting only of main thread Skipping cohortId = '4' because it is unchanged from earlier run Skipping cohortId = '5' because it is unchanged from earlier run Generating cohort set took 0.62 secs > > library(CohortDiagnostics) Loading required package: FeatureExtraction Loading required package: Andromeda Loading required package: dplyr Attaching package: ‘dplyr’ The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union Attaching package: ‘CohortDiagnostics’ The following object is masked from ‘package:CohortGenerator’: getCohortCounts > > executeDiagnostics( + cohortDefinitionSet = cohortDefinitionSet, + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTableNames = cohortTableNames, + exportFolder = file.path(dataFolder,databaseId), + databaseId = databaseId, + incremental = TRUE, + incrementalFolder = incrementalFolder, + minCellCount = 5, + runInclusionStatistics = TRUE, + runIncludedSourceConcepts = TRUE, + runOrphanConcepts = TRUE, + runTimeSeries = TRUE, + runVisitContext = TRUE, + runBreakdownIndexEvents = TRUE, + runIncidenceRate = TRUE, + runCohortRelationship = TRUE, + runTemporalCohortCharacterization = TRUE + ) Run Cohort Diagnostics started at 2023-08-10 17:59:43.068221 - Databasename was not provided. Using CDM source table - Databasedescription was not provided. Using CDM source table Created folder at D:\_YES_2023-05-28\workspace\SosExamples\_COVID\04-phenotype-evaluation\output\demo_db\demo_cdm The following fields found in the cohortDefinitionSet will be exported in JSON format as part of metadata field of cohort table: atlasId, generateStats, logicDescription - Unexpected fields found in table cohort - atlasId, logicDescription, generateStats. These fields will be ignored. Connecting using Spark JDBC driver Found existing record keeping file in incremental folder - CreatedDiagnostics.csv Saving database metadata Saving database metadata took 0.0596 secs Counting cohort records and subjects Counting cohorts took 0.265 secs - Censoring 0 values (0%) from cohortEntries because value below minimum - Censoring 0 values (0%) from cohortSubjects because value below minimum Found 0 of 2 (0.00%) submitted cohorts instantiated. Beginning cohort diagnostics for instantiated cohorts. Fetching inclusion statistics from files Exporting cohort concept sets to csv - Unexpected fields found in table concept_sets - databaseId. These fields will be ignored. Starting concept set diagnostics Instantiating concept sets |=====================================================================================================================| 100% Creating internal concept counts table |=====================================================================================================================| 100% Executing SQL took 4.33 secs Fetching included source concepts Skipping 2 cohorts in incremental mode. Breaking down index events - Breaking down index events for cohort 'Not Homeless (Draft 1)' - Breaking down index events for cohort 'Homeless (Draft 1)' An error report has been created at D:\_YES_2023-05-28\workspace\SosExamples\_COVID\04-phenotype-evaluation\output\demo_db\demo_cdm/errorReportR.txt Error in makeDataExportable(x = data, tableName = "index_event_breakdown", : - Cannot find required field index_event_breakdown - conceptId, conceptCount, subjectCount, cohortId, domainField, domainTable. In addition: Warning messages: 1: Unknown or uninitialised column: `isSubset`. 2: Unknown or uninitialised column: `isSubset`. 3: Unknown or uninitialised column: `isSubset`. 4: Unknown or uninitialised column: `isSubset`. 5: Unknown or uninitialised column: `isSubset`. 6: In eval(expr) : No primary event criteria concept sets found for cohort id: 4 7: Unknown or uninitialised column: `isSubset`. 8: In eval(expr) : No primary event criteria concept sets found for cohort id: 5 >