> # ---
> #
> # 01-phenotype-evaluation.R
> #
> # ---
> 
> # install.packages("remotes")
> # remotes::install_github("OHDSI/CohortGenerator")
> # remotes::install_github("OHDSI/CohortDiagnostics")
> # remotes::install_github("OHDSI/ROhdsiWebApi")
> # remotes::install_github("OHDSI/OhdsiShinyModules")
> 
> # ---
> #
> # libraries
> #
> # ---
> 
> library(CohortGenerator)
Loading required package: DatabaseConnector
Loading required package: R6
> 
> # --- R Version ---------------------
> R.Version()
$platform
[1] "x86_64-w64-mingw32"

$arch
[1] "x86_64"

$os
[1] "mingw32"

$crt
[1] "ucrt"

$system
[1] "x86_64, mingw32"

$status
[1] ""

$major
[1] "4"

$minor
[1] "3.1"

$year
[1] "2023"

$month
[1] "06"

$day
[1] "16"

$`svn rev`
[1] "84548"

$language
[1] "R"

$version.string
[1] "R version 4.3.1 (2023-06-16 ucrt)"

$nickname
[1] "Beagle Scouts"

> # --- Java Version ------------------
> system("java -version")
java version "1.8.0_261"
Java(TM) SE Runtime Environment (build 1.8.0_261-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.261-b12, mixed mode)
[1] 0
> # -----------------------------------"
> 
> #
> # functions to get databricks token (user will be prompted for keyring password) and url with token as password
> #
> 
> getToken <- function () {
+   return (
+     keyring::backend_file$new()$get(
+       service = "databricks",
+       user = "token",
+       keyring = "databricks_keyring"
+     )
+   )
+ }
> 
> getUrl <- function () {
+   url <- "jdbc:databricks://nachc-databricks.cloud.databricks.com:443/default;transportMode=http;ssl=1;httpPath=sql/protocolv1/o/3956472157536757/0123-223459-leafy532;AuthMech=3;UID=token;PWD="
+   return (
+     paste(url, getToken(), sep = "")
+   )  
+ }
> 
> # ---
> #
> # variables for the current execution
> #
> # ---
> 
> baseUrl <- "http://localhost:8080/WebAPI"
> atlasCohortIds <- c(4,5)
> 
> connectionDetails <- DatabaseConnector::createConnectionDetails (
+   dbms = "spark",
+   connectionString = getUrl(),
+   pathToDriver="D:\\_YES_2023-05-28\\workspace\\SosExamples\\_COVID\\02-data-diagnostics\\drivers\\databricks\\"
+ )
> 
> databaseId <- "demo_cdm"
> cdmDatabaseSchema <- "demo_cdm"
> cohortDatabaseSchema <- "demo_cdm"
> 
> dataFolder <- "D:\\_YES_2023-05-28\\workspace\\SosExamples\\_COVID\\04-phenotype-evaluation\\output\\demo_db\\"
> incrementalFolder <- paste("incremental_", databaseId, sep="")
> 
> # ---
> #
> # phenotype evaluation implementation
> # 
> # ---
> 
> cohortDefinitionSet <- ROhdsiWebApi::exportCohortDefinitionSet(
+   baseUrl = baseUrl,
+   cohortIds = atlasCohortIds,
+   generateStats = TRUE
+ )
Fetching cohortId: 4
Fetching cohortId: 5
> 
> # View(cohortDefinitionSet)
> 
> cohorTableNames <- getCohortTableNames(cohortTable = "cohort")
> 
> library(CohortGenerator)
> 
> cohortTableNames <- getCohortTableNames(cohortTable = "cohort")
> 
> library(CohortGenerator)
> 
> cohortTableNames <- getCohortTableNames(cohortTable = "cohort")
> 
> createCohortTables(
+   connectionDetails = connectionDetails,
+   cohortDatabaseSchema = cohortDatabaseSchema,
+   incremental = TRUE
+ )
Connecting using Spark JDBC driver
Table "cohort" already exists and in incremental mode, so not recreating it.
Table "cohort_inclusion" already exists and in incremental mode, so not recreating it.
Table "cohort_inclusion_result" already exists and in incremental mode, so not recreating it.
Table "cohort_inclusion_stats" already exists and in incremental mode, so not recreating it.
Table "cohort_summary_stats" already exists and in incremental mode, so not recreating it.
Table "cohort_censor_stats" already exists and in incremental mode, so not recreating it.
> 
> generateCohortSet(
+   connectionDetails = connectionDetails,
+   cohortDefinitionSet = cohortDefinitionSet,
+   cohortTableNames = cohortTableNames,
+   incremental = TRUE,
+   incrementalFolder = incrementalFolder,
+   cdmDatabaseSchema = cdmDatabaseSchema
+ )
Connecting using Spark JDBC driver
Initiating cluster consisting only of main thread
Skipping cohortId = '4' because it is unchanged from earlier run                                                                                                  
Skipping cohortId = '5' because it is unchanged from earlier run                                                                                                  
Generating cohort set took 0.62 secs
> 
> library(CohortDiagnostics)
Loading required package: FeatureExtraction
Loading required package: Andromeda
Loading required package: dplyr

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching package: ‘CohortDiagnostics’

The following object is masked from ‘package:CohortGenerator’:

    getCohortCounts

> 
> executeDiagnostics(
+   cohortDefinitionSet = cohortDefinitionSet,
+   connectionDetails = connectionDetails,
+   cdmDatabaseSchema = cdmDatabaseSchema,
+   cohortDatabaseSchema = cohortDatabaseSchema,
+   cohortTableNames = cohortTableNames,
+   exportFolder = file.path(dataFolder,databaseId),
+   databaseId = databaseId,
+   incremental = TRUE,
+   incrementalFolder = incrementalFolder,
+   minCellCount = 5,
+   runInclusionStatistics = TRUE,
+   runIncludedSourceConcepts = TRUE,
+   runOrphanConcepts = TRUE,
+   runTimeSeries = TRUE,
+   runVisitContext = TRUE,
+   runBreakdownIndexEvents = TRUE,
+   runIncidenceRate = TRUE,
+   runCohortRelationship = TRUE,
+   runTemporalCohortCharacterization = TRUE
+ )
Run Cohort Diagnostics started at 2023-08-10 17:59:43.068221
 - Databasename was not provided. Using CDM source table
 - Databasedescription was not provided. Using CDM source table
Created folder at D:\_YES_2023-05-28\workspace\SosExamples\_COVID\04-phenotype-evaluation\output\demo_db\demo_cdm
The following fields found in the cohortDefinitionSet will be exported in JSON format as part of metadata field of cohort table:                                  
    atlasId,
    generateStats,
    logicDescription
 - Unexpected fields found in table cohort - atlasId, logicDescription, generateStats. These fields will be ignored.                                              
Connecting using Spark JDBC driver                                                                                                                                
Found existing record keeping file in incremental folder - CreatedDiagnostics.csv                                            
Saving database metadata                                                                                                     
Saving database metadata took 0.0596 secs                                                                                                                         
Counting cohort records and subjects
Counting cohorts took 0.265 secs
- Censoring 0 values (0%) from cohortEntries because value below minimum                                                                                          
- Censoring 0 values (0%) from cohortSubjects because value below minimum
Found 0 of 2 (0.00%) submitted cohorts instantiated. Beginning cohort diagnostics for instantiated cohorts.                  
Fetching inclusion statistics from files
Exporting cohort concept sets to csv                                                                                         
 - Unexpected fields found in table concept_sets - databaseId. These fields will be ignored.                                                                      
Starting concept set diagnostics                                                                                             
Instantiating concept sets                                                                                                                                        
  |=====================================================================================================================| 100%
Creating internal concept counts table                                                                                       
  |=====================================================================================================================| 100%
Executing SQL took 4.33 secs
Fetching included source concepts                                                                                            
Skipping 2 cohorts in incremental mode.
Breaking down index events                                                                                                   
- Breaking down index events for cohort 'Not Homeless (Draft 1)'                                                                                                  
- Breaking down index events for cohort 'Homeless (Draft 1)'
An error report has been created at  D:\_YES_2023-05-28\workspace\SosExamples\_COVID\04-phenotype-evaluation\output\demo_db\demo_cdm/errorReportR.txt             
Error in makeDataExportable(x = data, tableName = "index_event_breakdown",  : 
   - Cannot find required field index_event_breakdown - conceptId, conceptCount, subjectCount, cohortId, domainField, domainTable.
In addition: Warning messages:
1: Unknown or uninitialised column: `isSubset`. 
2: Unknown or uninitialised column: `isSubset`. 
3: Unknown or uninitialised column: `isSubset`. 
4: Unknown or uninitialised column: `isSubset`. 
5: Unknown or uninitialised column: `isSubset`. 
6: In eval(expr) :
  No primary event criteria concept sets found for cohort id: 4
7: Unknown or uninitialised column: `isSubset`. 
8: In eval(expr) :
  No primary event criteria concept sets found for cohort id: 5
>