I’m getting the strange errors shown below trying to run the CharacterizationModule. Throwing it up here in case it might look familiar to someone.
I’ll post more details as I find them.
DBMS:
spark
Error:
java.sql.SQLException: [Databricks][DatabricksJDBCDriver](500051) ERROR processing query/statement. Error Code: 0, SQL state: org.apache.hive.service.cli.HiveSQLException: Error running query: java.lang.IllegalStateException: Couldn't find subject_id#54861279 in [database_id#54861273,dechallenge_stop_interval#54861274,dechallenge_evaluation_window#54861275,target_cohort_definition_id#54861276L,outcome_cohort_definition_id#54861277L,person_key#54861278,dechallenge_exposure_number#54861280,dechallenge_exposure_start_date_offset#54861281,dechallenge_exposure_end_date_offset#54861282,dechallenge_outcome_number#54861283,dechallenge_outcome_start_date_offset#54861284,rechallenge_exposure_number#54861285,rechallenge_exposure_start_date_offset#54861286,rechallenge_exposure_end_date_offset#54861287,rechallenge_outcome_number#54861288,rechallenge_outcome_start_date_offset#54861289]
at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:56)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:498)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:124)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:410)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:321)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:149)
at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:49)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:54)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:299)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:284)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:333)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.IllegalStateException: Couldn't find subject_id#54861279 in [database_id#54861273,dechallenge_stop_interval#54861274,dechallenge_evaluation_window#54861275,target_cohort_definition_id#54861276L,outcome_cohort_definition_id#54861277L,person_key#54861278,dechallenge_exposure_number#54861280,dechallenge_exposure_start_date_offset#54861281,dechallenge_exposure_end_date_offset#54861282,dechallenge_outcome_number#54861283,dechallenge_outcome_start_date_offset#54861284,rechallenge_exposure_number#54861285,rechallenge_exposure_start_date_offset#54861286,rechallenge_exposure_end_date_offset#54861287,rechallenge_outcome_number#54861288,rechallenge_outcome_start_date_offset#54861289]
at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:80)
at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:73)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:99)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:517)
at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1174)
at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1173)
at org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:653)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:517)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:517)
at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1174)
at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1173)
at org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:653)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:517)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:456)
at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:73)
at org.apache.spark.sql.catalyst.expressions.BindReferences$.$anonfun$bindReferences$1(BoundAttribute.scala:94)
at scala.collection.immutable.List.map(List.scala:297)
at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReferences(BoundAttribute.scala:94)
at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:71)
at org.apache.spark.sql.execution.CodegenSupport.consume(WholeStageCodegenExec.scala:197)
at org.apache.spark.sql.execution.CodegenSupport.consume$(WholeStageCodegenExec.scala:152)
at org.apache.spark.sql.execution.ColumnarToRowExec.consume(Columnar.scala:71)
at org.apache.spark.sql.execution.ColumnarToRowExec.doProduce(Columnar.scala:202)
at org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:98)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:271)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:267)
at org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:93)
at org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:93)
at org.apache.spark.sql.execution.ColumnarToRowExec.produce(Columnar.scala:71)
at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:56)
at org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:98)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:271)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:267)
at org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:93)
at org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:93)
at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:46)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:661)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:724)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:227)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:271)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:267)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:223)
at com.databricks.sql.execution.arrowcollect.ArrowBatchCollector$.collect(ArrowBatchCollector.scala:93)
at com.databricks.sql.execution.arrowcollect.ArrowBatchCollector$.collect(ArrowBatchCollector.scala:125)
at org.apache.spark.sql.execution.qrc.CompressedArrowBatchFormat$.collect(cachedSparkResults.scala:158)
at org.apache.spark.sql.execution.qrc.CompressedArrowBatchFormat$.collect(cachedSparkResults.scala:142)
at org.apache.spark.sql.execution.qrc.ResultCacheManager.$anonfun$computeResult$1(ResultCacheManager.scala:531)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.execution.qrc.ResultCacheManager.collectResult$1(ResultCacheManager.scala:519)
at org.apache.spark.sql.execution.qrc.ResultCacheManager.computeResult(ResultCacheManager.scala:539)
at org.apache.spark.sql.execution.qrc.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:396)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.execution.qrc.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:390)
at org.apache.spark.sql.execution.qrc.ResultCacheManager.getOrComputeArrowResult(ResultCacheManager.scala:316)
at org.apache.spark.sql.execution.SparkPlan.executeArrowCollectResult(SparkPlan.scala:443)
at org.apache.spark.sql.Dataset.$anonfun$collectToArrowBatches$1(Dataset.scala:3441)
at org.apache.spark.sql.Dataset.$anonfun$withAction$3(Dataset.scala:4292)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:763)
at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4290)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$8(SQLExecution.scala:243)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:392)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:188)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:985)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:142)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:342)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4290)
at org.apache.spark.sql.Dataset.collectToArrowBatches(Dataset.scala:3440)
at org.apache.spark.sql.hive.thriftserver.ArrowResultHandler.computeResult(ArrowResultHandler.scala:53)
at org.apache.spark.sql.hive.thriftserver.ArrowResultHandler.computeResult(ArrowResultHandler.scala:32)
at org.apache.spark.sql.hive.thriftserver.ResultCollector.collectResult(ResultCollector.scala:53)
at org.apache.spark.sql.hive.thriftserver.ResultCollector.collectResult$(ResultCollector.scala:29)
at org.apache.spark.sql.hive.thriftserver.ArrowResultHandler.collectResult(ArrowResultHandler.scala:32)
at org.apache.spark.sql.hive.thriftserver.ArrowResultHandler$$anon$1.iterator(ArrowResultHandler.scala:66)
at org.apache.spark.sql.hive.thriftserver.ArrowFetchIterator.<init>(ArrowFetchIterator.scala:24)
at org.apache.spark.sql.hive.thriftserver.ArrowResultHandler.org$apache$spark$sql$hive$thriftserver$ArrowResultHandler$$initFromDataFrame(ArrowResultHandler.scala:62)
at org.apache.spark.sql.hive.thriftserver.ArrowResultHandler$.createFromDataFrame(ArrowResultHandler.scala:180)
at org.apache.spark.sql.hive.thriftserver.ResultHandlerFactory.createResultHandler(ResultHandlerFactory.scala:258)
at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:469)
... 19 more
, Query: select * f***.
SQL:
select * from nu4puu48fail_case_series;
R version:
R version 4.3.2 (2023-10-31 ucrt)
Platform:
x86_64-w64-mingw32
Attached base packages:
- stats
- graphics
- grDevices
- datasets
- utils
- methods
- base
Other attached packages:
- Strategus (0.1.0)
- ParallelLogger (3.3.0)
- CohortGenerator (0.8.1)
- DatabaseConnector (6.2.4)
- keyring (1.3.1)
- openssl (2.1.0)
- dplyr (1.1.3)
- R6 (2.5.1)