To initiate our new tag for vocabulary, let me share my experience with the newest release.
I was able to download via http://www.ohdsi.org/web/athena/ (Athena) the terminologies.
(As of today - i did not experience the zip-file-size-0 bug; Thank you, Athena Team)
I checked quite a lot of terminologies.
The download was 1.015 gigabytes.
For the community benefit, if people want to explore it in R, they can use this R code
concept <-read.delim(‘inst/extdata/concept.csv’,as.is=T,quote = “”)
vocabulary <-read.delim(‘inst/extdata/vocabulary.csv’,as.is=T,quote = “”)
cRelationship <-read.delim(‘inst/extdata/concept_relationship.csv’,as.is=T,quote = “”)
relationship<-read.delim(‘inst/extdata/relationship.csv’,as.is=T,quote = “”)
#cAncestor <-read.delim(‘inst/extdata/concept_ancestor.csv’,as.is=T,quote = “”)
library(dplyr)
#what version
print(filter(vocabulary,VOCABULARY_ID==‘None’)$VOCABULARY_VERSION)
stats<- concept %>% group_by(VOCABULARY_ID) %>% summarise(count= n()) %>% arrange(-count)
write.csv(stats,file=“stats.csv”,row.names=F,quote=F,na=’’)
as.data.frame(stats)
#relationships
library(dplyr);library(tidyr)
#glimpse(cRelationship)
rel_stats<- cRelationship %>% group_by(RELATIONSHIP_ID) %>% summarise(count= n()) %>% arrange(-count) %>% left_join(relationship) %>% separate(col=RELATIONSHIP_NAME,into=c(‘rel’,‘source’),sep=’\\(’,remove=F)
rel2<-rel_stats %>% group_by(source) %>% summarize(distinct_rels = n(),row_cnt=sum(count))
write.csv(rel2,file=“statsTWO.csv”,row.names=F,quote=F,na=’’)