HPO disease onsets.
data("hpo_onsets")
data.table
annot <- load_phenotype_to_genes(file = "phenotype.hpoa")
annot <- annot[onset!="",]
annot$onset_name <- map_phenotypes(terms = annot$onset)
counts <- dplyr::group_by(annot, disease_id) |>
dplyr::summarise(hpo_ids=length(unique(hpo_id)),
onsets=length(unique(onset)))
## The number of onsets partially depends on the number of hpo_ids
## so it's necessary to keep hpo_id too.
cor(counts$hpo_ids, counts$onsets)
hpo_onsets <- annot[,c("disease_id","hpo_id","onset","onset_name")]
usethis::use_data(hpo_onsets, overwrite = TRUE)