Functions to get data resources.
get_alphamissense(
types = c("canonical", "non_canonical", "merged"),
agg_fun = mean,
save_dir = cache_dir(),
force_new = FALSE
)
get_clinvar(as_granges = FALSE, annotate = FALSE)
get_data_package(name, package = "KGExplorer")
get_definitions(ont, from = "id", to = "definition")
get_gencc(
agg_by = c("disease_id", "gene_symbol"),
dict = c(Definitive = 6, Strong = 5, Moderate = 4, Supportive = 3, Limited = 2,
`Disputed Evidence` = 1, `Refuted Evidence` = 0, `No Known Disease Relationship` = 0),
save_dir = cache_dir(),
force_new = FALSE
)
get_gene_lengths(genes, keep_chr = c(seq(22), "X", "Y"), ensembl_version = 75)
get_genes_disease(
maps = list(c("causal_gene", "disease"), c("correlated_gene", "disease")),
run_map_mondo = FALSE,
to = c("OMIM", "Orphanet", "DECIPHER"),
method = c("kg", "tsv"),
...
)
get_graph_colnames(g, what = c("nodes", "edges"))
get_medgen_maps()
get_metadata_omim(save_dir = cache_dir())
get_metadata_orphanet(save_dir = cache_dir())
get_monarch(
queries = NULL,
maps = NULL,
domain = "https://data.monarchinitiative.org",
subdir = "monarch-kg/latest/tsv/all_associations/",
rbind = FALSE,
save_dir = cache_dir()
)
get_monarch_files(
maps = NULL,
queries = NULL,
domain = "https://data.monarchinitiative.org",
subdir = "monarch-kg/latest/tsv/all_associations/",
omit = c("...", "..", "md5sums", "index.html")
)
get_monarch_kg(as_graph = TRUE, save_dir = cache_dir(), force_new = FALSE, ...)
get_monarch_models(
maps = list(m2d = c("model", "disease")),
filters = list(disease = NULL, gene = NULL, variant = NULL),
input_col = "object",
to = NULL,
map_orthologs = TRUE,
as_graph = FALSE,
...
)
get_mondo_maps(
map_types = c("default", "broadmatch", "closematch", "exactmatch", "hasdbxref",
"narrowmatch", "relatedmatch"),
map_to = NULL,
map_type_order = c("default", "exactmatch", "closematch", "narrowmatch", "broadmatch",
"relatedmatch", "hasdbxref"),
top_n = NULL,
top_by = c("subject", "object"),
save_dir = cache_dir()
)
get_mondo_maps_files(map_types, map_to, save_dir)
get_ols_options(ol = rols::Ontologies())
get_ontology(
name = c("mondo", "hp", "upheno", "uberon", "cl"),
method = c("github", "rols"),
filetype = ".obo",
import_func = NULL,
terms = NULL,
add_metadata = TRUE,
lvl = 2,
add_n_edges = TRUE,
add_ontology_levels = TRUE,
save_dir = cache_dir(subdir = "ontologies"),
tag = NULL,
force_new = FALSE,
...
)
get_ontology_dict(
ont,
from = "short_id",
to = c("name", "label", "term"),
include_self = FALSE,
include_alternative_terms = FALSE,
as_datatable = FALSE
)
get_ontology_levels(
ont,
terms = NULL,
remove_terms = TRUE,
method = c("depth", "height"),
absolute = TRUE,
reverse = FALSE
)
get_pli(agg_fun = mean, save_dir = cache_dir(), force_new = FALSE)
get_prevalence(
method = c("orphanet", "oard"),
agg_by = c("mondo_id", "id", "Name"),
include_mondo = TRUE,
...
)
get_ttd(save_dir = cache_dir(), force_new = FALSE, run_map_genes = TRUE)
get_upheno(file = c("ontology", "bestmatches", "upheno_mapping"))
get_version(obj, return_version = FALSE, verbose = TRUE)A character vector of types to return.
A function to aggregate multiple transcripts per gene.
Directory to save a file to.
If TRUE, force a new download.
Return the object as a GRanges.
Add variant annotations with map_variants.
<...>Any ontology name from get_ols_options
"hpo" Import the Human Phenotype Ontology. GitHub.
a character vector giving the package(s) to look
in for data sets, or NULL.
By default, all packages in the search path are used, then
the data subdirectory (if present) of the current working
directory.
An ontology of class ontology_DAG.
The designated from column in from-to mapping or relations.
A character string specifying the format to convert to.
Column names to aggregate results by.
A named vector of evidence score mappings. See here for more information.
A character vector of gene symbols
Which chromosomes to keep.
Which Ensembl database version to use.
A list of paired to/from types to filter Monarch association
files by. For example, list(c("gene","disease")) will return any
files that contains gene-disease associations.
Passes to get_monarch_files.
Run map_mondo to map MONDO IDs to disease IDs.
Compute ontology levels using:
"height" (default) dag_height.
"depth" dag_depth.
Arguments passed on to link_monarch, data.table::fread, data.table::fread, get_ontology_github
node_filtersA named list of filters to apply to the node data.
Names should be name of the metadata column, and values should be a vector of
valid options. For example, list("type" = c("gene","variant")) will
return any rows where the "type" column contains either "gene" or "variant".
inputA single character string. The value is inspected and deferred to either file= (if no \n present), text= (if at least one \n is present) or cmd= (if no \n is present, at least one space is present, and it isn't a file name). Exactly one of input=, file=, text=, or cmd= should be used in the same call.
textThe input data itself as a character vector of one or more lines, for example as returned by readLines().
cmdA shell command that pre-processes the file; e.g. fread(cmd=paste("grep",word,"filename")). See Details.
sepThe separator between columns. Defaults to the character in the set [,\t |;:] that separates the sample of rows into the most number of lines with the same number of fields. Use NULL or "" to specify no separator; i.e. each line a single character column like base::readLines does.
sep2The separator within columns. A list column will be returned where each cell is a vector of values. This is much faster using less working memory than strsplit afterwards or similar techniques. For each column sep2 can be different and is the first character in the same set above [,\t |;], other than sep, that exists inside each field outside quoted regions in the sample. NB: sep2 is not yet implemented.
nrowsThe maximum number of rows to read. Unlike read.table, you do not need to set this to an estimate of the number of rows in the file for better speed because that is already automatically determined by fread almost instantly using the large sample of lines. nrows=0 returns the column names and typed empty columns determined by the large sample; useful for a dry run of a large file or to quickly check format consistency of a set of files before starting to read any of them.
headerDoes the first data line contain column names? Defaults according to whether every non-empty field on the first data line is type character. If so, or TRUE is supplied, any empty column names are given a default name.
na.stringsA character vector of strings which are to be interpreted as NA values. By default, ",," for columns of all types, including type character is read as NA for consistency. ,"", is unambiguous and read as an empty string. To read ,NA, as NA, set na.strings="NA". To read ,, as blank string "", set na.strings=NULL. When they occur in the file, the strings in na.strings should not appear quoted since that is how the string literal ,"NA", is distinguished from ,NA,, for example, when na.strings="NA".
stringsAsFactorsConvert all or some character columns to factors? Acceptable inputs are TRUE, FALSE, or a decimal value between 0.0 and 1.0. For stringsAsFactors = FALSE, all string columns are stored as character vs. all stored as factor when TRUE. When stringsAsFactors = p for 0 <= p <= 1, string columns col are stored as factor if uniqueN(col)/nrow < p.
skipIf 0 (default) start on the first line and from there finds the first row with a consistent number of columns. This automatically avoids irregular header information before the column names row. skip>0 means ignore the first skip rows manually. skip="string" searches for "string" in the file (e.g. a substring of the column names row) and starts on that line (inspired by read.xls in package gdata).
selectA vector of column names or numbers to keep, drop the rest. select may specify types too in the same way as colClasses; i.e., a vector of colname=type pairs, or a list of type=col(s) pairs. In all forms of select, the order that the columns are specified determines the order of the columns in the result.
dropVector of column names or numbers to drop, keep the rest.
colClassesAs in utils::read.csv; i.e., an unnamed vector of types corresponding to the columns in the file, or a named vector specifying types for a subset of the columns by name. The default, NULL means types are inferred from the data in the file. Further, data.table supports a named list of vectors of column names or numbers where the list names are the class names; see examples. The list form makes it easier to set a batch of columns to be a particular class. When column numbers are used in the list form, they refer to the column number in the file not the column number after select or drop has been applied.
If type coercion results in an error, introduces NAs, or would result in loss of accuracy, the coercion attempt is aborted for that column with warning and the column's type is left unchanged. If you really desire data loss (e.g. reading 3.14 as integer) you have to truncate such columns afterwards yourself explicitly so that this is clear to future readers of your code.
integer64"integer64" (default) reads columns detected as containing integers larger than 2^31 as type bit64::integer64. Alternatively, "double"|"numeric" reads as utils::read.csv does; i.e., possibly with loss of precision and if so silently. Or, "character".
decThe decimal separator as in utils::read.csv. When "auto" (the default), an attempt is made to decide whether "." or "," is more suitable for this input. See details.
col.namesA vector of optional names for the variables (columns). The default is to use the header column if present or detected, or if not "V" followed by the column number. This is applied after check.names and before key and index.
check.namesdefault is FALSE. If TRUE then the names of the variables in the data.table are checked to ensure that they are syntactically valid variable names. If necessary they are adjusted (by make.names) so that they are, and also to ensure that there are no duplicates.
encodingdefault is "unknown". Other possible options are "UTF-8" and "Latin-1". Note: it is not used to re-encode the input, rather enables handling of encoded strings in their native encoding.
quoteBy default ("\""), if a field starts with a double quote, fread handles embedded quotes robustly as explained under Details. If it fails, then another attempt is made to read the field as is, i.e., as if quotes are disabled. By setting quote="", the field is always read as if quotes are disabled. It is not expected to ever need to pass anything other than \"\" to quote; i.e., to turn it off.
strip.whiteLogical, default TRUE, in which case leading and trailing whitespace is stripped from unquoted "character" fields. "numeric" fields are always stripped of leading and trailing whitespace.
filllogical or integer (default is FALSE). If TRUE then in case the rows have unequal length, number of columns is estimated and blank fields are implicitly filled. If an integer is provided it is used as an upper bound for the number of columns. If fill=Inf then the whole file is read for detecting the number of columns.
blank.lines.skiplogical, default is FALSE. If TRUE blank lines in the input are ignored.
keyCharacter vector of one or more column names which is passed to setkey. Only valid when argument data.table=TRUE. Where applicable, this should refer to column names given in col.names.
indexCharacter vector or list of character vectors of one or more column names which is passed to setindexv. As with key, comma-separated notation like index="x,y,z" is accepted for convenience. Only valid when argument data.table=TRUE. Where applicable, this should refer to column names given in col.names.
showProgressTRUE displays progress on the console if the ETA is greater than 3 seconds. It is produced in fread's C code where the very nice (but R level) txtProgressBar and tkProgressBar are not easily available.
data.tableTRUE returns a data.table. FALSE returns a data.frame. The default for this argument can be changed with options(datatable.fread.datatable=FALSE).
nThreadThe number of threads to use. Experiment to see what works best for your data on your hardware.
logical01If TRUE a column containing only 0s and 1s will be read as logical, otherwise as integer.
logicalYNIf TRUE a column containing only Ys and Ns will be read as logical, otherwise as character.
keepLeadingZerosIf TRUE a column containing numeric data with leading zeros will be read as character, otherwise leading zeros will be removed and converted to numeric.
yamlIf TRUE, fread will attempt to parse (using yaml.load) the top of the input as YAML, and further to glean parameters relevant to improving the performance of fread on the data itself. The entire YAML section is returned as parsed into a list in the yaml_metadata attribute. See Details.
autostartDeprecated. Please use skip instead.
tmpdirDirectory to use as the tmpdir argument for any tempfile calls, e.g. when the input is a URL or a shell command. The default is tempdir() which can be controlled by setting TMPDIR before starting the R session; see base::tempdir.
tzRelevant to datetime values which have no Z or UTC-offset at the end, i.e. unmarked datetime, as written by utils::write.csv. The default tz="UTC" reads unmarked datetime as UTC POSIXct efficiently. tz="" reads unmarked datetime as type character (slowly) so that as.POSIXct can interpret (slowly) the character datetimes in local timezone; e.g. by using "POSIXct" in colClasses=. Note that fwrite() by default writes datetime in UTC including the final Z and therefore fwrite's output will be read by fread consistently and quickly without needing to use tz= or colClasses=. If the TZ environment variable is set to "UTC" (or "" on non-Windows where unset vs `""` is significant) then the R session's timezone is already UTC and tz="" will result in unmarked datetimes being read as UTC POSIXct. For more information, please see the news items from v1.13.0 and v1.14.0.
repoRepository name in format "owner/repo". Defaults to guess_repo().
tbl_graph object.
What should get activated? Possible values are nodes or
edges.
A list of free-form substring queries to filter files by
(using any column in the metadata).
For example, list("gene_disease","variant_disease") will return any
files that contain either of the substrings
"gene_disease" or "variant_disease".
Passes to get_monarch_files.
Web domain to search for Monarch files.
Subdirectory path to search for Monarch files within
domain.
If TRUE, rbinds all data.tables
together. Otherwise, returns a named list of separated
data.tables.
Files to omit from results.
Return the object as a tbl_graph.
A named list, where each element in the list is the name of a column in the data, and the vector within each element represents the values to include in the final data.
Column name of input IDs.
Add gene-level data.
Mapping types to include.
Mapping outputs to include (from Mondo IDs to another database's IDs).
The order in which map_types will be prioritised
when filtering the top_n rows by groupings.
Top number of mappings to return per top_by grouping.
Set to NULL to skip this step.
Grouping columns when selecting top_n rows per grouping.
Can be a character vector of one or more column names.
An Ontologies object.
File type to search for.
Function to import the ontology with.
If NULL, automatically tries to choose the correct function.
A vector of ontology term IDs.
Add metadata to the resulting ontology object.
Depth of the ancestor terms to add. Will get the closest ancestor to this level if none have this exact level.
Add the number of edges (connections) for each term.
Add the ontology level for each term.
tag for the GitHub release to which this data should be attached.
For dag_offspring() and dag_ancestors(), this controls whether to also include the query term itself.
Include alternative terms in the dictionary.
Return as a data.table instead of a named vector.
Character vector of term IDs to exclude.
Make the levels absolute in the sense that they consider
the entire ontology (TRUE).
Otherwise, levels will be relative to only the terms that are in
the provided subset of terms AND are directly adjacent (connected)
to a given cluster of terms (FALSE).
If TRUE, ontology
level numbers with be revered such that the level of the parent terms
are larger than the child terms.
Include MONDO IDs in the output.
Map genes to standardised HGNC symbols using map_genes.
Can be one of the following:
"ontology"Creates an ontology_DAG R object by importing the OBO file directly from the official uPheno GitHub repository.
"bestmatches"Returns a merged table with the best matches between human and non-human homologous phenotypes (from multiple species). Distributed by the official uPheno GitHub repository.
"upheno_mapping"Return a merged table with matches between human and non-human homologous phenotypes (from multiple species). Distributed by the Monarch Initiative server.
An object.
Return the version as a character string.
Print messages.
Data.
A named list of data.tables of AlphaMissense predictions.
data.table with columns:
"disease_id": Disease ID.
"gene_symbol": Gene symbol.
"evidence_score": Evidence score.
data.table
data.table of mappings.
A named vector of relative ontology level, where names are ontology term IDs and value is relative ontology level.
Data object release version a character string.
get_alphamissense(): get_
Get AlphaMissense predictions
Get gene-level AlphaMissense predictions for all canonical and non-canonical protein-coding gene transcripts.
get_clinvar(): get_
Get ClinVar variant data
ClinSigSimple integer, 0 = no current value of Likely pathogenic; Pathogenic; Likely pathogenic, low penetrance; Pathogenic, low penetrance; Likely risk allele; or Risk allele 1 = at least one current record submitted with an interpretation of Likely pathogenic; Pathogenic; Likely pathogenic, low penetrance; Pathogenic, low penetrance; Likely risk allele; or Risk allele (independent of whether that record includes assertion criteria and evidence). -1 = no values for clinical significance at all for this variant or set of variants; used for the "included" variants that are only in ClinVar because they are included in a haplotype or genotype with an interpretation NOTE: Now that the aggregate values of clinical significance give precedence to records with assertion criteria and evidence, the values in this column may appear to be in conflict with the value reported in ClinicalSignificance. In other words, if a submission without assertion criteria and evidence interpreted an allele as pathogenic, and those with assertion criteria and evidence interpreted as benign, then ClinicalSignificance would be reported as Benign and ClinSigSimple as 1.
get_data_package(): get_
get_definitions(): get_
Add ancestor
For each term, get its ancestor at a given level and add the ID and name of the ancestor to the ontology metadata.
get_gencc(): get_
Get GenCC
Get phenotype-gene evidence score from the
Gene Curation Coalition.
Note that the column "submitted_as_moi_id" indicates the mechanism of action
(e.g. "Autosomal dominant inheritance"), not specific HPO phenotypes.
Set agg_by=NULL to return raw unaggregated data.
Data downloaded from here.
NOTE: Due to licensing restrictions, a GenCC download does not
include OMIM data. OMIM data can be accessed and downloaded through
OMIM.
NOTE: GenCC does not currently have any systematic versioning.
There for the attr(obj,"version") attribute is set to the date it was
downloaded and cached by get_gencc.
get_gene_lengths(): get_
get_genes_disease(): get_
Load disease genes
Load gene lists associated with each disease phenotype from:
OMIM
Orphanet
DECIPHER
get_graph_colnames(): get_
Get column names in the nodes and/or edges of a tbl_graph.
get_medgen_maps(): get_
Get MedGen maps.
get_metadata_omim(): get_
get_metadata_orphanet(): get_
get_monarch(): get_
Get Monarch
Get key datasets from the Monarch Initiative server. See here for all associations data, specifically.
get_monarch_files(): get_
Monarch files
Find files Monarch Initiative server.
get_monarch_kg(): get_
Get knowledge graph: Monarch
Imports the entire Monarch knowledge graph containing >500,000 nodes and >10,000,000 edges across many categories (e.g. Disease, Phenotypes, Cell Types, etc.).
Option 1: Use the biolink API to efficiently extract specific subset of data from the Monarch server. Option 2: Import the entire knowledge graph from the Monarch server.
get_monarch_models(): get_
Get Monarch models
Get disease-to-model mappings for multiple model species.
Additionally maps mondo IDs to OMIM and Orphanet IDs.
NOTE, adding additional maps
will drastically reduce the number of results.
get_mondo_maps(): get_
Get Mondo ID maps
Get mappings between Mondo IDs and IDs in other databases/ontologies. All mappings stored on the official Mondo GitHub.
get_mondo_maps_files(): get_
get_ols_options(): get_
Get a complete up=to-date list of ontologies available via the
EBML-EBI Ontology Lookup Service API.
get_ontology(): get_ontology
Get ontology
Import an up-to-date ontology directly from from the creators or via the EBML-EBI Ontology Lookup Service API.
get_ontology_dict(): get_
get_ontology_levels(): get_
Get ontology level for ontology terms
For a given set of HPO terms, get their level
within the hierarchically organised ontology.
Ontology level can be computed either absolute mode (absolute=TRUE)
where the entire ontology is considered when assigning levels, or
relative mode (absolute=FALSE) where only a subset of the ontology
that is connected to a given term is considered when assigning levels.
Relative mode can be helpful when trying to make plot where nodes are
scaled to the ontology level.
get_pli(): get_
Get pLI
Get gene-level pLI scores for all canonical and non-canonical protein-coding gene transcripts. NOTE: The MANE Select set consists of one transcript at each protein-coding locus across the genome that is representative of biology at that locus. NOTE: Mapping genes with map_genes only reduces the number of mapped genes compared to the provided "gene" column.
get_prevalence(): get_
Get prevalence
Get epidemiological disease and phenotype prevalence data.
get_ttd(): get_
get_upheno(): get_
Get uPheno
Get data from the Unified Phenotype Ontology (uPheno).
get_version(): get_
Get version
For a given ontology, extract the precise version of the Release that the data object was built from. For Human Phenotype Ontology specifically, all Releases can be found at the official HPO GitHub Releases page.
if (FALSE) { # \dontrun{
am <- get_alphamissense()
} # }
ont <- get_ontology("hp", terms=10)
#> Using cached ontology file (1/1):
#> /github/home/.cache/R/KGExplorer/ontologies/github/hp_v2025-05-06.rds
#> Randomly sampling 10 term(s).
def <- get_definitions(ont)
d <- get_gencc()
#> Gathering data from GenCC.
#> Evidence scores for:
#> - 11050 diseases
#> - 5533 genes
#> + Version: 2025-08-02
genes_diseases <- get_genes_disease(method="tsv")
#> Filtering with `maps`.
#> Files found: 2
#> Constructing data: causal_gene <--> disease
#> Constructing data: correlated_gene <--> disease
#> genes(s): 0
dat <- get_monarch(maps=list(c("causal_gene","disease")))
#> Filtering with `maps`.
#> Files found: 1
#> Importing 1 Monarch files.
#> - 1/1: causal_gene_to_disease_association.all
files <- get_monarch_files()
#> Files found: 24
if (FALSE) { # \dontrun{
g <- get_monarch_kg(save_dir=tempdir(), nrows=100)
} # }
if (FALSE) { # \dontrun{
models <- get_monarch_models()
} # }
map <- get_mondo_maps("default")
mondo <- get_ontology(name="mondo")
#> ⠙ iterating 8 done (3.8/s) | 2.1s
#> ⠙ iterating 14 done (4.8/s) | 2.9s
#> Importing ontology via GitHub.
#> Identifying latest release for: monarch-initiative/mondo
#> Preparing ontology_index object from: https://github.com/monarch-initiative/mondo/releases/download/v2025-07-01/mondo.obo
#> Parsing [Typedef] sections in the obo file [276/276]
#> remove 2 obsolete terms
#> Parsing [Term] sections in the obo file [1000/56509]
#> Parsing [Term] sections in the obo file [2000/56509]
#> Parsing [Term] sections in the obo file [3000/56509]
#> Parsing [Term] sections in the obo file [4000/56509]
#> Parsing [Term] sections in the obo file [5000/56509]
#> Parsing [Term] sections in the obo file [6000/56509]
#> Parsing [Term] sections in the obo file [7000/56509]
#> Parsing [Term] sections in the obo file [8000/56509]
#> Parsing [Term] sections in the obo file [9000/56509]
#> Parsing [Term] sections in the obo file [10000/56509]
#> Parsing [Term] sections in the obo file [11000/56509]
#> Parsing [Term] sections in the obo file [12000/56509]
#> Parsing [Term] sections in the obo file [13000/56509]
#> Parsing [Term] sections in the obo file [14000/56509]
#> Parsing [Term] sections in the obo file [15000/56509]
#> Parsing [Term] sections in the obo file [16000/56509]
#> Parsing [Term] sections in the obo file [17000/56509]
#> Parsing [Term] sections in the obo file [18000/56509]
#> Parsing [Term] sections in the obo file [19000/56509]
#> Parsing [Term] sections in the obo file [20000/56509]
#> Parsing [Term] sections in the obo file [21000/56509]
#> Parsing [Term] sections in the obo file [22000/56509]
#> Parsing [Term] sections in the obo file [23000/56509]
#> Parsing [Term] sections in the obo file [24000/56509]
#> Parsing [Term] sections in the obo file [25000/56509]
#> Parsing [Term] sections in the obo file [26000/56509]
#> Parsing [Term] sections in the obo file [27000/56509]
#> Parsing [Term] sections in the obo file [28000/56509]
#> Parsing [Term] sections in the obo file [29000/56509]
#> Parsing [Term] sections in the obo file [30000/56509]
#> Parsing [Term] sections in the obo file [31000/56509]
#> Parsing [Term] sections in the obo file [32000/56509]
#> Parsing [Term] sections in the obo file [33000/56509]
#> Parsing [Term] sections in the obo file [34000/56509]
#> Parsing [Term] sections in the obo file [35000/56509]
#> Parsing [Term] sections in the obo file [36000/56509]
#> Parsing [Term] sections in the obo file [37000/56509]
#> Parsing [Term] sections in the obo file [38000/56509]
#> Parsing [Term] sections in the obo file [39000/56509]
#> Parsing [Term] sections in the obo file [40000/56509]
#> Parsing [Term] sections in the obo file [41000/56509]
#> Parsing [Term] sections in the obo file [42000/56509]
#> Parsing [Term] sections in the obo file [43000/56509]
#> Parsing [Term] sections in the obo file [44000/56509]
#> Parsing [Term] sections in the obo file [45000/56509]
#> Parsing [Term] sections in the obo file [46000/56509]
#> Parsing [Term] sections in the obo file [47000/56509]
#> Parsing [Term] sections in the obo file [48000/56509]
#> Parsing [Term] sections in the obo file [49000/56509]
#> Parsing [Term] sections in the obo file [50000/56509]
#> Parsing [Term] sections in the obo file [51000/56509]
#> Parsing [Term] sections in the obo file [52000/56509]
#> Parsing [Term] sections in the obo file [53000/56509]
#> Parsing [Term] sections in the obo file [54000/56509]
#> Parsing [Term] sections in the obo file [55000/56509]
#> Parsing [Term] sections in the obo file [56000/56509]
#> Parsing [Term] sections in the obo file [56509/56509]
#> remove 4346 obsolete terms
#> There are more than one root:
#> BFO:0000001, CHEBI:24431, CHEBI:36342, CHEBI:50906, ECTO:0000015,
#> and other 29 terms ...
#> A super root (~~all~~) is added.
#> Adding term metadata.
#> IC_method: IC_offspring
#> Adding ancestor metadata.
#> Getting absolute ontology level for 52,155 IDs.
#> 219 ancestors found at level 2
#> Translating ontology terms to names.
#> Translating ontology terms to ids.
#> Converted ontology to: adjacency
#> Getting absolute ontology level for 52,155 IDs.
#> Caching file --> /github/home/.cache/R/KGExplorer/ontologies/github/mondo_v2025-07-01.rds
if (FALSE) { # \dontrun{
hp <- get_ontology(name="hp")
upheno <- get_ontology(name="upheno")
} # }
ont <- get_ontology("hp", terms=10)
#> Using cached ontology file (1/1):
#> /github/home/.cache/R/KGExplorer/ontologies/github/hp_v2025-05-06.rds
#> Randomly sampling 10 term(s).
dict <- get_ontology_dict(ont)
ont <- get_ontology("hp")
#> Using cached ontology file (1/1):
#> /github/home/.cache/R/KGExplorer/ontologies/github/hp_v2025-05-06.rds
terms <- ont@terms[1:10]
lvls <- get_ontology_levels(ont, terms = terms)
#> Getting absolute ontology level for 10 IDs.
lvls_rel <- get_ontology_levels(ont, terms = terms, absolute=FALSE)
#> Getting relative ontology level for 10 IDs.
#> Translating ontology terms to ids.
if (FALSE) { # \dontrun{
pli <- get_pli()
} # }
if (FALSE) { # \dontrun{
get_prevalence()
} # }
if(.Platform$OS.type!="windows"){
ttd <- get_ttd()
}
#> Loading required namespace: readxl
#> Retrieving all organisms available in gprofiler.
#> Using stored `gprofiler_orgs`.
#> Mapping species name: hsapiens
#> 1 organism identified from search: hsapiens
#> 13,793 / 27,118 (50.86%) genes mapped.
upheno <- get_upheno()
#> ⠙ iterating 10 done (4.9/s) | 2s
#> ⠙ iterating 14 done (5.5/s) | 2.6s
#> Parsing 341 <owl:ObjectProperty> ...
#> remove 2 obsolete terms
#> Parsing 193247 <owl:Class> ...
#> Parsing 199799 <rdf:Description> ...
#> remove 17592 obsolete terms
#> There are more than one root:
#> BFO:0000001, BSPO:0000005, BSPO:0000010, BSPO:0000070, BSPO:0000086,
#> and other 14 terms ...
#> A super root (~~all~~) is added.
#> Adding term metadata.
#> IC_method: IC_offspring
#> Adding ancestor metadata.
#> Getting absolute ontology level for 175,653 IDs.
#> 76 ancestors found at level 2
#> Translating ontology terms to names.
#> Translating ontology terms to ids.
#> Converted ontology to: adjacency
#> Getting absolute ontology level for 175,653 IDs.
#> Caching file --> /github/home/.cache/R/KGExplorer/ontologies/github/upheno.rds
obj <- get_ontology("hp")
#> Using cached ontology file (1/1):
#> /github/home/.cache/R/KGExplorer/ontologies/github/hp_v2025-05-06.rds
get_version(obj=obj)
#> + Version: 2025-05-06