Functions to get data resources.
get_alphamissense(
types = c("canonical", "non_canonical", "merged"),
agg_fun = mean,
save_dir = cache_dir(),
force_new = FALSE
)
get_clinvar(as_granges = FALSE, annotate = FALSE)
get_data_package(name, package = "KGExplorer")
get_definitions(ont, from = "id", to = "definition")
get_gencc(
agg_by = c("disease_id", "gene_symbol"),
dict = c(Definitive = 6, Strong = 5, Moderate = 4, Supportive = 3, Limited = 2,
`Disputed Evidence` = 1, `Refuted Evidence` = 0, `No Known Disease Relationship` = 0),
save_dir = cache_dir(),
force_new = FALSE
)
get_gene_lengths(genes, keep_chr = c(seq(22), "X", "Y"), ensembl_version = 75)
get_genes_disease(
maps = list(c("gene", "disease")),
run_map_mondo = FALSE,
to = c("OMIM", "Orphanet", "DECIPHER"),
...
)
get_graph_colnames(g, what = c("nodes", "edges"))
get_hpo(
lvl = 2,
force_new = FALSE,
terms = NULL,
method = "github",
save_dir = cache_dir(package = "KGExplorer"),
...
)
get_medgen_maps()
get_metadata_omim(save_dir = cache_dir())
get_metadata_orphanet(save_dir = cache_dir())
get_monarch(
queries = NULL,
maps = NULL,
domain = "https://data.monarchinitiative.org",
subdir = "latest/tsv/all_associations/",
rbind = FALSE,
save_dir = cache_dir()
)
get_monarch_files(
maps = NULL,
queries = NULL,
domain = "https://data.monarchinitiative.org",
subdir = "latest/tsv/all_associations/",
omit = c("...", "md5sums", "index.html")
)
get_monarch_kg(as_graph = TRUE, save_dir = cache_dir(), force_new = FALSE, ...)
get_monarch_models(
maps = list(m2d = c("model", "disease")),
filters = list(disease = NULL, gene = NULL, variant = NULL),
input_col = "object",
to = NULL,
map_orthologs = TRUE,
as_graph = FALSE,
...
)
get_mondo_maps(
map_types = c("default", "broadmatch", "closematch", "exactmatch", "hasdbxref",
"narrowmatch", "relatedmatch"),
map_to = NULL,
map_type_order = c("default", "exactmatch", "closematch", "narrowmatch", "broadmatch",
"relatedmatch", "hasdbxref"),
top_n = NULL,
top_by = c("subject", "object"),
save_dir = cache_dir()
)
get_mondo_maps_files(map_types, map_to, save_dir)
get_ols_options(ol = rols::Ontologies())
get_ontology(
name = c("mondo", "hp", "upheno", "uberon", "cl")[1],
method = c("github", "rols")[1],
filetype = ".obo",
import_func = NULL,
terms = NULL,
add_metadata = TRUE,
lvl = 2,
add_n_edges = TRUE,
add_ontology_levels = TRUE,
save_dir = cache_dir(),
force_new = FALSE,
...
)
get_ontology_dict(
ont,
from = "short_id",
to = c("name", "label", "term"),
include_self = FALSE,
include_alternative_terms = FALSE,
as_datatable = FALSE
)
get_ontology_levels(
ont,
terms = NULL,
remove_terms = TRUE,
method = c("depth", "height"),
absolute = TRUE,
reverse = FALSE
)
get_pli(agg_fun = mean, save_dir = cache_dir(), force_new = FALSE)
get_prevalence(
method = c("orphanet", "oard"),
agg_by = c("mondo_id", "id", "Name"),
include_mondo = TRUE,
...
)
get_ttd(save_dir = cache_dir(), force_new = FALSE, run_map_genes = TRUE)
get_upheno(file = c("ontology", "bestmatches", "upheno_mapping"))
get_version(obj, return_version = FALSE, verbose = TRUE)
A character vector of types to return.
A function to aggregate multiple transcripts per gene.
Directory to save a file to.
If TRUE, force a new download.
Return the object as a GRanges.
Add variant annotations with map_variants.
<...>Any ontology name from get_ols_options
"hpo" Import the Human Phenotype Ontology. GitHub.
a character vector giving the package(s) to look
in for data sets, or NULL
.
By default, all packages in the search path are used, then
the data
subdirectory (if present) of the current working
directory.
An ontology of class ontology_DAG.
The designated from column in from-to mapping or relations.
A character string specifying the format to convert to.
Column names to aggregate results by.
A named vector of evidence score mappings. See here for more information.
A character vector of gene symbols
Which chromosomes to keep.
Which Ensembl database version to use.
A list of paired to/from types to filter Monarch association
files by. For example, list(c("gene","disease"))
will return any
files that contains gene-disease associations.
Passes to get_monarch_files.
Run map_mondo to map MONDO IDs to disease IDs.
Arguments passed on to link_monarch
, get_ontology
, data.table::fread
, data.table::fread
, get_ontology_github
node_filters
A named list of filters to apply to the node data.
Names should be name of the metadata column, and values should be a vector of
valid options. For example, list("type" = c("gene","variant"))
will
return any rows where the "type" column contains either "gene" or "variant".
input
A single character string. The value is inspected and deferred to either file=
(if no \n present), text=
(if at least one \n is present) or cmd=
(if no \n is present, at least one space is present, and it isn't a file name). Exactly one of input=
, file=
, text=
, or cmd=
should be used in the same call.
text
The input data itself as a character vector of one or more lines, for example as returned by readLines()
.
cmd
A shell command that pre-processes the file; e.g. fread(cmd=paste("grep",word,"filename"))
. See Details.
sep
The separator between columns. Defaults to the character in the set [,\t |;:]
that separates the sample of rows into the most number of lines with the same number of fields. Use NULL
or ""
to specify no separator; i.e. each line a single character column like base::readLines
does.
sep2
The separator within columns. A list
column will be returned where each cell is a vector of values. This is much faster using less working memory than strsplit
afterwards or similar techniques. For each column sep2
can be different and is the first character in the same set above [,\t |;
], other than sep
, that exists inside each field outside quoted regions in the sample. NB: sep2
is not yet implemented.
nrows
The maximum number of rows to read. Unlike read.table
, you do not need to set this to an estimate of the number of rows in the file for better speed because that is already automatically determined by fread
almost instantly using the large sample of lines. nrows=0
returns the column names and typed empty columns determined by the large sample; useful for a dry run of a large file or to quickly check format consistency of a set of files before starting to read any of them.
header
Does the first data line contain column names? Defaults according to whether every non-empty field on the first data line is type character. If so, or TRUE is supplied, any empty column names are given a default name.
na.strings
A character vector of strings which are to be interpreted as NA
values. By default, ",,"
for columns of all types, including type character
is read as NA
for consistency. ,"",
is unambiguous and read as an empty string. To read ,NA,
as NA
, set na.strings="NA"
. To read ,,
as blank string ""
, set na.strings=NULL
. When they occur in the file, the strings in na.strings
should not appear quoted since that is how the string literal ,"NA",
is distinguished from ,NA,
, for example, when na.strings="NA"
.
stringsAsFactors
Convert all or some character columns to factors? Acceptable inputs are TRUE
, FALSE
, or a decimal value between 0.0 and 1.0. For stringsAsFactors = FALSE
, all string columns are stored as character
vs. all stored as factor
when TRUE
. When stringsAsFactors = p
for 0 <= p <= 1
, string columns col
are stored as factor
if uniqueN(col)/nrow < p
.
skip
If 0 (default) start on the first line and from there finds the first row with a consistent number of columns. This automatically avoids irregular header information before the column names row. skip>0
means ignore the first skip
rows manually. skip="string"
searches for "string"
in the file (e.g. a substring of the column names row) and starts on that line (inspired by read.xls in package gdata).
select
A vector of column names or numbers to keep, drop the rest. select
may specify types too in the same way as colClasses
; i.e., a vector of colname=type
pairs, or a list
of type=col(s)
pairs. In all forms of select
, the order that the columns are specified determines the order of the columns in the result.
drop
Vector of column names or numbers to drop, keep the rest.
colClasses
As in utils::read.csv
; i.e., an unnamed vector of types corresponding to the columns in the file, or a named vector specifying types for a subset of the columns by name. The default, NULL
means types are inferred from the data in the file. Further, data.table
supports a named list
of vectors of column names or numbers where the list
names are the class names; see examples. The list
form makes it easier to set a batch of columns to be a particular class. When column numbers are used in the list
form, they refer to the column number in the file not the column number after select
or drop
has been applied.
If type coercion results in an error, introduces NA
s, or would result in loss of accuracy, the coercion attempt is aborted for that column with warning and the column's type is left unchanged. If you really desire data loss (e.g. reading 3.14
as integer
) you have to truncate such columns afterwards yourself explicitly so that this is clear to future readers of your code.
integer64
"integer64" (default) reads columns detected as containing integers larger than 2^31 as type bit64::integer64
. Alternatively, "double"|"numeric"
reads as utils::read.csv
does; i.e., possibly with loss of precision and if so silently. Or, "character".
dec
The decimal separator as in utils::read.csv
. When "auto"
(the default), an attempt is made to decide whether "."
or ","
is more suitable for this input. See details.
col.names
A vector of optional names for the variables (columns). The default is to use the header column if present or detected, or if not "V" followed by the column number. This is applied after check.names
and before key
and index
.
check.names
default is FALSE
. If TRUE
then the names of the variables in the data.table
are checked to ensure that they are syntactically valid variable names. If necessary they are adjusted (by make.names
) so that they are, and also to ensure that there are no duplicates.
encoding
default is "unknown"
. Other possible options are "UTF-8"
and "Latin-1"
. Note: it is not used to re-encode the input, rather enables handling of encoded strings in their native encoding.
quote
By default ("\""
), if a field starts with a double quote, fread
handles embedded quotes robustly as explained under Details
. If it fails, then another attempt is made to read the field as is, i.e., as if quotes are disabled. By setting quote=""
, the field is always read as if quotes are disabled. It is not expected to ever need to pass anything other than \"\" to quote; i.e., to turn it off.
strip.white
Logical, default TRUE
, in which case leading and trailing whitespace is stripped from unquoted "character"
fields. "numeric"
fields are always stripped of leading and trailing whitespace.
fill
logical or integer (default is FALSE
). If TRUE
then in case the rows have unequal length, number of columns is estimated and blank fields are implicitly filled. If an integer is provided it is used as an upper bound for the number of columns. If fill=Inf
then the whole file is read for detecting the number of columns.
blank.lines.skip
logical
, default is FALSE
. If TRUE
blank lines in the input are ignored.
key
Character vector of one or more column names which is passed to setkey
. Only valid when argument data.table=TRUE
. Where applicable, this should refer to column names given in col.names
.
index
Character vector or list of character vectors of one or more column names which is passed to setindexv
. As with key
, comma-separated notation like index="x,y,z"
is accepted for convenience. Only valid when argument data.table=TRUE
. Where applicable, this should refer to column names given in col.names
.
showProgress
TRUE
displays progress on the console if the ETA is greater than 3 seconds. It is produced in fread's C code where the very nice (but R level) txtProgressBar and tkProgressBar are not easily available.
data.table
TRUE returns a data.table
. FALSE returns a data.frame
. The default for this argument can be changed with options(datatable.fread.datatable=FALSE)
.
nThread
The number of threads to use. Experiment to see what works best for your data on your hardware.
logical01
If TRUE a column containing only 0s and 1s will be read as logical, otherwise as integer.
keepLeadingZeros
If TRUE a column containing numeric data with leading zeros will be read as character, otherwise leading zeros will be removed and converted to numeric.
yaml
If TRUE
, fread
will attempt to parse (using yaml.load
) the top of the input as YAML, and further to glean parameters relevant to improving the performance of fread
on the data itself. The entire YAML section is returned as parsed into a list
in the yaml_metadata
attribute. See Details
.
autostart
Deprecated and ignored with warning. Please use skip
instead.
tmpdir
Directory to use as the tmpdir
argument for any tempfile
calls, e.g. when the input is a URL or a shell command. The default is tempdir()
which can be controlled by setting TMPDIR
before starting the R session; see base::tempdir
.
tz
Relevant to datetime values which have no Z or UTC-offset at the end, i.e. unmarked datetime, as written by utils::write.csv
. The default tz="UTC"
reads unmarked datetime as UTC POSIXct efficiently. tz=""
reads unmarked datetime as type character (slowly) so that as.POSIXct
can interpret (slowly) the character datetimes in local timezone; e.g. by using "POSIXct"
in colClasses=
. Note that fwrite()
by default writes datetime in UTC including the final Z and therefore fwrite
's output will be read by fread
consistently and quickly without needing to use tz=
or colClasses=
. If the TZ
environment variable is set to "UTC"
(or ""
on non-Windows where unset vs `""` is significant) then the R session's timezone is already UTC and tz=""
will result in unmarked datetimes being read as UTC POSIXct. For more information, please see the news items from v1.13.0 and v1.14.0.
repo
Repository name in format "owner/repo". Defaults to guess_repo()
.
tag
tag for the GitHub release to which this data should be attached.
tbl_graph object.
What should get activated? Possible values are nodes
or
edges
.
Depth of the ancestor terms to add. Will get the closest ancestor to this level if none have this exact level.
A vector of ontology term IDs.
Compute ontology levels using:
"height" (default) dag_height.
"depth" dag_depth.
A list of free-form substring queries to filter files by
(using any column in the metadata).
For example, list("gene_disease","variant_disease")
will return any
files that contain either of the substrings
"gene_disease" or "variant_disease".
Passes to get_monarch_files.
Web domain to search for Monarch files.
Subdirectory path to search for Monarch files within
domain
.
If TRUE
, rbinds all data.tables
together. Otherwise, returns a named list of separated
data.tables.
Files to omit from results.
Return the object as a tbl_graph.
A named list, where each element in the list is the name of a column in the data, and the vector within each element represents the values to include in the final data.
Column name of input IDs.
Add gene-level data.
Mapping types to include.
Mapping outputs to include (from Mondo IDs to another database's IDs).
The order in which map_types
will be prioritised
when filtering the top_n
rows by groupings.
Top number of mappings to return per top_by
grouping.
Set to NULL
to skip this step.
Grouping columns when selecting top_n
rows per grouping.
Can be a character vector of one or more column names.
An Ontologies object.
File type to search for.
Function to import the ontology with.
If NULL
, automatically tries to choose the correct function.
Add metadata to the resulting ontology object.
Add the number of edges (connections) for each term.
Add the ontology level for each term.
For dag_offspring()
and dag_ancestors()
, this controls whether to also include the query term itself.
Include alternative terms in the dictionary.
Return as a data.table instead of a named vector.
Character vector of term IDs to exclude.
Make the levels absolute in the sense that they consider
the entire ontology (TRUE
).
Otherwise, levels will be relative to only the terms that are in
the provided subset of terms
AND are directly adjacent (connected)
to a given cluster of terms (FALSE
).
If TRUE
, ontology
level numbers with be revered such that the level of the parent terms
are larger than the child terms.
Include MONDO IDs in the output.
Map genes to standardised HGNC symbols using map_genes.
Can be one of the following:
"ontology"Creates an ontology_DAG R object by importing the OBO file directly from the official uPheno GitHub repository.
"bestmatches"Returns a merged table with the best matches between human and non-human homologous phenotypes (from multiple species). Distributed by the official uPheno GitHub repository.
"upheno_mapping"Return a merged table with matches between human and non-human homologous phenotypes (from multiple species). Distributed by the Monarch Initiative server.
An object.
Return the version as a character string.
Print messages.
Data.
A named list of data.tables of AlphaMissense predictions.
data.table with columns:
"disease_id": Disease ID.
"gene_symbol": Gene symbol.
"evidence_score": Evidence score.
data.table
ontology_DAG object.
data.table of mappings.
A named vector of relative ontology level, where names are ontology term IDs and value is relative ontology level.
Data object release version a character string.
get_alphamissense()
: get_
Get AlphaMissense predictions
Get gene-level AlphaMissense predictions for all canonical and non-canonical protein-coding gene transcripts.
get_clinvar()
: get_
Get ClinVar variant data
ClinSigSimple integer, 0 = no current value of Likely pathogenic; Pathogenic; Likely pathogenic, low penetrance; Pathogenic, low penetrance; Likely risk allele; or Risk allele 1 = at least one current record submitted with an interpretation of Likely pathogenic; Pathogenic; Likely pathogenic, low penetrance; Pathogenic, low penetrance; Likely risk allele; or Risk allele (independent of whether that record includes assertion criteria and evidence). -1 = no values for clinical significance at all for this variant or set of variants; used for the "included" variants that are only in ClinVar because they are included in a haplotype or genotype with an interpretation NOTE: Now that the aggregate values of clinical significance give precedence to records with assertion criteria and evidence, the values in this column may appear to be in conflict with the value reported in ClinicalSignificance. In other words, if a submission without assertion criteria and evidence interpreted an allele as pathogenic, and those with assertion criteria and evidence interpreted as benign, then ClinicalSignificance would be reported as Benign and ClinSigSimple as 1.
get_data_package()
: get_
get_definitions()
: get_
Add ancestor
For each term, get its ancestor at a given level and add the ID and name of the ancestor to the ontology metadata.
get_gencc()
: get_
Get GenCC
Get phenotype-gene evidence score from the
Gene Curation Coalition.
Note that the column "submitted_as_moi_id" indicates the mechanism of action
(e.g. "Autosomal dominant inheritance"), not specific HPO phenotypes.
Set agg_by=NULL
to return raw unaggregated data.
Data downloaded from here.
NOTE: Due to licensing restrictions, a GenCC download does not
include OMIM data. OMIM data can be accessed and downloaded through
OMIM.
NOTE: GenCC does not currently have any systematic versioning.
There for the attr(obj,"version")
attribute is set to the date it was
downloaded and cached by get_gencc.
get_gene_lengths()
: get_
get_genes_disease()
: get_
Load disease genes
Load gene lists associated with each disease phenotype from:
OMIM
Orphanet
DECIPHER
get_graph_colnames()
: get_
Get column names in the nodes and/or edges of a tbl_graph.
get_hpo()
: get_
Get Human Phenotype Ontology (HPO)
Updated version of Human Phenotype Ontology (HPO). Created from the OBO files distributed by the HPO project's GitHub. Adapted from get_hpo.
By comparison, the hpo
data from ontologyIndex is from 2016.
Note that the maximum ontology level depth in the 2016 version was 14,
whereas in the 2023 version the maximum ontology level depth is 16
(due to an expansion of the HPO).
get_medgen_maps()
: get_
Get MedGen maps.
get_metadata_omim()
: get_
get_metadata_orphanet()
: get_
get_monarch()
: get_
Get Monarch
Get key datasets from the Monarch Initiative server. See here for all associations data, specifically.
get_monarch_files()
: get_
Monarch files
Find files Monarch Initiative server.
get_monarch_kg()
: get_
Get knowledge graph: Monarch
Imports the entire Monarch knowledge graph containing >500,000 nodes and >10,000,000 edges across many categories (e.g. Disease, Phenotypes, Cell Types, etc.).
Option 1: Use the biolink API to efficiently extract specific subset of data from the Monarch server. Option 2: Import the entire knowledge graph from the Monarch server.
get_monarch_models()
: get_
Get Monarch models
Get disease-to-model mappings for multiple model species.
Additionally maps mondo IDs to OMIM and Orphanet IDs.
NOTE, adding additional maps
will drastically reduce the number of results.
get_mondo_maps()
: get_
Get Mondo ID maps
Get mappings between Mondo IDs and IDs in other databases/ontologies. All mappings stored on the official Mondo GitHub.
get_mondo_maps_files()
: get_
get_ols_options()
: get_
Get a complete up=to-date list of ontologies available via the
EBML-EBI Ontology Lookup Service API.
get_ontology()
: get_ontology
Get ontology
Import an up-to-date ontology directly from from the creators or via the EBML-EBI Ontology Lookup Service API.
get_ontology_dict()
: get_
get_ontology_levels()
: get_
Get ontology level for ontology terms
For a given set of HPO terms, get their level
within the hierarchically organised ontology.
Ontology level can be computed either absolute mode (absolute=TRUE
)
where the entire ontology is considered when assigning levels, or
relative mode (absolute=FALSE
) where only a subset of the ontology
that is connected to a given term is considered when assigning levels.
Relative mode can be helpful when trying to make plot where nodes are
scaled to the ontology level.
get_pli()
: get_
Get pLI
Get gene-level pLI scores for all canonical and non-canonical protein-coding gene transcripts. NOTE: The MANE Select set consists of one transcript at each protein-coding locus across the genome that is representative of biology at that locus. NOTE: Mapping genes with map_genes only reduces the number of mapped genes compared to the provided "gene" column.
get_prevalence()
: get_
Get prevalence
Get epidemiological disease and phenotype prevalence data.
get_ttd()
: get_
get_upheno()
: get_
Get uPheno
Get data from the Unified Phenotype Ontology (uPheno).
get_version()
: get_
Get version
For a given ontology, extract the precise version of the Release that the data object was built from. For Human Phenotype Ontology specifically, all Releases can be found at the official HPO GitHub Releases page.
if (FALSE) { # \dontrun{
am <- get_alphamissense()
} # }
ont <- get_ontology("hp", terms=10)
#> Loading cached ontology: /github/home/.cache/R/KGExplorer/hp.rds
#> Randomly sampling 10 term(s).
def <- get_definitions(ont)
d <- get_gencc()
#> Gathering data from GenCC.
#> Evidence scores for:
#> - 10514 diseases
#> - 5171 genes
#> + Version: 2024-12-19
genes <- get_genes_disease()
#> Filtering with `maps`.
#> Files found: 1
#> Constructing data: gene <--> disease
#> genes(s): 0
hpo <- get_hpo()
dat <- get_monarch(maps=list(c("gene","disease")))
#> Filtering with `maps`.
#> Files found: 1
#> Importing 1 Monarch files.
#> - 1/1: gene_disease.all
files <- get_monarch_files()
#> Files found: 35
if (FALSE) { # \dontrun{
g <- get_monarch_kg(save_dir=tempdir(), nrows=100)
} # }
models <- get_monarch_models()
#> Filtering with `maps`.
#> Files found: 1
#> Constructing data: model <--> disease
#> Model species: 21
map <- get_mondo_maps("default")
#> Loading required namespace: downloadR
#> Downloading with download.file.
#> download.file download successful.
#> Time difference of 0.4 secs
mondo <- get_ontology(name="mondo")
#> ⠙ Iterating 9 done (4.5/s) | 2s
#> ⠙ Iterating 14 done (4.9/s) | 2.9s
#> Preparing ontology_index object from: https://github.com/monarch-initiative/mondo/releases/download/v2024-12-03/mondo.obo
#> Parsing [Typedef] sections in the obo file [268/268]
#> remove 2 obsolete terms
#> Parsing [Term] sections in the obo file [1000/54683]
#> Parsing [Term] sections in the obo file [2000/54683]
#> Parsing [Term] sections in the obo file [3000/54683]
#> Parsing [Term] sections in the obo file [4000/54683]
#> Parsing [Term] sections in the obo file [5000/54683]
#> Parsing [Term] sections in the obo file [6000/54683]
#> Parsing [Term] sections in the obo file [7000/54683]
#> Parsing [Term] sections in the obo file [8000/54683]
#> Parsing [Term] sections in the obo file [9000/54683]
#> Parsing [Term] sections in the obo file [10000/54683]
#> Parsing [Term] sections in the obo file [11000/54683]
#> Parsing [Term] sections in the obo file [12000/54683]
#> Parsing [Term] sections in the obo file [13000/54683]
#> Parsing [Term] sections in the obo file [14000/54683]
#> Parsing [Term] sections in the obo file [15000/54683]
#> Parsing [Term] sections in the obo file [16000/54683]
#> Parsing [Term] sections in the obo file [17000/54683]
#> Parsing [Term] sections in the obo file [18000/54683]
#> Parsing [Term] sections in the obo file [19000/54683]
#> Parsing [Term] sections in the obo file [20000/54683]
#> Parsing [Term] sections in the obo file [21000/54683]
#> Parsing [Term] sections in the obo file [22000/54683]
#> Parsing [Term] sections in the obo file [23000/54683]
#> Parsing [Term] sections in the obo file [24000/54683]
#> Parsing [Term] sections in the obo file [25000/54683]
#> Parsing [Term] sections in the obo file [26000/54683]
#> Parsing [Term] sections in the obo file [27000/54683]
#> Parsing [Term] sections in the obo file [28000/54683]
#> Parsing [Term] sections in the obo file [29000/54683]
#> Parsing [Term] sections in the obo file [30000/54683]
#> Parsing [Term] sections in the obo file [31000/54683]
#> Parsing [Term] sections in the obo file [32000/54683]
#> Parsing [Term] sections in the obo file [33000/54683]
#> Parsing [Term] sections in the obo file [34000/54683]
#> Parsing [Term] sections in the obo file [35000/54683]
#> Parsing [Term] sections in the obo file [36000/54683]
#> Parsing [Term] sections in the obo file [37000/54683]
#> Parsing [Term] sections in the obo file [38000/54683]
#> Parsing [Term] sections in the obo file [39000/54683]
#> Parsing [Term] sections in the obo file [40000/54683]
#> Parsing [Term] sections in the obo file [41000/54683]
#> Parsing [Term] sections in the obo file [42000/54683]
#> Parsing [Term] sections in the obo file [43000/54683]
#> Parsing [Term] sections in the obo file [44000/54683]
#> Parsing [Term] sections in the obo file [45000/54683]
#> Parsing [Term] sections in the obo file [46000/54683]
#> Parsing [Term] sections in the obo file [47000/54683]
#> Parsing [Term] sections in the obo file [48000/54683]
#> Parsing [Term] sections in the obo file [49000/54683]
#> Parsing [Term] sections in the obo file [50000/54683]
#> Parsing [Term] sections in the obo file [51000/54683]
#> Parsing [Term] sections in the obo file [52000/54683]
#> Parsing [Term] sections in the obo file [53000/54683]
#> Parsing [Term] sections in the obo file [54000/54683]
#> Parsing [Term] sections in the obo file [54683/54683]
#> remove 4250 obsolete terms
#> There are more than one root:
#> BFO:0000001, CHEBI:24431, CHEBI:36342, CHEBI:50906, ECTO:0000015,
#> and other 29 terms ...
#> A super root (~~all~~) is added.
#> Adding term metadata.
#> IC_method: IC_offspring
#> Adding ancestor metadata.
#> Getting absolute ontology level for 50,425 IDs.
#> 206 ancestors found at level 2
#> Translating ontology terms to names.
#> Translating ontology terms to ids.
#> Converted ontology to: adjacency
#> Getting absolute ontology level for 50,425 IDs.
#> Saving ontology --> /github/home/.cache/R/KGExplorer/mondo.rds
if (FALSE) { # \dontrun{
hp <- get_ontology(name="hp")
upheno <- get_ontology(name="upheno")
} # }
ont <- get_ontology("hp", terms=10)
#> Loading cached ontology: /github/home/.cache/R/KGExplorer/hp.rds
#> Randomly sampling 10 term(s).
dict <- get_ontology_dict(ont)
ont <- get_ontology("hp")
#> Loading cached ontology: /github/home/.cache/R/KGExplorer/hp.rds
terms <- ont@terms[1:10]
lvls <- get_ontology_levels(ont, terms = terms)
#> Getting absolute ontology level for 10 IDs.
lvls_rel <- get_ontology_levels(ont, terms = terms, absolute=FALSE)
#> Getting relative ontology level for 10 IDs.
#> Translating ontology terms to ids.
if (FALSE) { # \dontrun{
pli <- get_pli()
} # }
if (FALSE) { # \dontrun{
get_prevalence()
} # }
ttd <- get_ttd()
#> Loading required namespace: readxl
#> Retrieving all organisms available in gprofiler.
#> Using stored `gprofiler_orgs`.
#> Mapping species name: hsapiens
#> 1 organism identified from search: hsapiens
#> 13,815 / 27,118 (50.94%) genes mapped.
upheno <- get_upheno()
#> ⠙ Iterating 9 done (4.2/s) | 2.1s
#> ⠙ Iterating 14 done (4.7/s) | 3s
#> Parsing 279 <owl:ObjectProperty> ...
#> remove 2 obsolete terms
#> Parsing 190124 <owl:Class> ...
#> Parsing 87972 <rdf:Description> ...
#> remove 17351 obsolete terms
#> There are more than one root:
#> BFO:0000001, BSPO:0000005, BSPO:0000010, BSPO:0000070, BSPO:0000086,
#> and other 14 terms ...
#> A super root (~~all~~) is added.
#> Adding term metadata.
#> IC_method: IC_offspring
#> Adding ancestor metadata.
#> Getting absolute ontology level for 172,772 IDs.
#> 114 ancestors found at level 2
#> Translating ontology terms to names.
#> Translating ontology terms to ids.
#> Converted ontology to: adjacency
#> Getting absolute ontology level for 172,772 IDs.
#> Saving ontology --> /github/home/.cache/R/KGExplorer/upheno.rds
obj <- get_ontology("hp")
#> Loading cached ontology: /github/home/.cache/R/KGExplorer/hp.rds
get_version(obj=obj)
#> + Version: releases