library(poweranalysis)
library(SingleCellExperiment)
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: generics
#>
#> Attaching package: 'generics'
#> The following objects are masked from 'package:base':
#>
#> as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
#> setequal, union
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, aperm, append, as.data.frame, basename, cbind,
#> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
#> get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
#> unsplit, which.max, which.min
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:utils':
#>
#> findMatches
#> The following objects are masked from 'package:base':
#>
#> expand.grid, I, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
The correlation_analysis
function quantifies agreement
in effect sizes (log₂ fold-changes) of differentially expressed genes
(DEGs) across and within scRNA-seq datasets by computing Spearman’s rank
correlations. A user-specified reference dataset is used to define
significant DEGs, which are then compared across other studies,
independently sampled subsets, and random permutations. Comparing these
patterns to correlations from random permutations helps distinguish true
biological signals from noise.
Below is an example demonstrating how to apply this
analysis using a list of SingleCellExperiment
objects:
# Load SCE objects (replace with actual file paths or data)
tsai_path <- system.file("extdata", "Tsai_Micro.qs", package="poweranalysis")
tsai_micro <- qs::qread(tsai_path)
zhou_path <- system.file("extdata", "Zhou_Micro.qs", package="poweranalysis")
zhou_micro <- qs::qread(zhou_path)
# List of SCE datasets
SCEs <- list(tsai_micro, zhou_micro)
# Dataset names (used in plots and output files)
dataset_names <- c("Tsai", "Zhou")
# Cell type mapping
celltype_corr <- list(Micro=c("Micro", "Micro"))
# Metadata column names per SCE
celltypeIDs <- c("cluster_celltype","cluster_celltype")
sampleIDs <- c("sample_id","sample_id")
# list of P-value thresholds
pvals <- c(0.05,0.01,0.001)
# Output path (must be a clean directory with no files or subdirectories)
output_corr <- file.path(getwd(), "output_corr")
if (!dir.exists(output_corr)) {
dir.create(output_corr, recursive = TRUE)
}
# Run correlation analysis between single-cell datasets
correlation_analysis(
main_dataset = "Tsai", # Name of the dataset used to select significant DEGs from
SCEs = SCEs,
dataset_names = dataset_names,
celltype_corr = celltype_corr,
celltypeIDs = celltypeIDs,
sampleIDs = sampleIDs,
pvals = pvals,
N_randperms = 3,
N_subsets = 3,
output_path = output_corr
)
The correlation_analysis
function generates two key
outputs:
P-value = 0.01:
P-value = 0.001:
P-value = 0.01:
P-value = 0.001: