knitr::opts_chunk$set(echo = TRUE,
message = params$debug,
warning = params$debug,
cache = FALSE,
error = params$debug,
results = "asis")
### Parse input ###
genome_build <- check_genome_build(params$genome_build)
result_len <- length(params$peak_files)
motif_len <- length(params$motif_files)
if (is.null(params$exp_type)) {
exp_types <- rep("unknown", result_len)
} else {
exp_types <- params$exp_type
}
peak_files_encode <- Vectorize(check_ENCODE, "encode_id")(
params$peak_files, expect_format = c("narrowPeak", "bed"))
alignment_files_encode <- Vectorize(check_ENCODE, "encode_id")(
params$alignment_files, expect_format = "bam")
result <- list(
peaks = Vectorize(read_peak_file, "peak_file")(peak_files_encode),
alignments = lapply(alignment_files_encode, Rsamtools::BamFile),
exp_labels = params$exp_labels,
exp_type = unname(Vectorize(format_exptype, "exp_type")(exp_types))
)
motif_files_jaspar <- Vectorize(check_JASPAR, "motif_id")(params$motif_files)
user_motifs <- list(
motifs = Vectorize(read_motif_file, "motif_file")(motif_files_jaspar),
motif_labels = params$motif_labels
)
result$alignments <- unname(result$alignments)
result$peaks <- unname(result$peaks)
user_motifs$motifs <- unname(user_motifs$motifs)
temp_dir <- file.path(tempdir(), paste0("MotifPeeker_temp_", random_string(5)))
out_dir_extra <- ifelse(params$save_runfiles, params$out_dir, temp_dir)
label_colours <- c("#c83e73", "#2b83ba") # No, Yes
## Flags for optional plots
alignment_metrics <- ifelse((length(result$alignments) == 0), FALSE, TRUE)
cellcount_metrics <- ifelse((length(params$cell_counts) == 0), FALSE, TRUE)
user_motif_metrics <- ifelse((length(user_motifs$motifs) == 0 ||
result_len == 1), FALSE, TRUE)
comparison_metrics <- ifelse(result_len == 1, FALSE, TRUE)
denovo_metrics <- params$denovo_motif_discovery && comparison_metrics
## Misc
ex_emo <- ifelse(requireNamespace("emoji", quietly = TRUE),
emoji::emoji("exclamation"), "!!")
## Check motif_db
using_jaspar_db <- if (denovo_metrics) ifelse(is.null(params$motif_db),
TRUE, FALSE)
motif_db <- if (denovo_metrics) ifelse(is.null(params$motif_db),
get_JASPARCORE(), params$motif_db)
### General Metrics ###
## Peak Counts
result$peak_count <- vapply(
result$peaks, function(x) length(x), integer(1)
)
if (alignment_metrics) {
## Read Counts
result$read_count <- vapply(result$alignments, function(x) {
Rsamtools::countBam(x)$records
}, integer(1))
## FRiP scores
result$frip <- Vectorize(calc_frip, c("read_file",
"peak_file"))(result$alignments, result$peaks)
frip_df <- data.frame(exp_type = result$exp_type,
frip = result$frip,
exp_label = result$exp_labels)
}
## Peak Widths
result$peak_widths <- lapply(result$peaks, function(x){
x@ranges@width
})
peak_width_len <- vapply(result$peak_widths, length, integer(1))
peak_width_df <-
data.frame(exp_label = rep(result$exp_labels, peak_width_len),
exp_type = rep(result$exp_type, peak_width_len),
peak_width = unlist(result$peak_widths))
## Motif-Summit Distances
motif_summit_dist_df <- get_df_distances(
result, user_motifs, genome_build, out_dir_extra, params$workers,
params$meme_path, params$debug
)
if (comparison_metrics) {
### Known-motif Analysis ###
comparison_indices <- setdiff(seq_along(result$peaks),
params$reference_index)
segregated_peaks <- lapply(comparison_indices, function(x) {
segregate_seqs(result$peaks[[params$reference_index]],
result$peaks[[x]])
})
## Calculate enrichment for segregated peaks
if (user_motif_metrics) {
enrichment_df <- get_df_enrichment(
result, segregated_peaks, user_motifs, genome_build,
params$reference_index, out_dir_extra, params$workers,
params$meme_path, params$verbose
)
}
}
### De-Novo Motif Analysis ###
if (denovo_metrics) {
denovo_res <- list()
## Run STREME
denovo_res$streme <- denovo_motifs(
unlist(segregated_peaks), params$trim_seq_width, genome_build,
params$denovo_motifs, filter_n = params$filter_n,
out_dir = out_dir_extra, meme_path = params$meme_path,
workers = params$workers, verbose = params$verbose, debug = params$debug
)
## Run TOMTOM
denovo_res$similar_motifs <- find_motifs(
denovo_res$streme, motif_db, out_dir = out_dir_extra,
meme_path = params$meme_path, workers = params$workers,
verbose = params$verbose, debug = params$debug
)
## Compare motifs
denovo_res$comparisons <- motif_similarity(
denovo_res$streme, workers = params$workers
)
}
MotifPeeker
compares and analyses datasets from different epigenomic profiling
methods using motif enrichment as a key benchmark.
The report consists of the following sections:
General Metrics: Provides an overview of metrics related to dataset peaks, including FRiP scores, peak widths, and motif-to-summit distances.
Known Motif Enrichment Analysis: Presents statistics on the frequency of enriched user-supplied motifs in the datasets and compares them between the common and unique peaks from comparison and reference datasets.
De-Novo Motif Enrichment Analysis: Details the statistics of de-novo discovered motifs in common and unique peaks from comparison and reference datasets. Examines motif similarities and identifies the closest known motifs in the JASPAR or the provided database.
Experimental dataset labels used:
User-provided motifs used:
MotifPeeker
report was generated with the following
command:
cat(report_command(params))
MotifPeeker(use_cache = NULL,
peak_files = ...,
reference_index = 2,
alignment_files = ...,
exp_labels = list("ChIP", "TIP"),
exp_type = list("chipseq", "tipseq"),
genome_build = "hg38",
motif_files = ...,
motif_labels = NULL,
cell_counts = NULL,
denovo_motif_discovery = TRUE,
denovo_motifs = 3,
filter_n = 6,
motif_db = NULL,
trim_seq_width = NULL,
download_buttons = TRUE,
meme_path = NULL,
out_dir = "/Users/hdash/code/icl_project2/MotifPeeker-report/built_in_data/output/MotifPeeker_20240716_104747",
save_runfiles = TRUE,
workers = 2,
debug = FALSE,
verbose = TRUE)
Fraction of Reads in Peaks (FRiP) is the proportion
of sequencing reads within identified peak regions. Higher FRiP scores
suggest higher signal-to-noise ratios in the dataset.
\[\text{FRiP (Fraction of Reads in Peaks)} =
\frac{\text{Reads in Peaks}}{\text{Total Reads}}\]
if (!alignment_metrics) {
cat("\n", ex_emo,
" *Skipping FRiP calculation as alignment files were not provided.*",
" \n")
} else {
if (length(result$alignments) != length(result$peaks)) {
stp_msg <- "Number of alignment files and peak files do not match."
stopper(stp_msg)
}
cat("\n### By Experiment Type {- .unlisted} \n")
frip_exp_plot <- frip_df %>%
ggplot2::ggplot(aes(x = exp_type, y = frip, fill = exp_type,
text = paste0(
"<b>Experiment Label</b>: ", exp_label, "<br>",
"<b>Experiment Type</b>: ", exp_type, "<br>",
"<b>FRiP Score</b>: ", round(frip, 4)))) +
geom_boxplot(outlier.shape = NA) +
geom_jitter() +
labs(
x = "Experiment Type",
y = "FRiP Score",
fill = "Experiment Type"
) +
scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(frip_exp_plot))
}
if (alignment_metrics) {
frip_df %>%
group_by(exp_type) %>%
rename("Experiment Type" = exp_type) %>%
summarise(
"Mean score" = round(mean(frip), 3),
"Median Score" = round(median(frip), 3),
"Standard Deviation" = round(sd(frip), 3)
) %>%
arrange(`Experiment Type`) %>%
print_DT()
}
if (alignment_metrics) {
cat("\n### By Individual Dataset {- .unlisted} \n")
frip_individual_plot <- frip_df %>%
ggplot(aes(x = exp_label, y = frip, fill = exp_type, text = paste0(
"<b>Experiment Label</b>: ", exp_label, "<br>",
"<b>Experiment Type</b>: ", exp_type, "<br>",
"<b>FRiP Score</b>: ", round(frip, 4)))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_col() +
labs(
x = "Experiment Label",
y = "FRiP Score",
fill = "Experiment Type"
) +
scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(frip_individual_plot))
}
if (alignment_metrics) {
frip_df$read_count <- result$read_count
frip_individual_df <- frip_df %>%
mutate(frip = round(frip, 3)) %>%
select(exp_label, exp_type, read_count, frip) %>%
rename(
"Experiment Label" = exp_label,
"Experiment Type" = exp_type,
"FRiP Score" = frip,
"Read Count" = read_count
) %>%
arrange(`Experiment Type`, `Experiment Label`)
if (length(params$cell_counts) > 0) {
frip_individual_df$cell_count <- params$cell_counts
frip_individual_df <- frip_individual_df %>%
rename(
"Cell Count" = cell_count
)
}
frip_individual_df %>%
print_DT()
}
if (alignment_metrics) {
if (!cellcount_metrics) {
cat("\n", ex_emo,
" *Skipping cell count plot as cell counts were not provided.*\n")
} else {
cat("\n### By Cell Count {- .unlisted} \n")
frip_df$cell_count <- as.numeric(params$cell_counts)
frip_df_cellcount <- frip_df %>%
group_by(cell_count, exp_type) %>%
summarise(
frip = mean(frip),
.groups = "drop"
)
frip_cellcount_plot <- frip_df %>%
ggplot(aes(
x = cell_count,
y = frip,
shape = exp_type,
color = exp_type,
text = paste0("<b>Experiment Type</b>: ", exp_type, "<br>",
"<b>FRiP Score</b>: ", round(frip, 4),
"<br><b>Cell Count</b>: ", cell_count))) +
geom_point() +
geom_line(data = frip_df_cellcount) +
labs(
x = "Cell Count (log10 scale)",
y = "FRiP Score",
fill = "Experiment Type",
shape = "Experiment Type",
color = "Experiment Type"
) +
scale_x_continuous(trans="log10") +
scale_color_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(frip_cellcount_plot))
}
}
❗ Skipping cell count plot as cell counts were not provided.
This section presents the peak width distribution for each experiment type, as well as for individual experiments and cell counts, reported in base pairs.
The data plotted represents the central 95% of the overall data, with values below the 2.5th percentile and above the 97.5th percentile excluded.
peak_width_plt1 <- peak_width_df %>%
filter(peak_width >= stats::quantile(peak_width, 0.025) &
peak_width <= stats::quantile(peak_width, 0.975)) %>%
ggplot(aes(x = peak_width, fill = exp_type)) +
geom_histogram(bins = 30, alpha = 0.5, position = "identity") +
labs(
x = "Peak Width",
y = "Peak Count",
fill = "Experiment Type"
) +
scale_fill_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(peak_width_plt1))
peak_width_plt2 <- peak_width_df %>%
filter(peak_width >= stats::quantile(peak_width, 0.025) &
peak_width <= stats::quantile(peak_width, 0.975)) %>%
ggplot(aes(x = exp_type, y = peak_width, fill = exp_type)) +
geom_boxplot(alpha = 0.5) +
labs(
x = "Experiment Type",
y = "Peak Width",
fill = "Experiment Type"
) +
scale_fill_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(peak_width_plt2))
peak_width_df %>%
group_by(exp_type) %>%
rename("Experiment Type" = exp_type) %>%
summarise(
"Mean Peak Width" = round(mean(peak_width), 1),
"Median Peak Width" = round(median(peak_width), 3),
"2.5th Quantile" = round(quantile(peak_width, 0.025), 3),
"97.5th Quantile" = round(quantile(peak_width, 0.975), 3),
"Standard Deviation" = round(sd(peak_width), 1)
) %>%
arrange(`Experiment Type`) %>%
print_DT()
peak_width_plt3 <- peak_width_df %>%
filter(peak_width >= stats::quantile(peak_width, 0.025) &
peak_width <= stats::quantile(peak_width, 0.975)) %>%
ggplot(aes(x = exp_label, y = peak_width, fill = exp_type)) +
geom_boxplot(alpha = 0.5) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
x = "Experiment Label",
y = "Peak Width",
fill = "Experiment Type"
) +
scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(peak_width_plt3))
peak_width_df$cell_count <- rep(params$cell_counts, peak_width_len)
peak_width_df_all <- if (is.null(peak_width_df$cell_count)) {
peak_width_df %>%
group_by(exp_label, exp_type)
} else {
peak_width_df %>%
group_by(exp_label, exp_type, cell_count) %>%
rename("Cell Count" = cell_count,)
}
peak_width_df_all <- peak_width_df_all %>%
summarise(
"Mean Peak Width" = round(mean(peak_width), 1),
"Median Peak Width" = round(median(peak_width), 3),
"2.5th Quantile" = round(stats::quantile(peak_width, 0.025), 3),
"97.5th Quantile" = round(stats::quantile(peak_width, 0.975), 3),
"Standard Deviation" = round(stats::sd(peak_width), 1),
.groups = "drop"
) %>%
rename(
"Experiment Label" = exp_label,
"Experiment Type" = exp_type
) %>%
arrange(`Experiment Type`, `Experiment Label`)
print_DT(peak_width_df_all)
if (!cellcount_metrics) {
cat("\n", ex_emo,
" *Skipping cell count plot as cell counts were not provided.*\n")
} else {
cat("\n### By Cell Count {- .unlisted} \n")
peak_width_df_cellcount <- peak_width_df %>%
group_by(exp_label, exp_type, cell_count) %>%
drop_na(cell_count) %>%
summarise(
mean_width = mean(peak_width),
.groups = "drop"
) %>%
group_by(exp_type, cell_count) %>%
summarise(
mean_width = mean(mean_width),
.groups = "drop"
)
peak_width_cellcount_plot <- peak_width_df %>%
group_by(exp_label, exp_type) %>%
drop_na(cell_count) %>%
summarise(
mean_width = mean(peak_width),
cell_count = mean(cell_count),
.groups = "drop"
) %>%
ggplot(aes(
x = cell_count,
y = mean_width,
shape = exp_type,
color = exp_type,
text = paste0("<b>Experiment Type</b>: ", exp_type, "<br>",
"<b>Mean Peak Width</b>: ", round(mean_width, 1),
"<br><b>Cell Count</b>: ", cell_count))) +
geom_point() +
geom_line(data = peak_width_df_cellcount) +
labs(
x = "Cell Count (log10 scale)",
y = "Mean Peak Width",
fill = "Experiment Type",
shape = "Experiment Type",
color = "Experiment Type"
) +
scale_x_continuous(trans="log10") +
scale_color_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(peak_width_cellcount_plot))
}
❗ Skipping cell count plot as cell counts were not provided.
This section reports the distances between the peak summit and the centre of the nearest motif. MEME suite’s FIMO tool is used to scan the sequences in the peak file to identify all occurrences of the provided motifs, and the distances between the centres of each motif occurrence and the closest peak summit are calculated.
The data plotted represents the central 95% of the overall data, with values below the 2.5th percentile and above the 97.5th percentile excluded.
if (!user_motif_metrics) {
cat("\n", ex_emo,
" *Skipping motif-summit distance plots as no motifs files",
" were provided.*\n")
} else {
tab_header_flag <- FALSE # Print tabset header just once
## Loop through all motifs
for (motif_i in seq_len(motif_len)) {
if (!tab_header_flag) {
cat("**Select Motif:** \n")
tab_header_flag <- TRUE
}
cat(paste0("\n### ", user_motifs$motifs[[motif_i]]@name,
" {- .unlisted} \n"))
cat(paste0("**Motif Consensus Sequence:** ",
user_motifs$motifs[[motif_i]]@consensus, " (Length: ",
nchar(user_motifs$motifs[[motif_i]]@consensus), " bp) \n"))
cat("\n#### By Experiment Type {- .unlisted} \n")
cat("The top graph shows peak counts by distance from the motif",
"center, while the bottom graph depicts the probability density of",
"these distances. Different colors represent experiment types, and",
"each density curve sums to 1. \n")
## Limits
motif_summit_dist_lims <- motif_summit_dist_df %>%
filter(motif_indice == motif_i) %>%
pull(distance) %>%
stats::quantile(c(0.025, 0.975))
## Raw count plot
motif_summit_dist_plt1 <- motif_summit_dist_df %>%
filter(motif_indice == motif_i) %>%
filter(distance >= motif_summit_dist_lims[1] &
distance <= motif_summit_dist_lims[2]) %>%
ggplot(aes(x = distance, color = exp_type,
text = paste("<b>Experiment Type</b>:", exp_type))) +
geom_freqpoly(bins = 30, linewidth = 1) +
scale_color_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
option = "A") +
labs(color = "Experiment Type",
x = "Distance (bp)",
y = "Count")
motif_summit_dist_plt1 <- to_plotly(motif_summit_dist_plt1,
html_tags = FALSE)
## Density plot
motif_summit_dist_plt2 <- motif_summit_dist_df %>%
filter(motif_indice == motif_i) %>%
filter(distance >= motif_summit_dist_lims[1] &
distance <= motif_summit_dist_lims[2]) %>%
ggplot(aes(x = distance, fill = exp_type, color = exp_type,
text = paste("<b>Experiment Type</b>:", exp_type))) +
geom_density(alpha = 0.5) +
scale_fill_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
option = "A") +
scale_color_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
option = "A") +
labs(color = "Experiment Type",
fill = "Experiment Type",
x = "Distance (bp)",
y = "Count")
motif_summit_dist_plt2 <- to_plotly(motif_summit_dist_plt2,
html_tags = FALSE)
motif_summit_dist_exp_plt <-
subplot(motif_summit_dist_plt1, motif_summit_dist_plt2,
nrows = 2, margin = 0.04) %>%
layout(
xaxis2 = list(title = "Distance (bp)",
titlefont = list(size = 16)),
yaxis = list(title = "Peak Count",
titlefont = list(size = 16)),
yaxis2 = list(title = "Peak Density",
titlefont = list(size = 16))
)
print(htmltools::tagList(motif_summit_dist_exp_plt))
## DT
motif_summit_dist_df %>%
filter(motif_indice == motif_i) %>%
group_by(exp_type) %>%
summarise(
"Mean Distance" = round(mean(distance), 2),
"Absolute Mean Distance" = round(mean(abs(distance)), 2),
"Median Distance" = median(distance),
"Absolute Median Distance" = median(abs(distance)),
"Standard Deviation" = round(sd(distance), 2)
) %>%
rename("Experiment Type" = exp_type) %>%
print_DT(html_tags = TRUE) %>%
print()
cat("\n#### By Individual Dataset {- .unlisted}\n")
cat("This plot shows the average of the absolute distance between peak",
"summits and the nearest motif center for each input experiment. \n")
## Absolute mean distance plot
motif_summit_dist_ind_plt <- motif_summit_dist_df %>%
filter(motif_indice == motif_i) %>%
group_by(exp_label, exp_type) %>%
summarise(abs_mean_dist = round(mean(abs(distance)), 2),
.groups = "drop") %>%
ggplot(aes(x = exp_label, y = abs_mean_dist, fill = exp_type,
text = paste0(
"<b>Experiment Label</b>: ", exp_label, "<br>",
"<b>Experiment Type</b>: ", exp_type, "<br>",
"<b>Absolute Mean Distance</b>: ", abs_mean_dist
))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_col() +
labs(
x = "Experiment Label",
y = "Absolute Mean Distance (bp)",
fill = "Experiment Type"
) +
scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
option = "A")
print(to_plotly(motif_summit_dist_ind_plt, html_tags = TRUE))
## DT
motif_summit_dist_df %>%
filter(motif_indice == motif_i) %>%
group_by(exp_label, exp_type) %>%
summarise(
"Mean Distance" = round(mean(distance), 2),
"Absolute Mean Distance" = round(mean(abs(distance)), 2),
"Median Distance" = median(distance),
"Absolute Median Distance" = median(abs(distance)),
"Standard Deviation" = round(sd(distance), 2),
.groups = "keep"
) %>%
arrange(exp_type, exp_label) %>%
rename(
"Experiment Label" = exp_label,
"Experiment Type" = exp_type
) %>%
print_DT(html_tags = TRUE) %>%
print()
}
}
Select Motif:
Motif Consensus Sequence: CAGGGGGC (Length: 8 bp)
Motif Consensus Sequence: CTGCAGTNCNNNNNNNNNNCCASYAGRKGGCRS (Length: 33 bp)
Summary of all the general metrics for the input datasets.
dplyr::tibble(
"Experiment Label" = result$exp_label,
"Experiment Type" = result$exp_type,
"Cell Count" = ifelse((cellcount_metrics), params$cell_count, NA),
"Read Count" = result$read_count,
"Peak Count" = result$peak_count,
"FRiP Score" = ifelse((alignment_metrics), round(frip_df$frip, 3), NA),
"Mean Peak Width" = peak_width_df_all$`Mean Peak Width`,
) %>%
arrange(`Experiment Type`, `Experiment Label`) %>%
print_DT(extra = TRUE)
This section provides a comprehensive pair-wise comparison of motif enrichment patterns between the reference and comparison experiments. The presence of user-provided motif in the peaks was determined using AME tool from MEME suite.
The overall summary plot compacts all the individual comparisons into one for quick assessment of relative motif enrichment across experiments. The individual comparison plots provide a detailed view of the proportion of peaks with the motif of interest for all, common and unique peaks in the respective reference and comparison plots.
Peaks are considered common to both comparison and reference datasets if they overlap by at least 1 base pair. All other peaks are classified as unique.
NOTE: Discrepancies in motif enrichment in common peaks between experiments may arise due to variations in peak lengths. Experiment with longer peaks may have a higher proportion of peaks with the motif of interest purely by chance.
if (!user_motif_metrics) {
cat("\n", ex_emo,
" *Skipping motif-summit distance plots as either no motifs files",
" was not provided, or only a single dataset was provided.*\n")
} else {
tab_header_flag <- FALSE # Print tabset header just once
## Loop through all motifs
for (motif_i in seq_len(motif_len)) {
if (!tab_header_flag) {
cat("**Select Motif:** \n")
tab_header_flag <- TRUE
}
cat(paste0("\n## ", user_motifs$motifs[[motif_i]]@name,
" {- .unlisted .tabset} \n"))
cat(paste0("**Motif Consensus Sequence:** ",
user_motifs$motifs[[motif_i]]@consensus, " (Length: ",
nchar(user_motifs$motifs[[motif_i]]@consensus), " bp) \n"))
cat("<br>**Select Comparison Group:** \n")
### Overall Summary Plot ###
cat("\n### **Overall** {- .unlisted} \n")
cat("These plots are organised in a 2x2 grid comparing motif",
"enrichment: top row for the comparison experiment and bottom row",
"for the reference experiment, while columns represent common",
"peaks on the left and unique peaks on the right. \n<br>")
cat("<details><summary>**Show Comparison-Reference Pair Labels**",
"</summary> \n")
cat("(Pair Number. Comparison Experiment - Reference Experiment) \n ")
for (pair_i in seq_along(comparison_indices)) {
cat(paste0(
pair_i, ". ", result$exp_labels[comparison_indices][pair_i], " - ",
result$exp_labels[params$reference_index], " \n"
))
}
cat(" \n</details> \n<br> \n")
enrichment_overall_plots <- plot_enrichment_overall(
enrichment_df, motif_i, label_colours,
reference_label = result$exp_labels[params$reference_index]
)
cat("\n#### Counts of Peaks Containing Motif Across All Experiments ",
"{- .unlisted} \n")
print(enrichment_overall_plots$count_plt)
cat("\n#### Percentages of Peaks Containing Motif Across All ",
"Experiments {- .unlisted} \n")
print(enrichment_overall_plots$perc_plt)
### Individual Comparison Plots ###
for (i in comparison_indices) {
print_labels(result$exp_labels, params$reference_index, i,
"known_motif", result$read_count)
## Plot
plot_enrichment_individual(result, enrichment_df, i, motif_i,
label_colours, params$reference_index) %>%
print()
## DT
cat(" \n ")
dt_enrichment_individual(result, enrichment_df, i, motif_i,
params$reference_index) %>%
print()
cat(" \n ")
}
}
}
Select Motif:
Motif Consensus Sequence: CAGGGGGC (Length: 8
bp)
Select Comparison Group:
(Pair Number. Comparison Experiment - Reference Experiment)
1. ChIP - TIP
Motif Consensus Sequence:
CTGCAGTNCNNNNNNNNNNCCASYAGRKGGCRS (Length: 33 bp)
Select Comparison Group:
(Pair Number. Comparison Experiment - Reference Experiment)
1. ChIP - TIP
De-novo motif discovery identifies novel motif sequences in the common and unique peaks of the reference and comparison peak files. The Position Weight Matrix (PWM) of these motifs, representing the probability of each base at each position in the motif, are compared across different peak sets to identify similarities.
Similarity is quantified using the Pearson correlation coefficient (PCC) score, ranging from -1 to +1, with higher values indicating greater similarity. The scores are also normalised to penalise disparities between motifs of different lengths. De-novo motifs were discovered using the STREME tool from MEME suite.
Explanation of the plots:
Common Motif Comparison
: High correlation between
motifs in this plot validates that the common peaks from both the
reference and comparison datasets are enriched for similar motifs. This
suggests consistent binding of shared regulatory factors across the
experiments.Unique Motif Comparison
: This plot examines whether
unique peaks from both experiments capture similar motifs. The presence
of highly correlated motifs here indicates that both experiments miss
out on capturing peaks with a likely legitimate motif. This may be
caused by strict peak calling thresholds, potentially excluding true
positive signals.Cross Motif Comparison A/B
: These plots compare motifs
from unique peaks in one experiment to motifs from common peaks in the
other experiment (refer to axis labels). Higher correlation scores
suggests that the experiment contributing the unique peaks captures a
higher proportion of motifs that are actually shared between the two
experiments, suggesting higher sensitivity of the experiment with unique
peaks.The discovered motifs are also compared to motifs from the JASPAR or the provided database to identify any similar known motifs using TOMTOM from MEME suite.
NOTE: Motifs with poly-nucleotide repeats (such as
“AAAAA”) should be avoided due to inflated correlation scores. Motifs
containing filter_n
or more repeats are not included in the
results.
tab_header_flag <- FALSE # Print tabset header just once
for (i in seq_along(comparison_indices)) {
comparison_i <- comparison_indices[i]
if (!tab_header_flag) {
cat("**Select Comparison Group:** \n")
tab_header_flag <- TRUE
}
print_labels(result$exp_labels, params$reference_index, comparison_i,
"denovo_motif", result$read_count)
cat("<br>**Select Plot:** \n")
start_i <- 1 + (i - 1) * 4
end_i <- i * 4
streme_motifs <- denovo_res$streme[seq(start_i, end_i)]
similar_motifs <- denovo_res$similar_motifs[seq(start_i, end_i)]
comparison_matrices <- denovo_res$comparisons[seq(start_i, end_i)]
denovo_plts <- plot_motif_comparison(comparison_matrices, exp_labels = c(
result$exp_labels[params$reference_index],
result$exp_labels[comparison_i]
), width = 650, height = 500)
download_btns <-
get_download_buttons(i, start_i, segregated_peaks, out_dir = out_dir_extra,
add_buttons = params$download_buttons, verbose = params$verbose)
.print_denovo_sections_i <- function(x, y) {
print_denovo_sections(streme_motifs, similar_motifs, segregated_peaks[[i]],
c(x, y), jaspar_link = using_jaspar_db,
download_buttons = download_btns)
}
cat("\n### Common Motif Comparison {- .unlisted} \n")
cat("\n#### Motif Similarity Plot {- .unlisted} \n")
print(denovo_plts[[1]])
cat("\n#### Motif Details {- .unlisted} \n")
.print_denovo_sections_i(1, 2)
cat("\n### Unique Motif Comparison {- .unlisted} \n")
cat("\n#### Motif Similarity Plot {- .unlisted} \n")
print(denovo_plts[[2]])
cat("\n#### Motif Details {- .unlisted} \n")
.print_denovo_sections_i(3, 4)
cat("\n### Cross Motif Comparison A {- .unlisted} \n")
cat("\n#### Motif Similarity Plot {- .unlisted} \n")
print(denovo_plts[[3]])
cat("\n#### Motif Details {- .unlisted} \n")
.print_denovo_sections_i(3, 2)
cat("\n### Cross Motif Comparison B {- .unlisted} \n")
cat("\n#### Motif Similarity Plot {- .unlisted} \n")
print(denovo_plts[[4]])
cat("\n#### Motif Details {- .unlisted} \n")
.print_denovo_sections_i(4, 1)
}
Select Comparison Group:
Reference Experiment Label: TIP (Total Reads:
16.06K)
Comparison Experiment Label: ChIP (Total Reads:
49.83K)
Select Plot:
Reference Group - Common Peaks
Total peaks in group: 127
Comparison Group - Common Peaks
Total peaks in group: 127
Reference Group - Unique Peaks
Total peaks in group: 57
Comparison Group - Unique Peaks
Total peaks in group: 82
Reference Group - Unique Peaks
Total peaks in group: 57
Comparison Group - Common Peaks
Total peaks in group: 127
Comparison Group - Unique Peaks
Total peaks in group: 82
Reference Group - Common Peaks
Total peaks in group: 127
utils::sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.5
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MotifPeeker_0.99.0 devtools_2.4.5 usethis_2.2.3
##
## loaded via a namespace (and not attached):
## [1] later_1.3.2 BiocIO_1.15.0 bitops_1.0-7
## [4] filelock_1.0.3 tibble_3.2.1 R.oo_1.26.0
## [7] XML_3.99-0.17 lifecycle_1.0.4 rprojroot_2.0.4
## [10] lattice_0.22-6 MASS_7.3-61 crosstalk_1.2.1
## [13] dendextend_1.17.1 magrittr_2.0.3 plotly_4.10.4
## [16] sass_0.4.9 rmarkdown_2.27 jquerylib_0.1.4
## [19] yaml_2.3.9 BSgenome.Hsapiens.UCSC.hg38_1.4.5 remotes_2.5.0
## [22] httpuv_1.6.15 zip_2.3.1 sessioninfo_1.2.2
## [25] pkgbuild_1.4.4 DBI_1.2.3 RColorBrewer_1.1-3
## [28] lubridate_1.9.3 abind_1.4-5 pkgload_1.4.0
## [31] zlibbioc_1.51.1 GenomicRanges_1.57.1 purrr_1.0.2
## [34] R.utils_2.12.3 BiocGenerics_0.51.0 RCurl_1.98-1.14
## [37] seriation_1.5.5 GenomeInfoDbData_1.2.12 IRanges_2.39.0
## [40] S4Vectors_0.43.0 testthat_3.2.1.1 cmdfun_1.0.2
## [43] codetools_0.2-20 DelayedArray_0.31.3 DT_0.33
## [46] tidyselect_1.2.1 ggseqlogo_0.2 UCSC.utils_1.1.0
## [49] farver_2.1.2 viridis_0.6.5 TSP_1.2-4
## [52] universalmotif_1.23.0 base64enc_0.1-3 matrixStats_1.3.0
## [55] stats4_4.4.1 BiocFileCache_2.13.0 webshot_0.5.5
## [58] GenomicAlignments_1.41.0 jsonlite_1.8.8 ellipsis_0.3.2
## [61] iterators_1.0.14 foreach_1.5.2 tools_4.4.1
## [64] emoji_15.0 Rcpp_1.0.12 glue_1.7.0
## [67] gridExtra_2.3 SparseArray_1.5.11 xfun_0.45
## [70] MatrixGenerics_1.17.0 GenomeInfoDb_1.41.1 dplyr_1.1.4
## [73] ca_0.71.1 withr_3.0.0 fastmap_1.2.0
## [76] fansi_1.0.6 digest_0.6.36 timechange_0.3.0
## [79] R6_2.5.1 mime_0.12 colorspace_2.1-0
## [82] RSQLite_2.3.7 R.methodsS3_1.8.2 utf8_1.2.4
## [85] tidyr_1.3.1 generics_0.1.3 data.table_1.15.4
## [88] rtracklayer_1.65.0 bsplus_0.1.4 httr_1.4.7
## [91] htmlwidgets_1.6.4 S4Arrays_1.5.2 downloadthis_0.3.3
## [94] pkgconfig_2.0.3 gtable_0.3.5 blob_1.2.4
## [97] registry_0.5-1 XVector_0.45.0 brio_1.1.5
## [100] htmltools_0.5.8.1 profvis_0.3.8 scales_1.3.0
## [103] Biobase_2.65.0 knitr_1.48 rstudioapi_0.16.0
## [106] tzdb_0.4.0 reshape2_1.4.4 rjson_0.2.21
## [109] curl_5.2.1 memes_1.13.0 cachem_1.1.0
## [112] stringr_1.5.1 parallel_4.4.1 miniUI_0.1.1.1
## [115] restfulr_0.0.15 desc_1.4.3 pillar_1.9.0
## [118] grid_4.4.1 vctrs_0.6.5 urlchecker_1.0.1
## [121] promises_1.3.0 dbplyr_2.5.0 xtable_1.8-4
## [124] evaluate_0.24.0 readr_2.1.5 cli_3.6.3
## [127] compiler_4.4.1 Rsamtools_2.21.0 rlang_1.1.4
## [130] crayon_1.5.3 heatmaply_1.5.0 labeling_0.4.3
## [133] plyr_1.8.9 fs_1.6.4 stringi_1.8.4
## [136] viridisLite_0.4.2 BiocParallel_1.39.0 assertthat_0.2.1
## [139] munsell_0.5.1 Biostrings_2.73.1 lazyeval_0.2.2
## [142] Matrix_1.7-0 BSgenome_1.73.0 hms_1.1.3
## [145] bit64_4.0.5 ggplot2_3.5.1 shiny_1.8.1.1
## [148] SummarizedExperiment_1.35.1 memoise_2.0.1 bslib_0.7.0
## [151] bit_4.0.5