knitr::opts_chunk$set(echo = TRUE,
                    message = params$debug,
                    warning = params$debug,
                    cache = FALSE, 
                    error = params$debug,
                    results = "asis")

### Parse input ###
genome_build <- check_genome_build(params$genome_build)
result_len <- length(params$peak_files)
motif_len <- length(params$motif_files)
if (is.null(params$exp_type)) {
    exp_types <- rep("unknown", result_len)
} else {
    exp_types <- params$exp_type
}
peak_files_encode <- Vectorize(check_ENCODE, "encode_id")(
    params$peak_files, expect_format = c("narrowPeak", "bed"))
alignment_files_encode <- Vectorize(check_ENCODE, "encode_id")(
    params$alignment_files, expect_format = "bam")
result <- list(
    peaks = Vectorize(read_peak_file, "peak_file")(peak_files_encode),
    alignments = lapply(alignment_files_encode, Rsamtools::BamFile),
    exp_labels = params$exp_labels,
    exp_type = unname(Vectorize(format_exptype, "exp_type")(exp_types))
)
motif_files_jaspar <- Vectorize(check_JASPAR, "motif_id")(params$motif_files)
user_motifs <- list(
    motifs = Vectorize(read_motif_file, "motif_file")(motif_files_jaspar),
    motif_labels = params$motif_labels
)
result$alignments <- unname(result$alignments)
result$peaks <- unname(result$peaks)
user_motifs$motifs <- unname(user_motifs$motifs)

temp_dir <- file.path(tempdir(), paste0("MotifPeeker_temp_", random_string(5)))
out_dir_extra <- ifelse(params$save_runfiles, params$out_dir, temp_dir)
label_colours <- c("#c83e73", "#2b83ba") # No, Yes

## Flags for optional plots
alignment_metrics <- ifelse((length(result$alignments) == 0), FALSE, TRUE)
cellcount_metrics <- ifelse((length(params$cell_counts) == 0), FALSE, TRUE)
user_motif_metrics <- ifelse((length(user_motifs$motifs) == 0 ||
                                    result_len == 1), FALSE, TRUE)
comparison_metrics <- ifelse(result_len == 1, FALSE, TRUE)
denovo_metrics <- params$denovo_motif_discovery && comparison_metrics

## Misc
ex_emo <- ifelse(requireNamespace("emoji", quietly = TRUE),
                emoji::emoji("exclamation"), "!!")

## Check motif_db
using_jaspar_db <- if (denovo_metrics) ifelse(is.null(params$motif_db),
                                       TRUE, FALSE)
motif_db <- if (denovo_metrics) ifelse(is.null(params$motif_db),
                                       get_JASPARCORE(), params$motif_db)

### General Metrics ###
## Peak Counts
result$peak_count <- vapply(
        result$peaks, function(x) length(x), integer(1)
    )
if (alignment_metrics) {
    ## Read Counts
    result$read_count <- vapply(result$alignments, function(x) {
        Rsamtools::countBam(x)$records
    }, integer(1))
    
    ## FRiP scores
    result$frip <- Vectorize(calc_frip, c("read_file",
                                "peak_file"))(result$alignments, result$peaks)
    frip_df <- data.frame(exp_type = result$exp_type,
                            frip = result$frip,
                            exp_label = result$exp_labels)
}
    
## Peak Widths
result$peak_widths <- lapply(result$peaks, function(x){
    x@ranges@width
})
peak_width_len <- vapply(result$peak_widths, length, integer(1))
peak_width_df <- 
    data.frame(exp_label = rep(result$exp_labels, peak_width_len),
                exp_type = rep(result$exp_type, peak_width_len),
                peak_width = unlist(result$peak_widths))

## Motif-Summit Distances
motif_summit_dist_df <- get_df_distances(
        result, user_motifs, genome_build, out_dir_extra, params$workers,
        params$meme_path, params$debug
    )

if (comparison_metrics) {
    ### Known-motif Analysis ###
    comparison_indices <- setdiff(seq_along(result$peaks),
                                  params$reference_index)
    segregated_peaks <- lapply(comparison_indices, function(x) {
        segregate_seqs(result$peaks[[params$reference_index]],
                      result$peaks[[x]])
    })
    
    ## Calculate enrichment for segregated peaks
    if (user_motif_metrics) {
      enrichment_df <- get_df_enrichment(
              result, segregated_peaks, user_motifs, genome_build,
              params$reference_index, out_dir_extra, params$workers,
              params$meme_path, params$verbose
          )
    }
}
    
### De-Novo Motif Analysis ###
if (denovo_metrics) {
    denovo_res <- list()
    ## Run STREME
    denovo_res$streme <- denovo_motifs(
      unlist(segregated_peaks), params$trim_seq_width, genome_build,
      params$denovo_motifs, filter_n = params$filter_n,
      out_dir = out_dir_extra, meme_path = params$meme_path,
      workers = params$workers, verbose = params$verbose, debug = params$debug
    )
    ## Run TOMTOM
    denovo_res$similar_motifs <- find_motifs(
        denovo_res$streme, motif_db, out_dir = out_dir_extra,
        meme_path = params$meme_path, workers = params$workers,
        verbose = params$verbose, debug = params$debug
    )
    ## Compare motifs
    denovo_res$comparisons <- motif_similarity(
        denovo_res$streme, workers = params$workers
    )
}

MotifPeeker compares and analyses datasets from different epigenomic profiling methods using motif enrichment as a key benchmark.

Summary

The report consists of the following sections:

General Metrics: Provides an overview of metrics related to dataset peaks, including FRiP scores, peak widths, and motif-to-summit distances.
Known Motif Enrichment Analysis: Presents statistics on the frequency of enriched user-supplied motifs in the datasets and compares them between the common and unique peaks from comparison and reference datasets.
De-Novo Motif Enrichment Analysis: Details the statistics of de-novo discovered motifs in common and unique peaks from comparison and reference datasets. Examines motif similarities and identifies the closest known motifs in the JASPAR or the provided database.

Input Datasets

Experimental dataset labels used:

Dataset 1: ChIP
- Experiment Type: ChIP-Seq
Dataset 2: TIP
- Experiment Type: TIP-Seq
Reference dataset: TIP (Index: 2)

User-provided motifs used:

Motif 1: MA1102.3
Motif 2: MA1930.2

Command

MotifPeeker report was generated with the following command:

cat(report_command(params))

MotifPeeker(use_cache = NULL,
            peak_files = ...,
            reference_index = 2,
            alignment_files = ...,
            exp_labels = list("ChIP", "TIP"),
            exp_type = list("chipseq", "tipseq"),
            genome_build = "hg38",
            motif_files = ...,
            motif_labels = NULL,
            cell_counts = NULL,
            denovo_motif_discovery = TRUE,
            denovo_motifs = 3,
            filter_n = 6,
            motif_db = NULL,
            trim_seq_width = NULL,
            download_buttons = TRUE,
            meme_path = NULL,
            out_dir = "/Users/hdash/code/icl_project2/MotifPeeker-report/built_in_data/output/MotifPeeker_20240716_104747",
            save_runfiles = TRUE,
            workers = 2,
            debug = FALSE,
            verbose = TRUE)

1 General Metrics

FRiP Score

Fraction of Reads in Peaks (FRiP) is the proportion of sequencing reads within identified peak regions. Higher FRiP scores suggest higher signal-to-noise ratios in the dataset.
\[\text{FRiP (Fraction of Reads in Peaks)} = \frac{\text{Reads in Peaks}}{\text{Total Reads}}\]

if (!alignment_metrics) {
    cat("\n", ex_emo,
    " *Skipping FRiP calculation as alignment files were not provided.*",
        "  \n")
} else {
    if (length(result$alignments) != length(result$peaks)) {
        stp_msg <- "Number of alignment files and peak files do not match."
        stopper(stp_msg)
    }
    cat("\n### By Experiment Type {- .unlisted}  \n")
    frip_exp_plot <- frip_df %>%
        ggplot2::ggplot(aes(x = exp_type, y = frip, fill = exp_type,
                            text = paste0(
                                "<b>Experiment Label</b>: ", exp_label, "<br>",
                                "<b>Experiment Type</b>: ", exp_type, "<br>",
                                "<b>FRiP Score</b>: ", round(frip, 4)))) +
            geom_boxplot(outlier.shape = NA) +
            geom_jitter() +
            labs(
              x = "Experiment Type",
              y = "FRiP Score",
              fill = "Experiment Type"
            ) +
        scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
                           option = "A")
    
    print(to_plotly(frip_exp_plot))
}

By Experiment Type

if (alignment_metrics) {
    frip_df %>%
        group_by(exp_type) %>%
        rename("Experiment Type" = exp_type) %>%
        summarise(
          "Mean score" = round(mean(frip), 3),
          "Median Score" = round(median(frip), 3),
          "Standard Deviation" = round(sd(frip), 3)
        ) %>%
        arrange(`Experiment Type`) %>%
    print_DT()
}

if (alignment_metrics) {
    cat("\n### By Individual Dataset {- .unlisted}  \n")
    
    frip_individual_plot <- frip_df %>%
        ggplot(aes(x = exp_label, y = frip, fill = exp_type, text = paste0(
                                "<b>Experiment Label</b>: ", exp_label, "<br>",
                                "<b>Experiment Type</b>: ", exp_type, "<br>",
                                "<b>FRiP Score</b>: ", round(frip, 4)))) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
            geom_col() +
            labs(
              x = "Experiment Label",
              y = "FRiP Score",
              fill = "Experiment Type"
            ) +
        scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
                           option = "A")
    
    print(to_plotly(frip_individual_plot))
}

By Individual Dataset

if (alignment_metrics) {
    frip_df$read_count <- result$read_count
    frip_individual_df <- frip_df %>%
        mutate(frip = round(frip, 3)) %>%
        select(exp_label, exp_type, read_count, frip) %>%
        rename(
          "Experiment Label" = exp_label,
          "Experiment Type" = exp_type,
          "FRiP Score" = frip,
          "Read Count" = read_count
        ) %>%
        arrange(`Experiment Type`, `Experiment Label`)
    
    if (length(params$cell_counts) > 0) {
        frip_individual_df$cell_count <- params$cell_counts
        frip_individual_df <- frip_individual_df %>%
            rename(
              "Cell Count" = cell_count
            )
    }
    frip_individual_df %>%
        print_DT()
}

if (alignment_metrics) {
    if (!cellcount_metrics) {
        cat("\n", ex_emo,
        " *Skipping cell count plot as cell counts were not provided.*\n")
    } else {
        cat("\n### By Cell Count {- .unlisted}  \n")
        
        frip_df$cell_count <- as.numeric(params$cell_counts)
        frip_df_cellcount <- frip_df %>%
            group_by(cell_count, exp_type) %>%
                summarise(
                    frip = mean(frip),
                    .groups = "drop"
                )
            
        frip_cellcount_plot <- frip_df %>%
            ggplot(aes(
                x = cell_count,
                y = frip,
                shape = exp_type,
                color = exp_type,
                text = paste0("<b>Experiment Type</b>: ", exp_type, "<br>",
                                "<b>FRiP Score</b>: ", round(frip, 4),
                                "<br><b>Cell Count</b>: ", cell_count))) +
            geom_point() +
            geom_line(data = frip_df_cellcount) +
            labs(
                x = "Cell Count (log10 scale)",
                y = "FRiP Score",
                fill = "Experiment Type",
                shape = "Experiment Type",
                color = "Experiment Type"
            ) +
            scale_x_continuous(trans="log10") +
            scale_color_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
                           option = "A")
        
        print(to_plotly(frip_cellcount_plot))
    }
}

❗ Skipping cell count plot as cell counts were not provided.

Peak Width Distribution

This section presents the peak width distribution for each experiment type, as well as for individual experiments and cell counts, reported in base pairs.

The data plotted represents the central 95% of the overall data, with values below the 2.5th percentile and above the 97.5th percentile excluded.

By Experiment Type

peak_width_plt1 <- peak_width_df %>%
    filter(peak_width >= stats::quantile(peak_width, 0.025) &
           peak_width <= stats::quantile(peak_width, 0.975)) %>%
    ggplot(aes(x = peak_width, fill = exp_type)) +
        geom_histogram(bins = 30, alpha = 0.5, position = "identity") +
        labs(
            x = "Peak Width",
            y = "Peak Count",
            fill = "Experiment Type"
        ) +
        scale_fill_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
                            option = "A")
    
    print(to_plotly(peak_width_plt1))

peak_width_plt2 <- peak_width_df %>%
    filter(peak_width >= stats::quantile(peak_width, 0.025) &
           peak_width <= stats::quantile(peak_width, 0.975)) %>%
    ggplot(aes(x = exp_type, y = peak_width, fill = exp_type)) +
        geom_boxplot(alpha = 0.5) +
        labs(
            x = "Experiment Type",
            y = "Peak Width",
            fill = "Experiment Type"
        ) +
        scale_fill_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
                            option = "A")

    print(to_plotly(peak_width_plt2))

peak_width_df %>%
    group_by(exp_type) %>%
    rename("Experiment Type" = exp_type) %>%
    summarise(
        "Mean Peak Width" = round(mean(peak_width), 1),
        "Median Peak Width" = round(median(peak_width), 3),
        "2.5th Quantile" = round(quantile(peak_width, 0.025), 3),
        "97.5th Quantile" = round(quantile(peak_width, 0.975), 3),
        "Standard Deviation" = round(sd(peak_width), 1)
    ) %>%
    arrange(`Experiment Type`) %>%
    print_DT()

By Individual Dataset

peak_width_plt3 <- peak_width_df %>%
    filter(peak_width >= stats::quantile(peak_width, 0.025) &
           peak_width <= stats::quantile(peak_width, 0.975)) %>%
    ggplot(aes(x = exp_label, y = peak_width, fill = exp_type)) +
        geom_boxplot(alpha = 0.5) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
        labs(
        x = "Experiment Label",
        y = "Peak Width",
        fill = "Experiment Type"
        ) +
        scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
                            option = "A")

    print(to_plotly(peak_width_plt3))

peak_width_df$cell_count <- rep(params$cell_counts, peak_width_len)

peak_width_df_all <- if (is.null(peak_width_df$cell_count)) {
    peak_width_df %>%
    group_by(exp_label, exp_type)
} else {
    peak_width_df %>%
    group_by(exp_label, exp_type, cell_count) %>%
    rename("Cell Count" = cell_count,) 
}

peak_width_df_all <- peak_width_df_all %>%
    summarise(
        "Mean Peak Width" = round(mean(peak_width), 1),
        "Median Peak Width" = round(median(peak_width), 3),
        "2.5th Quantile" = round(stats::quantile(peak_width, 0.025), 3),
        "97.5th Quantile" = round(stats::quantile(peak_width, 0.975), 3),
        "Standard Deviation" = round(stats::sd(peak_width), 1),
        .groups = "drop"
    ) %>%
    rename(
        "Experiment Label" = exp_label,
        "Experiment Type" = exp_type
    ) %>%
    arrange(`Experiment Type`, `Experiment Label`)

    print_DT(peak_width_df_all)

if (!cellcount_metrics) {
    cat("\n", ex_emo,
    " *Skipping cell count plot as cell counts were not provided.*\n")
} else {
    cat("\n### By Cell Count {- .unlisted}  \n")
    
    peak_width_df_cellcount <- peak_width_df %>%
        group_by(exp_label, exp_type, cell_count) %>%
        drop_na(cell_count) %>%
        summarise(
            mean_width = mean(peak_width),
            .groups = "drop"
        ) %>%
        group_by(exp_type, cell_count) %>%
        summarise(
            mean_width = mean(mean_width),
            .groups = "drop"
        )
        
    peak_width_cellcount_plot <- peak_width_df %>%
        group_by(exp_label, exp_type) %>%
        drop_na(cell_count) %>%
        summarise(
            mean_width = mean(peak_width),
            cell_count = mean(cell_count),
            .groups = "drop"
        ) %>%
        ggplot(aes(
            x = cell_count,
            y = mean_width,
            shape = exp_type,
            color = exp_type,
            text = paste0("<b>Experiment Type</b>: ", exp_type, "<br>",
                            "<b>Mean Peak Width</b>: ", round(mean_width, 1),
                            "<br><b>Cell Count</b>: ", cell_count))) +
        geom_point() +
        geom_line(data = peak_width_df_cellcount) +
        labs(
            x = "Cell Count (log10 scale)",
            y = "Mean Peak Width",
            fill = "Experiment Type",
            shape = "Experiment Type",
            color = "Experiment Type"
        ) +
        scale_x_continuous(trans="log10") +
        scale_color_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
                       option = "A")
    
    print(to_plotly(peak_width_cellcount_plot))
}

❗ Skipping cell count plot as cell counts were not provided.

Motif-Summit Distance

This section reports the distances between the peak summit and the centre of the nearest motif. MEME suite’s FIMO tool is used to scan the sequences in the peak file to identify all occurrences of the provided motifs, and the distances between the centres of each motif occurrence and the closest peak summit are calculated.

The data plotted represents the central 95% of the overall data, with values below the 2.5th percentile and above the 97.5th percentile excluded.

if (!user_motif_metrics) {
    cat("\n", ex_emo,
    " *Skipping motif-summit distance plots as no motifs files",
    " were provided.*\n")
} else {
    tab_header_flag <- FALSE # Print tabset header just once
    
    ## Loop through all motifs
    for (motif_i in seq_len(motif_len)) {
        if (!tab_header_flag) {
            cat("**Select Motif:**  \n")
            tab_header_flag <- TRUE
        }
        cat(paste0("\n### ", user_motifs$motifs[[motif_i]]@name,
                  " {- .unlisted}  \n"))
        cat(paste0("**Motif Consensus Sequence:** ",
                  user_motifs$motifs[[motif_i]]@consensus, " (Length: ",
                  nchar(user_motifs$motifs[[motif_i]]@consensus), " bp)  \n"))
        
        cat("\n#### By Experiment Type {- .unlisted}  \n")
        cat("The top graph shows peak counts by distance from the motif",
        "center, while the bottom graph depicts the probability density of",
        "these distances. Different colors represent experiment types, and",
        "each density curve sums to 1.  \n")
        ## Limits
        motif_summit_dist_lims <- motif_summit_dist_df %>%
                      filter(motif_indice == motif_i) %>%
                      pull(distance) %>%
                      stats::quantile(c(0.025, 0.975))
        
        ## Raw count plot
        motif_summit_dist_plt1 <- motif_summit_dist_df %>%
            filter(motif_indice == motif_i) %>%
            filter(distance >= motif_summit_dist_lims[1] &
                    distance <= motif_summit_dist_lims[2]) %>%
            ggplot(aes(x = distance, color = exp_type,
                        text = paste("<b>Experiment Type</b>:", exp_type))) +
            geom_freqpoly(bins = 30, linewidth = 1) +
            scale_color_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
                            option = "A") +
            labs(color = "Experiment Type",
                x = "Distance (bp)", 
                y = "Count")
        motif_summit_dist_plt1 <- to_plotly(motif_summit_dist_plt1,
                                            html_tags = FALSE)
            
        ## Density plot
        motif_summit_dist_plt2 <- motif_summit_dist_df %>%
            filter(motif_indice == motif_i) %>%
            filter(distance >= motif_summit_dist_lims[1] &
                    distance <= motif_summit_dist_lims[2]) %>%
            ggplot(aes(x = distance, fill = exp_type, color = exp_type,
                        text = paste("<b>Experiment Type</b>:", exp_type))) +
            geom_density(alpha = 0.5) +
            scale_fill_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
                            option = "A") +
            scale_color_viridis(begin = 0.2, end = 0.6, discrete = TRUE,
                            option = "A") +
            labs(color = "Experiment Type",
                fill = "Experiment Type",
                x = "Distance (bp)", 
                y = "Count")
        motif_summit_dist_plt2 <- to_plotly(motif_summit_dist_plt2,
                                            html_tags = FALSE)
        
        motif_summit_dist_exp_plt <-
            subplot(motif_summit_dist_plt1, motif_summit_dist_plt2,
                    nrows = 2, margin = 0.04) %>%
            layout(
                xaxis2 = list(title = "Distance (bp)",
                              titlefont = list(size = 16)),
                yaxis = list(title = "Peak Count",
                             titlefont = list(size = 16)),
                yaxis2 = list(title = "Peak Density",
                              titlefont = list(size = 16))
            )
        print(htmltools::tagList(motif_summit_dist_exp_plt))
        
        ## DT
        motif_summit_dist_df %>%
            filter(motif_indice == motif_i) %>%
            group_by(exp_type) %>%
            summarise(
                "Mean Distance" = round(mean(distance), 2),
                "Absolute Mean Distance" = round(mean(abs(distance)), 2),
                "Median Distance" = median(distance),
                "Absolute Median Distance" = median(abs(distance)),
                "Standard Deviation" = round(sd(distance), 2)
            ) %>%
            rename("Experiment Type" = exp_type) %>%
            print_DT(html_tags = TRUE) %>%
            print()
        
        cat("\n#### By Individual Dataset {- .unlisted}\n")
        cat("This plot shows the average of the absolute distance between peak",
        "summits and the nearest motif center for each input experiment.  \n")
        ## Absolute mean distance plot
        motif_summit_dist_ind_plt <- motif_summit_dist_df %>%
            filter(motif_indice == motif_i) %>%
            group_by(exp_label, exp_type) %>%
            summarise(abs_mean_dist = round(mean(abs(distance)), 2),
                      .groups = "drop") %>%
            ggplot(aes(x = exp_label, y = abs_mean_dist, fill = exp_type,
                        text = paste0(
                            "<b>Experiment Label</b>: ", exp_label, "<br>",
                            "<b>Experiment Type</b>: ", exp_type, "<br>",
                            "<b>Absolute Mean Distance</b>: ", abs_mean_dist
                        ))) +
                theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
                geom_col() +
                labs(
                  x = "Experiment Label",
                  y = "Absolute Mean Distance (bp)",
                  fill = "Experiment Type"
                ) +
            scale_fill_viridis(begin = 0.15, end = 0.6, discrete = TRUE,
                               option = "A")
        
            print(to_plotly(motif_summit_dist_ind_plt, html_tags = TRUE))
        
        ## DT
        motif_summit_dist_df %>%
            filter(motif_indice == motif_i) %>%
            group_by(exp_label, exp_type) %>%
            summarise(
                "Mean Distance" = round(mean(distance), 2),
                "Absolute Mean Distance" = round(mean(abs(distance)), 2),
                "Median Distance" = median(distance),
                "Absolute Median Distance" = median(abs(distance)),
                "Standard Deviation" = round(sd(distance), 2),
                .groups = "keep"
            ) %>%
            arrange(exp_type, exp_label) %>%
            rename(
                "Experiment Label" = exp_label,
                "Experiment Type" = exp_type
            ) %>%
            print_DT(html_tags = TRUE) %>%
            print()
    }
}

Select Motif:

MA1102.3

Motif Consensus Sequence: CAGGGGGC (Length: 8 bp)

By Experiment Type

The top graph shows peak counts by distance from the motif center, while the bottom graph depicts the probability density of these distances. Different colors represent experiment types, and each density curve sums to 1.

By Individual Dataset

This plot shows the average of the absolute distance between peak summits and the nearest motif center for each input experiment.

MA1930.2

Motif Consensus Sequence: CTGCAGTNCNNNNNNNNNNCCASYAGRKGGCRS (Length: 33 bp)

By Experiment Type

By Individual Dataset

This plot shows the average of the absolute distance between peak summits and the nearest motif center for each input experiment.

Summary Table

Summary of all the general metrics for the input datasets.

dplyr::tibble(
    "Experiment Label" = result$exp_label,
    "Experiment Type" = result$exp_type,
    "Cell Count" = ifelse((cellcount_metrics), params$cell_count, NA),
    "Read Count" = result$read_count,
    "Peak Count" = result$peak_count,
    "FRiP Score" = ifelse((alignment_metrics), round(frip_df$frip, 3), NA),
    "Mean Peak Width" = peak_width_df_all$`Mean Peak Width`,
) %>%
    arrange(`Experiment Type`, `Experiment Label`) %>%
    print_DT(extra = TRUE)

2 Known Motif Enrichment Analysis

This section provides a comprehensive pair-wise comparison of motif enrichment patterns between the reference and comparison experiments. The presence of user-provided motif in the peaks was determined using AME tool from MEME suite.

The overall summary plot compacts all the individual comparisons into one for quick assessment of relative motif enrichment across experiments. The individual comparison plots provide a detailed view of the proportion of peaks with the motif of interest for all, common and unique peaks in the respective reference and comparison plots.

Peaks are considered common to both comparison and reference datasets if they overlap by at least 1 base pair. All other peaks are classified as unique.

NOTE: Discrepancies in motif enrichment in common peaks between experiments may arise due to variations in peak lengths. Experiment with longer peaks may have a higher proportion of peaks with the motif of interest purely by chance.

if (!user_motif_metrics) {
    cat("\n", ex_emo,
    " *Skipping motif-summit distance plots as either no motifs files",
    " was not provided, or only a single dataset was provided.*\n")
} else {
    tab_header_flag <- FALSE # Print tabset header just once
    
    ## Loop through all motifs
    for (motif_i in seq_len(motif_len)) {
        if (!tab_header_flag) {
            cat("**Select Motif:**  \n")
            tab_header_flag <- TRUE
        }
        cat(paste0("\n## ", user_motifs$motifs[[motif_i]]@name,
                   " {- .unlisted .tabset}  \n"))
        cat(paste0("**Motif Consensus Sequence:** ",
                  user_motifs$motifs[[motif_i]]@consensus, " (Length: ",
                  nchar(user_motifs$motifs[[motif_i]]@consensus), " bp)  \n"))
        
        cat("<br>**Select Comparison Group:**  \n")
        
        ### Overall Summary Plot ###
        cat("\n### **Overall** {- .unlisted}  \n")
        cat("These plots are organised in a 2x2 grid comparing motif",
            "enrichment: top row for the comparison experiment and bottom row",
            "for the reference experiment, while columns represent common",
            "peaks on the left and unique peaks on the right.  \n<br>")
        cat("<details><summary>**Show Comparison-Reference Pair Labels**",
        "</summary>  \n")
        cat("(Pair Number. Comparison Experiment - Reference Experiment)  \n  ")
        for (pair_i in seq_along(comparison_indices)) {
          cat(paste0(
            pair_i, ". ", result$exp_labels[comparison_indices][pair_i], " - ",
            result$exp_labels[params$reference_index], "  \n"
          ))
        }
        cat("  \n</details>  \n<br>  \n")
        enrichment_overall_plots <- plot_enrichment_overall(
            enrichment_df, motif_i, label_colours,
            reference_label = result$exp_labels[params$reference_index]
        )
        cat("\n#### Counts of Peaks Containing Motif Across All Experiments ",
        "{- .unlisted}  \n")
        print(enrichment_overall_plots$count_plt)
        
        cat("\n#### Percentages of Peaks Containing Motif Across All ",
        "Experiments {- .unlisted}  \n")
        print(enrichment_overall_plots$perc_plt)
        
        ### Individual Comparison Plots ###
        for (i in comparison_indices) {
            print_labels(result$exp_labels, params$reference_index, i,
                        "known_motif", result$read_count)
            ## Plot
            plot_enrichment_individual(result, enrichment_df, i, motif_i,
                                    label_colours, params$reference_index) %>%
                print()
            ## DT
            cat("  \n  ")
            dt_enrichment_individual(result, enrichment_df, i, motif_i,
                                    params$reference_index) %>%
                print()
            cat("  \n  ")
        }
    }
}

Select Motif:

MA1102.3

Motif Consensus Sequence: CAGGGGGC (Length: 8 bp)

Select Comparison Group:

Overall

These plots are organised in a 2x2 grid comparing motif enrichment: top row for the comparison experiment and bottom row for the reference experiment, while columns represent common peaks on the left and unique peaks on the right.

Show Comparison-Reference Pair Labels

(Pair Number. Comparison Experiment - Reference Experiment)
1. ChIP - TIP

Counts of Peaks Containing Motif Across All Experiments

Percentages of Peaks Containing Motif Across All Experiments

ChIP

Reference Experiment Label: TIP (Total Reads: 16.06K)
Comparison Experiment Label: ChIP (Total Reads: 49.83K)

MA1930.2

Motif Consensus Sequence: CTGCAGTNCNNNNNNNNNNCCASYAGRKGGCRS (Length: 33 bp)

Select Comparison Group:

Overall

Show Comparison-Reference Pair Labels

(Pair Number. Comparison Experiment - Reference Experiment)
1. ChIP - TIP

Counts of Peaks Containing Motif Across All Experiments

Percentages of Peaks Containing Motif Across All Experiments

ChIP

Reference Experiment Label: TIP (Total Reads: 16.06K)
Comparison Experiment Label: ChIP (Total Reads: 49.83K)

3 De-novo Motif Analysis

De-novo motif discovery identifies novel motif sequences in the common and unique peaks of the reference and comparison peak files. The Position Weight Matrix (PWM) of these motifs, representing the probability of each base at each position in the motif, are compared across different peak sets to identify similarities.

Similarity is quantified using the Pearson correlation coefficient (PCC) score, ranging from -1 to +1, with higher values indicating greater similarity. The scores are also normalised to penalise disparities between motifs of different lengths. De-novo motifs were discovered using the STREME tool from MEME suite.

Explanation of the plots:

Common Motif Comparison: High correlation between motifs in this plot validates that the common peaks from both the reference and comparison datasets are enriched for similar motifs. This suggests consistent binding of shared regulatory factors across the experiments.
Unique Motif Comparison: This plot examines whether unique peaks from both experiments capture similar motifs. The presence of highly correlated motifs here indicates that both experiments miss out on capturing peaks with a likely legitimate motif. This may be caused by strict peak calling thresholds, potentially excluding true positive signals.
Cross Motif Comparison A/B: These plots compare motifs from unique peaks in one experiment to motifs from common peaks in the other experiment (refer to axis labels). Higher correlation scores suggests that the experiment contributing the unique peaks captures a higher proportion of motifs that are actually shared between the two experiments, suggesting higher sensitivity of the experiment with unique peaks.

The discovered motifs are also compared to motifs from the JASPAR or the provided database to identify any similar known motifs using TOMTOM from MEME suite.

NOTE: Motifs with poly-nucleotide repeats (such as “AAAAA”) should be avoided due to inflated correlation scores. Motifs containing filter_n or more repeats are not included in the results.

tab_header_flag <- FALSE # Print tabset header just once
for (i in seq_along(comparison_indices)) {
    comparison_i <- comparison_indices[i]
    if (!tab_header_flag) {
        cat("**Select Comparison Group:**  \n")
        tab_header_flag <- TRUE
    }
  print_labels(result$exp_labels, params$reference_index, comparison_i,
              "denovo_motif", result$read_count)
  cat("<br>**Select Plot:**  \n")
  start_i <- 1 + (i - 1) * 4
  end_i <- i * 4
  streme_motifs <- denovo_res$streme[seq(start_i, end_i)]
  similar_motifs <- denovo_res$similar_motifs[seq(start_i, end_i)]
  comparison_matrices <- denovo_res$comparisons[seq(start_i, end_i)]
  denovo_plts <- plot_motif_comparison(comparison_matrices, exp_labels = c(
      result$exp_labels[params$reference_index],
      result$exp_labels[comparison_i]
  ), width = 650, height = 500)
  download_btns <-
    get_download_buttons(i, start_i, segregated_peaks, out_dir = out_dir_extra,
          add_buttons = params$download_buttons, verbose = params$verbose)
  .print_denovo_sections_i <- function(x, y) {
    print_denovo_sections(streme_motifs, similar_motifs, segregated_peaks[[i]],
                      c(x, y), jaspar_link = using_jaspar_db,
                      download_buttons = download_btns)
  }
  
  cat("\n### Common Motif Comparison  {- .unlisted}  \n")
  cat("\n#### Motif Similarity Plot  {- .unlisted}  \n")
  print(denovo_plts[[1]])
  cat("\n#### Motif Details  {- .unlisted}  \n")
  .print_denovo_sections_i(1, 2)
  
  cat("\n### Unique Motif Comparison {- .unlisted}  \n")
  cat("\n#### Motif Similarity Plot  {- .unlisted}  \n")
  print(denovo_plts[[2]])
  cat("\n#### Motif Details  {- .unlisted}  \n")
  .print_denovo_sections_i(3, 4)
  
  cat("\n### Cross Motif Comparison A {- .unlisted}  \n")
  cat("\n#### Motif Similarity Plot  {- .unlisted}  \n")
  print(denovo_plts[[3]])
  cat("\n#### Motif Details  {- .unlisted}  \n")
  .print_denovo_sections_i(3, 2)
  
  cat("\n### Cross Motif Comparison B {- .unlisted}  \n")
  cat("\n#### Motif Similarity Plot  {- .unlisted}  \n")
  print(denovo_plts[[4]])
  cat("\n#### Motif Details  {- .unlisted}  \n")
  .print_denovo_sections_i(4, 1)
}

Select Comparison Group:

ChIP

Reference Experiment Label: TIP (Total Reads: 16.06K)
Comparison Experiment Label: ChIP (Total Reads: 49.83K)

Select Plot:

Common Motif Comparison

Motif Similarity Plot

Motif Details

Reference Group - Common Peaks
Total peaks in group: 127

Comparison Group - Common Peaks
Total peaks in group: 127

Unique Motif Comparison

Motif Similarity Plot

Motif Details

Reference Group - Unique Peaks
Total peaks in group: 57

Comparison Group - Unique Peaks
Total peaks in group: 82

Cross Motif Comparison A

Motif Similarity Plot

Motif Details

Reference Group - Unique Peaks
Total peaks in group: 57

Comparison Group - Common Peaks
Total peaks in group: 127

Cross Motif Comparison B

Motif Similarity Plot

Motif Details

Comparison Group - Unique Peaks
Total peaks in group: 82

Reference Group - Common Peaks
Total peaks in group: 127

Session Info

utils::sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.5
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] MotifPeeker_0.99.0 devtools_2.4.5     usethis_2.2.3     
## 
## loaded via a namespace (and not attached):
##   [1] later_1.3.2                       BiocIO_1.15.0                     bitops_1.0-7                     
##   [4] filelock_1.0.3                    tibble_3.2.1                      R.oo_1.26.0                      
##   [7] XML_3.99-0.17                     lifecycle_1.0.4                   rprojroot_2.0.4                  
##  [10] lattice_0.22-6                    MASS_7.3-61                       crosstalk_1.2.1                  
##  [13] dendextend_1.17.1                 magrittr_2.0.3                    plotly_4.10.4                    
##  [16] sass_0.4.9                        rmarkdown_2.27                    jquerylib_0.1.4                  
##  [19] yaml_2.3.9                        BSgenome.Hsapiens.UCSC.hg38_1.4.5 remotes_2.5.0                    
##  [22] httpuv_1.6.15                     zip_2.3.1                         sessioninfo_1.2.2                
##  [25] pkgbuild_1.4.4                    DBI_1.2.3                         RColorBrewer_1.1-3               
##  [28] lubridate_1.9.3                   abind_1.4-5                       pkgload_1.4.0                    
##  [31] zlibbioc_1.51.1                   GenomicRanges_1.57.1              purrr_1.0.2                      
##  [34] R.utils_2.12.3                    BiocGenerics_0.51.0               RCurl_1.98-1.14                  
##  [37] seriation_1.5.5                   GenomeInfoDbData_1.2.12           IRanges_2.39.0                   
##  [40] S4Vectors_0.43.0                  testthat_3.2.1.1                  cmdfun_1.0.2                     
##  [43] codetools_0.2-20                  DelayedArray_0.31.3               DT_0.33                          
##  [46] tidyselect_1.2.1                  ggseqlogo_0.2                     UCSC.utils_1.1.0                 
##  [49] farver_2.1.2                      viridis_0.6.5                     TSP_1.2-4                        
##  [52] universalmotif_1.23.0             base64enc_0.1-3                   matrixStats_1.3.0                
##  [55] stats4_4.4.1                      BiocFileCache_2.13.0              webshot_0.5.5                    
##  [58] GenomicAlignments_1.41.0          jsonlite_1.8.8                    ellipsis_0.3.2                   
##  [61] iterators_1.0.14                  foreach_1.5.2                     tools_4.4.1                      
##  [64] emoji_15.0                        Rcpp_1.0.12                       glue_1.7.0                       
##  [67] gridExtra_2.3                     SparseArray_1.5.11                xfun_0.45                        
##  [70] MatrixGenerics_1.17.0             GenomeInfoDb_1.41.1               dplyr_1.1.4                      
##  [73] ca_0.71.1                         withr_3.0.0                       fastmap_1.2.0                    
##  [76] fansi_1.0.6                       digest_0.6.36                     timechange_0.3.0                 
##  [79] R6_2.5.1                          mime_0.12                         colorspace_2.1-0                 
##  [82] RSQLite_2.3.7                     R.methodsS3_1.8.2                 utf8_1.2.4                       
##  [85] tidyr_1.3.1                       generics_0.1.3                    data.table_1.15.4                
##  [88] rtracklayer_1.65.0                bsplus_0.1.4                      httr_1.4.7                       
##  [91] htmlwidgets_1.6.4                 S4Arrays_1.5.2                    downloadthis_0.3.3               
##  [94] pkgconfig_2.0.3                   gtable_0.3.5                      blob_1.2.4                       
##  [97] registry_0.5-1                    XVector_0.45.0                    brio_1.1.5                       
## [100] htmltools_0.5.8.1                 profvis_0.3.8                     scales_1.3.0                     
## [103] Biobase_2.65.0                    knitr_1.48                        rstudioapi_0.16.0                
## [106] tzdb_0.4.0                        reshape2_1.4.4                    rjson_0.2.21                     
## [109] curl_5.2.1                        memes_1.13.0                      cachem_1.1.0                     
## [112] stringr_1.5.1                     parallel_4.4.1                    miniUI_0.1.1.1                   
## [115] restfulr_0.0.15                   desc_1.4.3                        pillar_1.9.0                     
## [118] grid_4.4.1                        vctrs_0.6.5                       urlchecker_1.0.1                 
## [121] promises_1.3.0                    dbplyr_2.5.0                      xtable_1.8-4                     
## [124] evaluate_0.24.0                   readr_2.1.5                       cli_3.6.3                        
## [127] compiler_4.4.1                    Rsamtools_2.21.0                  rlang_1.1.4                      
## [130] crayon_1.5.3                      heatmaply_1.5.0                   labeling_0.4.3                   
## [133] plyr_1.8.9                        fs_1.6.4                          stringi_1.8.4                    
## [136] viridisLite_0.4.2                 BiocParallel_1.39.0               assertthat_0.2.1                 
## [139] munsell_0.5.1                     Biostrings_2.73.1                 lazyeval_0.2.2                   
## [142] Matrix_1.7-0                      BSgenome_1.73.0                   hms_1.1.3                        
## [145] bit64_4.0.5                       ggplot2_3.5.1                     shiny_1.8.1.1                    
## [148] SummarizedExperiment_1.35.1       memoise_2.0.1                     bslib_0.7.0                      
## [151] bit_4.0.5

MotifPeekerReport

Jul-16-2024

Summary

Table of Contents

Input Datasets

Command

1 General Metrics

FRiP Score

By Experiment Type

By Individual Dataset

Peak Width Distribution

By Experiment Type

By Individual Dataset

Motif-Summit Distance

MA1102.3

By Experiment Type

By Individual Dataset

MA1930.2

By Experiment Type

By Individual Dataset

Summary Table

2 Known Motif Enrichment Analysis

MA1102.3

Overall

Counts of Peaks Containing Motif Across All Experiments

Percentages of Peaks Containing Motif Across All Experiments

ChIP

MA1930.2

Overall

Counts of Peaks Containing Motif Across All Experiments

Percentages of Peaks Containing Motif Across All Experiments

ChIP

3 De-novo Motif Analysis

ChIP

Common Motif Comparison

Motif Similarity Plot

Motif Details

Unique Motif Comparison

Motif Similarity Plot

Motif Details

Cross Motif Comparison A

Motif Similarity Plot

Motif Details

Cross Motif Comparison B

Motif Similarity Plot

Motif Details

Session Info

`MotifPeekerReport`