Skip to content

Commit b6ded76

Browse files
committed
Updated vignette
1 parent 75d4075 commit b6ded76

141 files changed

Lines changed: 1307 additions & 1151 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ Imports:
3737
farver,
3838
ggraph,
3939
grDevices,
40-
mvtnorm
40+
mvtnorm,
41+
ggpubr
4142
Depends:
4243
R (>= 3.5.0)
4344
LazyData: true

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export(calculateEdgeDivergence)
88
export(calculateEigengenes)
99
export(calculatePresStats)
1010
export(calculateTreeStats)
11+
export(comparePresStats)
1112
export(convertPresToDist)
1213
export(convertToDT)
1314
export(convertToGraph)

R/addDirectionality.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
#'
99
#' The calculation of directionality relies on the approximate version of the Spearman's rho, significance testing and blocking implemented by \code{\link{correlatePairs}}. The results are summarized as 3 new edge attributes in the \code{\link{igraph}} object: rho (approximate Spearman's correlation coefficient), p.adj (BH-corrected approximate p-value) and direction ("+" or "-").
1010
#'
11-
#' @param network An \code{\link{igraph}} object containing the consensus network or the network of a clone.
12-
#' @param sce A \code{\link{SingleCellExperiment}} object containing the expression data either for all clones (in case \code{network} is the consensus network) or for the clone of interest (in case \code{network} is a clonewise network). If \code{sce} contains the expression data of all clones, it is also expected to have a metadata column "clone" specifying which clone each cell belongs to; this will be used to define the blocking levels.
11+
#' @param network An \code{\link{igraph}} object containing the consensus network or the network of a replicate.
12+
#' @param sce A \code{\link{SingleCellExperiment}} object containing the expression data either for all replicates (in case \code{network} is the consensus network) or for the replicate of interest (in case \code{network} is a replicate-wise network). If \code{sce} contains the expression data of all replicates, it is also expected to have a metadata column "replicate" specifying which replicate each cell belongs to; this will be used to define the blocking levels.
1313
#' @param assay Character, the name of the assay in \code{sce} that should be used for the calculation of gene-gene correlations (default: "logcounts").
1414
#' @param n_cores Integer, the number of cores (default: 1).
1515
#'
@@ -24,7 +24,7 @@
2424
#'
2525
#' @examples
2626
#' consensus_network <- network_list %>%
27-
#' createConsensus(clone2species, tree) %>%
27+
#' createConsensus(replicate2species, tree) %>%
2828
#' addDirectionality(sce)
2929
addDirectionality <- function(network, sce, assay = "logcounts", n_cores = 1L) {
3030

@@ -58,7 +58,7 @@ addDirectionality <- function(network, sce, assay = "logcounts", n_cores = 1L) {
5858
pairings <- as.matrix(dt[, .(from, to)])
5959

6060
# block info (if present)
61-
block <- sce$clone
61+
block <- sce$replicate
6262
if (!is.null(block)) block <- as.factor(block)
6363

6464
# set seed and schedule the restoration of the old seed

R/assignInitialModules.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88
#'
99
#' The function creates as many modules as there are regulators, each containing the regulator and its \code{N} best target genes. When choosing the best targets, the genes are ranked based on how strongly they are connected to the regulator (regulator-target edge weight/adjacency). \code{N} should be greater than or equal to the minimum number of targets per regulator across all regulators.
1010
#'
11-
#' @param consensus_network \code{\link{igraph}} object, the consensus network across all species and clones.
11+
#' @param consensus_network \code{\link{igraph}} object, the consensus network across all species and replicates.
1212
#' @param regulators Character vector of transcriptional regulators (default: transcriptional regulators with at least 1 annotated motif in the JASPAR 2024 vertebrate core collection that are present as network nodes in the consensus network).
1313
#' @param N Integer, the initial module size, i.e. the number of target genes to keep for each regulator.
1414
#'
1515
#' @return A data frame of initial modules with 3 core columns:
1616
#'\describe{
1717
#' \item{regulator}{Character, transcriptional regulator.}
1818
#' \item{target}{Character, member gene of the regulator's initial module.}
19-
#' \item{weight}{Numeric, consensus edge weight/adjacency, the weighted average of clonewise edge weights.}
19+
#' \item{weight}{Numeric, consensus edge weight/adjacency, the weighted average of replicate-wise edge weights.}
2020
#' }
2121
#' If the input \code{consensus_network} had more edge attributes, those appear as additional columns in the data frame.
2222
#' @export

R/calculatePresStats.R

Lines changed: 65 additions & 129 deletions
Large diffs are not rendered by default.

R/calculateTreeStats.R

Lines changed: 54 additions & 62 deletions
Large diffs are not rendered by default.

R/comparePresStats.R

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#' Compare preservation statistics
2+
#'
3+
#' Compares the two topology-based preservation statistics - correlation of adjacencies (cor_adj) and correlation of intramodular connectivities (cor_kIM) - in terms of their ability to distinguish actual modules from random ones and to capture the expected decrease in module preservation with increasing phylogenetic distance.
4+
#'
5+
#' As part of the CroCoNet approach, pairwise module preservation scores are calculated between replicates, both within and across species (see \code{\link{calculatePresStats}}) to gain information about the cross-species differences but also about the within-species diversity of the modules. These correlation-based preservation statistics quantify how well the module topology is preserved between the networks of two replicates. While cor_adj compares fine-grained topology at the level of adjacencies per edge, cor_kIM compares higher-level topology based on intramodular connectivities, gene-level summaries of the individual adjacencies. The statistics are calculated not just for the actual, biologically meaningful modules, but also for random modules with matching sizes.
6+
#'
7+
#' The function plots two distributions for both cor_adj and cor_kIM: 1) the difference in preservation between each actual and corresponding random module, and 2) the inverse correlation between preservation and phylogenetic distance for each actual module. The higher these values are, the better the preservation statistic perfoms, since the actual modules are expected to be more preserved than the random modules, and all modules, but especially the actual ones, are expected to be more preserved between closely related species than between phylogenetically distant species. By comparing the distributions between cor_adj and cor_kIM, the user can select the better preservation statistic for the downstream steps of the workflow (tree reconstruction and quantification of module conservation).
8+
#'
9+
#' @param pres_stats Data frame of the preservation statistics for the actual (pruned) modules. Required columns:
10+
#' \describe{
11+
#' \item{regulator}{Character, transcriptional regulator.}
12+
#' \item{replicate1, replicate2}{Character, the names of the replicates compared.}
13+
#' \item{species1, species2}{Character, the names of the species \code{replicate1} and \code{replicate2} belongs to, respectively.}
14+
#' \item{cor_adj}{Numeric, correlation of adjacencies per module and replicate pair.}
15+
#' \item{cor_kIM}{Numeric, correlation of intramodular connectivities per module and replicate pair.}
16+
#' }
17+
#' @param random_pres_stats Data frame of the preservation statistics for the random modules. Required columns:
18+
#' \describe{
19+
#' \item{regulator}{Character, transcriptional regulator.}
20+
#' \item{replicate1, replicate2}{The names of the replicates compared.}
21+
#' \item{species1, species2}{The names of the species \code{replicate1} and \code{replicate2} belongs to, respectively.}
22+
#' \item{cor_adj}{Numeric, correlation of adjacencies per random module and replicate pair.}
23+
#' \item{cor_kIM}{Numeric, correlation of intramodular connectivities per random module and replicate pair.}
24+
#' }
25+
#' @param tree Object of class \code{\link{phylo}}, the phylogenetic tree of the species.
26+
#' @param colors Character vector of length 2, the colors for \code{cor_adj} and \code{cor_kIM}.
27+
#' @param font_size Numeric, font size (default: 14).
28+
#'
29+
#' @return A boxplot as a \code{\link{ggplot}} object comparing how well each preservation statistic can distinguish actual and random modules and capture phylogenetic information.
30+
#' @export
31+
#'
32+
#' @examples comparePresStats(pres_stats, random_pres_stats, tree)
33+
#' @family functions to plot preservation statistics
34+
comparePresStats <- function(pres_stats, random_pres_stats, tree, colors = NULL, font_size = 14) {
35+
36+
# check input data
37+
if (!is.data.frame(pres_stats))
38+
stop("The argument \"pres_stats\" should be a data frame.")
39+
40+
if (any(!c("regulator", "replicate1", "replicate2", "species1", "species2", "cor_adj", "cor_kIM") %in% colnames(pres_stats)))
41+
stop("The argument \"pres_stats\" should contain the columns \"regulator\", \"replicate1\", \"replicate2\", \"species1\", \"species2\", \"cor_adj\" and \"cor_kIM\".")
42+
43+
if (!is.data.frame(random_pres_stats))
44+
stop("The argument \"random_pres_stats\" should be a data frame.")
45+
46+
if (any(!c("regulator", "replicate1", "replicate2", "species1", "species2", "cor_adj", "cor_kIM") %in% colnames(random_pres_stats)))
47+
stop("The argument \"random_pres_stats\" should contain the columns \"regulator\", \"replicate1\", \"replicate2\", \"species1\", \"species2\", \"cor_adj\" and \"cor_kIM\".")
48+
49+
if (!inherits(tree, "phylo"))
50+
stop("The argument \"tree\" should be a phylo object.")
51+
52+
if (any(!unique(c(pres_stats$species1, pres_stats$species2)) %in% tree$tip.label))
53+
stop("One or more species in \"pres_stats\" are not present in the phylogenetic tree. Please make sure the the species names in the columns \"species1\" and \"species2\" of \"pres_stats\" match the tip labels of \"tree\".")
54+
55+
if (any(!unique(c(random_pres_stats$species1, random_pres_stats$species2)) %in% tree$tip.label))
56+
stop("One or more species in \"random_pres_stats\" are not present in the phylogenetic tree. Please make sure the the species names in the columns \"species1\" and \"species2\" of \"random_pres_stats\" match the tip labels of \"tree\".")
57+
58+
if (!is.null(colors) && (!inherits(colors, "character") || any(!areColors(colors)) || length(colors) != 2))
59+
stop("The argument \"colors\" should be a character vector of valid color representations, with length of 2.")
60+
61+
if (!inherits(font_size, "numeric") || length(font_size) != 1 || font_size <= 0)
62+
stop("The argument \"font_size\" should be a positive numeric value.")
63+
64+
# avoid NSE notes in R CMD check
65+
. = NULL
66+
67+
# if no colors are provided, take the default
68+
if (is.null(colors))
69+
colors <- c(cor_adj = "palevioletred", cor_kIM = "deeppink4")
70+
71+
# differences between actual and rando modules
72+
random_vs_actual_pres <- dplyr::bind_rows("actual_module" = pres_stats,
73+
"random_module" = random_pres_stats,
74+
.id = "module_set") %>%
75+
tidyr::pivot_longer(cols = c("cor_kIM", "cor_adj"), names_to = "statistic", values_to = "value") %>%
76+
dplyr::select(dplyr::all_of(c("module_set", "regulator", "replicate1", "replicate2", "species1", "species2", "statistic", "value"))) %>%
77+
tidyr::pivot_wider(names_from = "module_set", values_from = "value") %>%
78+
dplyr::mutate(actual_random_diff = .data[["actual_module"]] - .data[["random_module"]])
79+
80+
# plot differences
81+
p1 <- random_vs_actual_pres %>%
82+
ggplot2::ggplot(ggplot2::aes(x = .data[["statistic"]], y = .data[["actual_random_diff"]], fill = .data[["statistic"]])) +
83+
ggplot2::geom_boxplot(color = "grey20", linewidth = 0.1, outlier.alpha = 0.5, outlier.size = 0.2) +
84+
ggplot2::theme_bw(base_size = 14) +
85+
ggplot2::scale_fill_manual(values = colors, guide = "none") +
86+
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
87+
axis.text.x = ggplot2::element_text(color = "black", size = 14)) +
88+
ggplot2::ylab(expression(Delta * ~preservation~score[~actual - random])) +
89+
ggpubr::geom_signif(comparisons = list(c("cor_kIM", "cor_adj")), map_signif_level = c("***"=0.001, "**"=0.01, "*"=0.05, "n.s." =1), textsize = 4, tip_length = 0.01, size = 0.3, vjust = 0.3) +
90+
ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0.05, 0.08)))
91+
92+
# phylogenetic distances
93+
species_names <- levels(pres_stats$species1)
94+
phylo_dists <- ape::cophenetic.phylo(tree) %>%
95+
as.data.frame() %>%
96+
tibble::rownames_to_column("species1") %>%
97+
tidyr::pivot_longer(cols = 2:ncol(.), names_to = "species2", values_to = "distance") %>%
98+
dplyr::mutate(species1 = factor(.data[["species1"]], species_names),
99+
species2 = factor(.data[["species2"]], species_names)) %>%
100+
dplyr::filter(as.integer(.data[["species1"]]) < as.integer( .data[["species2"]])) %>%
101+
dplyr::transmute(species_pair = paste0(.data[["species1"]], "_", .data[["species2"]]), .data[["distance"]]) %>%
102+
tibble::deframe()
103+
104+
# calculate correlations between preservation and phylogenetic distance
105+
pres_stats_to_plot <- pres_stats %>%
106+
tidyr::pivot_longer(cols = c("cor_adj", "cor_kIM"), names_to = "statistic", values_to = "value") %>%
107+
dplyr::mutate(species_pair = ifelse(.data[["species1"]] == .data[["species2"]], "within-\nspecies", paste0( .data[["species1"]], '_', .data[["species2"]])),
108+
phylo_dist = phylo_dists[.data[["species_pair"]]],
109+
phylo_dist = tidyr::replace_na(.data[["phylo_dist"]], 0)) %>%
110+
dplyr::group_by(dplyr::across(c("regulator", "statistic"))) %>%
111+
dplyr::summarize(corr_pres_phyl = stats::cor(.data[["value"]], .data[["phylo_dist"]], method = "pearson"))
112+
113+
# plot inverse correlations
114+
p2 <- (pres_stats_to_plot %>%
115+
ggplot2::ggplot(ggplot2::aes(x = .data[["statistic"]], y = -.data[["corr_pres_phyl"]] , fill = .data[["statistic"]])) +
116+
ggplot2::geom_boxplot(color = "grey20", linewidth = 0.1, outlier.alpha = 0.5, outlier.size = 0.2) +
117+
ggplot2::theme_bw(base_size = 14) +
118+
ggplot2::scale_fill_manual(values = colors, guide = "none") +
119+
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
120+
axis.text.x = ggplot2::element_text(color = "black", size = 14)) +
121+
ggplot2::ylab(expression(-italic(r)[preservation~score * ", " * phylogenetic~distance]))) +
122+
ggpubr::geom_signif(comparisons = list(c("cor_kIM", "cor_adj")), map_signif_level = c("***"=0.001, "**"=0.01, "*"=0.05, "n.s." =1), textsize = 4, tip_length = 0.01, size = 0.3, vjust = 0.3) +
123+
ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0.05, 0.08)))
124+
125+
p1 + p2
126+
127+
}

0 commit comments

Comments
 (0)