From 77a23b481eec40722bdfb9fa483a70e8897b441d Mon Sep 17 00:00:00 2001
From: veitveit <veits@bmb.sdu.dk>
Date: Fri, 19 Jun 2026 17:14:58 +0200
Subject: [PATCH] ptm site specific occupancy

---
 DESCRIPTION                         |   2 +-
 R/04_DataAnalysis.R                 | 433 ++++++++++++----------
 tests/testthat/test-ptm-occupancy.R |  82 ++++-
 vignettes/PTMOccupancy.html         | 532 ++++++++++++++++++++++++++++
 4 files changed, 841 insertions(+), 208 deletions(-)
 create mode 100644 vignettes/PTMOccupancy.html

diff --git a/DESCRIPTION b/DESCRIPTION
index 230c83a..df25f63 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ProteoMaker
 Type: Package
 Title: Simulations of LC-MS Data from Proteoforms
-Version: 0.6.28
+Version: 0.6.29
 Date: 2026-02-28
 Author: Veit Schwämmle, Marie Locard-Paulet
 Maintainer: Your Name <veits@bmb.sdu.dk>
diff --git a/R/04_DataAnalysis.R b/R/04_DataAnalysis.R
index 3855200..9a24c48 100644
--- a/R/04_DataAnalysis.R
+++ b/R/04_DataAnalysis.R
@@ -311,25 +311,26 @@ proteinSummarisation <- function(peptable, parameters) {
 #' Calculate PTM Site Occupancy
 #'
 #' Estimates the per-condition PTM-site occupancy (stoichiometry) using a
-#' mass-balance three-ratio approach.  For each modified peptidoform the
-#' method uses three between-condition fold-change ratios:
+#' mass-balance three-ratio approach.  For each protein PTM site the method
+#' uses three between-condition fold-change ratios:
 #' \describe{
-#'   \item{\eqn{R_{m,c}}}{Fold-change of the \emph{modified} peptidoform,
+#'   \item{\eqn{R_{m,c}}}{Fold-change of all peptidoforms carrying the modified
+#'     site,
 #'     \eqn{R_{m,c} = 2^{\bar{l}_{m,c} - \bar{l}_{m,1}}}.}
-#'   \item{\eqn{R_{u,c}}}{Fold-change of the \emph{same-sequence unmodified}
-#'     counterpart peptidoform,
+#'   \item{\eqn{R_{u,c}}}{Fold-change of all unmodified peptidoforms covering
+#'     the same protein site,
 #'     \eqn{R_{u,c} = 2^{\bar{l}_{u,c} - \bar{l}_{u,1}}}.}
 #'   \item{\eqn{R_{prot,c}}}{Fold-change of the protein, estimated from the
 #'     geometric mean (in linear space) of all unmodified peptides from the
-#'     same protein accession whose sequence does \emph{not} appear in any
-#'     modified form in the data,
+#'     same protein accession that do not cover any modified site on that
+#'     protein,
 #'     \eqn{R_{prot,c} = 2^{\bar{l}_{prot,c} - \bar{l}_{prot,1}}}.}
 #' }
 #' Here \eqn{\bar{l}_{x,c}} is the mean log2 intensity across replicates of
-#' condition \eqn{c} and subscript 1 denotes the reference condition.  For the
-#' protein ratio, log2 intensities from multiple non-counterpart unmodified rows
-#' are pooled first via \code{colMeans} (geometric mean in linear space) and
-#' then averaged across replicates.  Missing values are removed
+#' condition \eqn{c} and subscript 1 denotes the reference condition.  When a
+#' ratio is supported by multiple peptidoform rows, log2 intensities are pooled
+#' first via \code{colMeans} (geometric mean in linear space) and then averaged
+#' across replicates.  Missing values are removed
 #' (\code{na.rm = TRUE}) at every averaging step; consequently, an individual
 #' missing replicate is silently excluded from the mean for that condition.
 #' However, if \emph{all} replicates of a condition are missing, the condition
@@ -351,22 +352,18 @@ proteinSummarisation <- function(peptable, parameters) {
 #' columns contain \eqn{occ_c}.  When \eqn{R_{m,c} = R_{u,c}} for any
 #' condition (denominator of the \eqn{occ_1} estimator is zero), the
 #' per-condition term yields \code{NaN}, which propagates through the mean to
-#' \eqn{occ_1} and consequently to \emph{all} output columns.  A peptide
-#' sequence is omitted from the result when: (a) no same-sequence unmodified
-#' row is present; (b) no non-counterpart unmodified row from the same protein
-#' is available to estimate \eqn{R_{prot}}; or (c) more than one modified or
-#' more than one unmodified row exists for that sequence (a warning is issued
-#' and the sequence is skipped). If peptide start positions are available,
-#' modified peptides with more than one possible protein start position are
-#' also skipped because their protein-level PTM position is ambiguous.
+#' \eqn{occ_1} and consequently to \emph{all} output columns.  A PTM site is
+#' omitted from the result when no unmodified peptide covers the site, when no
+#' protein-background peptide is available for \eqn{R_{prot}}, or when the
+#' modified peptide maps to more than one accession or protein start position.
 #'
 #' @param peptable A data frame of peptidoform-level data as produced by the
 #'   ProteoMaker pipeline (output of \code{MSRunSim} or \code{runPolySTest}).
 #'   Required columns: \code{Sequence} (character), \code{PTMType} (list),
-#'   \code{PTMPos} (list), \code{Accession} (list), and one column per sample
+#'   \code{PTMPos} (list), \code{Accession} (list), \code{Start}, \code{Stop},
+#'   and one column per sample
 #'   as given by \code{parameters$QuantColnames}.  Quantification values must
-#'   be on the log2 scale. If \code{Start} is present, it is used to report
-#'   protein-level PTM positions in \code{ProteinPTMPos}.
+#'   be on the log2 scale.
 #' @param parameters A named list of analysis parameters.  Must contain:
 #'   \describe{
 #'     \item{QuantColnames}{Character vector of column names holding per-sample
@@ -376,11 +373,11 @@ proteinSummarisation <- function(peptable, parameters) {
 #'     \item{NumReps}{Integer.  Number of replicates per condition.}
 #'   }
 #'
-#' @return A data frame with one row per modified peptidoform that has both a
-#'   same-sequence unmodified counterpart and at least one non-counterpart
-#'   unmodified peptide from the same protein.  Columns are:
+#' @return A data frame with one row per protein PTM site that has both a
+#'   modified-site signal and at least one unmodified peptide covering the same
+#'   site.  Columns are:
 #'   \describe{
-#'     \item{Sequence}{Stripped peptide sequence.}
+#'     \item{Sequence}{Stripped modified peptide sequence(s).}
 #'     \item{Accession}{Protein accession(s) (list column).}
 #'     \item{PTMPos}{Modification positions within the peptide (list column).}
 #'     \item{ProteinPTMPos}{Modification positions within the protein sequence
@@ -395,7 +392,7 @@ proteinSummarisation <- function(peptable, parameters) {
 #'       with the mass-balance model (e.g.\ \eqn{R_{m,c} = R_{u,c}}).}
 #'   }
 #'   Returns an empty \code{data.frame} when no modified peptides are present,
-#'   when \code{NumCond < 2}, or when no qualifying modified peptide is found.
+#'   when \code{NumCond < 2}, or when no qualifying PTM site is found.
 #'
 #' @references
 #' Sharma, K. et al. (2014) Ultradeep Human Phosphoproteome Reveals a Distinct
@@ -437,197 +434,247 @@ calcPTMOccupancy <- function(peptable, parameters) {
 
   message(" + Calculating PTM site occupancy")
 
+  if (!all(c("Start", "Stop") %in% names(peptable))) {
+    message("calcPTMOccupancy: Start and Stop columns required for PTM site occupancy; returning empty table.")
+    return(data.frame())
+  }
+
   unique_values <- function(x) unique(unlist(x))
+  protein_ptm_pos <- function(start, ptm_pos) {
+    start <- unique_values(start)
+    if (length(start) != 1L) return(rep(NA_integer_, length(unlist(ptm_pos))))
+    start + unlist(ptm_pos) - 1L
+  }
+  site_key <- function(accession, ptm_type, protein_pos) {
+    paste(accession, ptm_type, protein_pos, sep = "|")
+  }
+  is_unambiguous_row <- function(i) {
+    length(unique_values(peptable$Accession[[i]])) == 1L &&
+      (!"Start" %in% names(peptable) || length(unique_values(peptable$Start[[i]])) == 1L)
+  }
 
   # Group QuantColnames into per-condition replicate blocks
   cond_cols      <- lapply(seq_len(NumCond), function(c) {
     QuantColnames[seq.int((c - 1L) * NumReps + 1L, c * NumReps)]
   })
+  quant_mat <- as.matrix(peptable[, QuantColnames, drop = FALSE])
   out_cond_names <- paste0("C_", seq_len(NumCond))
   out_comp_names <- paste0("prob_", out_cond_names[seq_len(NumCond-1)+1], "_vs_C1")
 
-  seqs          <- peptable$Sequence
-  uniq_mod_seqs <- unique(seqs[has_ptm])
-
-  # Row-index maps for fast sequence lookup inside the loop
-  mod_seq_idx   <- split(which(has_ptm),  seqs[has_ptm])
-  unmod_seq_idx <- split(which(!has_ptm), seqs[!has_ptm])
+  mod_site_records <- lapply(which(has_ptm), function(i) {
+    if (!"Start" %in% names(peptable)) return(NULL)
+    if (!is_unambiguous_row(i)) return(NULL)
+    ptm_pos <- unlist(peptable$PTMPos[[i]])
+    ptm_type <- unlist(peptable$PTMType[[i]])
+    if (length(ptm_pos) == 0L || length(ptm_pos) != length(ptm_type)) return(NULL)
+
+    protein_pos <- protein_ptm_pos(peptable$Start[[i]], ptm_pos)
+    if (any(is.na(protein_pos))) return(NULL)
+
+    acc <- unique_values(peptable$Accession[[i]])
+    data.frame(
+      row_index = rep(i, length(ptm_pos)),
+      Sequence = rep(peptable$Sequence[[i]], length(ptm_pos)),
+      Accession = rep(acc, length(ptm_pos)),
+      PTMType = ptm_type,
+      PTMPos = ptm_pos,
+      ProteinPTMPos = protein_pos,
+      SiteKey = site_key(acc, ptm_type, protein_pos),
+      stringsAsFactors = FALSE
+    )
+  })
+  mod_sites <- do.call(rbind, mod_site_records[!vapply(mod_site_records, is.null, logical(1))])
+  site_groups <- if (!is.null(mod_sites) && nrow(mod_sites) > 0L) {
+    split(mod_sites, mod_sites$SiteKey)
+  } else {
+    list()
+  }
+  unmod_row_records <- lapply(which(!has_ptm), function(i) {
+    if (!is_unambiguous_row(i)) return(NULL)
+    start <- unique_values(peptable$Start[[i]])
+    stop <- unique_values(peptable$Stop[[i]])
+    if (length(start) != 1L || length(stop) != 1L) return(NULL)
+
+    data.frame(
+      row_index = i,
+      Accession = unique_values(peptable$Accession[[i]]),
+      Start = start,
+      Stop = stop,
+      stringsAsFactors = FALSE
+    )
+  })
+  unmod_rows <- do.call(rbind, unmod_row_records[!vapply(unmod_row_records, is.null, logical(1))])
+  unmod_by_acc <- if (!is.null(unmod_rows) && nrow(unmod_rows) > 0L) {
+    split(unmod_rows, unmod_rows$Accession)
+  } else {
+    list()
+  }
+  mod_sites_by_acc <- if (!is.null(mod_sites) && nrow(mod_sites) > 0L) {
+    lapply(split(mod_sites$ProteinPTMPos, mod_sites$Accession), unique)
+  } else {
+    list()
+  }
+  background_by_acc <- lapply(names(unmod_by_acc), function(acc) {
+    rows <- unmod_by_acc[[acc]]
+    acc_mod_sites <- mod_sites_by_acc[[acc]]
+    if (is.null(acc_mod_sites) || length(acc_mod_sites) == 0L) return(rows$row_index)
+
+    covers_modified_site <- vapply(seq_len(nrow(rows)), function(j) {
+      any(rows$Start[j] <= acc_mod_sites & rows$Stop[j] >= acc_mod_sites)
+    }, logical(1))
+    rows$row_index[!covers_modified_site]
+  })
+  names(background_by_acc) <- names(unmod_by_acc)
+  supporting_modified <- function(site_group) {
+    row_idx <- unique(site_group$row_index)
+    list(
+      row_idx = row_idx,
+      sequences = unique(peptable$Sequence[row_idx])
+    )
+  }
+  matching_unmodified <- function(site_group) {
+    acc <- unique(site_group$Accession)
+    protein_pos <- unique(site_group$ProteinPTMPos)
+    if (length(acc) != 1L || length(protein_pos) != 1L) return(integer(0))
+
+    rows <- unmod_by_acc[[acc]]
+    if (is.null(rows)) return(integer(0))
+    rows$row_index[rows$Start <= protein_pos & rows$Stop >= protein_pos]
+  }
+  site_condition_means <- function(site_group) {
+    mod_rows <- supporting_modified(site_group)$row_idx
+    unmod_rows <- matching_unmodified(site_group)
+    if (length(mod_rows) == 0L || length(unmod_rows) == 0L) return(NULL)
 
-  # Accession -> row index for unmodified peptides whose sequence never appears modified;
-  # these are the only rows that contribute to the protein background ratio (Rprot)
-  non_counterpart_unmod_idx <- which(!has_ptm & !(seqs %in% uniq_mod_seqs))
-  acc_to_rows <- list()
-  for (.idx in non_counterpart_unmod_idx) {
-    for (.acc in unlist(peptable$Accession[[.idx]])) {
-      acc_to_rows[[.acc]] <- c(acc_to_rows[[.acc]], .idx)
+    condition_mean <- function(rows, cols) {
+      mean(colMeans(quant_mat[rows, cols, drop = FALSE], na.rm = TRUE), na.rm = TRUE)
     }
+    list(
+      mod_mean = sapply(cond_cols, function(cols) condition_mean(mod_rows, cols)),
+      unmod_mean = sapply(cond_cols, function(cols) condition_mean(unmod_rows, cols)),
+      mod_rows = mod_rows,
+      unmod_rows = unmod_rows
+    )
   }
 
-  # Pre-allocate output vectors to avoid repeated memory reallocation
-  n_mod <- length(uniq_mod_seqs)
-  out_seq        <- vector("character", n_mod)
-  out_acc        <- vector("list", n_mod)
-  out_ptmpos     <- vector("list", n_mod)
-  out_protptmpos <- vector("list", n_mod)
-  out_ptmtype    <- vector("list", n_mod)
-  out_quant      <- vector("list", n_mod)
-  out_prob       <- vector("list", n_mod)
-  out_n <- 0L
-
-  for (seq in uniq_mod_seqs) {
-    mod_idx   <- mod_seq_idx[[seq]]
-    unmod_idx <- unmod_seq_idx[[seq]]
-
-    if (is.null(unmod_idx) || length(unmod_idx) == 0) next
+  site_protein_background <- function(site_group) {
+    acc <- unique(site_group$Accession)
+    if (length(acc) != 1L) return(integer(0))
+    rows <- background_by_acc[[acc]]
+    if (is.null(rows)) return(integer(0))
+    rows
+  }
+  site_estimate <- function(site_group) {
+    means <- site_condition_means(site_group)
+    if (is.null(means)) return(NULL)
 
-    if (length(mod_idx) > 1) {
-      mod_keys <- vapply(mod_idx, function(i) {
-        paste(unlist(peptable$PTMType[[i]]), unlist(peptable$PTMPos[[i]]), sep = "@", collapse = ";")
-      }, character(1))
-      if (length(unique(mod_keys)) > 1) {
-        warning("Found more than one modified peptidoform for peptide ", seq, "!!")
-      }
-      next
-    }
+    prot_rows <- site_protein_background(site_group)
+    if (length(prot_rows) == 0L) return(NULL)
 
-    if (length(unmod_idx) > 1) {
-      next
+    prot_mean <- sapply(cond_cols, function(cols) {
+      mean(colMeans(quant_mat[prot_rows, cols, drop = FALSE], na.rm = TRUE), na.rm = TRUE)
+    })
+    condition_sd <- function(rows) {
+      mean(sapply(cond_cols, function(cols) {
+        sd(colMeans(quant_mat[rows, cols, drop = FALSE], na.rm = TRUE), na.rm = TRUE)
+      }), na.rm = TRUE)
     }
-
-    if ("Accession" %in% names(peptable)) {
-      mod_acc <- unique_values(peptable$Accession[[mod_idx]])
-      unmod_acc <- unique_values(peptable$Accession[[unmod_idx]])
-      if (length(mod_acc) != 1 || length(unmod_acc) != 1 || mod_acc != unmod_acc) {
-        next
-      }
+    condition_ratio <- function(condition_mean) {
+      log_ratio <- condition_mean - condition_mean[1L]
+      2^log_ratio[seq_len(NumCond - 1L) + 1L]
     }
-
-    if ("Start" %in% names(peptable)) {
-      mod_start <- unique_values(peptable$Start[[mod_idx]])
-      if (length(mod_start) != 1) {
-        next
-      }
+    log_Rm <- means$mod_mean - means$mod_mean[1L]
+    log_Ru <- means$unmod_mean - means$unmod_mean[1L]
+    log_Rprot <- prot_mean - prot_mean[1L]
+    Rm <- condition_ratio(means$mod_mean)
+    Ru <- condition_ratio(means$unmod_mean)
+    Rprot <- condition_ratio(prot_mean)
+    occ <- mean((Rprot - Ru) / (Rm - Ru))
+
+    mod_sd <- condition_sd(means$mod_rows)
+    unmod_sd <- condition_sd(means$unmod_rows)
+    prot_sd <- condition_sd(prot_rows)
+    if (!isTRUE(mod_sd > 0)) mod_sd <- NA_real_
+    if (!isTRUE(unmod_sd > 0)) unmod_sd <- NA_real_
+    if (!isTRUE(prot_sd > 0)) prot_sd <- NA_real_
+
+    valid_sds <- c(mod_sd, unmod_sd, prot_sd)
+    valid_sds <- valid_sds[is.finite(valid_sds)]
+    if (length(valid_sds) > 0L) {
+      ref_sd <- max(valid_sds) * 10
+      if (is.na(mod_sd)) mod_sd <- ref_sd
+      if (is.na(unmod_sd)) unmod_sd <- ref_sd
+      if (is.na(prot_sd)) prot_sd <- ref_sd
     }
 
-    # Per-condition mean log2 of the same-sequence unmodified peptidoform.
-    # Used only to compute occ_ref (site-specific reference occupancy).
-    unmod_mean <- sapply(cond_cols, function(cols) {
-      mean(as.numeric(peptable[unmod_idx, cols]), na.rm=TRUE)
-    })
-
-    # Sd across replicates averaged across conditions
-    unmod_sd <- mean(sapply(cond_cols, function(cols) {
-      sd(as.numeric(peptable[unmod_idx, cols]), na.rm=TRUE)
-    }))
-
-
-    for (mi in mod_idx) {
-      # Per-condition mean log2 modified intensity
-      mod_mean <- sapply(cond_cols, function(cols) {
-        mean(as.numeric(peptable[mi, cols]), na.rm=TRUE)
-      })
-      mod_sd <- mean(sapply(cond_cols, function(cols) {
-        sd(as.numeric(peptable[mi, cols]), na.rm=TRUE)
-      }))
-
-      # Protein ratio (Rprot): use only unmodified peptides from the same protein
-      # accession whose sequence does NOT appear as modified (i.e., exclude any
-      # counterpart unmodified peptides).  This follows requirement (c): only
-      # unmodified peptides that do not have any modified version contribute to the
-      # protein background ratio.  colMeans() pools rows in log2 space (geometric
-      # mean in linear); mean() averages replicates within each condition.
-      # Fix 1: O(1) lookup via precomputed index instead of O(N) vapply scan
-      acc_vec        <- unlist(peptable$Accession[[mi]])
-      prot_unmod_idx <- unique(unlist(acc_to_rows[acc_vec]))
-      if (length(prot_unmod_idx) == 0L) next
-      prot_mean <- sapply(cond_cols, function(cols) {
-        mean(colMeans(as.matrix(peptable[prot_unmod_idx, cols, drop = FALSE]), na.rm=TRUE), na.rm = TRUE)
-      })
-      prot_sd <- mean(sapply(cond_cols, function(cols) {
-        sd(colMeans(as.matrix(peptable[prot_unmod_idx, cols, drop = FALSE]), na.rm=TRUE), na.rm = TRUE)
-      }))
-
-      log_Rm    <- mod_mean  - mod_mean[1L]   # 0 for c = 1 (reference)
-      log_Ru    <- unmod_mean  - unmod_mean[1L]   # 0 for c = 1 (reference)
-      log_Rprot <- prot_mean - prot_mean[1L]  # 0 for c = 1 (reference)
-
-      Rm        <- 2^log_Rm[seq_len(NumCond-1)+1]
-      Ru        <- 2^log_Ru[seq_len(NumCond-1)+1]
-      Rprot     <- 2^log_Rprot[seq_len(NumCond-1)+1]
-
-      occ    <- mean((Rprot - Ru) / (Rm - Ru))
-      occ        <- c(occ, occ * Rm / Rprot)
-
-      # Treat non-positive and NA SDs as unavailable (e.g. single replicate or
-      # identical replicates produce sd = NA or sd = 0).
-      if (!isTRUE(mod_sd   > 0)) mod_sd   <- NA_real_
-      if (!isTRUE(unmod_sd > 0)) unmod_sd <- NA_real_
-      if (!isTRUE(prot_sd  > 0)) prot_sd  <- NA_real_
-
-      # Fill missing SDs from the available ones (×10 as uncertainty inflation).
-      # Only attempt this when at least one valid SD exists; otherwise all remain
-      # NA, Sigma_ab will contain NAs, and the pmvnorm block is safely skipped.
-      valid_sds <- c(mod_sd, unmod_sd, prot_sd)
-      valid_sds <- valid_sds[is.finite(valid_sds)]
-      if (length(valid_sds) > 0) {
-        ref_sd <- max(valid_sds) * 10
-        if (is.na(mod_sd))   mod_sd   <- ref_sd
-        if (is.na(unmod_sd)) unmod_sd <- ref_sd
-        if (is.na(prot_sd))  prot_sd  <- ref_sd
-      }
-
-
-      # Calculating the probabilities from a multi-variate normal distirbution
-      Sigma_ab <- matrix(c(
-        prot_sd^2 + unmod_sd^2,   -prot_sd^2,
-        -prot_sd^2,           mod_sd^2 + prot_sd^2
-      ), nrow = 2, byrow = TRUE) * 2 / NumReps
-
-      mu_ab <- cbind(log_Rprot - log_Ru, log_Rm - log_Rprot)
-      p_both_neg <- p_both_pos <- vector(length=nrow(mu_ab))
-      if (all(is.finite(Sigma_ab))) {
-        for (i in seq_len(nrow(mu_ab))) {
-          if (!any(is.na(mu_ab[i,]))) {
-            p_both_neg[i] <- mvtnorm::pmvnorm(
-              lower = c(-Inf, -Inf),
-              upper = c(0, 0),
-              mean  = mu_ab[i, ],
-              sigma = Sigma_ab
-            )[1]
-
-            # P(A >= 0, B >= 0)
-            p_both_pos[i] <- mvtnorm::pmvnorm(
-              lower = c(0, 0),
-              upper = c(Inf, Inf),
-              mean  = mu_ab[i, ],
-              sigma = Sigma_ab
-            )[1]
-          }
+    Sigma_ab <- matrix(c(
+      prot_sd^2 + unmod_sd^2, -prot_sd^2,
+      -prot_sd^2, mod_sd^2 + prot_sd^2
+    ), nrow = 2, byrow = TRUE) * 2 / NumReps
+
+    ratio_idx <- seq_len(NumCond - 1L) + 1L
+    mu_ab <- cbind(log_Rprot[ratio_idx] - log_Ru[ratio_idx],
+                   log_Rm[ratio_idx] - log_Rprot[ratio_idx])
+    p_both_neg <- p_both_pos <- numeric(nrow(mu_ab))
+    if (all(is.finite(Sigma_ab))) {
+      for (i in seq_len(nrow(mu_ab))) {
+        if (!any(is.na(mu_ab[i, ]))) {
+          p_both_neg[i] <- as.numeric(mvtnorm::pmvnorm(
+            lower = c(-Inf, -Inf),
+            upper = c(0, 0),
+            mean = mu_ab[i, ],
+            sigma = Sigma_ab
+          )[1])
+
+          p_both_pos[i] <- as.numeric(mvtnorm::pmvnorm(
+            lower = c(0, 0),
+            upper = c(Inf, Inf),
+            mean = mu_ab[i, ],
+            sigma = Sigma_ab
+          )[1])
         }
       }
+    }
 
-      occ_prob <- p_both_neg + p_both_pos
-
-      occ_prob <- occ_prob[-1]
+    list(
+      occ = c(occ, occ * Rm / Rprot),
+      prob = p_both_neg + p_both_pos,
+      Rm = Rm,
+      Ru = Ru,
+      Rprot = Rprot,
+      mod_rows = means$mod_rows,
+      unmod_rows = means$unmod_rows,
+      prot_rows = prot_rows
+    )
+  }
 
-      protein_ptmpos <- NA_integer_
-      if ("Start" %in% names(peptable)) {
-        protein_ptmpos <- mod_start + unlist(peptable$PTMPos[[mi]]) - 1L
-      }
+  # Pre-allocate output vectors to avoid repeated memory reallocation
+  n_sites <- length(site_groups)
+  out_seq        <- vector("character", n_sites)
+  out_acc        <- vector("list", n_sites)
+  out_ptmpos     <- vector("list", n_sites)
+  out_protptmpos <- vector("list", n_sites)
+  out_ptmtype    <- vector("list", n_sites)
+  out_quant      <- vector("list", n_sites)
+  out_prob       <- vector("list", n_sites)
+  out_n <- 0L
 
-      out_n <- out_n + 1L
-      out_seq[[out_n]]        <- seq
-      out_acc[[out_n]]        <- peptable$Accession[[mi]]
-      out_ptmpos[[out_n]]     <- peptable$PTMPos[[mi]]
-      out_protptmpos[[out_n]] <- protein_ptmpos
-      out_ptmtype[[out_n]]    <- peptable$PTMType[[mi]]
-      out_quant[[out_n]]      <- setNames(as.list(occ), out_cond_names)
-      out_prob[[out_n]]       <- setNames(as.list(occ_prob), out_comp_names)
-    }
+  for (site_group in site_groups) {
+    estimate <- site_estimate(site_group)
+    if (is.null(estimate)) next
+
+    out_n <- out_n + 1L
+    out_seq[[out_n]]        <- paste(unique(site_group$Sequence), collapse = ";")
+    out_acc[[out_n]]        <- unique(site_group$Accession)
+    out_ptmpos[[out_n]]     <- unique(site_group$PTMPos)
+    out_protptmpos[[out_n]] <- unique(site_group$ProteinPTMPos)
+    out_ptmtype[[out_n]]    <- unique(site_group$PTMType)
+    out_quant[[out_n]]      <- setNames(as.list(estimate$occ), out_cond_names)
+    out_prob[[out_n]]       <- setNames(as.list(estimate$prob), out_comp_names)
   }
 
   if (out_n == 0L) {
-    message("calcPTMOccupancy: no peptides with both modified and unmodified forms found.")
+    message("calcPTMOccupancy: no PTM sites with modified and covering unmodified peptides found.")
     return(data.frame())
   }
 
@@ -650,7 +697,7 @@ calcPTMOccupancy <- function(peptable, parameters) {
   result$ProteinPTMPos   <- out_protptmpos
   result$PTMType         <- out_ptmtype
 
-  message("  - Occupancy calculated for ", nrow(result), " modified peptidoforms across ",
-          length(unique(out_seq)), " unique peptide sequences")
+  message("  - Occupancy calculated for ", nrow(result), " PTM sites across ",
+          length(unique(out_seq)), " modified peptide sequence groups")
   result
 }
diff --git a/tests/testthat/test-ptm-occupancy.R b/tests/testthat/test-ptm-occupancy.R
index b261f9a..de7d6ff 100644
--- a/tests/testthat/test-ptm-occupancy.R
+++ b/tests/testthat/test-ptm-occupancy.R
@@ -32,12 +32,16 @@ make_peptable <- function(mod_vals, unmod_vals, other_vals = unmod_vals,
   mod_row$PTMType    <- ptmtype
   mod_row$PTMPos     <- ptmpos
   mod_row$Accession  <- list("P12345")
+  mod_row$Start      <- list(10L)
+  mod_row$Stop       <- list(16L)
 
   unmod_row <- data.frame(Sequence = "AASPEPR", stringsAsFactors = FALSE)
   unmod_row[quant_cols] <- as.list(unmod_vals)
   unmod_row$PTMType  <- list(character(0))
   unmod_row$PTMPos   <- list(integer(0))
   unmod_row$Accession <- list("P12345")
+  unmod_row$Start    <- list(10L)
+  unmod_row$Stop     <- list(16L)
 
   # Non-counterpart unmodified peptide: different sequence, same protein.
   # Required for Rprot computation.
@@ -46,6 +50,8 @@ make_peptable <- function(mod_vals, unmod_vals, other_vals = unmod_vals,
   other_row$PTMType  <- list(character(0))
   other_row$PTMPos   <- list(integer(0))
   other_row$Accession <- list("P12345")
+  other_row$Start    <- list(30L)
+  other_row$Stop     <- list(38L)
 
   rbind(mod_row, unmod_row, other_row)
 }
@@ -54,7 +60,7 @@ make_peptable <- function(mod_vals, unmod_vals, other_vals = unmod_vals,
 # Basic correctness  (2 conditions × 2 replicates)
 # ──────────────────────────────────────────────────────────────────────────────
 
-test_that("calcPTMOccupancy returns one row per modified peptidoform", {
+test_that("calcPTMOccupancy returns one row per PTM site", {
   pep <- make_peptable(mod_vals = c(1, 1, 2, 2), unmod_vals = c(1, 1, 1, 1))
   occ <- calcPTMOccupancy(pep, make_params())
 
@@ -293,6 +299,8 @@ test_that("NA in non-counterpart background propagates via occ_1 to all output c
   other_row$Accession <- list("P33333")
 
   pep <- rbind(mod_row, unmod_same, other_row)
+  pep$Start <- I(list(10L, 10L, 30L))
+  pep$Stop <- I(list(16L, 16L, 37L))
   occ <- calcPTMOccupancy(pep, params)
 
   expect_true(is.na(occ[1, "C_1"]))   # occ_1 = NaN (is.na(NaN) == TRUE)
@@ -304,9 +312,9 @@ test_that("NA in non-counterpart background propagates via occ_1 to all output c
 # Multiple unmodified rows: geometric mean per condition
 # ──────────────────────────────────────────────────────────────────────────────
 
-test_that("sequence with multiple unmodified counterpart rows is skipped without warning", {
-  # The code requires exactly one unmodified row per sequence.  When two unmodified
-  # rows exist for the same sequence, the sequence is skipped without a peptidoform warning.
+test_that("site with multiple covering unmodified peptides is averaged without warning", {
+  # Multiple unmodified peptides covering the same protein site are valid site
+  # support rows and are averaged into Ru.
   params <- make_params(num_cond = 2, num_reps = 1)
   qc     <- params$QuantColnames   # C_1_R_1, C_2_R_1
 
@@ -332,18 +340,55 @@ test_that("sequence with multiple unmodified counterpart rows is skipped without
   other$Accession <- list("P11111")
 
   pep <- rbind(mod_row, unmod1, unmod2, other)
+  pep$Start <- I(list(10L, 10L, 10L, 30L))
+  pep$Stop <- I(list(18L, 18L, 18L, 37L))
 
   expect_warning(occ <- calcPTMOccupancy(pep, params), NA)
-  expect_equal(nrow(occ), 0L)
+  expect_equal(nrow(occ), 1L)
+})
+
+test_that("different unmodified peptide covering the PTM site contributes to Ru", {
+  # The unmodified peptide has a different stripped sequence, e.g. due to a
+  # missed cleavage, but still covers the same protein residue.
+  params <- make_params(num_cond = 2, num_reps = 1)
+  qc     <- params$QuantColnames
+
+  mod_row <- data.frame(Sequence = "MODSEQ", stringsAsFactors = FALSE)
+  mod_row[qc] <- as.list(c(0, 2))
+  mod_row$PTMType   <- list("ph")
+  mod_row$PTMPos    <- list(3L)
+  mod_row$Accession <- list("P11111")
+
+  unmod_covering <- data.frame(Sequence = "XXMODSEQK", stringsAsFactors = FALSE)
+  unmod_covering[qc] <- as.list(c(0, 0))
+  unmod_covering$PTMType   <- list(character(0))
+  unmod_covering$PTMPos    <- list(integer(0))
+  unmod_covering$Accession <- list("P11111")
+
+  other <- data.frame(Sequence = "OTHERPEP", stringsAsFactors = FALSE)
+  other[qc] <- as.list(c(0, 1))
+  other$PTMType   <- list(character(0))
+  other$PTMPos    <- list(integer(0))
+  other$Accession <- list("P11111")
+
+  pep <- rbind(mod_row, unmod_covering, other)
+  pep$Start <- I(list(10L, 8L, 30L))
+  pep$Stop <- I(list(15L, 16L, 37L))
+
+  occ <- calcPTMOccupancy(pep, params)
+
+  expect_equal(nrow(occ), 1L)
+  expect_equal(as.numeric(occ[1, "C_1"]), 1/3, tolerance = 1e-9)
+  expect_equal(as.numeric(occ[1, "C_2"]), 2/3, tolerance = 1e-9)
 })
 
 # ──────────────────────────────────────────────────────────────────────────────
 # Multiple modified peptidoforms for the same sequence
 # ──────────────────────────────────────────────────────────────────────────────
 
-test_that("sequence with multiple modified peptidoforms is skipped with a warning", {
-  # The code requires exactly one modified peptidoform per sequence.  When two
-  # modified peptidoforms exist for the same sequence a warning is issued and the sequence is skipped.
+test_that("sequence with multiple modified peptidoforms returns one row per PTM site", {
+  # Different modified peptidoforms on the same peptide sequence can represent
+  # different protein sites, so they are estimated separately.
   params <- make_params(num_cond = 2, num_reps = 1)
   qc     <- params$QuantColnames
 
@@ -369,12 +414,11 @@ test_that("sequence with multiple modified peptidoforms is skipped with a warnin
   other$Accession <- list("P22222")
 
   pep <- rbind(mod1, mod2, unmod, other)
+  pep$Start <- I(list(10L, 10L, 10L, 30L))
+  pep$Stop <- I(list(15L, 15L, 15L, 37L))
 
-  expect_warning(
-    occ <- calcPTMOccupancy(pep, params),
-    regexp = "more than one modified peptidoform"
-  )
-  expect_equal(nrow(occ), 0L)
+  expect_warning(occ <- calcPTMOccupancy(pep, params), NA)
+  expect_equal(nrow(occ), 2L)
 })
 
 # ──────────────────────────────────────────────────────────────────────────────
@@ -463,6 +507,8 @@ test_that("protein ratio uses only non-counterpart unmodified peptides", {
   unmod_other$Accession <- list("P12345")
 
   pep <- rbind(mod_row, unmod_same, unmod_other)
+  pep$Start <- I(list(10L, 10L, 30L))
+  pep$Stop <- I(list(15L, 15L, 37L))
   occ <- calcPTMOccupancy(pep, params)
 
   expect_equal(nrow(occ), 1L)
@@ -488,7 +534,11 @@ test_that("returns empty data.frame when modified peptide has no non-counterpart
   unmod_row$PTMPos   <- list(integer(0))
   unmod_row$Accession <- list("P00001")
 
-  occ <- calcPTMOccupancy(rbind(mod_row, unmod_row), params)
+  pep <- rbind(mod_row, unmod_row)
+  pep$Start <- I(list(10L, 10L))
+  pep$Stop <- I(list(18L, 18L))
+
+  occ <- calcPTMOccupancy(pep, params)
   expect_equal(nrow(occ), 0L)
 })
 
@@ -525,6 +575,8 @@ test_that("accession index is not contaminated across proteins", {
     r$PTMType   <- list(ptmtype)
     r$PTMPos    <- list(ptmpos)
     r$Accession <- list(acc)
+    r$Start     <- list(if (startsWith(seq, "SEQ")) 10L else 30L)
+    r$Stop      <- list(if (startsWith(seq, "SEQ")) 13L else 36L)
     r
   }
 
@@ -563,6 +615,8 @@ test_that("modified peptide with no same-protein background is skipped even with
     r$PTMType   <- list(ptmtype)
     r$PTMPos    <- list(ptmpos)
     r$Accession <- list(acc)
+    r$Start     <- list(if (startsWith(seq, "SEQ")) 10L else 30L)
+    r$Stop      <- list(if (startsWith(seq, "SEQ")) 13L else 36L)
     r
   }
 
diff --git a/vignettes/PTMOccupancy.html b/vignettes/PTMOccupancy.html
new file mode 100644
index 0000000..c2b0cd0
--- /dev/null
+++ b/vignettes/PTMOccupancy.html
@@ -0,0 +1,532 @@
+<!DOCTYPE html>
+
+<html>
+
+<head>
+
+<meta charset="utf-8" />
+<meta name="generator" content="pandoc" />
+<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+
+<meta name="author" content />
+
+<meta name="date" content="2026-05-12" />
+
+<title>PTM Occupancy Ground Truth</title>
+
+<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+  var i, h, a;
+  for (i = 0; i < hs.length; i++) {
+    h = hs[i];
+    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
+    a = h.attributes;
+    while (a.length > 0) h.removeAttribute(a[0].name);
+  }
+});
+</script>
+
+<style type="text/css">
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+span.underline{text-decoration: underline;}
+div.column{display: inline-block; vertical-align: top; width: 50%;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+</style>
+
+
+
+<style type="text/css">
+code {
+white-space: pre;
+}
+.sourceCode {
+overflow: visible;
+}
+</style>
+<style type="text/css" data-origin="pandoc">
+html { -webkit-text-size-adjust: 100%; }
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+{ counter-reset: source-line 0; }
+pre.numberSource code > span
+{ position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+{ content: counter(source-line);
+position: relative; left: -1em; text-align: right; vertical-align: baseline;
+border: none; display: inline-block;
+-webkit-touch-callout: none; -webkit-user-select: none;
+-khtml-user-select: none; -moz-user-select: none;
+-ms-user-select: none; user-select: none;
+padding: 0 4px; width: 4em;
+color: #aaaaaa;
+}
+pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
+div.sourceCode
+{ }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+code span.al { color: #ff0000; font-weight: bold; } 
+code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } 
+code span.at { color: #7d9029; } 
+code span.bn { color: #40a070; } 
+code span.bu { color: #008000; } 
+code span.cf { color: #007020; font-weight: bold; } 
+code span.ch { color: #4070a0; } 
+code span.cn { color: #880000; } 
+code span.co { color: #60a0b0; font-style: italic; } 
+code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } 
+code span.do { color: #ba2121; font-style: italic; } 
+code span.dt { color: #902000; } 
+code span.dv { color: #40a070; } 
+code span.er { color: #ff0000; font-weight: bold; } 
+code span.ex { } 
+code span.fl { color: #40a070; } 
+code span.fu { color: #06287e; } 
+code span.im { color: #008000; font-weight: bold; } 
+code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } 
+code span.kw { color: #007020; font-weight: bold; } 
+code span.op { color: #666666; } 
+code span.ot { color: #007020; } 
+code span.pp { color: #bc7a00; } 
+code span.sc { color: #4070a0; } 
+code span.ss { color: #bb6688; } 
+code span.st { color: #4070a0; } 
+code span.va { color: #19177c; } 
+code span.vs { color: #4070a0; } 
+code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } 
+</style>
+<script>
+// apply pandoc div.sourceCode style to pre.sourceCode instead
+(function() {
+  var sheets = document.styleSheets;
+  for (var i = 0; i < sheets.length; i++) {
+    if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
+    try { var rules = sheets[i].cssRules; } catch (e) { continue; }
+    var j = 0;
+    while (j < rules.length) {
+      var rule = rules[j];
+      // check if there is a div.sourceCode rule
+      if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
+        j++;
+        continue;
+      }
+      var style = rule.style.cssText;
+      // check if color or background-color is set
+      if (rule.style.color === '' && rule.style.backgroundColor === '') {
+        j++;
+        continue;
+      }
+      // replace div.sourceCode by a pre.sourceCode rule
+      sheets[i].deleteRule(j);
+      sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
+    }
+  }
+})();
+</script>
+
+
+
+
+<style type="text/css">body {
+background-color: #fff;
+margin: 1em auto;
+max-width: 700px;
+overflow: visible;
+padding-left: 2em;
+padding-right: 2em;
+font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
+font-size: 14px;
+line-height: 1.35;
+}
+#TOC {
+clear: both;
+margin: 0 0 10px 10px;
+padding: 4px;
+width: 400px;
+border: 1px solid #CCCCCC;
+border-radius: 5px;
+background-color: #f6f6f6;
+font-size: 13px;
+line-height: 1.3;
+}
+#TOC .toctitle {
+font-weight: bold;
+font-size: 15px;
+margin-left: 5px;
+}
+#TOC ul {
+padding-left: 40px;
+margin-left: -1.5em;
+margin-top: 5px;
+margin-bottom: 5px;
+}
+#TOC ul ul {
+margin-left: -2em;
+}
+#TOC li {
+line-height: 16px;
+}
+table {
+margin: 1em auto;
+border-width: 1px;
+border-color: #DDDDDD;
+border-style: outset;
+border-collapse: collapse;
+}
+table th {
+border-width: 2px;
+padding: 5px;
+border-style: inset;
+}
+table td {
+border-width: 1px;
+border-style: inset;
+line-height: 18px;
+padding: 5px 5px;
+}
+table, table th, table td {
+border-left-style: none;
+border-right-style: none;
+}
+table thead, table tr.even {
+background-color: #f7f7f7;
+}
+p {
+margin: 0.5em 0;
+}
+blockquote {
+background-color: #f6f6f6;
+padding: 0.25em 0.75em;
+}
+hr {
+border-style: solid;
+border: none;
+border-top: 1px solid #777;
+margin: 28px 0;
+}
+dl {
+margin-left: 0;
+}
+dl dd {
+margin-bottom: 13px;
+margin-left: 13px;
+}
+dl dt {
+font-weight: bold;
+}
+ul {
+margin-top: 0;
+}
+ul li {
+list-style: circle outside;
+}
+ul ul {
+margin-bottom: 0;
+}
+pre, code {
+background-color: #f7f7f7;
+border-radius: 3px;
+color: #333;
+white-space: pre-wrap; 
+}
+pre {
+border-radius: 3px;
+margin: 5px 0px 10px 0px;
+padding: 10px;
+}
+pre:not([class]) {
+background-color: #f7f7f7;
+}
+code {
+font-family: Consolas, Monaco, 'Courier New', monospace;
+font-size: 85%;
+}
+p > code, li > code {
+padding: 2px 0px;
+}
+div.figure {
+text-align: center;
+}
+img {
+background-color: #FFFFFF;
+padding: 2px;
+border: 1px solid #DDDDDD;
+border-radius: 3px;
+border: 1px solid #CCCCCC;
+margin: 0 5px;
+}
+h1 {
+margin-top: 0;
+font-size: 35px;
+line-height: 40px;
+}
+h2 {
+border-bottom: 4px solid #f7f7f7;
+padding-top: 10px;
+padding-bottom: 2px;
+font-size: 145%;
+}
+h3 {
+border-bottom: 2px solid #f7f7f7;
+padding-top: 10px;
+font-size: 120%;
+}
+h4 {
+border-bottom: 1px solid #f7f7f7;
+margin-left: 8px;
+font-size: 105%;
+}
+h5, h6 {
+border-bottom: 1px solid #ccc;
+font-size: 105%;
+}
+a {
+color: #0033dd;
+text-decoration: none;
+}
+a:hover {
+color: #6666ff; }
+a:visited {
+color: #800080; }
+a:visited:hover {
+color: #BB00BB; }
+a[href^="http:"] {
+text-decoration: underline; }
+a[href^="https:"] {
+text-decoration: underline; }
+
+code > span.kw { color: #555; font-weight: bold; } 
+code > span.dt { color: #902000; } 
+code > span.dv { color: #40a070; } 
+code > span.bn { color: #d14; } 
+code > span.fl { color: #d14; } 
+code > span.ch { color: #d14; } 
+code > span.st { color: #d14; } 
+code > span.co { color: #888888; font-style: italic; } 
+code > span.ot { color: #007020; } 
+code > span.al { color: #ff0000; font-weight: bold; } 
+code > span.fu { color: #900; font-weight: bold; } 
+code > span.er { color: #a61717; background-color: #e3d2d2; } 
+</style>
+
+
+
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">PTM Occupancy Ground Truth</h1>
+<h4 class="author"></h4>
+<h4 class="date">2026-05-12</h4>
+
+
+
+<div id="introduction" class="section level2">
+<h2>Introduction</h2>
+<p>This vignette runs a small ProteoMaker simulation with
+phosphorylation-like PTMs and compares the PTM occupancies estimated
+from peptide-level data with the ground-truth occupancies calculated
+from proteoform abundances.</p>
+<p>The estimated occupancy table reports peptide-local PTM positions and
+protein-level PTM positions. The comparison below joins estimated and
+ground-truth occupancies by accession and protein-level PTM site.</p>
+</div>
+<div id="configure-a-small-ptm-simulation" class="section level2">
+<h2>Configure a Small PTM Simulation</h2>
+<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(ProteoMaker)</span>
+<span id="cb1-2"><a href="#cb1-2" tabindex="-1"></a></span>
+<span id="cb1-3"><a href="#cb1-3" tabindex="-1"></a>proteomaker_config <span class="ot">&lt;-</span> <span class="fu">set_proteomaker</span>(</span>
+<span id="cb1-4"><a href="#cb1-4" tabindex="-1"></a>  <span class="at">fastaFilePath =</span> <span class="fu">system.file</span>(<span class="st">&quot;Proteomes&quot;</span>, <span class="at">package =</span> <span class="st">&quot;ProteoMaker&quot;</span>),</span>
+<span id="cb1-5"><a href="#cb1-5" tabindex="-1"></a>  <span class="at">resultFilePath =</span> <span class="fu">file.path</span>(<span class="fu">tempdir</span>(), <span class="st">&quot;ProteoMaker_PTM_occupancy&quot;</span>),</span>
+<span id="cb1-6"><a href="#cb1-6" tabindex="-1"></a>  <span class="at">cores =</span> <span class="dv">1</span>,</span>
+<span id="cb1-7"><a href="#cb1-7" tabindex="-1"></a>  <span class="at">clusterType =</span> <span class="st">&quot;PSOCK&quot;</span>,</span>
+<span id="cb1-8"><a href="#cb1-8" tabindex="-1"></a>  <span class="at">runStatTests =</span> <span class="cn">TRUE</span>,</span>
+<span id="cb1-9"><a href="#cb1-9" tabindex="-1"></a>  <span class="at">calcAllBenchmarks =</span> <span class="cn">FALSE</span></span>
+<span id="cb1-10"><a href="#cb1-10" tabindex="-1"></a>)</span>
+<span id="cb1-11"><a href="#cb1-11" tabindex="-1"></a></span>
+<span id="cb1-12"><a href="#cb1-12" tabindex="-1"></a>Param <span class="ot">&lt;-</span> <span class="fu">def_param</span>()</span>
+<span id="cb1-13"><a href="#cb1-13" tabindex="-1"></a></span>
+<span id="cb1-14"><a href="#cb1-14" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PathToFasta <span class="ot">&lt;-</span> <span class="st">&quot;fasta_example.fasta&quot;</span></span>
+<span id="cb1-15"><a href="#cb1-15" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>NumCond <span class="ot">&lt;-</span> <span class="dv">2</span></span>
+<span id="cb1-16"><a href="#cb1-16" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>NumReps <span class="ot">&lt;-</span> <span class="dv">3</span></span>
+<span id="cb1-17"><a href="#cb1-17" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PercExpressedProt <span class="ot">&lt;-</span> <span class="dv">1</span></span>
+<span id="cb1-18"><a href="#cb1-18" tabindex="-1"></a></span>
+<span id="cb1-19"><a href="#cb1-19" tabindex="-1"></a><span class="co"># Generate modified proteins and keep their unmodified counterparts.</span></span>
+<span id="cb1-20"><a href="#cb1-20" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>FracModProt <span class="ot">&lt;-</span> <span class="fl">0.5</span></span>
+<span id="cb1-21"><a href="#cb1-21" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PropModPerProt <span class="ot">&lt;-</span> <span class="dv">1</span></span>
+<span id="cb1-22"><a href="#cb1-22" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PTMMultipleLambda <span class="ot">&lt;-</span> <span class="fl">0.0</span></span>
+<span id="cb1-23"><a href="#cb1-23" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>RemoveNonModFormFrac <span class="ot">&lt;-</span> <span class="dv">0</span></span>
+<span id="cb1-24"><a href="#cb1-24" tabindex="-1"></a></span>
+<span id="cb1-25"><a href="#cb1-25" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PTMTypes <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">mods =</span> <span class="fu">c</span>(<span class="st">&quot;ph&quot;</span>))</span>
+<span id="cb1-26"><a href="#cb1-26" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PTMTypesMass <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">ph =</span> <span class="fl">79.966331</span>)</span>
+<span id="cb1-27"><a href="#cb1-27" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>PTMTypesDistr <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">ph =</span> <span class="dv">1</span>)</span>
+<span id="cb1-28"><a href="#cb1-28" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>ModifiableResidues <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">mods =</span> <span class="fu">list</span>(<span class="at">ph =</span> <span class="fu">c</span>(<span class="st">&quot;S&quot;</span>, <span class="st">&quot;T&quot;</span>, <span class="st">&quot;Y&quot;</span>)))</span>
+<span id="cb1-29"><a href="#cb1-29" tabindex="-1"></a>Param<span class="sc">$</span>paramGroundTruth<span class="sc">$</span>ModifiableResiduesDistr <span class="ot">&lt;-</span> <span class="fu">list</span>(</span>
+<span id="cb1-30"><a href="#cb1-30" tabindex="-1"></a>  <span class="at">mods =</span> <span class="fu">list</span>(<span class="at">ph =</span> <span class="fu">c</span>(<span class="at">S =</span> <span class="fl">0.86</span>, <span class="at">T =</span> <span class="fl">0.13</span>, <span class="at">Y =</span> <span class="fl">0.01</span>))</span>
+<span id="cb1-31"><a href="#cb1-31" tabindex="-1"></a>)</span>
+<span id="cb1-32"><a href="#cb1-32" tabindex="-1"></a></span>
+<span id="cb1-33"><a href="#cb1-33" tabindex="-1"></a><span class="co"># Keep the MSRun simple so the comparison is easy to inspect.</span></span>
+<span id="cb1-34"><a href="#cb1-34" tabindex="-1"></a>Param<span class="sc">$</span>paramMSRun<span class="sc">$</span>PercDetectability <span class="ot">&lt;-</span> <span class="dv">1</span></span>
+<span id="cb1-35"><a href="#cb1-35" tabindex="-1"></a>Param<span class="sc">$</span>paramMSRun<span class="sc">$</span>PercDetectedVal <span class="ot">&lt;-</span> <span class="dv">1</span></span>
+<span id="cb1-36"><a href="#cb1-36" tabindex="-1"></a>Param<span class="sc">$</span>paramMSRun<span class="sc">$</span>MSNoise <span class="ot">&lt;-</span> <span class="dv">0</span></span>
+<span id="cb1-37"><a href="#cb1-37" tabindex="-1"></a>Param<span class="sc">$</span>paramMSRun<span class="sc">$</span>WrongIDs <span class="ot">&lt;-</span> <span class="fl">0.0</span></span>
+<span id="cb1-38"><a href="#cb1-38" tabindex="-1"></a>Param<span class="sc">$</span>paramMSRun<span class="sc">$</span>WrongLocalizations <span class="ot">&lt;-</span> <span class="fl">0.0</span></span>
+<span id="cb1-39"><a href="#cb1-39" tabindex="-1"></a>Param<span class="sc">$</span>paramMSRun<span class="sc">$</span>MaxNAPerPep <span class="ot">&lt;-</span> <span class="dv">100</span></span></code></pre></div>
+</div>
+<div id="run-the-simulation-and-retrieve-outputs" class="section level2">
+<h2>Run the Simulation and Retrieve Outputs</h2>
+<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>allBs <span class="ot">&lt;-</span> <span class="fu">run_sims</span>(Param, proteomaker_config, <span class="at">overwrite =</span> <span class="cn">TRUE</span>)</span>
+<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a></span>
+<span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a>sim_param <span class="ot">&lt;-</span> allBs[[<span class="dv">1</span>]]<span class="sc">$</span>Param</span>
+<span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a>data_analysis <span class="ot">&lt;-</span> <span class="fu">get_simulation</span>(sim_param, proteomaker_config, <span class="at">stage =</span> <span class="st">&quot;DataAnalysis&quot;</span>)</span>
+<span id="cb2-5"><a href="#cb2-5" tabindex="-1"></a>proteoform_ab <span class="ot">&lt;-</span> <span class="fu">get_simulation</span>(sim_param, proteomaker_config, <span class="at">stage =</span> <span class="st">&quot;ProteoformAb&quot;</span>)<span class="sc">$</span>proteoformAb</span>
+<span id="cb2-6"><a href="#cb2-6" tabindex="-1"></a></span>
+<span id="cb2-7"><a href="#cb2-7" tabindex="-1"></a>estimated_occ <span class="ot">&lt;-</span> data_analysis<span class="sc">$</span>Occupancies</span>
+<span id="cb2-8"><a href="#cb2-8" tabindex="-1"></a>ground_truth_occ <span class="ot">&lt;-</span> <span class="fu">calcGroundTruthPTMOccupancy</span>(proteoform_ab, sim_param)</span></code></pre></div>
+</div>
+<div id="site-counts" class="section level2">
+<h2>Site Counts</h2>
+<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>condition_cols <span class="ot">&lt;-</span> <span class="fu">paste0</span>(<span class="st">&quot;C_&quot;</span>, <span class="fu">seq_len</span>(sim_param<span class="sc">$</span>NumCond))</span>
+<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a>condition_replicates <span class="ot">&lt;-</span> <span class="fu">split</span>(sim_param<span class="sc">$</span>QuantColnames, <span class="fu">rep</span>(condition_cols, <span class="at">each =</span> sim_param<span class="sc">$</span>NumReps))</span>
+<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a></span>
+<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a><span class="cf">for</span> (cond <span class="cf">in</span> condition_cols) {</span>
+<span id="cb3-5"><a href="#cb3-5" tabindex="-1"></a>  ground_truth_occ[[cond]] <span class="ot">&lt;-</span> <span class="fu">rowMeans</span>(</span>
+<span id="cb3-6"><a href="#cb3-6" tabindex="-1"></a>    ground_truth_occ[, condition_replicates[[cond]], <span class="at">drop =</span> <span class="cn">FALSE</span>],</span>
+<span id="cb3-7"><a href="#cb3-7" tabindex="-1"></a>    <span class="at">na.rm =</span> <span class="cn">TRUE</span></span>
+<span id="cb3-8"><a href="#cb3-8" tabindex="-1"></a>  )</span>
+<span id="cb3-9"><a href="#cb3-9" tabindex="-1"></a>}</span>
+<span id="cb3-10"><a href="#cb3-10" tabindex="-1"></a></span>
+<span id="cb3-11"><a href="#cb3-11" tabindex="-1"></a>estimated_sites <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(</span>
+<span id="cb3-12"><a href="#cb3-12" tabindex="-1"></a>  <span class="at">Accession =</span> <span class="fu">vapply</span>(estimated_occ<span class="sc">$</span>Accession, <span class="cf">function</span>(x) <span class="fu">unlist</span>(x)[<span class="dv">1</span>], <span class="fu">character</span>(<span class="dv">1</span>)),</span>
+<span id="cb3-13"><a href="#cb3-13" tabindex="-1"></a>  <span class="at">PTMType =</span> <span class="fu">vapply</span>(estimated_occ<span class="sc">$</span>PTMType, <span class="cf">function</span>(x) <span class="fu">unlist</span>(x)[<span class="dv">1</span>], <span class="fu">character</span>(<span class="dv">1</span>)),</span>
+<span id="cb3-14"><a href="#cb3-14" tabindex="-1"></a>  <span class="at">PTMPos =</span> <span class="fu">vapply</span>(estimated_occ<span class="sc">$</span>ProteinPTMPos, <span class="cf">function</span>(x) <span class="fu">as.integer</span>(<span class="fu">unlist</span>(x)[<span class="dv">1</span>]), <span class="fu">integer</span>(<span class="dv">1</span>)),</span>
+<span id="cb3-15"><a href="#cb3-15" tabindex="-1"></a>  estimated_occ[, condition_cols, <span class="at">drop =</span> <span class="cn">FALSE</span>]</span>
+<span id="cb3-16"><a href="#cb3-16" tabindex="-1"></a>)</span>
+<span id="cb3-17"><a href="#cb3-17" tabindex="-1"></a></span>
+<span id="cb3-18"><a href="#cb3-18" tabindex="-1"></a>ground_truth_sites <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(</span>
+<span id="cb3-19"><a href="#cb3-19" tabindex="-1"></a>  <span class="at">Accession =</span> <span class="fu">vapply</span>(ground_truth_occ<span class="sc">$</span>Accession, <span class="cf">function</span>(x) <span class="fu">unlist</span>(x)[<span class="dv">1</span>], <span class="fu">character</span>(<span class="dv">1</span>)),</span>
+<span id="cb3-20"><a href="#cb3-20" tabindex="-1"></a>  <span class="at">PTMType =</span> <span class="fu">vapply</span>(ground_truth_occ<span class="sc">$</span>PTMType, <span class="cf">function</span>(x) <span class="fu">unlist</span>(x)[<span class="dv">1</span>], <span class="fu">character</span>(<span class="dv">1</span>)),</span>
+<span id="cb3-21"><a href="#cb3-21" tabindex="-1"></a>  <span class="at">PTMPos =</span> <span class="fu">vapply</span>(ground_truth_occ<span class="sc">$</span>PTMPos, <span class="cf">function</span>(x) <span class="fu">unlist</span>(x)[<span class="dv">1</span>], <span class="fu">integer</span>(<span class="dv">1</span>)),</span>
+<span id="cb3-22"><a href="#cb3-22" tabindex="-1"></a>  ground_truth_occ[, condition_cols, <span class="at">drop =</span> <span class="cn">FALSE</span>]</span>
+<span id="cb3-23"><a href="#cb3-23" tabindex="-1"></a>)</span>
+<span id="cb3-24"><a href="#cb3-24" tabindex="-1"></a></span>
+<span id="cb3-25"><a href="#cb3-25" tabindex="-1"></a>comparison <span class="ot">&lt;-</span> <span class="fu">merge</span>(</span>
+<span id="cb3-26"><a href="#cb3-26" tabindex="-1"></a>  estimated_sites,</span>
+<span id="cb3-27"><a href="#cb3-27" tabindex="-1"></a>  ground_truth_sites,</span>
+<span id="cb3-28"><a href="#cb3-28" tabindex="-1"></a>  <span class="at">by =</span> <span class="fu">c</span>(<span class="st">&quot;Accession&quot;</span>, <span class="st">&quot;PTMType&quot;</span>, <span class="st">&quot;PTMPos&quot;</span>),</span>
+<span id="cb3-29"><a href="#cb3-29" tabindex="-1"></a>  <span class="at">suffixes =</span> <span class="fu">c</span>(<span class="st">&quot;_estimated&quot;</span>, <span class="st">&quot;_ground_truth&quot;</span>)</span>
+<span id="cb3-30"><a href="#cb3-30" tabindex="-1"></a>)</span>
+<span id="cb3-31"><a href="#cb3-31" tabindex="-1"></a></span>
+<span id="cb3-32"><a href="#cb3-32" tabindex="-1"></a>site_cols <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">&quot;Accession&quot;</span>, <span class="st">&quot;PTMType&quot;</span>, <span class="st">&quot;PTMPos&quot;</span>)</span>
+<span id="cb3-33"><a href="#cb3-33" tabindex="-1"></a>estimated_site_ids <span class="ot">&lt;-</span> <span class="fu">unique</span>(estimated_sites[, site_cols])</span>
+<span id="cb3-34"><a href="#cb3-34" tabindex="-1"></a>ground_truth_site_ids <span class="ot">&lt;-</span> <span class="fu">unique</span>(ground_truth_sites[, site_cols])</span>
+<span id="cb3-35"><a href="#cb3-35" tabindex="-1"></a>matched_site_ids <span class="ot">&lt;-</span> <span class="fu">unique</span>(comparison[, site_cols])</span>
+<span id="cb3-36"><a href="#cb3-36" tabindex="-1"></a></span>
+<span id="cb3-37"><a href="#cb3-37" tabindex="-1"></a>counts <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(</span>
+<span id="cb3-38"><a href="#cb3-38" tabindex="-1"></a>  <span class="at">Metric =</span> <span class="fu">c</span>(</span>
+<span id="cb3-39"><a href="#cb3-39" tabindex="-1"></a>    <span class="st">&quot;Ground-truth PTM sites&quot;</span>,</span>
+<span id="cb3-40"><a href="#cb3-40" tabindex="-1"></a>    <span class="st">&quot;Estimated PTM sites&quot;</span>,</span>
+<span id="cb3-41"><a href="#cb3-41" tabindex="-1"></a>    <span class="st">&quot;Matched PTM sites&quot;</span>,</span>
+<span id="cb3-42"><a href="#cb3-42" tabindex="-1"></a>    <span class="st">&quot;Ground truth only&quot;</span>,</span>
+<span id="cb3-43"><a href="#cb3-43" tabindex="-1"></a>    <span class="st">&quot;Estimated only&quot;</span></span>
+<span id="cb3-44"><a href="#cb3-44" tabindex="-1"></a>  ),</span>
+<span id="cb3-45"><a href="#cb3-45" tabindex="-1"></a>  <span class="at">Count =</span> <span class="fu">c</span>(</span>
+<span id="cb3-46"><a href="#cb3-46" tabindex="-1"></a>    <span class="fu">nrow</span>(ground_truth_site_ids),</span>
+<span id="cb3-47"><a href="#cb3-47" tabindex="-1"></a>    <span class="fu">nrow</span>(estimated_site_ids),</span>
+<span id="cb3-48"><a href="#cb3-48" tabindex="-1"></a>    <span class="fu">nrow</span>(matched_site_ids),</span>
+<span id="cb3-49"><a href="#cb3-49" tabindex="-1"></a>    <span class="fu">nrow</span>(ground_truth_site_ids) <span class="sc">-</span> <span class="fu">nrow</span>(matched_site_ids),</span>
+<span id="cb3-50"><a href="#cb3-50" tabindex="-1"></a>    <span class="fu">nrow</span>(estimated_site_ids) <span class="sc">-</span> <span class="fu">nrow</span>(matched_site_ids)</span>
+<span id="cb3-51"><a href="#cb3-51" tabindex="-1"></a>  )</span>
+<span id="cb3-52"><a href="#cb3-52" tabindex="-1"></a>)</span>
+<span id="cb3-53"><a href="#cb3-53" tabindex="-1"></a></span>
+<span id="cb3-54"><a href="#cb3-54" tabindex="-1"></a>counts</span></code></pre></div>
+</div>
+<div id="occupancy-distributions" class="section level2">
+<h2>Occupancy Distributions</h2>
+<p>The check is whether estimated and ground-truth occupancies occupy
+similar ranges. Ground-truth distributions are shown after averaging
+replicates within each condition.</p>
+<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a>estimated_distribution <span class="ot">&lt;-</span> <span class="fu">summary</span>(estimated_occ[, condition_cols, <span class="at">drop =</span> <span class="cn">FALSE</span>])</span>
+<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a>ground_truth_distribution <span class="ot">&lt;-</span> <span class="fu">summary</span>(ground_truth_occ[, condition_cols, <span class="at">drop =</span> <span class="cn">FALSE</span>])</span>
+<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a></span>
+<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a>estimated_distribution</span>
+<span id="cb4-5"><a href="#cb4-5" tabindex="-1"></a>ground_truth_distribution</span></code></pre></div>
+<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a><span class="co"># histograms </span></span>
+<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> condition_cols) {</span>
+<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a>  <span class="fu">hist</span>(estimated_occ[[i]], <span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;Estimated occupancy -&quot;</span>, i), <span class="at">xlab =</span> <span class="st">&quot;Occupancy&quot;</span>, </span>
+<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a>       <span class="at">breaks =</span> <span class="dv">20</span><span class="sc">*</span><span class="fu">diff</span>(<span class="fu">range</span>(estimated_occ[[i]])), <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>, <span class="dv">2</span>), <span class="at">border =</span> <span class="cn">NA</span>, <span class="at">col =</span> <span class="st">&quot;#1b9e77&quot;</span>)</span>
+<span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a>  <span class="fu">hist</span>(ground_truth_occ[[i]], <span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;Ground truth occupancy -&quot;</span>, i), <span class="at">xlab =</span> <span class="st">&quot;Occupancy&quot;</span>, </span>
+<span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a>       <span class="at">xlim=</span><span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>,<span class="dv">2</span>), <span class="at">breaks =</span> <span class="dv">20</span>, <span class="at">border =</span> <span class="cn">NA</span>, <span class="at">col=</span><span class="st">&quot;#d95f02&quot;</span>)</span>
+<span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a>}</span></code></pre></div>
+</div>
+<div id="direct-value-comparison" class="section level2">
+<h2>Direct Value Comparison</h2>
+<p>For matched PTM sites, compare the estimated and true occupancy
+values per condition.</p>
+<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a><span class="cf">for</span> (i <span class="cf">in</span> condition_cols) {</span>
+<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a>  <span class="fu">plot</span>(</span>
+<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a>    comparison[[<span class="fu">paste0</span>(i, <span class="st">&quot;_estimated&quot;</span>)]],</span>
+<span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a>    comparison[[<span class="fu">paste0</span>(i, <span class="st">&quot;_ground_truth&quot;</span>)]],</span>
+<span id="cb6-5"><a href="#cb6-5" tabindex="-1"></a>    <span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;Estimated vs Ground Truth Occupancy -&quot;</span>, i),</span>
+<span id="cb6-6"><a href="#cb6-6" tabindex="-1"></a>    <span class="at">xlab =</span> <span class="st">&quot;Estimated Occupancy&quot;</span>,</span>
+<span id="cb6-7"><a href="#cb6-7" tabindex="-1"></a>    <span class="at">ylab =</span> <span class="st">&quot;Ground Truth Occupancy&quot;</span>,</span>
+<span id="cb6-8"><a href="#cb6-8" tabindex="-1"></a>    <span class="at">pch =</span> <span class="dv">16</span>, <span class="at">col =</span> <span class="fu">rgb</span>(<span class="fl">0.1</span>, <span class="fl">0.1</span>, <span class="fl">0.1</span>, <span class="fl">0.5</span>)</span>
+<span id="cb6-9"><a href="#cb6-9" tabindex="-1"></a>  )</span>
+<span id="cb6-10"><a href="#cb6-10" tabindex="-1"></a>  <span class="fu">abline</span>(<span class="at">a =</span> <span class="dv">0</span>, <span class="at">b =</span> <span class="dv">1</span>, <span class="at">col =</span> <span class="st">&quot;red&quot;</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span>
+<span id="cb6-11"><a href="#cb6-11" tabindex="-1"></a>}</span>
+<span id="cb6-12"><a href="#cb6-12" tabindex="-1"></a></span>
+<span id="cb6-13"><a href="#cb6-13" tabindex="-1"></a><span class="co"># on log-scale</span></span>
+<span id="cb6-14"><a href="#cb6-14" tabindex="-1"></a><span class="cf">for</span> (i <span class="cf">in</span> condition_cols) {</span>
+<span id="cb6-15"><a href="#cb6-15" tabindex="-1"></a>  <span class="fu">plot</span>(</span>
+<span id="cb6-16"><a href="#cb6-16" tabindex="-1"></a>    <span class="fu">log10</span>(comparison[[<span class="fu">paste0</span>(i, <span class="st">&quot;_estimated&quot;</span>)]] <span class="sc">+</span> <span class="fl">1e-6</span>),</span>
+<span id="cb6-17"><a href="#cb6-17" tabindex="-1"></a>    <span class="fu">log10</span>(comparison[[<span class="fu">paste0</span>(i, <span class="st">&quot;_ground_truth&quot;</span>)]] <span class="sc">+</span> <span class="fl">1e-6</span>),</span>
+<span id="cb6-18"><a href="#cb6-18" tabindex="-1"></a>    <span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;Estimated vs Ground Truth Occupancy (log10) -&quot;</span>, i),</span>
+<span id="cb6-19"><a href="#cb6-19" tabindex="-1"></a>    <span class="at">xlab =</span> <span class="st">&quot;Estimated Occupancy (log10)&quot;</span>,</span>
+<span id="cb6-20"><a href="#cb6-20" tabindex="-1"></a>    <span class="at">ylab =</span> <span class="st">&quot;Ground Truth Occupancy (log10)&quot;</span>,</span>
+<span id="cb6-21"><a href="#cb6-21" tabindex="-1"></a>    <span class="at">pch =</span> <span class="dv">16</span>, <span class="at">col =</span> <span class="fu">rgb</span>(<span class="fl">0.1</span>, <span class="fl">0.1</span>, <span class="fl">0.1</span>, <span class="fl">0.5</span>)</span>
+<span id="cb6-22"><a href="#cb6-22" tabindex="-1"></a>  )</span>
+<span id="cb6-23"><a href="#cb6-23" tabindex="-1"></a>  <span class="fu">abline</span>(<span class="at">a =</span> <span class="dv">0</span>, <span class="at">b =</span> <span class="dv">1</span>, <span class="at">col =</span> <span class="st">&quot;red&quot;</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span>
+<span id="cb6-24"><a href="#cb6-24" tabindex="-1"></a>}</span></code></pre></div>
+</div>
+
+
+
+<!-- code folding -->
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>