seqeralabs · ewels · Apr 28, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,10 @@ categories = ["command-line-utilities", "science"]
 homepage = "https://seqeralabs.github.io/RustQC/"
 exclude = ["benchmark/", "docs/", "paper/", "tests/", ".github/", "Dockerfile", ".dockerignore", ".pre-commit-config.yaml", "netlify.toml", "CONTRIBUTING.md", "AGENTS.md"]
 
+[lib]
+name = "rustqc"
+path = "src/lib.rs"
+
 [[bin]]
 name = "rustqc"
 path = "src/main.rs"

diff --git a/src/cli.rs b/src/cli.rs
@@ -10,34 +10,9 @@
 //!
 //! A GTF gene annotation file is required for all analyses.
 
-use clap::{CommandFactory, Parser, Subcommand, ValueEnum};
-use serde::Deserialize;
+use clap::{CommandFactory, Parser, Subcommand};
 
-/// Library strandedness protocol.
-///
-/// Determines how read strand is interpreted relative to the gene annotation
-/// strand during counting. Accepted CLI values: `unstranded`, `forward`, `reverse`.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum Strandedness {
-    /// Count reads on either strand (library is not strand-specific).
-    #[default]
-    Unstranded,
-    /// Forward stranded: read 1 maps to the transcript strand.
-    Forward,
-    /// Reverse stranded: read 2 maps to the transcript strand (e.g. dUTP).
-    Reverse,
-}
-
-impl std::fmt::Display for Strandedness {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Strandedness::Unstranded => write!(f, "unstranded"),
-            Strandedness::Forward => write!(f, "forward"),
-            Strandedness::Reverse => write!(f, "reverse"),
-        }
-    }
-}
+use rustqc::Strandedness;
 
 /// Fast quality control tools for sequencing data, written in Rust.
 #[derive(Parser, Debug)]
@@ -407,7 +382,7 @@ pub fn parse_args() -> Cli {
             env!("CARGO_PKG_VERSION"),
             env!("GIT_SHORT_HASH"),
             env!("BUILD_TIMESTAMP"),
-            crate::cpu::cpu_info_line(),
+            rustqc::cpu::cpu_info_line(),
         )
         .into_boxed_str(),
     );

diff --git a/src/config.rs b/src/config.rs
@@ -4,7 +4,7 @@
 //! like chromosome name mappings between alignment file and GTF references,
 //! per-tool output configuration, and tool enable/disable toggles.
 
-use crate::cli::Strandedness;
+use crate::Strandedness;
 use anyhow::{Context, Result};
 use serde::Deserialize;
 use serde_yaml_ng::Value;

diff --git a/src/io.rs b/src/io.rs
@@ -9,6 +9,7 @@ use flate2::read::GzDecoder;
 use std::fs::File;
 use std::io::{BufRead, BufReader, Read, Seek};
 use std::path::Path;
+use std::time::Duration;
 
 /// Gzip magic bytes: the first two bytes of any gzip-compressed file.
 const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
@@ -101,6 +102,56 @@ pub fn format_with_commas(n: u64) -> String {
     result
 }
 
+/// Format a count with SI suffixes (e.g. "1.5K", "48.2M", "2.3G").
+///
+/// Used for compact human-readable counts in progress messages and summaries.
+pub fn format_count(n: u64) -> String {
+    use number_prefix::NumberPrefix;
+    match NumberPrefix::decimal(n as f64) {
+        NumberPrefix::Standalone(n) => format!("{n}"),
+        NumberPrefix::Prefixed(prefix, n) => {
+            // Map SI prefixes to short single-char suffixes
+            let suffix = match prefix {
+                number_prefix::Prefix::Kilo => "K",
+                number_prefix::Prefix::Mega => "M",
+                number_prefix::Prefix::Giga => "G",
+                number_prefix::Prefix::Tera => "T",
+                _ => return format!("{:.1}{prefix:?}", n),
+            };
+            format!("{n:.1}{suffix}")
+        }
+    }
+}
+
+/// Format a percentage string (e.g. "(83.3%)").
+pub fn format_pct(n: u64, total: u64) -> String {
+    if total == 0 {
+        return "(0.0%)".to_string();
+    }
+    format!("({:.1}%)", n as f64 / total as f64 * 100.0)
+}
+
+/// Format a duration as human-friendly mm:ss or h:mm:ss.
+///
+/// - Under 60s: `"45.2s"`
+/// - Under 1h: `"1:23"`
+/// - Over 1h: `"1:02:34"`
+pub fn format_duration(d: Duration) -> String {
+    let total_secs = d.as_secs_f64();
+    if total_secs < 60.0 {
+        return format!("{total_secs:.1}s");
+    }
+    let total_secs = d.as_secs();
+    let hours = total_secs / 3600;
+    let minutes = (total_secs % 3600) / 60;
+    let seconds = total_secs % 60;
+    if hours > 0 {
+        format!("{hours}:{minutes:02}:{seconds:02}")
+    } else {
+        format!("{minutes}:{seconds:02}")
+    }
+}
+
 // ============================================================
 // Numeric helpers
 // ============================================================
@@ -181,6 +232,60 @@ mod tests {
         assert_eq!(format_with_commas(1234567), "1,234,567");
     }
 
+    #[test]
+    fn test_format_count_small() {
+        assert_eq!(format_count(0), "0");
+        assert_eq!(format_count(42), "42");
+        assert_eq!(format_count(999), "999");
+    }
+
+    #[test]
+    fn test_format_count_thousands() {
+        assert_eq!(format_count(1000), "1.0K");
+        assert_eq!(format_count(1500), "1.5K");
+        assert_eq!(format_count(50000), "50.0K");
+    }
+
+    #[test]
+    fn test_format_count_millions() {
+        assert_eq!(format_count(1_000_000), "1.0M");
+        assert_eq!(format_count(48_200_000), "48.2M");
+        assert_eq!(format_count(50_000_000), "50.0M");
+    }
+
+    #[test]
+    fn test_format_count_billions() {
+        assert_eq!(format_count(1_000_000_000), "1.0G");
+        assert_eq!(format_count(5_000_000_000), "5.0G");
+    }
+
+    #[test]
+    fn test_format_pct() {
+        assert_eq!(format_pct(833, 1000), "(83.3%)");
+        assert_eq!(format_pct(0, 0), "(0.0%)");
+        assert_eq!(format_pct(1000, 1000), "(100.0%)");
+    }
+
+    #[test]
+    fn test_format_duration_seconds() {
+        assert_eq!(format_duration(Duration::from_secs_f64(0.5)), "0.5s");
+        assert_eq!(format_duration(Duration::from_secs_f64(45.2)), "45.2s");
+        assert_eq!(format_duration(Duration::from_secs_f64(59.9)), "59.9s");
+    }
+
+    #[test]
+    fn test_format_duration_minutes() {
+        assert_eq!(format_duration(Duration::from_secs(60)), "1:00");
+        assert_eq!(format_duration(Duration::from_secs(83)), "1:23");
+        assert_eq!(format_duration(Duration::from_secs(3599)), "59:59");
+    }
+
+    #[test]
+    fn test_format_duration_hours() {
+        assert_eq!(format_duration(Duration::from_secs(3600)), "1:00:00");
+        assert_eq!(format_duration(Duration::from_secs(3754)), "1:02:34");
+    }
+
     #[test]
     fn test_open_reader_plain() {
         let content = "line1\nline2\nline3\n";

diff --git a/src/lib.rs b/src/lib.rs
@@ -0,0 +1,56 @@
+//! RustQC — fast quality control tools for sequencing data.
+//!
+//! This is the library API. The companion CLI (`rustqc` binary) is built on
+//! top of these same modules and provides a single-pass RNA-Seq QC pipeline
+//! that runs dupRadar, featureCounts, RSeQC tools, preseq, samtools-style
+//! outputs, and Qualimap analyses.
+//!
+//! Library consumers can drive individual analyses directly. The submodules
+//! are organised by tool family:
+//!
+//! - [`gtf`] — GTF gene-annotation parsing.
+//! - [`io`] — shared I/O helpers (transparent gzip decompression, etc.).
+//! - [`config`] — configuration types (mirrors the CLI's YAML config).
+//! - [`summary`] — serializable types for the JSON run summary.
+//! - [`cpu`] — CPU feature detection and binary-target identification.
+//! - [`rna`] — the RNA-Seq QC analysis modules (dupRadar, featureCounts,
+//!   RSeQC, Qualimap, preseq, samtools-style outputs).
+//!
+//! The [`Strandedness`] enum lives at the crate root because it is used
+//! across most analysis modules.
+
+use clap::ValueEnum;
+use serde::Deserialize;
+
+pub mod config;
+pub mod cpu;
+pub mod gtf;
+pub mod io;
+pub mod rna;
+pub mod summary;
+
+/// Library strandedness protocol.
+///
+/// Determines how read strand is interpreted relative to the gene annotation
+/// strand during counting. Accepted CLI values: `unstranded`, `forward`, `reverse`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Strandedness {
+    /// Count reads on either strand (library is not strand-specific).
+    #[default]
+    Unstranded,
+    /// Forward stranded: read 1 maps to the transcript strand.
+    Forward,
+    /// Reverse stranded: read 2 maps to the transcript strand (e.g. dUTP).
+    Reverse,
+}
+
+impl std::fmt::Display for Strandedness {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Strandedness::Unstranded => write!(f, "unstranded"),
+            Strandedness::Forward => write!(f, "forward"),
+            Strandedness::Reverse => write!(f, "reverse"),
+        }
+    }
+}
diff --git a/src/main.rs b/src/main.rs
@@ -11,12 +11,6 @@
 
 mod citations;
 mod cli;
-mod config;
-mod cpu;
-mod gtf;
-mod io;
-mod rna;
-mod summary;
 mod ui;
 
 use anyhow::{ensure, Context, Result};
@@ -27,7 +21,10 @@ use std::collections::{HashMap, HashSet};
 use std::path::Path;
 use std::time::{Instant, SystemTime, UNIX_EPOCH};
 
-use ui::{format_count, format_duration, format_pct, Ui, Verbosity};
+use rustqc::io::{format_count, format_duration, format_pct};
+use rustqc::{config, cpu, gtf, rna, summary};
+
+use ui::{Ui, Verbosity};
 
 use rust_htslib::bam::Read as BamRead;
 
@@ -310,7 +307,7 @@ fn run_rna(args: cli::RnaArgs, ui: &Ui) -> Result<()> {
     let effective_stranded = args
         .stranded
         .or(config.stranded)
-        .unwrap_or(cli::Strandedness::Unstranded);
+        .unwrap_or(rustqc::Strandedness::Unstranded);
     let effective_paired = args.paired || config.paired.unwrap_or(false);
 
     if n_bams == 1 {
@@ -749,7 +746,7 @@ struct SharedParams<'a> {
     /// Terminal UI handle.
     ui: &'a Ui,
     /// Library strandedness.
-    stranded: cli::Strandedness,
+    stranded: rustqc::Strandedness,
     /// Whether the library is paired-end.
     paired: bool,
     /// Alignment-to-GTF chromosome name mapping.

diff --git a/src/rna/dupradar/counting.rs b/src/rna/dupradar/counting.rs
@@ -9,11 +9,11 @@
 //!
 //! This implements a simplified featureCounts-compatible counting strategy.
 
-use crate::cli::Strandedness;
 use crate::gtf::Gene;
+use crate::io::format_count;
 use crate::rna::qualimap::QualimapAccum;
 use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig};
-use crate::ui::format_count;
+use crate::Strandedness;
 use anyhow::{Context, Result};
 use coitrees::{COITree, Interval, IntervalTree};
 use indexmap::IndexMap;

diff --git a/src/rna/qualimap/accumulator.rs b/src/rna/qualimap/accumulator.rs
@@ -13,7 +13,7 @@ use coitrees::IntervalTree;
 use rust_htslib::bam;
 use rust_htslib::bam::record::Cigar;
 
-use crate::cli::Strandedness;
+use crate::Strandedness;
 
 use super::coverage::TranscriptCoverage;
 use super::index::QualimapIndex;

diff --git a/src/rna/qualimap/output.rs b/src/rna/qualimap/output.rs
@@ -13,7 +13,7 @@ use super::coverage::TranscriptCoverage;
 use super::index::QualimapIndex;
 use super::plots;
 use super::QualimapResult;
-use crate::cli::Strandedness;
+use crate::Strandedness;
 
 // ============================= Constants =======================================
 

diff --git a/src/rna/qualimap/report.rs b/src/rna/qualimap/report.rs
@@ -9,7 +9,7 @@ use std::path::Path;
 
 use anyhow::{Context, Result};
 
-use crate::cli::Strandedness;
+use crate::Strandedness;
 use log::debug;
 
 // ===================================================================

diff --git a/src/rna/rseqc/infer_experiment.rs b/src/rna/rseqc/infer_experiment.rs
@@ -4,8 +4,8 @@
 //! gene models (from GTF annotation) and determines the fraction consistent with
 //! each strand protocol.
 
-use crate::cli::Strandedness;
 use crate::gtf::Gene;
+use crate::Strandedness;
 use anyhow::{Context, Result};
 use indexmap::IndexMap;
 use log::debug;

diff --git a/src/rna/rseqc/tin.rs b/src/rna/rseqc/tin.rs
@@ -127,6 +127,11 @@ impl TinResults {
     pub fn len(&self) -> usize {
         self.transcripts.len()
     }
+
+    /// Whether there are no transcripts with computed TIN scores.
+    pub fn is_empty(&self) -> bool {
+        self.transcripts.is_empty()
+    }
 }
 
 // ===================================================================
@@ -331,7 +336,7 @@ pub struct TinAccum {
     /// Per-transcript unique read start positions, capped at `min_cov + 1`.
     /// Once exceeded, the set is drained and `exceeded_threshold[tx_idx]`
     /// is set instead.
-    pub unique_starts: Vec<HashSet<u64, TinHashState>>,
+    pub(crate) unique_starts: Vec<HashSet<u64, TinHashState>>,
     /// Per-transcript flag: true once unique start count exceeded `min_cov`.
     /// Avoids further HashSet inserts for high-coverage transcripts.
     pub exceeded_threshold: Vec<bool>,