Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ categories = ["command-line-utilities", "science"]
homepage = "https://seqeralabs.github.io/RustQC/"
exclude = ["benchmark/", "docs/", "paper/", "tests/", ".github/", "Dockerfile", ".dockerignore", ".pre-commit-config.yaml", "netlify.toml", "CONTRIBUTING.md", "AGENTS.md"]

[lib]
name = "rustqc"
path = "src/lib.rs"

[[bin]]
name = "rustqc"
path = "src/main.rs"
Expand Down
31 changes: 3 additions & 28 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,9 @@
//!
//! A GTF gene annotation file is required for all analyses.

use clap::{CommandFactory, Parser, Subcommand, ValueEnum};
use serde::Deserialize;
use clap::{CommandFactory, Parser, Subcommand};

/// Library strandedness protocol.
///
/// Determines how read strand is interpreted relative to the gene annotation
/// strand during counting. Accepted CLI values: `unstranded`, `forward`, `reverse`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Strandedness {
/// Count reads on either strand (library is not strand-specific).
#[default]
Unstranded,
/// Forward stranded: read 1 maps to the transcript strand.
Forward,
/// Reverse stranded: read 2 maps to the transcript strand (e.g. dUTP).
Reverse,
}

impl std::fmt::Display for Strandedness {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Strandedness::Unstranded => write!(f, "unstranded"),
Strandedness::Forward => write!(f, "forward"),
Strandedness::Reverse => write!(f, "reverse"),
}
}
}
use rustqc::Strandedness;

/// Fast quality control tools for sequencing data, written in Rust.
#[derive(Parser, Debug)]
Expand Down Expand Up @@ -407,7 +382,7 @@ pub fn parse_args() -> Cli {
env!("CARGO_PKG_VERSION"),
env!("GIT_SHORT_HASH"),
env!("BUILD_TIMESTAMP"),
crate::cpu::cpu_info_line(),
rustqc::cpu::cpu_info_line(),
)
.into_boxed_str(),
);
Expand Down
2 changes: 1 addition & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! like chromosome name mappings between alignment file and GTF references,
//! per-tool output configuration, and tool enable/disable toggles.

use crate::cli::Strandedness;
use crate::Strandedness;
use anyhow::{Context, Result};
use serde::Deserialize;
use serde_yaml_ng::Value;
Expand Down
105 changes: 105 additions & 0 deletions src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek};
use std::path::Path;
use std::time::Duration;

/// Gzip magic bytes: the first two bytes of any gzip-compressed file.
const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
Expand Down Expand Up @@ -101,6 +102,56 @@ pub fn format_with_commas(n: u64) -> String {
result
}

/// Format a count with SI suffixes (e.g. "1.5K", "48.2M", "2.3G").
///
/// Used for compact human-readable counts in progress messages and summaries.
pub fn format_count(n: u64) -> String {
use number_prefix::NumberPrefix;
match NumberPrefix::decimal(n as f64) {
NumberPrefix::Standalone(n) => format!("{n}"),
NumberPrefix::Prefixed(prefix, n) => {
// Map SI prefixes to short single-char suffixes
let suffix = match prefix {
number_prefix::Prefix::Kilo => "K",
number_prefix::Prefix::Mega => "M",
number_prefix::Prefix::Giga => "G",
number_prefix::Prefix::Tera => "T",
_ => return format!("{:.1}{prefix:?}", n),
};
format!("{n:.1}{suffix}")
}
}
}

/// Format a percentage string (e.g. "(83.3%)").
pub fn format_pct(n: u64, total: u64) -> String {
if total == 0 {
return "(0.0%)".to_string();
}
format!("({:.1}%)", n as f64 / total as f64 * 100.0)
}

/// Format a duration as human-friendly mm:ss or h:mm:ss.
///
/// - Under 60s: `"45.2s"`
/// - Under 1h: `"1:23"`
/// - Over 1h: `"1:02:34"`
pub fn format_duration(d: Duration) -> String {
let total_secs = d.as_secs_f64();
if total_secs < 60.0 {
return format!("{total_secs:.1}s");
}
let total_secs = d.as_secs();
let hours = total_secs / 3600;
let minutes = (total_secs % 3600) / 60;
let seconds = total_secs % 60;
if hours > 0 {
format!("{hours}:{minutes:02}:{seconds:02}")
} else {
format!("{minutes}:{seconds:02}")
}
}

// ============================================================
// Numeric helpers
// ============================================================
Expand Down Expand Up @@ -181,6 +232,60 @@ mod tests {
assert_eq!(format_with_commas(1234567), "1,234,567");
}

#[test]
fn test_format_count_small() {
assert_eq!(format_count(0), "0");
assert_eq!(format_count(42), "42");
assert_eq!(format_count(999), "999");
}

#[test]
fn test_format_count_thousands() {
assert_eq!(format_count(1000), "1.0K");
assert_eq!(format_count(1500), "1.5K");
assert_eq!(format_count(50000), "50.0K");
}

#[test]
fn test_format_count_millions() {
assert_eq!(format_count(1_000_000), "1.0M");
assert_eq!(format_count(48_200_000), "48.2M");
assert_eq!(format_count(50_000_000), "50.0M");
}

#[test]
fn test_format_count_billions() {
assert_eq!(format_count(1_000_000_000), "1.0G");
assert_eq!(format_count(5_000_000_000), "5.0G");
}

#[test]
fn test_format_pct() {
assert_eq!(format_pct(833, 1000), "(83.3%)");
assert_eq!(format_pct(0, 0), "(0.0%)");
assert_eq!(format_pct(1000, 1000), "(100.0%)");
}

#[test]
fn test_format_duration_seconds() {
assert_eq!(format_duration(Duration::from_secs_f64(0.5)), "0.5s");
assert_eq!(format_duration(Duration::from_secs_f64(45.2)), "45.2s");
assert_eq!(format_duration(Duration::from_secs_f64(59.9)), "59.9s");
}

#[test]
fn test_format_duration_minutes() {
assert_eq!(format_duration(Duration::from_secs(60)), "1:00");
assert_eq!(format_duration(Duration::from_secs(83)), "1:23");
assert_eq!(format_duration(Duration::from_secs(3599)), "59:59");
}

#[test]
fn test_format_duration_hours() {
assert_eq!(format_duration(Duration::from_secs(3600)), "1:00:00");
assert_eq!(format_duration(Duration::from_secs(3754)), "1:02:34");
}

#[test]
fn test_open_reader_plain() {
let content = "line1\nline2\nline3\n";
Expand Down
56 changes: 56 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//! RustQC — fast quality control tools for sequencing data.
//!
//! This is the library API. The companion CLI (`rustqc` binary) is built on
//! top of these same modules and provides a single-pass RNA-Seq QC pipeline
//! that runs dupRadar, featureCounts, RSeQC tools, preseq, samtools-style
//! outputs, and Qualimap analyses.
//!
//! Library consumers can drive individual analyses directly. The submodules
//! are organised by tool family:
//!
//! - [`gtf`] — GTF gene-annotation parsing.
//! - [`io`] — shared I/O helpers (transparent gzip decompression, etc.).
//! - [`config`] — configuration types (mirrors the CLI's YAML config).
//! - [`summary`] — serializable types for the JSON run summary.
//! - [`cpu`] — CPU feature detection and binary-target identification.
//! - [`rna`] — the RNA-Seq QC analysis modules (dupRadar, featureCounts,
//! RSeQC, Qualimap, preseq, samtools-style outputs).
//!
//! The [`Strandedness`] enum lives at the crate root because it is used
//! across most analysis modules.

use clap::ValueEnum;
use serde::Deserialize;

pub mod config;
pub mod cpu;
pub mod gtf;
pub mod io;
pub mod rna;
pub mod summary;

/// Library strandedness protocol.
///
/// Determines how read strand is interpreted relative to the gene annotation
/// strand during counting. Accepted CLI values: `unstranded`, `forward`, `reverse`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Strandedness {
/// Count reads on either strand (library is not strand-specific).
#[default]
Unstranded,
/// Forward stranded: read 1 maps to the transcript strand.
Forward,
/// Reverse stranded: read 2 maps to the transcript strand (e.g. dUTP).
Reverse,
}

impl std::fmt::Display for Strandedness {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Strandedness::Unstranded => write!(f, "unstranded"),
Strandedness::Forward => write!(f, "forward"),
Strandedness::Reverse => write!(f, "reverse"),
}
}
}
15 changes: 6 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@

mod citations;
mod cli;
mod config;
mod cpu;
mod gtf;
mod io;
mod rna;
mod summary;
mod ui;

use anyhow::{ensure, Context, Result};
Expand All @@ -27,7 +21,10 @@ use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::time::{Instant, SystemTime, UNIX_EPOCH};

use ui::{format_count, format_duration, format_pct, Ui, Verbosity};
use rustqc::io::{format_count, format_duration, format_pct};
use rustqc::{config, cpu, gtf, rna, summary};

use ui::{Ui, Verbosity};

use rust_htslib::bam::Read as BamRead;

Expand Down Expand Up @@ -310,7 +307,7 @@ fn run_rna(args: cli::RnaArgs, ui: &Ui) -> Result<()> {
let effective_stranded = args
.stranded
.or(config.stranded)
.unwrap_or(cli::Strandedness::Unstranded);
.unwrap_or(rustqc::Strandedness::Unstranded);
let effective_paired = args.paired || config.paired.unwrap_or(false);

if n_bams == 1 {
Expand Down Expand Up @@ -749,7 +746,7 @@ struct SharedParams<'a> {
/// Terminal UI handle.
ui: &'a Ui,
/// Library strandedness.
stranded: cli::Strandedness,
stranded: rustqc::Strandedness,
/// Whether the library is paired-end.
paired: bool,
/// Alignment-to-GTF chromosome name mapping.
Expand Down
4 changes: 2 additions & 2 deletions src/rna/dupradar/counting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
//!
//! This implements a simplified featureCounts-compatible counting strategy.

use crate::cli::Strandedness;
use crate::gtf::Gene;
use crate::io::format_count;
use crate::rna::qualimap::QualimapAccum;
use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig};
use crate::ui::format_count;
use crate::Strandedness;
use anyhow::{Context, Result};
use coitrees::{COITree, Interval, IntervalTree};
use indexmap::IndexMap;
Expand Down
2 changes: 1 addition & 1 deletion src/rna/qualimap/accumulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use coitrees::IntervalTree;
use rust_htslib::bam;
use rust_htslib::bam::record::Cigar;

use crate::cli::Strandedness;
use crate::Strandedness;

use super::coverage::TranscriptCoverage;
use super::index::QualimapIndex;
Expand Down
2 changes: 1 addition & 1 deletion src/rna/qualimap/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use super::coverage::TranscriptCoverage;
use super::index::QualimapIndex;
use super::plots;
use super::QualimapResult;
use crate::cli::Strandedness;
use crate::Strandedness;

// ============================= Constants =======================================

Expand Down
2 changes: 1 addition & 1 deletion src/rna/qualimap/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::path::Path;

use anyhow::{Context, Result};

use crate::cli::Strandedness;
use crate::Strandedness;
use log::debug;

// ===================================================================
Expand Down
2 changes: 1 addition & 1 deletion src/rna/rseqc/infer_experiment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
//! gene models (from GTF annotation) and determines the fraction consistent with
//! each strand protocol.

use crate::cli::Strandedness;
use crate::gtf::Gene;
use crate::Strandedness;
use anyhow::{Context, Result};
use indexmap::IndexMap;
use log::debug;
Expand Down
7 changes: 6 additions & 1 deletion src/rna/rseqc/tin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ impl TinResults {
pub fn len(&self) -> usize {
self.transcripts.len()
}

/// Whether there are no transcripts with computed TIN scores.
pub fn is_empty(&self) -> bool {
self.transcripts.is_empty()
}
}

// ===================================================================
Expand Down Expand Up @@ -331,7 +336,7 @@ pub struct TinAccum {
/// Per-transcript unique read start positions, capped at `min_cov + 1`.
/// Once exceeded, the set is drained and `exceeded_threshold[tx_idx]`
/// is set instead.
pub unique_starts: Vec<HashSet<u64, TinHashState>>,
pub(crate) unique_starts: Vec<HashSet<u64, TinHashState>>,
/// Per-transcript flag: true once unique start count exceeded `min_cov`.
/// Avoids further HashSet inserts for high-coverage transcripts.
pub exceeded_threshold: Vec<bool>,
Expand Down
Loading
Loading