diff --git a/Cargo.toml b/Cargo.toml
index c297033..ebf02f0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,10 @@ categories = ["command-line-utilities", "science"]
homepage = "https://seqeralabs.github.io/RustQC/"
exclude = ["benchmark/", "docs/", "paper/", "tests/", ".github/", "Dockerfile", ".dockerignore", ".pre-commit-config.yaml", "netlify.toml", "CONTRIBUTING.md", "AGENTS.md"]
+[lib]
+name = "rustqc"
+path = "src/lib.rs"
+
[[bin]]
name = "rustqc"
path = "src/main.rs"
diff --git a/README.md b/README.md
index d0f48d3..f576fc7 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,17 @@ cargo install rustqc
See the [documentation](https://seqeralabs.github.io/RustQC/) for full usage details, configuration options, output file descriptions, and benchmark results.
+## Use as a Rust library
+
+The crate is also published as a library, so the QC analysis modules (GTF parsing, dupRadar, featureCounts, RSeQC, Qualimap, preseq, samtools-style outputs) can be embedded into other Rust programs:
+
+```toml
+[dependencies]
+rustqc = "0.2"
+```
+
+See the [library guide](https://seqeralabs.github.io/RustQC/usage/library/) and the full API reference on [docs.rs/rustqc](https://docs.rs/rustqc).
+
## AI & Provenance
RustQC was developed with substantial assistance from AI coding agents (primarily [Claude](https://claude.ai/)), using the upstream tool source code as reference. Correctness is validated by comparing output against the original tools on real sequencing data, not by manual code review alone. See the [AI & Provenance](https://seqeralabs.github.io/RustQC/about/ai-statement/) documentation for full details, including known validation gaps.
diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs
index f58fb4e..4e1c2f4 100644
--- a/docs/astro.config.mjs
+++ b/docs/astro.config.mjs
@@ -57,6 +57,7 @@ export default defineConfig({
slug: "usage/configuration",
},
{ label: "Performance & Tuning", slug: "usage/performance" },
+ { label: "Rust Library", slug: "usage/library" },
],
},
{
diff --git a/docs/src/content/docs/usage/library.mdx b/docs/src/content/docs/usage/library.mdx
new file mode 100644
index 0000000..dd74d60
--- /dev/null
+++ b/docs/src/content/docs/usage/library.mdx
@@ -0,0 +1,102 @@
+---
+title: Rust Library
+description: Use RustQC as a Rust library crate, embedding its QC analysis modules in your own programs.
+---
+
+import { Aside } from "@astrojs/starlight/components";
+
+RustQC is published on [crates.io](https://crates.io/crates/rustqc) as both a
+binary and a library. The CLI (`rustqc rna ...`) is the primary interface, but
+the same analysis modules are also exposed as a library so they can be embedded
+into other Rust programs.
+
+Full API reference: **[docs.rs/rustqc](https://docs.rs/rustqc)**.
+
+## Adding RustQC as a dependency
+
+```toml
+[dependencies]
+rustqc = "0.2"
+```
+
+`rust-htslib` is linked statically and a small C++ component (used by the preseq
+tool) is built from source, so a working C/C++ toolchain (`cc`, `c++`) is
+required when building. No runtime dependencies are added beyond what the binary
+already needs.
+
+## What's in the library
+
+The crate exposes these modules:
+
+| Module | Contents |
+| ----------------------------- | --------------------------------------------------------------------------------------------------------- |
+| [`gtf`][docs-gtf] | GTF gene-annotation parsing. `Gene`, `Transcript`, `Exon`, `parse_gtf`. |
+| [`io`][docs-io] | Transparent gzip-aware reader, FNV-1a hashing, number formatters. |
+| [`config`][docs-config] | Configuration types mirroring the CLI's YAML config file. |
+| [`summary`][docs-summary] | Serializable types for the JSON run summary. |
+| [`cpu`][docs-cpu] | CPU feature detection and binary-target identification. |
+| [`rna`][docs-rna] | RNA-Seq analyses: `dupradar`, `featurecounts`, `qualimap`, `preseq`, `rseqc`. |
+
+[`Strandedness`][docs-strandedness] lives at the crate root because it is used
+across most analysis modules.
+
+[docs-gtf]: https://docs.rs/rustqc/latest/rustqc/gtf/
+[docs-io]: https://docs.rs/rustqc/latest/rustqc/io/
+[docs-config]: https://docs.rs/rustqc/latest/rustqc/config/
+[docs-summary]: https://docs.rs/rustqc/latest/rustqc/summary/
+[docs-cpu]: https://docs.rs/rustqc/latest/rustqc/cpu/
+[docs-rna]: https://docs.rs/rustqc/latest/rustqc/rna/
+[docs-strandedness]: https://docs.rs/rustqc/latest/rustqc/enum.Strandedness.html
+
+## Quick examples
+
+Parse a GTF file:
+
+```rust
+use rustqc::gtf;
+
+let genes = gtf::parse_gtf("genes.gtf", &[])?;
+println!("{} genes parsed", genes.len());
+for (gene_id, gene) in genes.iter().take(3) {
+ println!("{gene_id}: {} transcripts", gene.transcripts.len());
+}
+# Ok::<(), anyhow::Error>(())
+```
+
+Open a possibly-gzipped annotation or output file with one call:
+
+```rust
+use std::io::BufRead;
+use rustqc::io::open_reader;
+
+let reader = open_reader("counts.tsv.gz")?;
+for line in reader.lines() {
+ println!("{}", line?);
+}
+# Ok::<(), anyhow::Error>(())
+```
+
+Use the `Strandedness` enum (it derives `serde::Deserialize` and clap's
+`ValueEnum`, so it integrates with both YAML configs and CLI parsers):
+
+```rust
+use rustqc::Strandedness;
+
+let s = Strandedness::Reverse;
+assert_eq!(s.to_string(), "reverse");
+```
+
+## Stability
+
+The library is at `0.2.x` and the public surface is intentionally small. Expect
+breaking changes in minor releases until `1.0`. Module visibility may be
+narrowed in future versions if internal types are inadvertently exposed.
+
+
+
+[issue-72]: https://github.com/seqeralabs/RustQC/issues/72
diff --git a/src/cli.rs b/src/cli.rs
index 07e50a1..6e6459e 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -10,34 +10,9 @@
//!
//! A GTF gene annotation file is required for all analyses.
-use clap::{CommandFactory, Parser, Subcommand, ValueEnum};
-use serde::Deserialize;
+use clap::{CommandFactory, Parser, Subcommand};
-/// Library strandedness protocol.
-///
-/// Determines how read strand is interpreted relative to the gene annotation
-/// strand during counting. Accepted CLI values: `unstranded`, `forward`, `reverse`.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum Strandedness {
- /// Count reads on either strand (library is not strand-specific).
- #[default]
- Unstranded,
- /// Forward stranded: read 1 maps to the transcript strand.
- Forward,
- /// Reverse stranded: read 2 maps to the transcript strand (e.g. dUTP).
- Reverse,
-}
-
-impl std::fmt::Display for Strandedness {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- Strandedness::Unstranded => write!(f, "unstranded"),
- Strandedness::Forward => write!(f, "forward"),
- Strandedness::Reverse => write!(f, "reverse"),
- }
- }
-}
+use rustqc::Strandedness;
/// Fast quality control tools for sequencing data, written in Rust.
#[derive(Parser, Debug)]
@@ -407,7 +382,7 @@ pub fn parse_args() -> Cli {
env!("CARGO_PKG_VERSION"),
env!("GIT_SHORT_HASH"),
env!("BUILD_TIMESTAMP"),
- crate::cpu::cpu_info_line(),
+ rustqc::cpu::cpu_info_line(),
)
.into_boxed_str(),
);
diff --git a/src/config.rs b/src/config.rs
index 7dd93bc..4952a1f 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -4,7 +4,7 @@
//! like chromosome name mappings between alignment file and GTF references,
//! per-tool output configuration, and tool enable/disable toggles.
-use crate::cli::Strandedness;
+use crate::Strandedness;
use anyhow::{Context, Result};
use serde::Deserialize;
use serde_yaml_ng::Value;
@@ -1213,7 +1213,7 @@ preseq:
deep_merge(&mut base, overlay);
let m = base.as_mapping().unwrap();
let items = m
- .get(&Value::String("items".into()))
+ .get(Value::String("items".into()))
.unwrap()
.as_sequence()
.unwrap();
@@ -1268,7 +1268,7 @@ preseq:
let paths = collect_config_paths(Some("/tmp/nonexistent.yml"));
// The -c flag should always be last
- assert!(paths.last().unwrap().0 == PathBuf::from("/tmp/nonexistent.yml"));
+ assert!(paths.last().unwrap().0 == Path::new("/tmp/nonexistent.yml"));
assert_eq!(paths.last().unwrap().1, "-c flag");
// Restore
diff --git a/src/io.rs b/src/io.rs
index 9177ae2..9030aff 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -9,6 +9,7 @@ use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek};
use std::path::Path;
+use std::time::Duration;
/// Gzip magic bytes: the first two bytes of any gzip-compressed file.
const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
@@ -101,6 +102,56 @@ pub fn format_with_commas(n: u64) -> String {
result
}
+/// Format a count with SI suffixes (e.g. "1.5K", "48.2M", "2.3G").
+///
+/// Used for compact human-readable counts in progress messages and summaries.
+pub fn format_count(n: u64) -> String {
+ use number_prefix::NumberPrefix;
+ match NumberPrefix::decimal(n as f64) {
+ NumberPrefix::Standalone(n) => format!("{n}"),
+ NumberPrefix::Prefixed(prefix, n) => {
+ // Map SI prefixes to short single-char suffixes
+ let suffix = match prefix {
+ number_prefix::Prefix::Kilo => "K",
+ number_prefix::Prefix::Mega => "M",
+ number_prefix::Prefix::Giga => "G",
+ number_prefix::Prefix::Tera => "T",
+ _ => return format!("{:.1}{prefix:?}", n),
+ };
+ format!("{n:.1}{suffix}")
+ }
+ }
+}
+
+/// Format a percentage string (e.g. "(83.3%)").
+pub fn format_pct(n: u64, total: u64) -> String {
+ if total == 0 {
+ return "(0.0%)".to_string();
+ }
+ format!("({:.1}%)", n as f64 / total as f64 * 100.0)
+}
+
+/// Format a duration as human-friendly mm:ss or h:mm:ss.
+///
+/// - Under 60s: `"45.2s"`
+/// - Under 1h: `"1:23"`
+/// - Over 1h: `"1:02:34"`
+pub fn format_duration(d: Duration) -> String {
+ let total_secs = d.as_secs_f64();
+ if total_secs < 60.0 {
+ return format!("{total_secs:.1}s");
+ }
+ let total_secs = d.as_secs();
+ let hours = total_secs / 3600;
+ let minutes = (total_secs % 3600) / 60;
+ let seconds = total_secs % 60;
+ if hours > 0 {
+ format!("{hours}:{minutes:02}:{seconds:02}")
+ } else {
+ format!("{minutes}:{seconds:02}")
+ }
+}
+
// ============================================================
// Numeric helpers
// ============================================================
@@ -181,6 +232,60 @@ mod tests {
assert_eq!(format_with_commas(1234567), "1,234,567");
}
+ #[test]
+ fn test_format_count_small() {
+ assert_eq!(format_count(0), "0");
+ assert_eq!(format_count(42), "42");
+ assert_eq!(format_count(999), "999");
+ }
+
+ #[test]
+ fn test_format_count_thousands() {
+ assert_eq!(format_count(1000), "1.0K");
+ assert_eq!(format_count(1500), "1.5K");
+ assert_eq!(format_count(50000), "50.0K");
+ }
+
+ #[test]
+ fn test_format_count_millions() {
+ assert_eq!(format_count(1_000_000), "1.0M");
+ assert_eq!(format_count(48_200_000), "48.2M");
+ assert_eq!(format_count(50_000_000), "50.0M");
+ }
+
+ #[test]
+ fn test_format_count_billions() {
+ assert_eq!(format_count(1_000_000_000), "1.0G");
+ assert_eq!(format_count(5_000_000_000), "5.0G");
+ }
+
+ #[test]
+ fn test_format_pct() {
+ assert_eq!(format_pct(833, 1000), "(83.3%)");
+ assert_eq!(format_pct(0, 0), "(0.0%)");
+ assert_eq!(format_pct(1000, 1000), "(100.0%)");
+ }
+
+ #[test]
+ fn test_format_duration_seconds() {
+ assert_eq!(format_duration(Duration::from_secs_f64(0.5)), "0.5s");
+ assert_eq!(format_duration(Duration::from_secs_f64(45.2)), "45.2s");
+ assert_eq!(format_duration(Duration::from_secs_f64(59.9)), "59.9s");
+ }
+
+ #[test]
+ fn test_format_duration_minutes() {
+ assert_eq!(format_duration(Duration::from_secs(60)), "1:00");
+ assert_eq!(format_duration(Duration::from_secs(83)), "1:23");
+ assert_eq!(format_duration(Duration::from_secs(3599)), "59:59");
+ }
+
+ #[test]
+ fn test_format_duration_hours() {
+ assert_eq!(format_duration(Duration::from_secs(3600)), "1:00:00");
+ assert_eq!(format_duration(Duration::from_secs(3754)), "1:02:34");
+ }
+
#[test]
fn test_open_reader_plain() {
let content = "line1\nline2\nline3\n";
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..9a228ca
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,99 @@
+//! RustQC — fast quality control tools for sequencing data.
+//!
+//! RustQC is primarily a CLI (`rustqc rna ...`) that runs a single-pass
+//! RNA-Seq QC pipeline (dupRadar, featureCounts, 8 RSeQC tools, Qualimap,
+//! preseq, samtools-style outputs). The same analysis modules are also
+//! exposed as a library so they can be embedded into other Rust programs.
+//!
+//! # Adding RustQC as a dependency
+//!
+//! ```toml
+//! [dependencies]
+//! rustqc = "0.2"
+//! ```
+//!
+//! The library pulls in `rust-htslib` (linked statically), `plotters`, and
+//! a small C++ component used by the preseq tool (built via `build.rs`),
+//! so a working C/C++ toolchain is required at build time.
+//!
+//! # Modules
+//!
+//! - [`gtf`] — GTF gene-annotation parsing into [`gtf::Gene`] / [`gtf::Transcript`] / [`gtf::Exon`].
+//! - [`io`] — shared I/O helpers (transparent gzip decompression, FNV-1a, number formatting).
+//! - [`config`] — configuration types that mirror the CLI's YAML config file.
+//! - [`summary`] — serializable types for the JSON run summary.
+//! - [`cpu`] — CPU feature detection and binary-target identification.
+//! - [`rna`] — the RNA-Seq analysis modules:
+//! - [`rna::dupradar`], [`rna::featurecounts`], [`rna::qualimap`],
+//! [`rna::preseq`], [`rna::rseqc`].
+//!
+//! [`Strandedness`] lives at the crate root because it is used across most
+//! analysis modules.
+//!
+//! # Stability
+//!
+//! The library is at `0.2.x` and the public surface is intentionally small
+//! at this stage. Expect breaking changes in minor releases until `1.0`.
+//! The full single-pass RNA-Seq pipeline (the `run_rna` orchestrator that
+//! the binary uses) is not yet exposed as a library entry point — for now
+//! library consumers drive individual analyses themselves. Pipeline-level
+//! orchestration may be exposed in a future release; see issue
+//! [#72](https://github.com/seqeralabs/RustQC/issues/72).
+//!
+//! # Examples
+//!
+//! Parse a GTF file and inspect the first gene:
+//!
+//! ```no_run
+//! use rustqc::gtf;
+//!
+//! let genes = gtf::parse_gtf("genes.gtf", &[]).unwrap();
+//! if let Some((gene_id, gene)) = genes.iter().next() {
+//! println!("{gene_id}: {} transcripts", gene.transcripts.len());
+//! }
+//! ```
+//!
+//! Use the [`Strandedness`] enum (also accepted by `serde` for YAML configs):
+//!
+//! ```
+//! use rustqc::Strandedness;
+//!
+//! let s = Strandedness::Reverse;
+//! assert_eq!(s.to_string(), "reverse");
+//! ```
+
+use clap::ValueEnum;
+use serde::Deserialize;
+
+pub mod config;
+pub mod cpu;
+pub mod gtf;
+pub mod io;
+pub mod rna;
+pub mod summary;
+
+/// Library strandedness protocol.
+///
+/// Determines how read strand is interpreted relative to the gene annotation
+/// strand during counting. Accepted CLI values: `unstranded`, `forward`, `reverse`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Strandedness {
+ /// Count reads on either strand (library is not strand-specific).
+ #[default]
+ Unstranded,
+ /// Forward stranded: read 1 maps to the transcript strand.
+ Forward,
+ /// Reverse stranded: read 2 maps to the transcript strand (e.g. dUTP).
+ Reverse,
+}
+
+impl std::fmt::Display for Strandedness {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Strandedness::Unstranded => write!(f, "unstranded"),
+ Strandedness::Forward => write!(f, "forward"),
+ Strandedness::Reverse => write!(f, "reverse"),
+ }
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index d78152b..4c66c17 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,12 +11,6 @@
mod citations;
mod cli;
-mod config;
-mod cpu;
-mod gtf;
-mod io;
-mod rna;
-mod summary;
mod ui;
use anyhow::{ensure, Context, Result};
@@ -27,7 +21,10 @@ use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::time::{Instant, SystemTime, UNIX_EPOCH};
-use ui::{format_count, format_duration, format_pct, Ui, Verbosity};
+use rustqc::io::{format_count, format_duration, format_pct};
+use rustqc::{config, cpu, gtf, rna, summary};
+
+use ui::{Ui, Verbosity};
use rust_htslib::bam::Read as BamRead;
@@ -310,7 +307,7 @@ fn run_rna(args: cli::RnaArgs, ui: &Ui) -> Result<()> {
let effective_stranded = args
.stranded
.or(config.stranded)
- .unwrap_or(cli::Strandedness::Unstranded);
+ .unwrap_or(rustqc::Strandedness::Unstranded);
let effective_paired = args.paired || config.paired.unwrap_or(false);
if n_bams == 1 {
@@ -749,7 +746,7 @@ struct SharedParams<'a> {
/// Terminal UI handle.
ui: &'a Ui,
/// Library strandedness.
- stranded: cli::Strandedness,
+ stranded: rustqc::Strandedness,
/// Whether the library is paired-end.
paired: bool,
/// Alignment-to-GTF chromosome name mapping.
diff --git a/src/rna/dupradar/counting.rs b/src/rna/dupradar/counting.rs
index ff165dd..785115c 100644
--- a/src/rna/dupradar/counting.rs
+++ b/src/rna/dupradar/counting.rs
@@ -9,11 +9,11 @@
//!
//! This implements a simplified featureCounts-compatible counting strategy.
-use crate::cli::Strandedness;
use crate::gtf::Gene;
+use crate::io::format_count;
use crate::rna::qualimap::QualimapAccum;
use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig};
-use crate::ui::format_count;
+use crate::Strandedness;
use anyhow::{Context, Result};
use coitrees::{COITree, Interval, IntervalTree};
use indexmap::IndexMap;
@@ -1273,11 +1273,11 @@ pub fn count_reads(
// with BGZF I/O. When total threads exceed num_workers the extra
// threads are distributed evenly; when threads == num_workers every
// worker still gets 1 dedicated decompression thread.
- let htslib_threads = if num_workers > 0 {
- ((threads.saturating_sub(num_workers)) / num_workers).max(1)
- } else {
- 0
- };
+ let htslib_threads = threads
+ .saturating_sub(num_workers)
+ .checked_div(num_workers)
+ .map(|n| n.max(1))
+ .unwrap_or(0);
// Process chromosome batches in parallel
let results: Vec)>> = pool.install(|| {
diff --git a/src/rna/qualimap/accumulator.rs b/src/rna/qualimap/accumulator.rs
index bbecbbf..f8e82fd 100644
--- a/src/rna/qualimap/accumulator.rs
+++ b/src/rna/qualimap/accumulator.rs
@@ -13,7 +13,7 @@ use coitrees::IntervalTree;
use rust_htslib::bam;
use rust_htslib::bam::record::Cigar;
-use crate::cli::Strandedness;
+use crate::Strandedness;
use super::coverage::TranscriptCoverage;
use super::index::QualimapIndex;
diff --git a/src/rna/qualimap/index.rs b/src/rna/qualimap/index.rs
index 36ae11c..3e164f8 100644
--- a/src/rna/qualimap/index.rs
+++ b/src/rna/qualimap/index.rs
@@ -322,7 +322,7 @@ impl QualimapIndex {
// work (interval tree, intron gaps) is done.
let coverage_exons = if tx.strand == '-' {
let mut desc = exons_0based.clone();
- desc.sort_unstable_by(|a, b| b.0.cmp(&a.0));
+ desc.sort_unstable_by_key(|e| std::cmp::Reverse(e.0));
desc
} else {
exons_0based
diff --git a/src/rna/qualimap/output.rs b/src/rna/qualimap/output.rs
index 9081eb4..5d53082 100644
--- a/src/rna/qualimap/output.rs
+++ b/src/rna/qualimap/output.rs
@@ -13,7 +13,7 @@ use super::coverage::TranscriptCoverage;
use super::index::QualimapIndex;
use super::plots;
use super::QualimapResult;
-use crate::cli::Strandedness;
+use crate::Strandedness;
// ============================= Constants =======================================
diff --git a/src/rna/qualimap/report.rs b/src/rna/qualimap/report.rs
index 284aa7e..bf0b488 100644
--- a/src/rna/qualimap/report.rs
+++ b/src/rna/qualimap/report.rs
@@ -9,7 +9,7 @@ use std::path::Path;
use anyhow::{Context, Result};
-use crate::cli::Strandedness;
+use crate::Strandedness;
use log::debug;
// ===================================================================
diff --git a/src/rna/rseqc/accumulators.rs b/src/rna/rseqc/accumulators.rs
index 0692ba9..b91a409 100644
--- a/src/rna/rseqc/accumulators.rs
+++ b/src/rna/rseqc/accumulators.rs
@@ -781,13 +781,12 @@ impl BamStatAccum {
// GC content: cumulative step function with ngc=200 bins.
// Matches samtools stats.c:925-941. For a read with gc_count G/C
// bases out of read_len total, increment bins gc_idx_min..gc_idx_max.
- if read_len > 0 {
- let ngc: usize = 200;
- let gc_idx_min = gc_count as usize * (ngc - 1) / read_len;
- let mut gc_idx_max = (gc_count as usize + 1) * (ngc - 1) / read_len;
- if gc_idx_max >= ngc {
- gc_idx_max = ngc - 1;
- }
+ let ngc: usize = 200;
+ if let (Some(gc_idx_min), Some(gc_idx_max)) = (
+ (gc_count as usize * (ngc - 1)).checked_div(read_len),
+ ((gc_count as usize + 1) * (ngc - 1)).checked_div(read_len),
+ ) {
+ let gc_idx_max = gc_idx_max.min(ngc - 1);
for item in gc_arr.iter_mut().take(gc_idx_max).skip(gc_idx_min) {
*item += 1;
}
diff --git a/src/rna/rseqc/infer_experiment.rs b/src/rna/rseqc/infer_experiment.rs
index 4153b8a..bb25788 100644
--- a/src/rna/rseqc/infer_experiment.rs
+++ b/src/rna/rseqc/infer_experiment.rs
@@ -4,8 +4,8 @@
//! gene models (from GTF annotation) and determines the fraction consistent with
//! each strand protocol.
-use crate::cli::Strandedness;
use crate::gtf::Gene;
+use crate::Strandedness;
use anyhow::{Context, Result};
use indexmap::IndexMap;
use log::debug;
diff --git a/src/rna/rseqc/read_distribution.rs b/src/rna/rseqc/read_distribution.rs
index f6e2283..0973f63 100644
--- a/src/rna/rseqc/read_distribution.rs
+++ b/src/rna/rseqc/read_distribution.rs
@@ -204,17 +204,13 @@ pub fn build_regions_from_genes(genes: &IndexMap) -> RegionSets {
// 5' UTR: exon portions before CDS (strand-aware)
for (&es, &ee) in exon_starts.iter().zip(exon_ends.iter()) {
match strand {
- '+' => {
- if es < cds_start {
- let e = ee.min(cds_start);
- regions.utr_5.entry(chrom.clone()).or_default().add(es, e);
- }
+ '+' if es < cds_start => {
+ let e = ee.min(cds_start);
+ regions.utr_5.entry(chrom.clone()).or_default().add(es, e);
}
- '-' => {
- if ee > cds_end {
- let s = es.max(cds_end);
- regions.utr_5.entry(chrom.clone()).or_default().add(s, ee);
- }
+ '-' if ee > cds_end => {
+ let s = es.max(cds_end);
+ regions.utr_5.entry(chrom.clone()).or_default().add(s, ee);
}
_ => {}
}
@@ -223,17 +219,13 @@ pub fn build_regions_from_genes(genes: &IndexMap) -> RegionSets {
// 3' UTR: exon portions after CDS (strand-aware)
for (&es, &ee) in exon_starts.iter().zip(exon_ends.iter()) {
match strand {
- '+' => {
- if ee > cds_end {
- let s = es.max(cds_end);
- regions.utr_3.entry(chrom.clone()).or_default().add(s, ee);
- }
+ '+' if ee > cds_end => {
+ let s = es.max(cds_end);
+ regions.utr_3.entry(chrom.clone()).or_default().add(s, ee);
}
- '-' => {
- if es < cds_start {
- let e = ee.min(cds_start);
- regions.utr_3.entry(chrom.clone()).or_default().add(es, e);
- }
+ '-' if es < cds_start => {
+ let e = ee.min(cds_start);
+ regions.utr_3.entry(chrom.clone()).or_default().add(es, e);
}
_ => {}
}
diff --git a/src/rna/rseqc/tin.rs b/src/rna/rseqc/tin.rs
index 341c648..4ce2dda 100644
--- a/src/rna/rseqc/tin.rs
+++ b/src/rna/rseqc/tin.rs
@@ -127,6 +127,11 @@ impl TinResults {
pub fn len(&self) -> usize {
self.transcripts.len()
}
+
+ /// Whether there are no transcripts with computed TIN scores.
+ pub fn is_empty(&self) -> bool {
+ self.transcripts.is_empty()
+ }
}
// ===================================================================
@@ -331,7 +336,7 @@ pub struct TinAccum {
/// Per-transcript unique read start positions, capped at `min_cov + 1`.
/// Once exceeded, the set is drained and `exceeded_threshold[tx_idx]`
/// is set instead.
- pub unique_starts: Vec>,
+ pub(crate) unique_starts: Vec>,
/// Per-transcript flag: true once unique start count exceeded `min_cov`.
/// Avoids further HashSet inserts for high-coverage transcripts.
pub exceeded_threshold: Vec,
diff --git a/src/ui.rs b/src/ui.rs
index c07be01..fb92edd 100644
--- a/src/ui.rs
+++ b/src/ui.rs
@@ -8,6 +8,8 @@ use console::Style;
use indicatif::{ProgressBar, ProgressStyle};
use std::time::Duration;
+use rustqc::io::{format_count, format_duration};
+
// ============================================================================
// Verbosity
// ============================================================================
@@ -444,113 +446,3 @@ fn format_summary_row(
content
}
}
-
-/// Format a count with SI prefix (e.g. 48200000 → "48.2M").
-///
-/// Values below 1000 are shown as-is. Values above use K/M/G/T suffixes
-/// with one decimal place.
-pub fn format_count(n: u64) -> String {
- use number_prefix::NumberPrefix;
- match NumberPrefix::decimal(n as f64) {
- NumberPrefix::Standalone(n) => format!("{n}"),
- NumberPrefix::Prefixed(prefix, n) => {
- // Map SI prefixes to short single-char suffixes
- let suffix = match prefix {
- number_prefix::Prefix::Kilo => "K",
- number_prefix::Prefix::Mega => "M",
- number_prefix::Prefix::Giga => "G",
- number_prefix::Prefix::Tera => "T",
- _ => return format!("{:.1}{prefix:?}", n),
- };
- format!("{n:.1}{suffix}")
- }
- }
-}
-
-/// Format a percentage string (e.g. "83.3%").
-pub fn format_pct(n: u64, total: u64) -> String {
- if total == 0 {
- return "(0.0%)".to_string();
- }
- format!("({:.1}%)", n as f64 / total as f64 * 100.0)
-}
-
-/// Format a duration as human-friendly mm:ss or h:mm:ss.
-///
-/// - Under 60s: "45.2s"
-/// - Under 1h: "1:23"
-/// - Over 1h: "1:02:34"
-pub fn format_duration(d: Duration) -> String {
- let total_secs = d.as_secs_f64();
- if total_secs < 60.0 {
- return format!("{total_secs:.1}s");
- }
- let total_secs = d.as_secs();
- let hours = total_secs / 3600;
- let minutes = (total_secs % 3600) / 60;
- let seconds = total_secs % 60;
- if hours > 0 {
- format!("{hours}:{minutes:02}:{seconds:02}")
- } else {
- format!("{minutes}:{seconds:02}")
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_format_count_small() {
- assert_eq!(format_count(0), "0");
- assert_eq!(format_count(42), "42");
- assert_eq!(format_count(999), "999");
- }
-
- #[test]
- fn test_format_count_thousands() {
- assert_eq!(format_count(1000), "1.0K");
- assert_eq!(format_count(1500), "1.5K");
- assert_eq!(format_count(50000), "50.0K");
- }
-
- #[test]
- fn test_format_count_millions() {
- assert_eq!(format_count(1_000_000), "1.0M");
- assert_eq!(format_count(48_200_000), "48.2M");
- assert_eq!(format_count(50_000_000), "50.0M");
- }
-
- #[test]
- fn test_format_count_billions() {
- assert_eq!(format_count(1_000_000_000), "1.0G");
- assert_eq!(format_count(5_000_000_000), "5.0G");
- }
-
- #[test]
- fn test_format_pct() {
- assert_eq!(format_pct(833, 1000), "(83.3%)");
- assert_eq!(format_pct(0, 0), "(0.0%)");
- assert_eq!(format_pct(1000, 1000), "(100.0%)");
- }
-
- #[test]
- fn test_format_duration_seconds() {
- assert_eq!(format_duration(Duration::from_secs_f64(0.5)), "0.5s");
- assert_eq!(format_duration(Duration::from_secs_f64(45.2)), "45.2s");
- assert_eq!(format_duration(Duration::from_secs_f64(59.9)), "59.9s");
- }
-
- #[test]
- fn test_format_duration_minutes() {
- assert_eq!(format_duration(Duration::from_secs(60)), "1:00");
- assert_eq!(format_duration(Duration::from_secs(83)), "1:23");
- assert_eq!(format_duration(Duration::from_secs(3599)), "59:59");
- }
-
- #[test]
- fn test_format_duration_hours() {
- assert_eq!(format_duration(Duration::from_secs(3600)), "1:00:00");
- assert_eq!(format_duration(Duration::from_secs(3754)), "1:02:34");
- }
-}