diff --git a/src/modules/overrepresented_seqs.rs b/src/modules/overrepresented_seqs.rs
index 2f718bb..c6b2f0d 100644
--- a/src/modules/overrepresented_seqs.rs
+++ b/src/modules/overrepresented_seqs.rs
@@ -416,17 +416,18 @@ impl QCModule for OverRepresentedSeqs {
Some(hit) => hit.to_string(),
None => "No Hit".to_string(),
};
- // The Java ResultsTable.getValueAt() for percentage does
- // JAVA COMPAT: Math.round(percentage * 100.0) / 100.0, rounding to 2 decimal places.
- // The text report then calls String.valueOf() on the Double, producing
- // Java's Double.toString() format.
- let rounded_pct = (s.percentage * 100.0).round() / 100.0;
+ // JAVA COMPAT: Java's OverRepresentedSeqs.OverrepresentedSeq stores
+ // the raw double percentage without rounding (see OverRepresentedSeqs.java:253),
+ // and AbstractQCModule.writeTable serializes it via String.valueOf(getValueAt(...))
+ // (AbstractQCModule.java:159), which returns Java's Double.toString() of the
+ // unrounded value (e.g. "7.160449112640348"). Pass the raw percentage to the
+ // formatter; do not round to 2 decimals.
writeln!(
writer,
"{}\t{}\t{}\t{}",
s.seq,
s.count,
- java_format_double(rounded_pct),
+ java_format_double(s.percentage),
source
)?;
}
diff --git a/tests/data/gen_realistic.py b/tests/data/gen_realistic.py
new file mode 100755
index 0000000..167ef8b
--- /dev/null
+++ b/tests/data/gen_realistic.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""Deterministically generate a realistic test FASTQ for fastqc-rust equivalence tests.
+
+Produces 1009 reads of 50bp each at uniform Phred 40, with 5 deliberately
+overrepresented sequences at non-round percentages. Background reads are
+pseudo-random (fixed seed) and below the 0.1% overrepresented threshold.
+
+Designed to expose the percentage-precision bug fixed by ewels/FastQC-Rust#2.
+
+Output is written gzipped (~20 KB vs ~120 KB plain) to keep the repo lean.
+The gzip stream uses mtime=0 and the deterministic content above so byte-
+identical regeneration is possible across machines.
+"""
+import gzip
+import random
+import sys
+
+random.seed(20260426)
+
+QUAL = "I" * 50 # Phred 40
+LEN = 50
+TOTAL = 1009
+
+OVERREP = [
+ ("OVERREP_A_HIGH", 73), # 73/1009 = 7.23488602...%
+ ("OVERREP_B_MID", 37), # 37/1009 = 3.66699702...%
+ ("OVERREP_C_LOW", 11), # 11/1009 = 1.09018830...%
+ ("OVERREP_D_TINY", 5), # 5/1009 = 0.49554014...%
+ ("OVERREP_E_EDGE", 2), # 2/1009 = 0.19821605...% (just above 0.1% threshold)
+]
+
+def random_seq(rng):
+ return "".join(rng.choice("ACGT") for _ in range(LEN))
+
+# Build the five overrepresented sequences. Use deterministic RNG, but reject
+# any candidate that collides with a previous sequence to keep counts exact.
+overrep_seqs = []
+seen = set()
+seq_rng = random.Random(20260426)
+for label, _count in OVERREP:
+ while True:
+ s = random_seq(seq_rng)
+ if s not in seen:
+ overrep_seqs.append(s)
+ seen.add(s)
+ break
+
+# Build background reads. Each must be unique AND not collide with any
+# overrepresented sequence (otherwise the percentages drift).
+n_overrep = sum(c for _, c in OVERREP)
+n_background = TOTAL - n_overrep
+assert n_background > 0
+background_seqs = []
+while len(background_seqs) < n_background:
+ s = random_seq(seq_rng)
+ if s in seen:
+ continue
+ seen.add(s)
+ background_seqs.append(s)
+
+# Assemble reads in a deterministic interleaved order so the output is stable.
+reads = []
+for s, (label, count) in zip(overrep_seqs, OVERREP):
+ for i in range(count):
+ reads.append((f"{label}_{i+1}", s))
+for i, s in enumerate(background_seqs):
+ reads.append((f"BACKGROUND_{i+1}", s))
+
+# Shuffle deterministically so overrepresented reads aren't all clustered.
+order_rng = random.Random(99)
+order_rng.shuffle(reads)
+assert len(reads) == TOTAL
+
+out_path = sys.argv[1]
+# mtime=0 makes the gzip header deterministic so re-running the generator
+# produces byte-identical output on any machine.
+with gzip.GzipFile(filename=out_path, mode="wb", mtime=0) as f:
+ for header, seq in reads:
+ f.write(f"@{header}\n{seq}\n+\n{QUAL}\n".encode("ascii"))
+
+print(f"wrote {out_path}: {TOTAL} reads, {LEN}bp, {len(OVERREP)} overrepresented sequences", file=sys.stderr)
+for label, count in OVERREP:
+ pct = count * 100 / TOTAL
+ print(f" {label}: {count}/{TOTAL} = {pct}%", file=sys.stderr)
diff --git a/tests/data/realistic.fastq.gz b/tests/data/realistic.fastq.gz
new file mode 100644
index 0000000..386166d
Binary files /dev/null and b/tests/data/realistic.fastq.gz differ
diff --git a/tests/equivalence/patches/realistic_default_adapter_content_svg.patch b/tests/equivalence/patches/realistic_default_adapter_content_svg.patch
new file mode 100644
index 0000000..a905e30
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_adapter_content_svg.patch
@@ -0,0 +1,11 @@
+--- java/Images/adapter_content.svg
++++ rust/Images/adapter_content.svg
+@@ -17,7 +17,7 @@
+ % Adapter
+
+
+-Position in read (bp)
++Position in read (bp)
+ 1
+
+ 2
diff --git a/tests/equivalence/patches/realistic_default_duplication_levels_svg.patch b/tests/equivalence/patches/realistic_default_duplication_levels_svg.patch
new file mode 100644
index 0000000..8739842
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_duplication_levels_svg.patch
@@ -0,0 +1,22 @@
+--- java/Images/duplication_levels.svg
++++ rust/Images/duplication_levels.svg
+@@ -17,7 +17,7 @@
+ Percent of seqs remaining if deduplicated 87.81%
+
+
+-Sequence Duplication Level
++Sequence Duplication Level
+ 1
+
+ 2
+@@ -68,7 +68,7 @@
+
+
+
+-
+-
+-% Total sequences
++
++
++% Total sequences
+
diff --git a/tests/equivalence/patches/realistic_default_per_base_n_content_svg.patch b/tests/equivalence/patches/realistic_default_per_base_n_content_svg.patch
new file mode 100644
index 0000000..4c30aab
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_per_base_n_content_svg.patch
@@ -0,0 +1,21 @@
+--- java/Images/per_base_n_content.svg
++++ rust/Images/per_base_n_content.svg
+@@ -17,7 +17,7 @@
+ N content across all bases
+
+
+-Position in read (bp)
++Position in read (bp)
+ 1
+
+ 2
+@@ -132,7 +132,7 @@
+
+
+
+-
+-
++
++
+ %N
+
diff --git a/tests/equivalence/patches/realistic_default_per_base_quality_svg.patch b/tests/equivalence/patches/realistic_default_per_base_quality_svg.patch
new file mode 100644
index 0000000..eaaef17
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_per_base_quality_svg.patch
@@ -0,0 +1,20 @@
+--- java/Images/per_base_quality.svg
++++ rust/Images/per_base_quality.svg
+@@ -21,7 +21,7 @@
+ 30
+ 32
+ 34
+-Quality scores across all bases (Illumina 1.5 encoding)
++Quality scores across all bases (Illumina 1.5 encoding)
+
+
+
+@@ -203,7 +203,7 @@
+
+
+
+-Position in read (bp)
++Position in read (bp)
+
+
+
diff --git a/tests/equivalence/patches/realistic_default_per_base_sequence_content_svg.patch b/tests/equivalence/patches/realistic_default_per_base_sequence_content_svg.patch
new file mode 100644
index 0000000..17a07ea
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_per_base_sequence_content_svg.patch
@@ -0,0 +1,22 @@
+--- java/Images/per_base_sequence_content.svg
++++ rust/Images/per_base_sequence_content.svg
+@@ -17,7 +17,7 @@
+ Sequence content across all bases
+
+
+-Position in read (bp)
++Position in read (bp)
+ 1
+
+ 2
+@@ -279,8 +279,8 @@
+
+
+
+-
+-
++
++
+ %T
+ %C
+ %A
diff --git a/tests/equivalence/patches/realistic_default_per_sequence_gc_content_svg.patch b/tests/equivalence/patches/realistic_default_per_sequence_gc_content_svg.patch
new file mode 100644
index 0000000..48fabba
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_per_sequence_gc_content_svg.patch
@@ -0,0 +1,22 @@
+--- java/Images/per_sequence_gc_content.svg
++++ rust/Images/per_sequence_gc_content.svg
+@@ -15,7 +15,7 @@
+ GC distribution over all sequences
+
+
+-Mean GC content (%)
++Mean GC content (%)
+ 0
+
+ 2
+@@ -303,8 +303,8 @@
+
+
+
+-
+-
++
++
+ GC count per read
+ Theoretical Distribution
+
diff --git a/tests/equivalence/patches/realistic_default_per_sequence_quality_svg.patch b/tests/equivalence/patches/realistic_default_per_sequence_quality_svg.patch
new file mode 100644
index 0000000..1b0b9ed
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_per_sequence_quality_svg.patch
@@ -0,0 +1,26 @@
+--- java/Images/per_sequence_quality.svg
++++ rust/Images/per_sequence_quality.svg
+@@ -9,10 +9,10 @@
+ 600
+ 800
+ 1000
+-Quality score distribution over all sequences
++Quality score distribution over all sequences
+
+
+-Mean Sequence Quality (Phred Score)
++Mean Sequence Quality (Phred Score)
+ 9
+
+
+@@ -20,7 +20,7 @@
+
+
+
+-
+-
+-Average Quality per read
++
++
++Average Quality per read
+
diff --git a/tests/equivalence/patches/realistic_default_sequence_length_distribution_svg.patch b/tests/equivalence/patches/realistic_default_sequence_length_distribution_svg.patch
new file mode 100644
index 0000000..9a182c7
--- /dev/null
+++ b/tests/equivalence/patches/realistic_default_sequence_length_distribution_svg.patch
@@ -0,0 +1,13 @@
+--- java/Images/sequence_length_distribution.svg
++++ rust/Images/sequence_length_distribution.svg
+@@ -25,7 +25,7 @@
+
+
+
+-
+-
+-Sequence Length
++
++
++Sequence Length
+
diff --git a/tests/equivalence/reference/realistic_default/Images/adapter_content.png b/tests/equivalence/reference/realistic_default/Images/adapter_content.png
new file mode 100644
index 0000000..8d90857
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/adapter_content.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/adapter_content.svg b/tests/equivalence/reference/realistic_default/Images/adapter_content.svg
new file mode 100644
index 0000000..ca428d2
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/adapter_content.svg
@@ -0,0 +1,311 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/duplication_levels.png b/tests/equivalence/reference/realistic_default/Images/duplication_levels.png
new file mode 100644
index 0000000..87c570b
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/duplication_levels.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/duplication_levels.svg b/tests/equivalence/reference/realistic_default/Images/duplication_levels.svg
new file mode 100644
index 0000000..8f44b39
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/duplication_levels.svg
@@ -0,0 +1,74 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/per_base_n_content.png b/tests/equivalence/reference/realistic_default/Images/per_base_n_content.png
new file mode 100644
index 0000000..0211043
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/per_base_n_content.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/per_base_n_content.svg b/tests/equivalence/reference/realistic_default/Images/per_base_n_content.svg
new file mode 100644
index 0000000..0981699
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/per_base_n_content.svg
@@ -0,0 +1,138 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/per_base_quality.png b/tests/equivalence/reference/realistic_default/Images/per_base_quality.png
new file mode 100644
index 0000000..f82cd15
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/per_base_quality.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/per_base_quality.svg b/tests/equivalence/reference/realistic_default/Images/per_base_quality.svg
new file mode 100644
index 0000000..795faca
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/per_base_quality.svg
@@ -0,0 +1,606 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/per_base_sequence_content.png b/tests/equivalence/reference/realistic_default/Images/per_base_sequence_content.png
new file mode 100644
index 0000000..8e80320
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/per_base_sequence_content.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/per_base_sequence_content.svg b/tests/equivalence/reference/realistic_default/Images/per_base_sequence_content.svg
new file mode 100644
index 0000000..b676512
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/per_base_sequence_content.svg
@@ -0,0 +1,288 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/per_sequence_gc_content.png b/tests/equivalence/reference/realistic_default/Images/per_sequence_gc_content.png
new file mode 100644
index 0000000..9954155
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/per_sequence_gc_content.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/per_sequence_gc_content.svg b/tests/equivalence/reference/realistic_default/Images/per_sequence_gc_content.svg
new file mode 100644
index 0000000..de767aa
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/per_sequence_gc_content.svg
@@ -0,0 +1,310 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/per_sequence_quality.png b/tests/equivalence/reference/realistic_default/Images/per_sequence_quality.png
new file mode 100644
index 0000000..f7eddef
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/per_sequence_quality.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/per_sequence_quality.svg b/tests/equivalence/reference/realistic_default/Images/per_sequence_quality.svg
new file mode 100644
index 0000000..b528334
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/per_sequence_quality.svg
@@ -0,0 +1,26 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/Images/sequence_length_distribution.png b/tests/equivalence/reference/realistic_default/Images/sequence_length_distribution.png
new file mode 100644
index 0000000..5625625
Binary files /dev/null and b/tests/equivalence/reference/realistic_default/Images/sequence_length_distribution.png differ
diff --git a/tests/equivalence/reference/realistic_default/Images/sequence_length_distribution.svg b/tests/equivalence/reference/realistic_default/Images/sequence_length_distribution.svg
new file mode 100644
index 0000000..45ba1ab
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/Images/sequence_length_distribution.svg
@@ -0,0 +1,31 @@
+
+
+
diff --git a/tests/equivalence/reference/realistic_default/fastqc.fo b/tests/equivalence/reference/realistic_default/fastqc.fo
new file mode 100644
index 0000000..1c0c5da
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/fastqc.fo
@@ -0,0 +1,3 @@
+
+ FASTQC-Report
+ Basic StatisticsFilenamerealistic.fastq.gzFile typeConventional base callsEncodingIllumina 1.5Total Sequences1009Total Bases50.4 kbpSequences flagged as poor quality0Sequence length50%GC49Per base sequence qualityPer sequence quality scoresPer base sequence contentPer sequence GC contentPer base N contentSequence Length DistributionSequence Duplication LevelsOverrepresented sequencesGCAGGACCTCTAGATTGTATCACTCTGGACCGAAGATATTGACCCTCAAA737.234886025768088No HitGTAACAGTAACCGACAACCCGATCACAAGGTTCAAAGACTCCGTGAAAAA373.6669970267591676No HitCAGCAAGTGTGGTCTTTGTTCAAGTAAGCTTGCACCTGAGTTTGCGCTGC111.0901883052527255No HitCCACATCTCTCTCCCATTTGATCTATACGTAGACAGGTTCTAGATCCGGT50.4955401387512388No HitCAGAGTAAATCCTTGAGTGGCCTCTAGGCTCACATAATAGAAGTCAATCC20.19821605550049554No HitAdapter Content
\ No newline at end of file
diff --git a/tests/equivalence/reference/realistic_default/fastqc_data.txt b/tests/equivalence/reference/realistic_default/fastqc_data.txt
new file mode 100644
index 0000000..ab75b6e
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/fastqc_data.txt
@@ -0,0 +1,353 @@
+##FastQC 0.12.1
+>>Basic Statistics pass
+#Measure Value
+Filename realistic.fastq.gz
+File type Conventional base calls
+Encoding Illumina 1.5
+Total Sequences 1009
+Total Bases 50.4 kbp
+Sequences flagged as poor quality 0
+Sequence length 50
+%GC 49
+>>END_MODULE
+>>Per base sequence quality fail
+#Base Mean Median Lower Quartile Upper Quartile 10th Percentile 90th Percentile
+1 9.0 9.0 9.0 9.0 9.0 9.0
+2 9.0 9.0 9.0 9.0 9.0 9.0
+3 9.0 9.0 9.0 9.0 9.0 9.0
+4 9.0 9.0 9.0 9.0 9.0 9.0
+5 9.0 9.0 9.0 9.0 9.0 9.0
+6 9.0 9.0 9.0 9.0 9.0 9.0
+7 9.0 9.0 9.0 9.0 9.0 9.0
+8 9.0 9.0 9.0 9.0 9.0 9.0
+9 9.0 9.0 9.0 9.0 9.0 9.0
+10 9.0 9.0 9.0 9.0 9.0 9.0
+11 9.0 9.0 9.0 9.0 9.0 9.0
+12 9.0 9.0 9.0 9.0 9.0 9.0
+13 9.0 9.0 9.0 9.0 9.0 9.0
+14 9.0 9.0 9.0 9.0 9.0 9.0
+15 9.0 9.0 9.0 9.0 9.0 9.0
+16 9.0 9.0 9.0 9.0 9.0 9.0
+17 9.0 9.0 9.0 9.0 9.0 9.0
+18 9.0 9.0 9.0 9.0 9.0 9.0
+19 9.0 9.0 9.0 9.0 9.0 9.0
+20 9.0 9.0 9.0 9.0 9.0 9.0
+21 9.0 9.0 9.0 9.0 9.0 9.0
+22 9.0 9.0 9.0 9.0 9.0 9.0
+23 9.0 9.0 9.0 9.0 9.0 9.0
+24 9.0 9.0 9.0 9.0 9.0 9.0
+25 9.0 9.0 9.0 9.0 9.0 9.0
+26 9.0 9.0 9.0 9.0 9.0 9.0
+27 9.0 9.0 9.0 9.0 9.0 9.0
+28 9.0 9.0 9.0 9.0 9.0 9.0
+29 9.0 9.0 9.0 9.0 9.0 9.0
+30 9.0 9.0 9.0 9.0 9.0 9.0
+31 9.0 9.0 9.0 9.0 9.0 9.0
+32 9.0 9.0 9.0 9.0 9.0 9.0
+33 9.0 9.0 9.0 9.0 9.0 9.0
+34 9.0 9.0 9.0 9.0 9.0 9.0
+35 9.0 9.0 9.0 9.0 9.0 9.0
+36 9.0 9.0 9.0 9.0 9.0 9.0
+37 9.0 9.0 9.0 9.0 9.0 9.0
+38 9.0 9.0 9.0 9.0 9.0 9.0
+39 9.0 9.0 9.0 9.0 9.0 9.0
+40 9.0 9.0 9.0 9.0 9.0 9.0
+41 9.0 9.0 9.0 9.0 9.0 9.0
+42 9.0 9.0 9.0 9.0 9.0 9.0
+43 9.0 9.0 9.0 9.0 9.0 9.0
+44 9.0 9.0 9.0 9.0 9.0 9.0
+45 9.0 9.0 9.0 9.0 9.0 9.0
+46 9.0 9.0 9.0 9.0 9.0 9.0
+47 9.0 9.0 9.0 9.0 9.0 9.0
+48 9.0 9.0 9.0 9.0 9.0 9.0
+49 9.0 9.0 9.0 9.0 9.0 9.0
+50 9.0 9.0 9.0 9.0 9.0 9.0
+>>END_MODULE
+>>Per sequence quality scores fail
+#Quality Count
+9 1009.0
+>>END_MODULE
+>>Per base sequence content warn
+#Base G A T C
+1 33.10208126858276 21.902874132804754 21.605550049554015 23.389494549058472
+2 22.2001982160555 21.407333994053516 25.272547076313177 31.119920713577798
+3 23.984142715559962 33.79583746283449 20.713577799801783 21.506442021803764
+4 28.543111992071356 23.785926660059463 22.497522299306244 25.173439048562933
+5 29.83151635282458 24.677898909811695 21.110009910802773 24.380574826560952
+6 22.101090188305253 34.093161546085234 19.920713577799802 23.88503468780971
+7 27.056491575817642 19.028741328047573 22.2001982160555 31.714568880079284
+8 20.812685827552034 21.010901883052526 28.93954410307235 29.23686818632309
+9 25.768087215064423 25.074331020812686 28.344895936570865 20.812685827552034
+10 22.794846382556987 23.48860257680872 24.2814667988107 29.43508424182359
+11 23.48860257680872 22.69573835480674 29.038652130822594 24.777006937561943
+12 21.80376610505451 30.426164519326065 20.41625371655104 27.35381565906839
+13 33.0029732408325 22.001982160555006 22.398414271555996 22.59663032705649
+14 20.911793855302278 33.3994053518335 21.80376610505451 23.88503468780971
+15 22.497522299306244 21.506442021803764 27.452923686818632 28.543111992071356
+16 21.605550049554015 26.362735381565905 29.43508424182359 22.59663032705649
+17 28.14667988107037 25.074331020812686 24.4796828543112 22.29930624380575
+18 22.893954410307234 21.110009910802773 31.020812685827554 24.975222993062438
+19 20.317145688800792 29.23686818632309 24.08325074331021 26.362735381565905
+20 21.407333994053516 22.101090188305253 31.020812685827554 25.47076313181368
+21 28.14667988107037 19.127849355797817 22.398414271555996 30.327056491575817
+22 22.398414271555996 33.30029732408325 23.290386521308225 21.010901883052526
+23 22.398414271555996 22.101090188305253 26.362735381565905 29.137760158572846
+24 20.515361744301288 23.19127849355798 30.22794846382557 26.065411298315162
+25 20.01982160555005 29.33597621407334 20.812685827552034 29.83151635282458
+26 23.785926660059463 23.290386521308225 29.137760158572846 23.785926660059463
+27 28.44400396432111 26.461843409316156 25.074331020812686 20.01982160555005
+28 32.70564915758176 26.065411298315162 21.308225966303272 19.920713577799802
+29 25.569871159563924 30.22794846382557 21.308225966303272 22.893954410307234
+30 22.2001982160555 22.101090188305253 26.7591674925669 28.93954410307235
+31 23.389494549058472 21.902874132804754 26.560951437066404 28.14667988107037
+32 31.41724479682854 20.614469772051535 26.957383548067394 21.010901883052526
+33 20.812685827552034 32.30921704658078 20.118929633300297 26.7591674925669
+34 22.69573835480674 33.201189296333 21.902874132804754 22.2001982160555
+35 29.038652130822594 26.461843409316156 23.290386521308225 21.209117938553025
+36 22.2001982160555 33.3994053518335 20.614469772051535 23.785926660059463
+37 26.26362735381566 21.80376610505451 30.426164519326065 21.506442021803764
+38 22.101090188305253 32.01189296333003 22.993062438057482 22.893954410307234
+39 20.515361744301288 23.68681863230922 30.327056491575817 25.47076313181368
+40 22.794846382556987 21.902874132804754 32.21010901883052 23.09217046580773
+41 27.94846382556987 22.398414271555996 24.182358771060457 25.47076313181368
+42 22.497522299306244 28.93954410307235 21.605550049554015 26.957383548067394
+43 26.16451932606541 19.920713577799802 23.19127849355798 30.723488602576808
+44 22.398414271555996 22.29930624380575 26.461843409316156 28.840436075322103
+45 26.16451932606541 24.4796828543112 19.920713577799802 29.43508424182359
+46 21.506442021803764 26.16451932606541 31.813676907829535 20.515361744301288
+47 22.59663032705649 23.984142715559962 23.88503468780971 29.534192269573833
+48 22.101090188305253 34.489593657086225 23.09217046580773 20.317145688800792
+49 22.59663032705649 34.19226957383548 19.722497522299307 23.48860257680872
+50 21.605550049554015 33.597621407333996 22.101090188305253 22.69573835480674
+>>END_MODULE
+>>Per sequence GC content fail
+#GC Content Count
+0 0.0
+1 0.0
+2 0.0
+3 0.0
+4 0.0
+5 0.0
+6 0.0
+7 0.0
+8 0.0
+9 0.0
+10 0.0
+11 0.0
+12 0.0
+13 0.0
+14 0.0
+15 0.0
+16 0.0
+17 0.0
+18 0.0
+19 0.0
+20 0.0
+21 0.0
+22 0.0
+23 0.0
+24 0.0
+25 0.0
+26 0.0
+27 0.0
+28 0.0
+29 1.0
+30 2.0
+31 4.5
+32 7.0
+33 8.0
+34 9.0
+35 10.5
+36 12.0
+37 16.5
+38 21.0
+39 31.0
+40 41.0
+41 46.0
+42 51.0
+43 77.0
+44 103.0
+45 139.5
+46 176.0
+47 129.5
+48 83.0
+49 103.5
+50 124.0
+51 106.5
+52 89.0
+53 81.5
+54 74.0
+55 73.0
+56 72.0
+57 67.0
+58 62.0
+59 48.0
+60 34.0
+61 26.5
+62 19.0
+63 15.5
+64 12.0
+65 12.0
+66 12.0
+67 7.5
+68 3.0
+69 2.5
+70 2.0
+71 1.5
+72 1.0
+73 0.5
+74 0.0
+75 0.0
+76 0.0
+77 0.0
+78 0.0
+79 0.0
+80 0.0
+81 0.0
+82 0.0
+83 0.0
+84 0.0
+85 0.0
+86 0.0
+87 0.0
+88 0.0
+89 0.0
+90 0.0
+91 0.0
+92 0.0
+93 0.0
+94 0.0
+95 0.0
+96 0.0
+97 0.0
+98 0.0
+99 0.0
+100 0.0
+>>END_MODULE
+>>Per base N content pass
+#Base N-Count
+1 0.0
+2 0.0
+3 0.0
+4 0.0
+5 0.0
+6 0.0
+7 0.0
+8 0.0
+9 0.0
+10 0.0
+11 0.0
+12 0.0
+13 0.0
+14 0.0
+15 0.0
+16 0.0
+17 0.0
+18 0.0
+19 0.0
+20 0.0
+21 0.0
+22 0.0
+23 0.0
+24 0.0
+25 0.0
+26 0.0
+27 0.0
+28 0.0
+29 0.0
+30 0.0
+31 0.0
+32 0.0
+33 0.0
+34 0.0
+35 0.0
+36 0.0
+37 0.0
+38 0.0
+39 0.0
+40 0.0
+41 0.0
+42 0.0
+43 0.0
+44 0.0
+45 0.0
+46 0.0
+47 0.0
+48 0.0
+49 0.0
+50 0.0
+>>END_MODULE
+>>Sequence Length Distribution pass
+#Length Count
+50 1009.0
+>>END_MODULE
+>>Sequence Duplication Levels pass
+#Total Deduplicated Percentage 87.80971258671953
+#Duplication Level Percentage of total
+1 87.31417244796829
+2 0.19821605550049554
+3 0.0
+4 0.0
+5 0.4955401387512388
+6 0.0
+7 0.0
+8 0.0
+9 0.0
+>10 4.757185332011893
+>50 7.234886025768088
+>100 0.0
+>500 0.0
+>1k 0.0
+>5k 0.0
+>10k+ 0.0
+>>END_MODULE
+>>Overrepresented sequences fail
+#Sequence Count Percentage Possible Source
+GCAGGACCTCTAGATTGTATCACTCTGGACCGAAGATATTGACCCTCAAA 73 7.234886025768088 No Hit
+GTAACAGTAACCGACAACCCGATCACAAGGTTCAAAGACTCCGTGAAAAA 37 3.6669970267591676 No Hit
+CAGCAAGTGTGGTCTTTGTTCAAGTAAGCTTGCACCTGAGTTTGCGCTGC 11 1.0901883052527255 No Hit
+CCACATCTCTCTCCCATTTGATCTATACGTAGACAGGTTCTAGATCCGGT 5 0.4955401387512388 No Hit
+CAGAGTAAATCCTTGAGTGGCCTCTAGGCTCACATAATAGAAGTCAATCC 2 0.19821605550049554 No Hit
+>>END_MODULE
+>>Adapter Content pass
+#Position Illumina Universal Adapter Illumina Small RNA 3' Adapter Illumina Small RNA 5' Adapter Nextera Transposase Sequence PolyA PolyG
+1 0.0 0.0 0.0 0.0 0.0 0.0
+2 0.0 0.0 0.0 0.0 0.0 0.0
+3 0.0 0.0 0.0 0.0 0.0 0.0
+4 0.0 0.0 0.0 0.0 0.0 0.0
+5 0.0 0.0 0.0 0.0 0.0 0.0
+6 0.0 0.0 0.0 0.0 0.0 0.0
+7 0.0 0.0 0.0 0.0 0.0 0.0
+8 0.0 0.0 0.0 0.0 0.0 0.0
+9 0.0 0.0 0.0 0.0 0.0 0.0
+10 0.0 0.0 0.0 0.0 0.0 0.0
+11 0.0 0.0 0.0 0.0 0.0 0.0
+12 0.0 0.0 0.0 0.0 0.0 0.0
+13 0.0 0.0 0.0 0.0 0.0 0.0
+14 0.0 0.0 0.0 0.0 0.0 0.0
+15 0.0 0.0 0.0 0.0 0.0 0.0
+16 0.0 0.0 0.0 0.0 0.0 0.0
+17 0.0 0.0 0.0 0.0 0.0 0.0
+18 0.0 0.0 0.0 0.0 0.0 0.0
+19 0.0 0.0 0.0 0.0 0.0 0.0
+20 0.0 0.0 0.0 0.0 0.0 0.0
+21 0.0 0.0 0.0 0.0 0.0 0.0
+22 0.0 0.0 0.0 0.0 0.0 0.0
+23 0.0 0.0 0.0 0.0 0.0 0.0
+24 0.0 0.0 0.0 0.0 0.0 0.0
+25 0.0 0.0 0.0 0.0 0.0 0.0
+26 0.0 0.0 0.0 0.0 0.0 0.0
+27 0.0 0.0 0.0 0.0 0.0 0.0
+28 0.0 0.0 0.0 0.0 0.0 0.0
+29 0.0 0.0 0.0 0.0 0.0 0.0
+30 0.0 0.0 0.0 0.0 0.0 0.0
+31 0.0 0.0 0.0 0.0 0.0 0.0
+32 0.0 0.0 0.0 0.0 0.0 0.0
+33 0.0 0.0 0.0 0.0 0.0 0.0
+34 0.0 0.0 0.0 0.0 0.0 0.0
+35 0.0 0.0 0.0 0.0 0.0 0.0
+36 0.0 0.0 0.0 0.0 0.0 0.0
+37 0.0 0.0 0.0 0.0 0.0 0.0
+38 0.0 0.0 0.0 0.0 0.0 0.0
+39 0.0 0.0 0.0 0.0 0.0 0.0
+>>END_MODULE
diff --git a/tests/equivalence/reference/realistic_default/fastqc_report.html b/tests/equivalence/reference/realistic_default/fastqc_report.html
new file mode 100644
index 0000000..b830093
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/fastqc_report.html
@@ -0,0 +1,187 @@
+
realistic.fastq.gz FastQC Report
\ No newline at end of file
diff --git a/tests/equivalence/reference/realistic_default/summary.txt b/tests/equivalence/reference/realistic_default/summary.txt
new file mode 100644
index 0000000..1069074
--- /dev/null
+++ b/tests/equivalence/reference/realistic_default/summary.txt
@@ -0,0 +1,10 @@
+PASS Basic Statistics realistic.fastq.gz
+FAIL Per base sequence quality realistic.fastq.gz
+FAIL Per sequence quality scores realistic.fastq.gz
+WARN Per base sequence content realistic.fastq.gz
+FAIL Per sequence GC content realistic.fastq.gz
+PASS Per base N content realistic.fastq.gz
+PASS Sequence Length Distribution realistic.fastq.gz
+PASS Sequence Duplication Levels realistic.fastq.gz
+FAIL Overrepresented sequences realistic.fastq.gz
+PASS Adapter Content realistic.fastq.gz
diff --git a/tests/equivalence/test_cases.yaml b/tests/equivalence/test_cases.yaml
index 7e2b758..6ea8f00 100644
--- a/tests/equivalence/test_cases.yaml
+++ b/tests/equivalence/test_cases.yaml
@@ -76,3 +76,14 @@
- name: complex_dup_length_10
file: complex.fastq
args: [--dup_length, "10"]
+
+# Realistic mid-size input designed to exercise non-round overrepresented
+# percentages. 1009 reads (prime), 50bp uniform, 5 deliberately-overrepresented
+# sequences at counts (73, 37, 11, 5, 2) producing percentages with full
+# Double.toString() precision (e.g. 7.234886025768088). Background reads are
+# pseudo-random and below the 0.1% overrepresented threshold so they don't
+# pollute the section. Generated deterministically; see header comment in
+# tests/data/realistic.fastq.
+- name: realistic_default
+ file: realistic.fastq.gz
+ args: []