From f590fe91569f0c7e51b2bf41cd45bfbbeb631d54 Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Wed, 18 Mar 2026 10:27:36 +0100 Subject: [PATCH 1/3] Feat: Use nucleotide position instead of integer. Therefore, the interger is already validated --- src/GenomicPosition.php | 10 +++------- src/GenomicRegion.php | 27 ++++++++++----------------- src/NucleotidePosition.php | 18 ++++++++++++++++++ tests/GenomicPositionTest.php | 9 +++++---- tests/GenomicRegionTest.php | 2 +- 5 files changed, 37 insertions(+), 29 deletions(-) create mode 100644 src/NucleotidePosition.php diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index bdf4eb3..4c859ee 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -10,14 +10,10 @@ class GenomicPosition public int $position; - public function __construct(Chromosome $chromosome, int $position) + public function __construct(Chromosome $chromosome, NucleotidePosition $position) { - if ($position < 1) { - throw new \InvalidArgumentException("Position must be positive, got: {$position}."); - } - $this->chromosome = $chromosome; - $this->position = $position; + $this->position = $position->value; } /** @example GenomicPosition::parse('chr1:123456') */ @@ -27,7 +23,7 @@ public static function parse(string $value): self throw new \InvalidArgumentException("Invalid genomic position format: {$value}. Expected format: chr1:123456."); } - return new self(new Chromosome($matches[1]), (int) $matches[3]); + return new self(new Chromosome($matches[1]), new NucleotidePosition($matches[3])); } public function equals(self $other): bool diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 3acf3a1..4a80bc0 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -14,24 +14,17 @@ class GenomicRegion public function __construct( Chromosome $chromosome, - int $start, - int $end + NucleotidePosition $start, + NucleotidePosition $end ) { - if ($start < 1) { - throw new \InvalidArgumentException("Start must be positive, got: {$start}."); - } - - if ($end < 1) { - throw new \InvalidArgumentException("End must be positive, got: {$end}."); - } - if ($start > $end) { - throw new \InvalidArgumentException("End ({$end}) must not be less than start ({$start})."); + if ($start->value > $end->value) { + throw new \InvalidArgumentException("End ({$end->value}) must not be less than start ({$start->value})."); } $this->chromosome = $chromosome; - $this->start = $start; - $this->end = $end; + $this->start = $start->value; + $this->end = $end->value; } public static function parse(string $value): self @@ -42,8 +35,8 @@ public static function parse(string $value): self return new self( new Chromosome($matches[1]), - (int) $matches[3], - (int) ($matches[5] ?? $matches[3]) + new NucleotidePosition($matches[3]), + new NucleotidePosition($matches[5] ?? $matches[3]) ); } @@ -98,8 +91,8 @@ public function intersection(self $other): ?self return new self( $this->chromosome, - max($this->start, $other->start), - min($this->end, $other->end) + new NucleotidePosition(max($this->start, $other->start)), + new NucleotidePosition(min($this->end, $other->end)) ); } diff --git a/src/NucleotidePosition.php b/src/NucleotidePosition.php new file mode 100644 index 0000000..97ca571 --- /dev/null +++ b/src/NucleotidePosition.php @@ -0,0 +1,18 @@ +value = $position; + } +} \ No newline at end of file diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index 6523235..e6d9524 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -3,6 +3,7 @@ use MLL\Utils\Chromosome; use MLL\Utils\GenomicPosition; use MLL\Utils\NamingConvention; +use MLL\Utils\NucleotidePosition; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; @@ -46,18 +47,18 @@ public function testEquals(): void ); } - public function testConstructorRejectsNonPositivePosition(): void + public function testConstructorRejectsNegativePosition(): void { self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage('Position must be positive, got: 0.'); - new GenomicPosition(new Chromosome('chr1'), 0); + self::expectExceptionMessage('Position must be positive, got: -1.'); + new NucleotidePosition(-1); } /** @return iterable */ public static function invalidFormats(): iterable { yield ['11:1test']; - yield ['chr1:0']; + yield ['chr1:-1']; yield ['chr1:']; yield ['chr1']; } diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index feced0c..ac17281 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -195,7 +195,7 @@ public function testDifferentChromosomesNeverMatch(): void public function testParseRejectsPositionZero(): void { self::expectException(\InvalidArgumentException::class); - GenomicRegion::parse('chr1:0-10'); + GenomicRegion::parse('chr1:-1-10'); } public function testContainsRegionIsInverseOfIsCoveredBy(): void From 0f5b019efc87c48ef0dd6357f3cc3319ba915eb6 Mon Sep 17 00:00:00 2001 From: KingKong1213 <168984406+KingKong1213@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:55:53 +0000 Subject: [PATCH 2/3] Apply php-cs-fixer changes --- src/GenomicRegion.php | 1 - src/NucleotidePosition.php | 4 ++-- tests/GenomicPositionTest.php | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 4a80bc0..6ff3085 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -17,7 +17,6 @@ public function __construct( NucleotidePosition $start, NucleotidePosition $end ) { - if ($start->value > $end->value) { throw new \InvalidArgumentException("End ({$end->value}) must not be less than start ({$start->value})."); } diff --git a/src/NucleotidePosition.php b/src/NucleotidePosition.php index 97ca571..47b4ee9 100644 --- a/src/NucleotidePosition.php +++ b/src/NucleotidePosition.php @@ -1,4 +1,4 @@ -value = $position; } -} \ No newline at end of file +} diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index e6d9524..86cfc12 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -1,6 +1,5 @@ Date: Wed, 18 Mar 2026 12:08:22 +0100 Subject: [PATCH 3/3] fix: enforce 1-based coordinates, add 0-based half-open conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NucleotidePosition must reject position 0 to stay consistent with the 1-based closed coordinate system used by length(), containsPosition(), intersection() and HGVS g. notation. Add GenomicRegion::fromZeroBasedHalfOpen() and toZeroBasedHalfOpen() for BED/BAM/bigWig interoperability: convert at the I/O boundary, work internally in 1-based closed coordinates. 🤖 Generated with Claude Code --- src/GenomicRegion.php | 16 +++++++++++++ src/NucleotidePosition.php | 2 +- tests/GenomicPositionTest.php | 8 +++++++ tests/GenomicRegionTest.php | 45 ++++++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 6ff3085..84f496e 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -95,6 +95,22 @@ public function intersection(self $other): ?self ); } + /** Constructs a 1-based closed region from 0-based half-open coordinates (BED, BAM, bigWig). */ + public static function fromZeroBasedHalfOpen(string $chromosome, int $start, int $end): self + { + return new self( + new Chromosome($chromosome), + new NucleotidePosition($start + 1), + new NucleotidePosition($end) + ); + } + + /** @return array{Chromosome, int, int} Chromosome, 0-based start, half-open end. */ + public function toZeroBasedHalfOpen(): array + { + return [$this->chromosome, $this->start - 1, $this->end]; + } + private function containsCoordinate(int $position): bool { return $position >= $this->start && $position <= $this->end; diff --git a/src/NucleotidePosition.php b/src/NucleotidePosition.php index 47b4ee9..c455e47 100644 --- a/src/NucleotidePosition.php +++ b/src/NucleotidePosition.php @@ -10,7 +10,7 @@ class NucleotidePosition public function __construct($positionAsMixed) { $position = SafeCast::toInt($positionAsMixed); - if ($position < 0) { + if ($position < 1) { throw new \InvalidArgumentException("Position must be positive, got: {$position}."); } $this->value = $position; diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index 86cfc12..788d5d5 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -46,6 +46,13 @@ public function testEquals(): void ); } + public function testConstructorRejectsZeroPosition(): void + { + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage('Position must be positive, got: 0.'); + new NucleotidePosition(0); + } + public function testConstructorRejectsNegativePosition(): void { self::expectException(\InvalidArgumentException::class); @@ -57,6 +64,7 @@ public function testConstructorRejectsNegativePosition(): void public static function invalidFormats(): iterable { yield ['11:1test']; + yield ['chr1:0']; yield ['chr1:-1']; yield ['chr1:']; yield ['chr1']; diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index ac17281..796d888 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -195,7 +195,7 @@ public function testDifferentChromosomesNeverMatch(): void public function testParseRejectsPositionZero(): void { self::expectException(\InvalidArgumentException::class); - GenomicRegion::parse('chr1:-1-10'); + GenomicRegion::parse('chr1:0-10'); } public function testContainsRegionIsInverseOfIsCoveredBy(): void @@ -219,6 +219,49 @@ public function testIsCoveredByDifferentChromosomes(): void self::assertFalse($region->isCoveredBy(GenomicRegion::parse('chr12:1-100'))); } + public function testFromZeroBasedHalfOpenConvertsToOneBased(): void + { + // BED: chr7 55249070 55249171 (EGFR Exon 19, 0-based half-open) + $region = GenomicRegion::fromZeroBasedHalfOpen('chr7', 55249070, 55249171); + + self::assertSame(55249071, $region->start); + self::assertSame(55249171, $region->end); + self::assertSame(101, $region->length()); + } + + public function testFromZeroBasedHalfOpenSingleBase(): void + { + // BED single base: chr1 99 100 → 1-based chr1:100-100 + $region = GenomicRegion::fromZeroBasedHalfOpen('chr1', 99, 100); + + self::assertSame(100, $region->start); + self::assertSame(100, $region->end); + self::assertSame(1, $region->length()); + } + + public function testToZeroBasedHalfOpenRoundTrips(): void + { + $region = GenomicRegion::fromZeroBasedHalfOpen('chr7', 55249070, 55249171); + + [$chromosome, $start, $end] = $region->toZeroBasedHalfOpen(); + + self::assertSame('7', $chromosome->value()); + self::assertSame(55249070, $start); + self::assertSame(55249171, $end); + } + + public function testFromZeroBasedHalfOpenLengthMatchesBedFormula(): void + { + $bedStart = 1000; + $bedEnd = 2000; + + $region = GenomicRegion::fromZeroBasedHalfOpen('chr1', $bedStart, $bedEnd); + + // BED length = end - start; 1-based length = end - start + 1 + // Both must agree on the actual number of bases + self::assertSame($bedEnd - $bedStart, $region->length()); + } + public function testIntersectionIsCommutative(): void { $a = GenomicRegion::parse('chr11:10-20');