diff --git a/src/IlluminaRunFolder.php b/src/IlluminaRunFolder.php index 6e9c73a..6875d62 100644 --- a/src/IlluminaRunFolder.php +++ b/src/IlluminaRunFolder.php @@ -2,21 +2,27 @@ namespace MLL\Utils; -use Carbon\CarbonImmutable; +use Carbon\Carbon; use function Safe\preg_match; +/** + * Parses Illumina sequencer run folder names (YYYYMMDD_InstrumentID_RunNumber_FlowcellSegment). + */ class IlluminaRunFolder { - public CarbonImmutable $date; + private const FLOWCELL_ID_PATTERN = '/\d*-?([A-Z].+)$/'; + + public Carbon $date; public string $instrumentID; public int $runNumber; + /** Strips optional zero-prefix from raw segment: 000000000-AGKG7 → AGKG7. */ public string $flowcellID; - public function __construct(CarbonImmutable $date, string $instrumentID, int $runNumber, string $flowcellID) + public function __construct(Carbon $date, string $instrumentID, int $runNumber, string $flowcellID) { $this->date = $date; $this->instrumentID = $instrumentID; @@ -24,40 +30,43 @@ public function __construct(CarbonImmutable $date, string $instrumentID, int $ru $this->flowcellID = $flowcellID; } - /** @example IlluminaRunFolder::parse('/path/to/20260205_SH01038_0007_ASC2139476-SC3') */ + /** + * Accepts both bare folder names and paths with forward or backslashes. + * + * @example IlluminaRunFolder::parse('foo\bar\260310_M02074_1219_000000000-MB4RJ') + * @example IlluminaRunFolder::parse('/path/to/20260205_SH01038_0007_ASC2139476-SC3') + */ public static function parse(string $runFolder): self { - $folderName = basename($runFolder); + // Normalize backslashes so basename() works on Linux with Windows-style paths + $normalized = str_replace('\\', '/', $runFolder); + $folderName = basename($normalized); + $parts = explode('_', $folderName, 4); if (count($parts) !== 4) { throw new \InvalidArgumentException("Invalid run folder format: {$runFolder}. Expected format: YYYYMMDD_InstrumentID_RunNumber_FlowcellID."); } - [$dateString, $instrumentID, $runNumberString, $flowcellID] = $parts; + [$dateString, $instrumentID, $runNumberString, $flowcellSegment] = $parts; - if (preg_match('/^\d{8}$/', $dateString) === 0) { - throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}. Expected format: YYYYMMDD."); + if (preg_match('/^(\d{6}|\d{8})$/', $dateString) === 0) { + throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}. Expected 6 or 8 digit date."); } - $date = CarbonImmutable::createFromFormat('!Ymd', $dateString); - if (! $date instanceof \Carbon\CarbonImmutable) { - throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}. Expected format: YYYYMMDD."); + $format = strlen($dateString) === 8 ? '!Ymd' : '!ymd'; + $date = Carbon::createFromFormat($format, $dateString); + if (! $date instanceof Carbon) { + throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}."); } if ($runNumberString === '' || ! ctype_digit($runNumberString)) { throw new \InvalidArgumentException("Invalid run number in run folder: {$runNumberString}. Expected a numeric value."); } - return new self($date, $instrumentID, (int) $runNumberString, $flowcellID); - } + if (preg_match(self::FLOWCELL_ID_PATTERN, $flowcellSegment, $matches) !== 1) { + throw new \InvalidArgumentException("Cannot extract flowcell ID from: {$flowcellSegment}"); + } - public function toString(): string - { - return implode('_', [ - $this->date->format('Ymd'), - $this->instrumentID, - str_pad((string) $this->runNumber, 4, '0', STR_PAD_LEFT), - $this->flowcellID, - ]); + return new self($date, $instrumentID, (int) $runNumberString, $matches[1]); } } diff --git a/tests/IlluminaRunFolderTest.php b/tests/IlluminaRunFolderTest.php index 03a80bf..20d3d85 100644 --- a/tests/IlluminaRunFolderTest.php +++ b/tests/IlluminaRunFolderTest.php @@ -1,12 +1,14 @@ flowcellID); } - public function testParseFromPath(): void + public function testParseMiSeqWithZeroPrefixedFlowcell(): void { - $folder = IlluminaRunFolder::parse('/data/sequencing/20260205_SH01038_0007_ASC2139476-SC3'); + $folder = IlluminaRunFolder::parse('151231_M01261_0163_000000000-AGKG7'); + + self::assertSame('2015-12-31', $folder->date->format('Y-m-d')); + self::assertSame('M01261', $folder->instrumentID); + self::assertSame(163, $folder->runNumber); + self::assertSame('AGKG7', $folder->flowcellID); + } + + public function testParseNextSeq(): void + { + $folder = IlluminaRunFolder::parse('160205_NB501352_0003_AH7LFFAFXX'); + + self::assertSame('2016-02-05', $folder->date->format('Y-m-d')); + self::assertSame('NB501352', $folder->instrumentID); + self::assertSame(3, $folder->runNumber); + self::assertSame('AH7LFFAFXX', $folder->flowcellID); + } + + public function testParseMiSeqNanoFlowcell(): void + { + $folder = IlluminaRunFolder::parse('160315_M01111_0231_000000000-D0WDA'); + + self::assertSame('D0WDA', $folder->flowcellID); + } + + public function testParseFlowcellStartingWithL(): void + { + $folder = IlluminaRunFolder::parse('231013_M02074_0918_000000000-L6G7G'); + + self::assertSame('L6G7G', $folder->flowcellID); + } + + public function testParseFromForwardSlashPath(): void + { + $folder = IlluminaRunFolder::parse('/path/to/20260205_SH01038_0007_ASC2139476-SC3'); - self::assertSame('2026-02-05', $folder->date->format('Y-m-d')); self::assertSame('SH01038', $folder->instrumentID); - self::assertSame(7, $folder->runNumber); self::assertSame('ASC2139476-SC3', $folder->flowcellID); } - public function testToString(): void + public function testParseFromBackslashPath(): void { - $folder = IlluminaRunFolder::parse('20260205_SH01038_0007_ASC2139476-SC3'); + $folder = IlluminaRunFolder::parse('foo\bar\260310_M02074_1219_000000000-MB4RJ'); - self::assertSame('20260205_SH01038_0007_ASC2139476-SC3', $folder->toString()); + self::assertSame('2026-03-10', $folder->date->format('Y-m-d')); + self::assertSame('M02074', $folder->instrumentID); + self::assertSame(1219, $folder->runNumber); + self::assertSame('MB4RJ', $folder->flowcellID); } public function testParseRejectsInvalidPartCount(): void @@ -43,7 +80,7 @@ public function testParseRejectsInvalidPartCount(): void public function testParseRejectsInvalidDate(): void { self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage('Expected format: YYYYMMDD.'); + self::expectExceptionMessage('Expected 6 or 8 digit date.'); IlluminaRunFolder::parse('not-a-date_SH01038_0007_ASC2139476-SC3'); } @@ -62,4 +99,11 @@ public function testParseRejectsInvalidRunNumber(string $value): void self::expectExceptionMessage('Expected a numeric value.'); IlluminaRunFolder::parse($value); } + + public function testParseRejectsUnparsableFlowcellID(): void + { + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage('Cannot extract flowcell ID from: 12345'); + IlluminaRunFolder::parse('20260205_SH01038_0007_12345'); + } }