Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 30 additions & 21 deletions src/IlluminaRunFolder.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,71 @@

namespace MLL\Utils;

use Carbon\CarbonImmutable;
use Carbon\Carbon;

use function Safe\preg_match;

/**
* Parses Illumina sequencer run folder names (YYYYMMDD_InstrumentID_RunNumber_FlowcellSegment).
*/
class IlluminaRunFolder
{
public CarbonImmutable $date;
private const FLOWCELL_ID_PATTERN = '/\d*-?([A-Z].+)$/';

public Carbon $date;

public string $instrumentID;

public int $runNumber;

/** Strips optional zero-prefix from raw segment: 000000000-AGKG7 → AGKG7. */
public string $flowcellID;

public function __construct(CarbonImmutable $date, string $instrumentID, int $runNumber, string $flowcellID)
public function __construct(Carbon $date, string $instrumentID, int $runNumber, string $flowcellID)
{
$this->date = $date;
$this->instrumentID = $instrumentID;
$this->runNumber = $runNumber;
$this->flowcellID = $flowcellID;
}

/** @example IlluminaRunFolder::parse('/path/to/20260205_SH01038_0007_ASC2139476-SC3') */
/**
* Accepts both bare folder names and paths with forward or backslashes.
*
* @example IlluminaRunFolder::parse('foo\bar\260310_M02074_1219_000000000-MB4RJ')
* @example IlluminaRunFolder::parse('/path/to/20260205_SH01038_0007_ASC2139476-SC3')
*/
public static function parse(string $runFolder): self
{
$folderName = basename($runFolder);
// Normalize backslashes so basename() works on Linux with Windows-style paths
$normalized = str_replace('\\', '/', $runFolder);
$folderName = basename($normalized);

$parts = explode('_', $folderName, 4);
if (count($parts) !== 4) {
throw new \InvalidArgumentException("Invalid run folder format: {$runFolder}. Expected format: YYYYMMDD_InstrumentID_RunNumber_FlowcellID.");
}

[$dateString, $instrumentID, $runNumberString, $flowcellID] = $parts;
[$dateString, $instrumentID, $runNumberString, $flowcellSegment] = $parts;

if (preg_match('/^\d{8}$/', $dateString) === 0) {
throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}. Expected format: YYYYMMDD.");
if (preg_match('/^(\d{6}|\d{8})$/', $dateString) === 0) {
throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}. Expected 6 or 8 digit date.");
}

$date = CarbonImmutable::createFromFormat('!Ymd', $dateString);
if (! $date instanceof \Carbon\CarbonImmutable) {
throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}. Expected format: YYYYMMDD.");
$format = strlen($dateString) === 8 ? '!Ymd' : '!ymd';
$date = Carbon::createFromFormat($format, $dateString);
if (! $date instanceof Carbon) {
throw new \InvalidArgumentException("Invalid date in run folder: {$dateString}.");
}

if ($runNumberString === '' || ! ctype_digit($runNumberString)) {
throw new \InvalidArgumentException("Invalid run number in run folder: {$runNumberString}. Expected a numeric value.");
}

return new self($date, $instrumentID, (int) $runNumberString, $flowcellID);
}
if (preg_match(self::FLOWCELL_ID_PATTERN, $flowcellSegment, $matches) !== 1) {
throw new \InvalidArgumentException("Cannot extract flowcell ID from: {$flowcellSegment}");
}

public function toString(): string
{
return implode('_', [
$this->date->format('Ymd'),
$this->instrumentID,
str_pad((string) $this->runNumber, 4, '0', STR_PAD_LEFT),
$this->flowcellID,
]);
return new self($date, $instrumentID, (int) $runNumberString, $matches[1]);
}
}
62 changes: 53 additions & 9 deletions tests/IlluminaRunFolderTest.php
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
<?php declare(strict_types=1);

namespace MLL\Utils\Tests;

use MLL\Utils\IlluminaRunFolder;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;

final class IlluminaRunFolderTest extends TestCase
{
public function testParse(): void
public function testParseMiSeqI100(): void
{
$folder = IlluminaRunFolder::parse('20260205_SH01038_0007_ASC2139476-SC3');

Expand All @@ -16,21 +18,56 @@ public function testParse(): void
self::assertSame('ASC2139476-SC3', $folder->flowcellID);
}

public function testParseFromPath(): void
public function testParseMiSeqWithZeroPrefixedFlowcell(): void
{
$folder = IlluminaRunFolder::parse('/data/sequencing/20260205_SH01038_0007_ASC2139476-SC3');
$folder = IlluminaRunFolder::parse('151231_M01261_0163_000000000-AGKG7');

self::assertSame('2015-12-31', $folder->date->format('Y-m-d'));
self::assertSame('M01261', $folder->instrumentID);
self::assertSame(163, $folder->runNumber);
self::assertSame('AGKG7', $folder->flowcellID);
}

public function testParseNextSeq(): void
{
$folder = IlluminaRunFolder::parse('160205_NB501352_0003_AH7LFFAFXX');

self::assertSame('2016-02-05', $folder->date->format('Y-m-d'));
self::assertSame('NB501352', $folder->instrumentID);
self::assertSame(3, $folder->runNumber);
self::assertSame('AH7LFFAFXX', $folder->flowcellID);
}

public function testParseMiSeqNanoFlowcell(): void
{
$folder = IlluminaRunFolder::parse('160315_M01111_0231_000000000-D0WDA');

self::assertSame('D0WDA', $folder->flowcellID);
}

public function testParseFlowcellStartingWithL(): void
{
$folder = IlluminaRunFolder::parse('231013_M02074_0918_000000000-L6G7G');

self::assertSame('L6G7G', $folder->flowcellID);
}

public function testParseFromForwardSlashPath(): void
{
$folder = IlluminaRunFolder::parse('/path/to/20260205_SH01038_0007_ASC2139476-SC3');

self::assertSame('2026-02-05', $folder->date->format('Y-m-d'));
self::assertSame('SH01038', $folder->instrumentID);
self::assertSame(7, $folder->runNumber);
self::assertSame('ASC2139476-SC3', $folder->flowcellID);
}

public function testToString(): void
public function testParseFromBackslashPath(): void
{
$folder = IlluminaRunFolder::parse('20260205_SH01038_0007_ASC2139476-SC3');
$folder = IlluminaRunFolder::parse('foo\bar\260310_M02074_1219_000000000-MB4RJ');

self::assertSame('20260205_SH01038_0007_ASC2139476-SC3', $folder->toString());
self::assertSame('2026-03-10', $folder->date->format('Y-m-d'));
self::assertSame('M02074', $folder->instrumentID);
self::assertSame(1219, $folder->runNumber);
self::assertSame('MB4RJ', $folder->flowcellID);
}

public function testParseRejectsInvalidPartCount(): void
Expand All @@ -43,7 +80,7 @@ public function testParseRejectsInvalidPartCount(): void
public function testParseRejectsInvalidDate(): void
{
self::expectException(\InvalidArgumentException::class);
self::expectExceptionMessage('Expected format: YYYYMMDD.');
self::expectExceptionMessage('Expected 6 or 8 digit date.');
IlluminaRunFolder::parse('not-a-date_SH01038_0007_ASC2139476-SC3');
}

Expand All @@ -62,4 +99,11 @@ public function testParseRejectsInvalidRunNumber(string $value): void
self::expectExceptionMessage('Expected a numeric value.');
IlluminaRunFolder::parse($value);
}

public function testParseRejectsUnparsableFlowcellID(): void
{
self::expectException(\InvalidArgumentException::class);
self::expectExceptionMessage('Cannot extract flowcell ID from: 12345');
IlluminaRunFolder::parse('20260205_SH01038_0007_12345');
}
}