diff --git a/src/Concentration/MolarityConverter.php b/src/Concentration/MolarityConverter.php new file mode 100644 index 0000000..cecb89d --- /dev/null +++ b/src/Concentration/MolarityConverter.php @@ -0,0 +1,39 @@ + */ + public static function parse(string $csvContent): Collection + { + $csvContent = StringUtil::toUTF8($csvContent); + $delimiter = self::detectDelimiter($csvContent); + + $rows = CSVArray::toArray($csvContent, $delimiter); + + return (new Collection($rows)) + ->map(static fn (array $row): CompactRegionTableRecord => self::recordFromRow($row)); + } + + /** @param array $row */ + private static function recordFromRow(array $row): CompactRegionTableRecord + { + self::rejectHighSensitivityAssay($row); + + return new CompactRegionTableRecord( + $row['FileName'] ?? '', + Coordinates::fromString($row['WellId'] ?? '', new CoordinateSystem12x8()), + $row['Sample Description'] ?? '', + self::parseNullableInt($row, 'From [bp]', 'From [nt]'), + SafeCast::toInt($row['To [bp]'] ?? $row['To [nt]'] ?? ''), + SafeCast::toInt($row['Average Size [bp]'] ?? $row['Average Size [nt]'] ?? ''), + self::parseConcentration($row), + SafeCast::toFloat($row[self::MOLARITY_KEY] ?? ''), + SafeCast::toFloat($row['% of Total'] ?? ''), + $row['Region Comment'] ?? '' + ); + } + + /** + * HS assays use pg/µl + pmol/l (1000× smaller than ng/µl + nmol/l). + * Silently parsing those would produce dangerously wrong results. + * + * @param array $row + */ + private static function rejectHighSensitivityAssay(array $row): void + { + if (array_key_exists('Region Molarity [pmol/l]', $row)) { + throw new \RuntimeException('High Sensitivity assay detected (pmol/l). This parser only supports standard assays (nmol/l).'); + } + + foreach (array_keys($row) as $key) { + if (strpos($key, 'Conc. [pg/') === 0) { + throw new \RuntimeException('High Sensitivity assay detected (pg/µl). This parser only supports standard assays (ng/µl).'); + } + } + } + + /** @param array $row */ + private static function parseConcentration(array $row): float + { + foreach ($row as $key => $value) { + if (strpos($key, self::CONCENTRATION_KEY_PREFIX) === 0) { + return SafeCast::toFloat($value); + } + } + + throw new \RuntimeException('Concentration column not found. Expected column starting with "' . self::CONCENTRATION_KEY_PREFIX . '"'); + } + + /** @param array $row */ + private static function parseNullableInt(array $row, string $primaryKey, string $fallbackKey): ?int + { + if (! array_key_exists($primaryKey, $row) && ! array_key_exists($fallbackKey, $row)) { + return null; + } + + $value = $row[$primaryKey] ?? $row[$fallbackKey] ?? ''; + + return $value === '' ? null : SafeCast::toInt($value); + } + + private static function detectDelimiter(string $csvContent): string + { + $firstLine = explode("\n", $csvContent, 2)[0]; + + $semicolonCount = substr_count($firstLine, ';'); + $commaCount = substr_count($firstLine, ','); + + return $semicolonCount > $commaCount ? ';' : ','; + } +} diff --git a/src/TapeStation/CompactRegionTableRecord.php b/src/TapeStation/CompactRegionTableRecord.php new file mode 100644 index 0000000..a6abdb0 --- /dev/null +++ b/src/TapeStation/CompactRegionTableRecord.php @@ -0,0 +1,57 @@ + */ + public Coordinates $coordinates; + + public string $sampleDescription; + + /** Null when the column is absent from the export. */ + public ?int $from; + + public int $to; + + /** Center of mass, not peak maximum. */ + public int $averageSize; + + public float $concentrationNgPerUl; + + public float $regionMolarityNmolPerL; + + public float $percentOfTotal; + + public string $regionComment; + + /** @param Coordinates $coordinates */ + public function __construct( + string $fileName, + Coordinates $coordinates, + string $sampleDescription, + ?int $from, + int $to, + int $averageSize, + float $concentrationNgPerUl, + float $regionMolarityNmolPerL, + float $percentOfTotal, + string $regionComment + ) { + $this->fileName = $fileName; + $this->coordinates = $coordinates; + $this->sampleDescription = $sampleDescription; + $this->from = $from; + $this->to = $to; + $this->averageSize = $averageSize; + $this->concentrationNgPerUl = $concentrationNgPerUl; + $this->regionMolarityNmolPerL = $regionMolarityNmolPerL; + $this->percentOfTotal = $percentOfTotal; + $this->regionComment = $regionComment; + } +} diff --git a/tests/Concentration/MolarityConverterTest.php b/tests/Concentration/MolarityConverterTest.php new file mode 100644 index 0000000..3ee0c6e --- /dev/null +++ b/tests/Concentration/MolarityConverterTest.php @@ -0,0 +1,62 @@ +expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage('Fragment size must be positive'); + + MolarityConverter::massConcentrationToMolarity(10.0, 0, MolarityConverter::DALTONS_PER_BASE_PAIR_DSDNA); + } + + public function testThrowsOnNegativeFragmentSize(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage('Fragment size must be positive'); + + MolarityConverter::molarityToMassConcentration(10.0, -100, MolarityConverter::DALTONS_PER_BASE_PAIR_DSDNA); + } + + public function testThrowsOnZeroDaltonsPerUnit(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage('Daltons per unit must be positive'); + + MolarityConverter::massConcentrationToMolarity(10.0, 400, 0.0); + } + + /** @return iterable */ + public static function conversionPairs(): iterable + { + // Values verified against TapeStation Excel pooling sheet + yield 'FLT3-ITD sample 11.4 ng/µl, 489 bp' => [35.3, 11.4, 489]; + yield 'FLT3-ITD sample 9.35 ng/µl, 491 bp' => [28.9, 9.35, 491]; + yield 'Immunoreceptor TRB 400 bp' => [37.9, 10.0, 400]; + yield 'Immunoreceptor TRG 300 bp' => [50.5, 10.0, 300]; + yield 'Low concentration' => [0.09, 0.03, 488]; + } +} diff --git a/tests/TapeStation/CompactRegionTableParserTest.php b/tests/TapeStation/CompactRegionTableParserTest.php new file mode 100644 index 0000000..093e7f3 --- /dev/null +++ b/tests/TapeStation/CompactRegionTableParserTest.php @@ -0,0 +1,170 @@ +first(); + self::assertInstanceOf(CompactRegionTableRecord::class, $first); + self::assertSame('A1', $first->coordinates->toString()); + self::assertSame('Poko_FLT3-ITD_A1', $first->sampleDescription); + self::assertSame(200, $first->from); + self::assertSame(1000, $first->to); + self::assertSame(505, $first->averageSize); + self::assertSame(15.0, $first->concentrationNgPerUl); + self::assertSame(46.6, $first->regionMolarityNmolPerL); + self::assertEqualsWithDelta(87.94, $first->percentOfTotal, 0.01); + self::assertSame('FLT3-ITD MRD', $first->regionComment); + } + + public function testParseCommaDelimited(): void + { + $csv = <<<'CSV' + FileName,WellId,Sample Description,From [bp],To [bp],Average Size [bp],Conc. [ng/µl],Region Molarity [nmol/l],% of Total,Region Comment + 2026-02-25.D1000,A8,22-000001,200,700,320,7.61,36.1,92.5,IDT + CSV; + + $records = CompactRegionTableParser::parse($csv); + + self::assertCount(1, $records); + + $record = $records->first(); + self::assertInstanceOf(CompactRegionTableRecord::class, $record); + self::assertSame('A8', $record->coordinates->toString()); + self::assertSame('22-000001', $record->sampleDescription); + self::assertSame(320, $record->averageSize); + self::assertSame(7.61, $record->concentrationNgPerUl); + self::assertSame(36.1, $record->regionMolarityNmolPerL); + } + + public function testParseWithNtUnits(): void + { + $csv = <<<'CSV' + FileName,WellId,Sample Description,From [nt],To [nt],Average Size [nt],Conc. [ng/µl],Region Molarity [nmol/l],% of Total,Region Comment + export.D1000,B2,RNA_179_23-025829_F2,200,4000,1800,34.7,29.5,85.0,WTS + CSV; + + $records = CompactRegionTableParser::parse($csv); + + self::assertCount(1, $records); + + $record = $records->first(); + self::assertInstanceOf(CompactRegionTableRecord::class, $record); + self::assertSame(1800, $record->averageSize); + self::assertSame(34.7, $record->concentrationNgPerUl); + } + + public function testParseWithMuAsLatin1Byte(): void + { + // Latin-1 µ (0xB5) without UTF-8 prefix — occurs when files are saved as ISO-8859-1 + $latin1Header = "FileName;WellId;Sample Description;From [bp];To [bp];Average Size [bp];Conc. [ng/\xB5l];Region Molarity [nmol/l];% of Total;Region Comment"; + $csv = $latin1Header . "\n" . '2026-02-25.D1000;A1;Sample1;200;1000;500;12.5;38.0;90.0;MRD'; + + $records = CompactRegionTableParser::parse($csv); + + self::assertCount(1, $records); + $record = $records->first(); + self::assertInstanceOf(CompactRegionTableRecord::class, $record); + self::assertSame(12.5, $record->concentrationNgPerUl); + } + + public function testSkipsEmptyLines(): void + { + $csv = <<<'CSV' + FileName;WellId;Sample Description;From [bp];To [bp];Average Size [bp];Conc. [ng/µl];Region Molarity [nmol/l];% of Total;Region Comment + 2026-02-25.D1000;A1;Sample1;200;1000;500;12.5;38.0;90.0;MRD + + 2026-02-25.D1000;B1;Sample2;200;1000;490;8.3;25.1;85.0;MRD + + CSV; + + $records = CompactRegionTableParser::parse($csv); + + self::assertCount(2, $records); + } + + public function testHeadersOnlyReturnsEmptyCollection(): void + { + $csv = <<<'CSV' + FileName;WellId;Sample Description;From [bp];To [bp];Average Size [bp];Conc. [ng/µl];Region Molarity [nmol/l];% of Total;Region Comment + CSV; + + $records = CompactRegionTableParser::parse($csv); + + self::assertCount(0, $records); + } + + public function testParseWithMissingFromColumn(): void + { + $csv = <<<'CSV' + FileName,WellId,Sample Description,To [bp],Average Size [bp],Conc. [ng/µl],Region Molarity [nmol/l],% of Total,Region Comment + 2026-02-25.D1000,A8,22-000001,550,336,7.61,36.1,80.91,IDT + CSV; + + $records = CompactRegionTableParser::parse($csv); + + self::assertCount(1, $records); + + $record = $records->first(); + self::assertInstanceOf(CompactRegionTableRecord::class, $record); + self::assertNull($record->from); + self::assertSame(550, $record->to); + self::assertSame(336, $record->averageSize); + self::assertSame(7.61, $record->concentrationNgPerUl); + } + + public function testThrowsOnMissingConcentrationColumn(): void + { + $csv = <<<'CSV' + FileName;WellId;Sample Description;From [bp];To [bp];Average Size [bp];Region Molarity [nmol/l];% of Total;Region Comment + 2026-02-25.D1000;A1;Sample1;200;1000;500;38.0;90.0;MRD + CSV; + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Concentration column not found'); + + CompactRegionTableParser::parse($csv); + } + + public function testThrowsOnHighSensitivityConcentration(): void + { + $csv = <<<'CSV' + FileName,WellId,Sample Description,From [bp],To [bp],Average Size [bp],Conc. [pg/µl],Region Molarity [nmol/l],% of Total,Region Comment + 2026-02-25.HSD1000,A1,Sample1,200,1000,500,125.0,38.0,90.0,MRD + CSV; + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('High Sensitivity assay detected (pg/µl)'); + + CompactRegionTableParser::parse($csv); + } + + public function testThrowsOnHighSensitivityMolarity(): void + { + $csv = <<<'CSV' + FileName,WellId,Sample Description,From [bp],To [bp],Average Size [bp],Conc. [ng/µl],Region Molarity [pmol/l],% of Total,Region Comment + 2026-02-25.HSD1000,A1,Sample1,200,1000,500,12.5,38000.0,90.0,MRD + CSV; + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('High Sensitivity assay detected (pmol/l)'); + + CompactRegionTableParser::parse($csv); + } +}