From 56cce4ac5bd3eb2def7e3350d78ab09a9c45a70f Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Fri, 22 May 2026 10:51:53 -0400 Subject: [PATCH 1/7] Add SpliceOutcomeSet.if_splicing_unchanged accessor (#391) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of the splice unification: the canonical "alternative outcome" accessor. Surfaces the coding consequence that applies if splicing proceeds normally — the NormalSplicing candidate's coding_effect — and unlike the old ExonicSpliceSite.alternate_effect works for intronic disruptions too (returns None when there's no coding consequence). Together with the existing most_likely_effect (top predicted mechanism) and candidates (full possibility set), this gives the three-accessor surface the unified design calls for. alternate_effect is kept as a deprecated alias for now; it's removed when the flag and the lightweight ExonicSpliceSite.alternate_effect form go away. Additive only — no behavior change yet. --- tests/test_splice_outcomes.py | 30 +++++++++++++++++++++++++++++ varcode/splice_outcomes.py | 36 +++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/tests/test_splice_outcomes.py b/tests/test_splice_outcomes.py index 7dc2d55..1d5f3d5 100644 --- a/tests/test_splice_outcomes.py +++ b/tests/test_splice_outcomes.py @@ -248,6 +248,36 @@ def test_normal_splicing_for_intronic_disruption_has_no_coding_effect(): assert normal.effect.mutant_protein_sequence is None +def test_if_splicing_unchanged_returns_normal_splicing_coding_effect(): + """SpliceOutcomeSet.if_splicing_unchanged surfaces the coding + consequence that applies when splicing proceeds — the + NormalSplicing candidate's coding_effect — and the deprecated + alternate_effect alias agrees.""" + variant = Variant("7", 117531114, "G", "T", ensembl_grch38) + transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) + splice_set = next( + e for e in variant.effects(splice_outcomes=True) + if e.transcript is transcript) + normal = _candidate_of_type(splice_set, NormalSplicing) + assert splice_set.if_splicing_unchanged is normal.effect.coding_effect + assert splice_set.if_splicing_unchanged is not None + # Deprecated alias delegates to the canonical accessor. + assert splice_set.alternate_effect is splice_set.if_splicing_unchanged + + +def test_if_splicing_unchanged_is_none_for_intronic_disruption(): + """Unlike the old ExonicSpliceSite.alternate_effect, the accessor + works for intronic disruptions — returning None when there's no + coding consequence if splicing proceeds.""" + variant = Variant("7", 117531115, "G", "A", ensembl_grch38) + transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) + splice_set = next( + e for e in variant.effects(splice_outcomes=True) + if e.transcript is transcript) + assert splice_set.if_splicing_unchanged is None + assert splice_set.alternate_effect is None + + def test_in_frame_exon_skipping_carries_aa_ref_and_protein_sequence(): """A resolved in-frame ExonSkipping has its own ``aa_ref`` / ``mutant_protein_sequence`` populated — no wrapper indirection.""" diff --git a/varcode/splice_outcomes.py b/varcode/splice_outcomes.py index a3f4c43..6ee609e 100644 --- a/varcode/splice_outcomes.py +++ b/varcode/splice_outcomes.py @@ -315,22 +315,38 @@ def priority_class(self): return self.disrupted_signal_class @property - def alternate_effect(self): - """Back-compat shim matching :attr:`ExonicSpliceSite.alternate_effect`. - - Resolves to the underlying coding change carried by the - :class:`NormalSplicing` candidate (its - :attr:`NormalSplicing.coding_effect`) — i.e. "the coding - change that applies when splicing proceeds normally." - Returns ``None`` when there is no :class:`NormalSplicing` - candidate or when it carries ``coding_effect=None`` (e.g. - the variant is purely intronic). + def if_splicing_unchanged(self): + """The coding effect that applies *if the spliceosome still + splices normally* despite the disrupted signal — i.e. the + protein consequence of the underlying nucleotide change on its + own. + + Resolves to the :class:`NormalSplicing` candidate's + :attr:`NormalSplicing.coding_effect`. Returns ``None`` when + there is no :class:`NormalSplicing` candidate or it carries no + coding consequence (e.g. a purely intronic disruption, where a + nucleotide change outside the CDS leaves the protein + untouched if splicing proceeds). + + This is the canonical "alternative outcome" accessor, alongside + :attr:`most_likely_effect` (the top predicted splicing + mechanism) and :attr:`candidates` (every plausible mechanism). + Unlike the old ``ExonicSpliceSite.alternate_effect`` it works + for intronic disruptions too. """ for candidate in self._candidates: if isinstance(candidate.effect, NormalSplicing): return candidate.effect.coding_effect return None + @property + def alternate_effect(self): + """Deprecated alias for :attr:`if_splicing_unchanged`, kept for + parity with the (also deprecated) + ``ExonicSpliceSite.alternate_effect``. Prefer + :attr:`if_splicing_unchanged`.""" + return self.if_splicing_unchanged + @property def candidate_proteins(self): """Mapping from each mechanism class to its computed mutant From bc844d3a0fffec1e9ce3376428cebe73f6af11ba Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Sun, 24 May 2026 18:32:19 -0400 Subject: [PATCH 2/7] Finalize effect_if_splicing_unchanged + expand splice docs (#391) Follow-up to 56cce4a. Two changes on top of the first-pass landing: - Rename `if_splicing_unchanged` -> `effect_if_splicing_unchanged` and delete the deprecated `alternate_effect` alias outright (no shim, no warning). The previous commit kept the alias for one release; since the PR has not shipped, the alias is dropped in the same PR rather than carried as a deprecation tail. - Read the underlying `NormalSplicing` candidate from the DNA-only `_candidates` set, not the public `candidates` (which also picks up RNA-attached outcomes). "If splicing proceeds normally" is a DNA property. Docs (docs/effect_annotation.md): - New "When splice disruption is in play" section enumerates the canonical window and the four splice signal classes (ExonicSpliceSite / SpliceDonor / SpliceAcceptor / IntronicSpliceSite), with a table of which class fires where. - New "Splice and coding effects can co-occur" section explains the ExonicSpliceSite duality (last/first 3 exonic bases hit both signal and codon), the intronic-only case (no coding effect), and the outside-the-window case (plain coding effect). - New "Common questions" cheat sheet maps each simple use case (is variant splice-disrupting, coding consequence if splicing proceeds, most likely mechanism, all candidates, most disruptive, candidate proteins, splice signal location) to the right accessor on both shapes. Tests: - Parametrize the "intronic classes lack alternate_effect" check over SpliceDonor / SpliceAcceptor / IntronicSpliceSite so the docstring claim is verified across all three. - Add `test_highest_priority_effect_returns_inner_effect` to cover the `_effect` accessor (`_candidate` was already covered). - Add `test_effect_if_splicing_unchanged_returns_coding_effect` and `test_effect_if_splicing_unchanged_none_for_intronic` on the renamed accessor. - Drop the two old `alternate_effect` alias tests (the alias is gone). - Shorten the four long test names flagged in review. 1230 tests pass. --- CHANGELOG.md | 9 + docs/effect_annotation.md | 162 +++++++++++++++++- .../test_exonic_splice_site_multi_outcome.py | 56 +++--- tests/test_splice_outcomes.py | 86 ++++++---- varcode/splice_outcomes.py | 22 +-- 5 files changed, 253 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2869cc8..dbcf364 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,15 @@ through `MolecularPhaseResolver(source)` and lives behind the optional `varcode[rna]` / `pysam` dependency. `ReadPhaseResolver` remains as the varcode 5.0 compatibility name. +- `SpliceOutcomeSet.effect_if_splicing_unchanged` — the canonical + "alternative outcome" accessor: the coding consequence that applies + if splicing proceeds normally (the `NormalSplicing` candidate's + `coding_effect`). Unlike the legacy `ExonicSpliceSite.alternate_effect`, + it works for intronic splice disruptions — returning `None` when + the nucleotide change leaves the protein untouched (i.e. there is + no coding consequence to attach). Sits alongside `most_likely_effect` + and `candidates` as the three-accessor surface on `SpliceOutcomeSet` + ([#391](https://github.com/openvax/varcode/issues/391)). **Breaking** - Unified the multi-outcome machinery: `SpliceCandidate` deleted; diff --git a/docs/effect_annotation.md b/docs/effect_annotation.md index 5fa94db..aaff1c2 100644 --- a/docs/effect_annotation.md +++ b/docs/effect_annotation.md @@ -47,12 +47,76 @@ an `EffectCollection`. Each element is a `MutationEffect` subclass — `Substitution`, `Silent`, `PrematureStop`, and so on. -## Splice-disrupting variants: two representations +## Splice-disrupting variants -When a variant sits in the canonical splice window (last 3 -exonic bases, first 3–6 intronic, canonical donor/acceptor), -varcode recognizes it as splice-disrupting. Two ways the -effect is expressed, at different richness levels. +A single nucleotide change near an exon-intron boundary can hit +the splice signal *and* the coding sequence at the same time. +The splice surface captures both possibilities, gives every +splice-disrupting variant a uniform candidate-set shape, and +exposes accessors for the "what if splicing still proceeds?" +question. + +### When splice disruption is in play + +The classifier is **position-based**: it fires when a variant +lands in the canonical splice window around an exon-intron +boundary. The window covers: + +- the last 3 exonic bases of an exon (donor side) or the first + 3 exonic bases of the next exon (acceptor side) — these + straddle the boundary and sit on top of the canonical signal +- the intronic +1..+6 bases on the donor side and -1..-3 bases + on the acceptor side — the intronic bases the spliceosome + reads directly, including the canonical `GT` (+1/+2) and `AG` + (-2/-1) dinucleotides + +Four classes record *where* in this window the variant landed: + +| Class | Position | +|---|---| +| `ExonicSpliceSite` | Exonic side of either boundary (last 3 / first 3 of an exon) | +| `SpliceDonor` | Canonical `GT` at intronic +1 / +2 | +| `SpliceAcceptor` | Canonical `AG` at intronic -2 / -1 | +| `IntronicSpliceSite` | Other intronic positions in the window | + +Variants outside this window are **not** flagged as +splice-disrupting, even when they may affect splicing +biologically — ESE/ESS motifs mid-exon, branch points ~20–50 bp +upstream of the acceptor, deep intronic cryptic activation. +Detecting those requires ML predictors or direct RNA evidence; +see [Limitations](#limitations). + +### Splice and coding effects can co-occur + +A variant in an exon sits on a coding base by definition — it +rewrites a codon. If that same exonic base is **also** in the +splice window (i.e. within the last 3 / first 3 of the exon), +the same nucleotide change disrupts the splice signal *and* +changes the protein. varcode represents this duality as +**`ExonicSpliceSite`**: + +- on the default 2-outcome shape, splice disruption is the + primary effect; the coding consequence (a `Substitution`, + `Silent`, etc.) hangs off `.alternate_effect` +- on the opt-in `SpliceOutcomeSet` shape, the same coding + consequence is the `coding_effect` of the `NormalSplicing` + candidate, reachable through + `splice_set.effect_if_splicing_unchanged` + +For purely **intronic** disruptions (`SpliceDonor`, +`SpliceAcceptor`, `IntronicSpliceSite`), there is no codon to +rewrite — the variant doesn't change a coding base. The default +shape doesn't expose `alternate_effect` on these classes; the +opt-in shape's `effect_if_splicing_unchanged` returns `None`. + +For coding variants **outside** the splice window, varcode emits +a plain coding effect (`Substitution`, `Silent`, `FrameShift`, +…) with no splice annotation attached. The variant may still +disrupt splicing through a non-canonical mechanism, but varcode +won't flag it — see Limitations. + +Two splice-effect richness levels are available; the rest of +this section walks through both. ### Default: lightweight 2-outcome form @@ -109,6 +173,14 @@ for c in splice_set.candidates: print(c.effect.side, c.effect.retained_intron_start) ``` +For the common "what if splicing still proceeds?" question, use +`splice_set.effect_if_splicing_unchanged` — the `NormalSplicing` +candidate's `coding_effect` (a `Substitution`, `Silent`, etc., or +`None` for purely intronic disruptions). This is the +`SpliceOutcomeSet` analogue of the legacy +`ExonicSpliceSite.alternate_effect` and works for intronic +disruptions too. + When you opt in, `SpliceDonor` / `SpliceAcceptor` / `IntronicSpliceSite` also get wrapped, so every splice- disrupting variant produces a `SpliceOutcomeSet`. @@ -171,6 +243,86 @@ agree, which is common — but for clinical / functional filtering `highest_priority_*`: a disruptive candidate behind a less-disruptive primary candidate should still light up. +### Common questions + +A cheat sheet for the simple splice use cases. Each example +assumes `effect` is a single splice-disrupting effect, or +`splice_set` is a `SpliceOutcomeSet` (opt-in form). + +**Is this variant splice-disrupting?** + +```python +from varcode import MultiOutcomeEffect, SpliceOutcomeSet +from varcode.effects import ( + ExonicSpliceSite, SpliceDonor, SpliceAcceptor, IntronicSpliceSite, +) + +# Splice-specific (covers both shapes): +isinstance(effect, (SpliceOutcomeSet, ExonicSpliceSite, + SpliceDonor, SpliceAcceptor, IntronicSpliceSite)) + +# Broader: any multi-outcome effect, including SV outcomes +# (LargeDeletion, GeneFusion, ...) — use when you want one +# uniform handler for splice + SV ambiguity. +isinstance(effect, MultiOutcomeEffect) +``` + +**What coding consequence applies if splicing still proceeds?** + +```python +# Default (2-outcome) shape — guard, since intronic classes don't expose it: +if isinstance(effect, ExonicSpliceSite): + coding = effect.alternate_effect + +# Opt-in (SpliceOutcomeSet) shape — uniform for any disrupted signal: +coding = splice_set.effect_if_splicing_unchanged +``` + +**What's the most likely splice mechanism?** + +```python +splice_set.most_likely_effect # SpliceMechanismEffect +splice_set.most_likely_candidate # EffectCandidate (.effect + .source/.evidence) +``` + +**What are all candidate outcomes?** + +```python +for candidate in splice_set.candidates: + candidate.effect # SpliceMechanismEffect (ExonSkipping, IntronRetention, ...) + candidate.source # producer name + candidate.evidence # opaque dict of provenance fields +``` + +**Which outcome is the most disruptive?** + +```python +splice_set.highest_priority_effect # most protein-disruptive +splice_set.highest_priority_candidate +``` + +Use this for clinical / functional filtering ("flag if any +candidate is at least a frameshift") — a disruptive candidate +ranked below a less-disruptive primary should still light up. + +**What protein sequences could result?** + +```python +splice_set.candidate_proteins # {ExonSkipping: "MA...", IntronRetention: "", ...} +splice_set.mutant_protein_sequences # set[str] of distinct non-empty sequences +``` + +Empty string means the mechanism's protein math couldn't resolve +(typically: no `genomic_sequence` provider, so `IntronRetention` +and `CrypticDonor` stay predicted-only). + +**Where on the transcript is the splice signal?** + +```python +for candidate in splice_set.candidates: + candidate.effect.splice_signal # SpliceDonor / SpliceAcceptor / IntronicSpliceSite / ExonicSpliceSite +``` + ### Limitations The splice classifier is **position-based** — it fires on the diff --git a/tests/test_exonic_splice_site_multi_outcome.py b/tests/test_exonic_splice_site_multi_outcome.py index 545d7f9..9f1ac17 100644 --- a/tests/test_exonic_splice_site_multi_outcome.py +++ b/tests/test_exonic_splice_site_multi_outcome.py @@ -10,20 +10,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for the ExonicSpliceSite → MultiOutcomeEffect retrofit -(openvax/varcode#299 Part 1). - -The retrofit is a class-hierarchy change only: - -* ExonicSpliceSite inherits from MultiOutcomeEffect and exposes - the ``candidates`` / ``most_likely`` / ``priority_class`` - protocol. -* SpliceOutcomeSet gains a back-compat ``alternate_effect`` - property that resolves to the NormalSplicing candidate's - coding_effect. - -No behaviour change — the existing test suite (tests/test_splice_*) -continues to lock in byte-for-byte output. +"""Tests for the ExonicSpliceSite → MultiOutcomeEffect retrofit and +the SpliceOutcomeSet accessor surface +(openvax/varcode#299, #391, #392). + +The retrofit makes ExonicSpliceSite a MultiOutcomeEffect — it +exposes the ``candidates`` / ``most_likely`` / ``priority_class`` +protocol uniformly with SpliceOutcomeSet. The SpliceOutcomeSet +side exposes ``effect_if_splicing_unchanged``, which resolves to +the NormalSplicing candidate's coding_effect (the opt-in shape's +analogue of the legacy ``ExonicSpliceSite.alternate_effect``). """ from pyensembl import cached_release @@ -97,14 +93,15 @@ def test_exonic_splice_site_alternate_effect_still_first_class(): # ==================================================================== -# SpliceOutcomeSet gets alternate_effect back-compat property +# SpliceOutcomeSet exposes effect_if_splicing_unchanged # ==================================================================== -def test_splice_outcome_set_alternate_effect_resolves_to_normal_splicing(): +def test_effect_if_splicing_unchanged_matches_alternate_effect(): # ExonicSpliceSite wrapped under splice_outcomes=True becomes a - # SpliceOutcomeSet. Its .alternate_effect should return the same - # coding_effect that ExonicSpliceSite.alternate_effect would. + # SpliceOutcomeSet. Its .effect_if_splicing_unchanged should + # return the same coding_effect that ExonicSpliceSite.alternate_effect + # would. variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) @@ -116,14 +113,16 @@ def test_splice_outcome_set_alternate_effect_resolves_to_normal_splicing(): wrapped = next(e for e in wrapped_effects if e.transcript is transcript) # Same underlying coding-effect class; don't require object # identity since the wrapped path may rebuild it. - assert type(wrapped.alternate_effect) is type(bare_alt) - assert wrapped.alternate_effect.short_description == bare_alt.short_description + assert type(wrapped.effect_if_splicing_unchanged) is type(bare_alt) + assert ( + wrapped.effect_if_splicing_unchanged.short_description + == bare_alt.short_description) -def test_splice_outcome_set_alternate_effect_none_when_no_normal_splicing(): +def test_effect_if_splicing_unchanged_none_for_intronic_donor(): # SpliceDonor-backed SpliceOutcomeSet: the NormalSplicing candidate # exists but its coding_effect is None (intronic variant, no - # underlying coding change). alternate_effect should be None. + # underlying coding change). effect_if_splicing_unchanged should be None. variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) wrapped_effects = variant.effects(splice_outcomes=True) @@ -132,7 +131,7 @@ def test_splice_outcome_set_alternate_effect_none_when_no_normal_splicing(): c for c in wrapped.candidates if isinstance(c.effect, NormalSplicing)) assert normal.effect.coding_effect is None - assert wrapped.alternate_effect is None + assert wrapped.effect_if_splicing_unchanged is None # ==================================================================== @@ -154,7 +153,12 @@ def test_multi_outcome_effect_check_works_on_both_shapes(): wrapped = next(e for e in wrapped_effects if e.transcript is transcript) assert isinstance(wrapped, MultiOutcomeEffect) - # alternate_effect is callable on both shapes post-retrofit. + # The "if splicing unchanged" coding consequence is reachable on + # both shapes (under their respective names — ExonicSpliceSite + # still carries the legacy ``alternate_effect``; SpliceOutcomeSet + # uses ``effect_if_splicing_unchanged``). assert bare.alternate_effect is not None - assert wrapped.alternate_effect is not None - assert type(bare.alternate_effect) is type(wrapped.alternate_effect) + assert wrapped.effect_if_splicing_unchanged is not None + assert ( + type(bare.alternate_effect) + is type(wrapped.effect_if_splicing_unchanged)) diff --git a/tests/test_splice_outcomes.py b/tests/test_splice_outcomes.py index 1d5f3d5..d72d59e 100644 --- a/tests/test_splice_outcomes.py +++ b/tests/test_splice_outcomes.py @@ -82,6 +82,29 @@ def test_default_for_collection_unchanged(): assert SpliceOutcomeSet not in classes +@pytest.mark.parametrize( + "position,ref,alt,expected_class", + [ + (117531115, "G", "A", SpliceDonor), + (117530898, "G", "A", SpliceAcceptor), + (117531115, "A", "G", IntronicSpliceSite), + ], +) +def test_intronic_splice_classes_lack_alternate_effect( + position, ref, alt, expected_class): + """The default-shape intronic splice classes (SpliceDonor, + SpliceAcceptor, IntronicSpliceSite) don't expose alternate_effect + — the variant is intronic, so there's no coding consequence to + attach. Only ExonicSpliceSite carries alternate_effect on the + default shape.""" + variant = Variant("7", position, ref, alt, ensembl_grch38) + transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) + effect = variant.effect_on_transcript(transcript) + assert isinstance(effect, expected_class) + with pytest.raises(AttributeError): + effect.alternate_effect + + # -------------------------------------------------------------------- # Opt-in wraps splice effects # -------------------------------------------------------------------- @@ -248,24 +271,23 @@ def test_normal_splicing_for_intronic_disruption_has_no_coding_effect(): assert normal.effect.mutant_protein_sequence is None -def test_if_splicing_unchanged_returns_normal_splicing_coding_effect(): - """SpliceOutcomeSet.if_splicing_unchanged surfaces the coding - consequence that applies when splicing proceeds — the - NormalSplicing candidate's coding_effect — and the deprecated - alternate_effect alias agrees.""" +def test_effect_if_splicing_unchanged_returns_coding_effect(): + """SpliceOutcomeSet.effect_if_splicing_unchanged surfaces the + coding consequence that applies when splicing proceeds — the + NormalSplicing candidate's coding_effect.""" variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( e for e in variant.effects(splice_outcomes=True) if e.transcript is transcript) normal = _candidate_of_type(splice_set, NormalSplicing) - assert splice_set.if_splicing_unchanged is normal.effect.coding_effect - assert splice_set.if_splicing_unchanged is not None - # Deprecated alias delegates to the canonical accessor. - assert splice_set.alternate_effect is splice_set.if_splicing_unchanged + assert ( + splice_set.effect_if_splicing_unchanged + is normal.effect.coding_effect) + assert splice_set.effect_if_splicing_unchanged is not None -def test_if_splicing_unchanged_is_none_for_intronic_disruption(): +def test_effect_if_splicing_unchanged_none_for_intronic(): """Unlike the old ExonicSpliceSite.alternate_effect, the accessor works for intronic disruptions — returning None when there's no coding consequence if splicing proceeds.""" @@ -274,8 +296,7 @@ def test_if_splicing_unchanged_is_none_for_intronic_disruption(): splice_set = next( e for e in variant.effects(splice_outcomes=True) if e.transcript is transcript) - assert splice_set.if_splicing_unchanged is None - assert splice_set.alternate_effect is None + assert splice_set.effect_if_splicing_unchanged is None def test_in_frame_exon_skipping_carries_aa_ref_and_protein_sequence(): @@ -701,6 +722,21 @@ def test_highest_priority_candidate_returns_most_disruptive(): assert effect_priority(top.effect) == expected +def test_highest_priority_effect_returns_inner_effect(): + """highest_priority_effect peels the EffectCandidate wrapper off + highest_priority_candidate.""" + variant = Variant("7", 117531115, "G", "A", ensembl_grch38) + transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) + splice_set = next( + e for e in variant.effects(splice_outcomes=True) + if e.transcript is transcript) + assert ( + splice_set.highest_priority_effect + is splice_set.highest_priority_candidate.effect) + assert isinstance( + splice_set.highest_priority_effect, SpliceMechanismEffect) + + def test_resolved_splice_mechanism_delegates_priority_to_protein_effect(): from varcode.effects import Deletion, effect_priority variant = Variant("7", 117531115, "G", "A", ensembl_grch38) @@ -856,32 +892,6 @@ def test_rna_evidence_support_is_tracked_per_splice_candidate(): assert donor_1_junction not in reconciled.rna_evidence_for(donor_2_junction) -# -------------------------------------------------------------------- -# alternate_effect back-compat shim -# -------------------------------------------------------------------- - - -def test_alternate_effect_for_exonic_splice_site_resolves_to_coding_effect(): - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - bare = variant.effect_on_transcript(transcript) - assert isinstance(bare, ExonicSpliceSite) - bare_alt = bare.alternate_effect - splice_set = next( - e for e in variant.effects(splice_outcomes=True) - if e.transcript is transcript) - assert type(splice_set.alternate_effect) is type(bare_alt) - - -def test_alternate_effect_none_when_no_underlying_coding_change(): - variant = Variant("7", 117531115, "G", "A", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - splice_set = next( - e for e in variant.effects(splice_outcomes=True) - if e.transcript is transcript) - assert splice_set.alternate_effect is None - - # -------------------------------------------------------------------- # Protein sequence prediction — biological validation. # diff --git a/varcode/splice_outcomes.py b/varcode/splice_outcomes.py index 6ee609e..a30e2bd 100644 --- a/varcode/splice_outcomes.py +++ b/varcode/splice_outcomes.py @@ -315,11 +315,12 @@ def priority_class(self): return self.disrupted_signal_class @property - def if_splicing_unchanged(self): - """The coding effect that applies *if the spliceosome still - splices normally* despite the disrupted signal — i.e. the - protein consequence of the underlying nucleotide change on its - own. + def effect_if_splicing_unchanged(self): + """The coding consequence that applies *if the spliceosome + still splices normally* despite the disrupted signal — i.e. + the protein consequence of the underlying nucleotide change + on its own (a :class:`MutationEffect` such as + :class:`Substitution`, not a :class:`SpliceMechanismEffect`). Resolves to the :class:`NormalSplicing` candidate's :attr:`NormalSplicing.coding_effect`. Returns ``None`` when @@ -334,19 +335,14 @@ def if_splicing_unchanged(self): Unlike the old ``ExonicSpliceSite.alternate_effect`` it works for intronic disruptions too. """ + # Iterate DNA-derived candidates only; ``candidates`` also + # includes RNA-attached outcomes, but "if splicing proceeds" + # is a DNA property. for candidate in self._candidates: if isinstance(candidate.effect, NormalSplicing): return candidate.effect.coding_effect return None - @property - def alternate_effect(self): - """Deprecated alias for :attr:`if_splicing_unchanged`, kept for - parity with the (also deprecated) - ``ExonicSpliceSite.alternate_effect``. Prefer - :attr:`if_splicing_unchanged`.""" - return self.if_splicing_unchanged - @property def candidate_proteins(self): """Mapping from each mechanism class to its computed mutant From 03c9596deb94d58be6ab8eb8b1d2099ca5a7ca69 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Sun, 24 May 2026 21:10:06 -0400 Subject: [PATCH 3/7] Correct splice-window docs + clarify test fixtures (#391) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review found that the new splice-window docs over-claimed the acceptor-side ExonicSpliceSite range. Verified against the classifier at varcode/effects/effect_helpers.py:119 and varcode/effects/effect_prediction.py:398-430: - Donor-side ExonicSpliceSite fires on the last 3 exonic bases of an exon (interval [exon_end - 2, exon_end]). ✓ docs match. - Acceptor-side ExonicSpliceSite fires on only the first exonic base (transcript_offset == exon_start_offset). Docs said "first 3 exonic bases" — corrected to "first base". - The asymmetry follows the consensus motifs (MAG|GURAGU on donor, YAG|R on acceptor). - IntronicSpliceSite also fires at +1/+2 or -1/-2 when the reference base isn't the canonical GT/AG (non-canonical signal downgrade in effect_prediction.py:403-409). Added to the table for completeness. Other review fixes in the same commit: - Add a comment on the parametrized intronic-classes test explaining the synthetic ref/alt at position 117531115 for the IntronicSpliceSite case (it exercises the non-canonical downgrade path; varcode classifies on supplied ref/alt + position, not the actual GRCh38 reference base). - Refine the test's docstring gloss: "the variant doesn't change a coding base" instead of "the variant is intronic" (more precise — IntronicSpliceSite is itself intronic, but the load-bearing property is the lack of coding consequence). - TODO marker on test_effect_if_splicing_unchanged_matches_alternate_effect so the breaking-6.0 work knows to revisit it when ExonicSpliceSite.alternate_effect goes away. - Drop a duplicate transition sentence ("Two splice-effect richness levels are available; the rest of this section walks through both.") — the next H3 already signals that. 1230 tests pass. --- docs/effect_annotation.md | 31 +++++++++---------- .../test_exonic_splice_site_multi_outcome.py | 6 ++++ tests/test_splice_outcomes.py | 11 +++++-- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/docs/effect_annotation.md b/docs/effect_annotation.md index aaff1c2..56bddac 100644 --- a/docs/effect_annotation.md +++ b/docs/effect_annotation.md @@ -60,24 +60,24 @@ question. The classifier is **position-based**: it fires when a variant lands in the canonical splice window around an exon-intron -boundary. The window covers: +boundary. The window is asymmetric — the donor consensus +(`MAG|GURAGU`) is wider on both sides than the acceptor +consensus (`YAG|R`): -- the last 3 exonic bases of an exon (donor side) or the first - 3 exonic bases of the next exon (acceptor side) — these - straddle the boundary and sit on top of the canonical signal -- the intronic +1..+6 bases on the donor side and -1..-3 bases - on the acceptor side — the intronic bases the spliceosome - reads directly, including the canonical `GT` (+1/+2) and `AG` - (-2/-1) dinucleotides +- **exonic side**: the last 3 bases of an exon (donor side) or + the first base of the next exon (acceptor side) +- **intronic side**: positions +1..+6 of the intron (donor side) + and positions -3..-1 (acceptor side), including the canonical + `GT` at +1/+2 and `AG` at -2/-1 Four classes record *where* in this window the variant landed: | Class | Position | |---|---| -| `ExonicSpliceSite` | Exonic side of either boundary (last 3 / first 3 of an exon) | +| `ExonicSpliceSite` | Last 3 bases of an exon (donor side) or the first base of the next exon (acceptor side) | | `SpliceDonor` | Canonical `GT` at intronic +1 / +2 | | `SpliceAcceptor` | Canonical `AG` at intronic -2 / -1 | -| `IntronicSpliceSite` | Other intronic positions in the window | +| `IntronicSpliceSite` | Intronic +3..+6 (donor side) or -3 (acceptor side); also `+1/+2` or `-1/-2` when the reference base isn't the canonical `GT` / `AG` | Variants outside this window are **not** flagged as splice-disrupting, even when they may affect splicing @@ -90,10 +90,10 @@ see [Limitations](#limitations). A variant in an exon sits on a coding base by definition — it rewrites a codon. If that same exonic base is **also** in the -splice window (i.e. within the last 3 / first 3 of the exon), -the same nucleotide change disrupts the splice signal *and* -changes the protein. varcode represents this duality as -**`ExonicSpliceSite`**: +splice window — the last 3 bases of an exon (donor side) or +the first base (acceptor side) — the same nucleotide change +disrupts the splice signal *and* changes the protein. varcode +represents this duality as **`ExonicSpliceSite`**: - on the default 2-outcome shape, splice disruption is the primary effect; the coding consequence (a `Substitution`, @@ -115,9 +115,6 @@ a plain coding effect (`Substitution`, `Silent`, `FrameShift`, disrupt splicing through a non-canonical mechanism, but varcode won't flag it — see Limitations. -Two splice-effect richness levels are available; the rest of -this section walks through both. - ### Default: lightweight 2-outcome form ```python diff --git a/tests/test_exonic_splice_site_multi_outcome.py b/tests/test_exonic_splice_site_multi_outcome.py index 9f1ac17..edff536 100644 --- a/tests/test_exonic_splice_site_multi_outcome.py +++ b/tests/test_exonic_splice_site_multi_outcome.py @@ -98,6 +98,12 @@ def test_exonic_splice_site_alternate_effect_still_first_class(): def test_effect_if_splicing_unchanged_matches_alternate_effect(): + # TODO: revisit when ExonicSpliceSite.alternate_effect is removed + # in the breaking 6.0 work (#391) — this test asserts equivalence + # between the legacy default-shape attribute and the new opt-in + # accessor, so it will need to retire or pivot to a snapshot + # comparison once the legacy attribute goes away. + # # ExonicSpliceSite wrapped under splice_outcomes=True becomes a # SpliceOutcomeSet. Its .effect_if_splicing_unchanged should # return the same coding_effect that ExonicSpliceSite.alternate_effect diff --git a/tests/test_splice_outcomes.py b/tests/test_splice_outcomes.py index d72d59e..b0157df 100644 --- a/tests/test_splice_outcomes.py +++ b/tests/test_splice_outcomes.py @@ -87,6 +87,11 @@ def test_default_for_collection_unchanged(): [ (117531115, "G", "A", SpliceDonor), (117530898, "G", "A", SpliceAcceptor), + # IntronicSpliceSite via the "non-canonical signal" downgrade + # path: at intronic +1 of CFTR exon 3, supplying A as ref + # means the reference signal isn't the canonical GT. varcode + # classifies on supplied ref/alt + position, not the actual + # GRCh38 reference base. (117531115, "A", "G", IntronicSpliceSite), ], ) @@ -94,9 +99,9 @@ def test_intronic_splice_classes_lack_alternate_effect( position, ref, alt, expected_class): """The default-shape intronic splice classes (SpliceDonor, SpliceAcceptor, IntronicSpliceSite) don't expose alternate_effect - — the variant is intronic, so there's no coding consequence to - attach. Only ExonicSpliceSite carries alternate_effect on the - default shape.""" + — the variant doesn't change a coding base, so there's no coding + consequence to attach. Only ExonicSpliceSite carries + alternate_effect on the default shape.""" variant = Variant("7", position, ref, alt, ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) effect = variant.effect_on_transcript(transcript) From a30146bb11f11589add3cb7375820a6782cc2b9b Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Sun, 24 May 2026 22:47:54 -0400 Subject: [PATCH 4/7] Tidy splice docs: fix stale paragraphs, lift cheat sheet (#391) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review noted two stale paragraphs that survived earlier passes, plus structural noise from accumulated edits. Bug fixes: - "Relationship between the two" claimed `alternate_effect` worked on both shapes; `SpliceOutcomeSet.alternate_effect` was deleted in this PR. Rewrote to map each shape to its actual accessor. - "Limitations" still summarized the window as "first 3-6 intronic, donor/acceptor", dropping the donor/acceptor asymmetry now documented in "When splice disruption is in play". Removed the now-wrong summary; kept the "what's not detected" content. Structural cleanups: - Moved "Common questions" up — now sits right after "Opt-in: full possibility set" rather than after the deep Provenance/Picking subsections. Practical readers reach the cheat sheet without scrolling past 200 lines of prose. - Replaced the duplicate `effect_if_splicing_unchanged` paragraph at the end of "Opt-in" with a pointer to the cheat sheet. The accessor was being documented three times. - Replaced the inline window description in "Splice and coding effects can co-occur" with a back-reference to the table above. The window definition now lives in exactly one place. 1230 tests pass. --- docs/effect_annotation.md | 148 ++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 78 deletions(-) diff --git a/docs/effect_annotation.md b/docs/effect_annotation.md index 56bddac..d1e1c7e 100644 --- a/docs/effect_annotation.md +++ b/docs/effect_annotation.md @@ -90,10 +90,10 @@ see [Limitations](#limitations). A variant in an exon sits on a coding base by definition — it rewrites a codon. If that same exonic base is **also** in the -splice window — the last 3 bases of an exon (donor side) or -the first base (acceptor side) — the same nucleotide change -disrupts the splice signal *and* changes the protein. varcode -represents this duality as **`ExonicSpliceSite`**: +splice window (the exonic positions in the table above), the +same nucleotide change disrupts the splice signal *and* changes +the protein. varcode represents this duality as +**`ExonicSpliceSite`**: - on the default 2-outcome shape, splice disruption is the primary effect; the coding consequence (a `Substitution`, @@ -170,81 +170,16 @@ for c in splice_set.candidates: print(c.effect.side, c.effect.retained_intron_start) ``` -For the common "what if splicing still proceeds?" question, use -`splice_set.effect_if_splicing_unchanged` — the `NormalSplicing` -candidate's `coding_effect` (a `Substitution`, `Silent`, etc., or -`None` for purely intronic disruptions). This is the -`SpliceOutcomeSet` analogue of the legacy -`ExonicSpliceSite.alternate_effect` and works for intronic -disruptions too. - When you opt in, `SpliceDonor` / `SpliceAcceptor` / `IntronicSpliceSite` also get wrapped, so every splice- disrupting variant produces a `SpliceOutcomeSet`. -### Relationship between the two - -| # candidates | Class | -|---|---| -| 1 | plain `Substitution` / `Silent` / etc. — not wrapped | -| 2 | `ExonicSpliceSite` with `alternate_effect` | -| N | `SpliceOutcomeSet` (opt-in via `splice_outcomes=True`) | - -Both `ExonicSpliceSite` and `SpliceOutcomeSet` are `MultiOutcomeEffect` -subclasses, so consumers iterate `.candidates` (a tuple of -`EffectCandidate` objects) uniformly without caring about which form -they're holding. `alternate_effect` works on both: on -`ExonicSpliceSite` it's the splicing-proceeds outcome directly; on -`SpliceOutcomeSet` it resolves to the inner effect of the -`NormalSplicing` candidate (or `None` when that candidate is just -a placeholder). `candidate.effect.short_description` is uniform -across both forms. - -With RNA evidence, splice sets are reconciled rather than merely -extended. `SpliceOutcomeSet.with_rna_evidence(...)` returns a new set -whose `candidates` are the RNA-observed mechanisms, while -`dna_candidates`, `rna_evidence`, `excluded_candidates`, -`added_candidates`, and `candidate_rna_evidence` preserve the audit -trail. Use `splice_set.rna_evidence_for(candidate)` to inspect the -observations supporting one current candidate. - -### Candidate provenance - -There is no `plausibility` or `probability` field in the shared -candidate wrapper. The old splice-specific `plausibility` value was a -DNA-only ordering heuristic, not evidence. Varcode now keeps that -ordering only as producer order. - -Producer-specific support belongs in `candidate.evidence` under -explicit names: `read_count`, `junction_id`, `psi`, `motif_score`, -`donor_score`, `acceptor_score`, and so on. Varcode stores evidence -as opaque provenance and does not normalize it into a probability. - -### Picking a single candidate - -When you need to collapse a multi-outcome effect to one Effect, two -notions of "best" are available — pick consciously: - -| Accessor | Returns | Meaning | -|---|---|---| -| `.most_likely_candidate` | `EffectCandidate` | First candidate after producer ordering | -| `.most_likely_effect` | `MutationEffect` | Inner effect of the above | -| `.highest_priority_candidate` | `EffectCandidate` | Top by `effect_priority` (most protein-disruptive) | -| `.highest_priority_effect` | `MutationEffect` | Inner effect of the above | - -The `_candidate` accessors keep the provenance wrapper (`.source`, -`.evidence`); the `_effect` accessors peel it off. The two "top by" -notions coincide whenever producer ordering and priority ranking -agree, which is common — but for clinical / functional filtering -("flag if any candidate is at least a frameshift") prefer -`highest_priority_*`: a disruptive candidate behind a less-disruptive -primary candidate should still light up. - ### Common questions A cheat sheet for the simple splice use cases. Each example -assumes `effect` is a single splice-disrupting effect, or -`splice_set` is a `SpliceOutcomeSet` (opt-in form). +assumes `effect` is a single splice-disrupting effect (default +shape) or `splice_set` is a `SpliceOutcomeSet` (opt-in shape; +see [Opt-in: full possibility set](#opt-in-full-possibility-set)). **Is this variant splice-disrupting?** @@ -301,6 +236,8 @@ splice_set.highest_priority_candidate Use this for clinical / functional filtering ("flag if any candidate is at least a frameshift") — a disruptive candidate ranked below a less-disruptive primary should still light up. +See [Picking a single candidate](#picking-a-single-candidate) +for the "most likely" vs "most disruptive" distinction. **What protein sequences could result?** @@ -320,14 +257,69 @@ for candidate in splice_set.candidates: candidate.effect.splice_signal # SpliceDonor / SpliceAcceptor / IntronicSpliceSite / ExonicSpliceSite ``` +### Relationship between the two + +| # candidates | Class | +|---|---| +| 1 | plain `Substitution` / `Silent` / etc. — not wrapped | +| 2 | `ExonicSpliceSite` with `alternate_effect` | +| N | `SpliceOutcomeSet` (opt-in via `splice_outcomes=True`) | + +Both `ExonicSpliceSite` and `SpliceOutcomeSet` are `MultiOutcomeEffect` +subclasses, so consumers iterate `.candidates` (a tuple of +`EffectCandidate` objects) uniformly without caring about which form +they're holding. The "if splicing proceeds" coding consequence is +reachable on both shapes — `ExonicSpliceSite.alternate_effect` on +the default shape, `SpliceOutcomeSet.effect_if_splicing_unchanged` +on the opt-in shape. `candidate.effect.short_description` is uniform +across both forms. + +With RNA evidence, splice sets are reconciled rather than merely +extended. `SpliceOutcomeSet.with_rna_evidence(...)` returns a new set +whose `candidates` are the RNA-observed mechanisms, while +`dna_candidates`, `rna_evidence`, `excluded_candidates`, +`added_candidates`, and `candidate_rna_evidence` preserve the audit +trail. Use `splice_set.rna_evidence_for(candidate)` to inspect the +observations supporting one current candidate. + +### Candidate provenance + +There is no `plausibility` or `probability` field in the shared +candidate wrapper. The old splice-specific `plausibility` value was a +DNA-only ordering heuristic, not evidence. Varcode now keeps that +ordering only as producer order. + +Producer-specific support belongs in `candidate.evidence` under +explicit names: `read_count`, `junction_id`, `psi`, `motif_score`, +`donor_score`, `acceptor_score`, and so on. Varcode stores evidence +as opaque provenance and does not normalize it into a probability. + +### Picking a single candidate + +When you need to collapse a multi-outcome effect to one Effect, two +notions of "best" are available — pick consciously: + +| Accessor | Returns | Meaning | +|---|---|---| +| `.most_likely_candidate` | `EffectCandidate` | First candidate after producer ordering | +| `.most_likely_effect` | `MutationEffect` | Inner effect of the above | +| `.highest_priority_candidate` | `EffectCandidate` | Top by `effect_priority` (most protein-disruptive) | +| `.highest_priority_effect` | `MutationEffect` | Inner effect of the above | + +The `_candidate` accessors keep the provenance wrapper (`.source`, +`.evidence`); the `_effect` accessors peel it off. The two "top by" +notions coincide whenever producer ordering and priority ranking +agree, which is common — but for clinical / functional filtering +("flag if any candidate is at least a frameshift") prefer +`highest_priority_*`: a disruptive candidate behind a less-disruptive +primary candidate should still light up. + ### Limitations -The splice classifier is **position-based** — it fires on the -canonical window (last 3 exonic, first 3-6 intronic, donor/acceptor) -and nothing else. Sequence-based signals are not flagged: exonic -splicing enhancer/silencer disruption mid-exon (~6-10nt SR-protein -motifs), branch points (~20-50nt upstream of the acceptor), deep -intronic cryptic sites. Detecting these needs ML predictors (SpliceAI, +Sequence-based splice signals are not flagged: exonic splicing +enhancer/silencer disruption mid-exon (~6-10nt SR-protein motifs), +branch points (~20-50nt upstream of the acceptor), deep intronic +cryptic sites. Detecting these needs ML predictors (SpliceAI, Pangolin, MMSplice, SpliceTransformer) or direct RNA evidence; tracked in [#297][i297]. From 7c6437a1db859fc4d9fcf8b0c99b723a94b8a681 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Mon, 25 May 2026 11:18:09 -0400 Subject: [PATCH 5/7] Always-on SpliceOutcomeSet with lazy candidates + compat shims (#391) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lands the breaking-6.0 splice unification that the previous commits prepared the ground for. Every splice-disrupting variant now wraps in a SpliceOutcomeSet at the user-facing API — no more splice_outcomes=True opt-in flag. Cost is amortised via lazy candidate computation; legacy access patterns keep working through compat shims. Core changes ------------ * `SpliceOutcomeSet.__init__` gains a lazy construction path: - Materialized path (explicit `candidates=...`) still works for `with_rna_evidence` rebuilds, deserialization, and tests. - Lazy path (used by `enumerate_splice_outcomes`) stores the raw `splice_effect` + `mechanism_order` + `genomic_sequence` and eagerly builds only the cheap `NormalSplicing` candidate; `ExonSkipping` / `IntronRetention` / cryptic candidates materialize on first `.candidates` access via a property, then cache. * `effect_if_splicing_unchanged` (and its `alternate_effect` back-compat alias) read from the eager `NormalSplicing` slot — no full materialization for the common "if splicing proceeds" question. * `SpliceOutcomeSet.modifies_protein_sequence = True` (class attr, shadows the inherited property). Fixes the silent-splice filter bug: variants whose `NormalSplicing.coding_effect` happens to be Silent were previously dropped by `drop_silent_and_noncoding()`. A splice disruption is always *potentially* protein-modifying via a non-NormalSplicing candidate. * `SpliceOutcomeSet` is now a `TranscriptMutationEffect` subclass (in addition to `MultiOutcomeEffect`), so it carries `gene` and matches the standard `isinstance(effect, TranscriptMutationEffect)` filter used by downstream consumers (dbnsfp validation, etc.). * `enumerate_splice_outcomes(splice_set, genomic_sequence=...)` accepts an already-wrapped SpliceOutcomeSet and rebuilds with the new provider. Lets test fixtures and downstream users inject a genomic-sequence source post-classification. Switch ------ * `splice_outcomes=` parameter removed from `Variant.effects()` / `VariantCollection.effects()` / `predict_variant_effects()`. The collection-level wrap (`wrap_splice_effects_in_collection`) is unconditional. * `predict_variant_effect_on_transcript()` refactored: the body moves to `_predict_variant_effect_on_transcript_raw()` (internal helper that returns raw splice classes); the public function wraps via `enumerate_splice_outcomes`. * `FastEffectAnnotator.annotate_on_transcript` now calls the raw helper directly, returning the unwrapped splice class. The collection-level wrap converts to SpliceOutcomeSet at the boundary. This preserves protein_diff's dual-dispatch (`isinstance(fast_effect, ExonicSpliceSite)` keeps working inside the annotator) while still wrapping at the user-facing surface. Compat shims ------------ * `SpliceOutcomeSet.alternate_effect` aliases `effect_if_splicing_unchanged`. Existing callers reading `effect.alternate_effect` (when `effect` was an `ExonicSpliceSite`) keep working when the effect is now a SpliceOutcomeSet wrapping ExonicSpliceSite. * `select_between_exonic_splice_site_and_alternate_effect` now unwraps SpliceOutcomeSet to its `alternate_effect` when (a) the disrupted signal class is ExonicSpliceSite and (b) the alternate has higher effect_priority than the splice signal. Preserves the legacy `top_priority_effect` behavior for variants that are splice-disrupting AND introduce a PrematureStop / FrameShift / etc. — the more disruptive coding consequence still surfaces as the canonical top. Tests ----- * Existing tests updated to the new shape — removed `splice_outcomes=True` from ~60 callsites; replaced `effect.__class__ is SpliceDonor` with `effect.disrupted_signal_class is SpliceDonor`; updated COSMIC and dbNSFP test helpers to handle SpliceOutcomeSet via the `alternate_effect` compat alias. * Deleted `tests/test_exonic_splice_site_multi_outcome.py` — its tests covered the #299 retrofit (making ExonicSpliceSite a MultiOutcomeEffect), which is superseded by always-on SpliceOutcomeSet. The relevant assertions are covered in test_splice_outcomes.py. * Parametrized `test_intronic_splice_classes_lack_alternate_effect` retired in favour of `test_intronic_disruptions_have_no_coding_alternate` — same parametrize, new semantics (both `effect.alternate_effect is None` and `effect.effect_if_splicing_unchanged is None`). Docs ---- * docs/effect_annotation.md: collapsed "Default lightweight 2-outcome form" + "Opt-in full possibility set" + "Relationship between the two" into a single "The SpliceOutcomeSet shape" section that documents the lazy construction and the `alternate_effect` back-compat alias. "Common questions" updated to drop the dual-shape `isinstance` guard. * README.md: splice-site section reworded to say the four SpliceSite classes survive as `disrupted_signal_class` / `splice_signal` rather than as top-level effects. Position ranges in the table refined to match the classifier. * CHANGELOG.md: Breaking entry documents the removed flag, the isinstance discriminator migration, the silent-splice filter fix, the lazy candidates, and the new TranscriptMutationEffect inheritance. 1219 tests pass. --- CHANGELOG.md | 35 ++++ README.md | 31 +-- docs/effect_annotation.md | 120 +++++------ tests/common.py | 15 +- tests/test_annotators.py | 33 ++- tests/test_cosmic_mutations.py | 12 +- tests/test_dbnsfp_validation.py | 12 +- tests/test_effect_candidates.py | 24 +-- tests/test_exonic_splice_site.py | 9 +- .../test_exonic_splice_site_multi_outcome.py | 170 ---------------- tests/test_splice_outcomes.py | 190 +++++++++--------- tests/test_splice_site_effects.py | 56 ++++-- varcode/annotators/fast.py | 14 +- varcode/effects/effect_classes.py | 10 +- varcode/effects/effect_ordering.py | 38 +++- varcode/effects/effect_prediction.py | 46 +++-- varcode/splice_outcomes.py | 175 ++++++++++++---- varcode/variant.py | 17 +- varcode/variant_collection.py | 16 +- 19 files changed, 535 insertions(+), 488 deletions(-) delete mode 100644 tests/test_exonic_splice_site_multi_outcome.py diff --git a/CHANGELOG.md b/CHANGELOG.md index dbcf364..df6b23a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,41 @@ ([#391](https://github.com/openvax/varcode/issues/391)). **Breaking** +- `SpliceOutcomeSet` is now always-on for splice-disrupting variants + ([#391](https://github.com/openvax/varcode/issues/391)). Every variant + that lands in the canonical splice window — `SpliceDonor`, + `SpliceAcceptor`, `ExonicSpliceSite`, `IntronicSpliceSite` — is + wrapped in a `SpliceOutcomeSet` carrying the candidate mechanisms. + Specifically: + - The `splice_outcomes=True` flag on `Variant.effects()` / + `VariantCollection.effects()` / `predict_variant_effects()` is + **removed**. Callers passing it explicitly get a `TypeError`. + Migration: drop the keyword — wrapping is unconditional. + - `Variant.effect_on_transcript(transcript)` and the + `FastEffectAnnotator` / `ProteinDiffEffectAnnotator` per-transcript + paths return a `SpliceOutcomeSet` for splice-disrupting variants + instead of the raw `ExonicSpliceSite` / `SpliceDonor` / etc. class. + Migration: replace `isinstance(effect, ExonicSpliceSite)` with + `isinstance(effect, SpliceOutcomeSet) and effect.disrupted_signal_class is ExonicSpliceSite`. + `effect.alternate_effect` still works as a back-compat alias for + `effect.effect_if_splicing_unchanged`, so attribute access keeps + working through the wrapper. + - `SpliceOutcomeSet.modifies_protein_sequence` is hardcoded to + `True` (a splice disruption is always *potentially* protein- + modifying via a non-`NormalSplicing` candidate). Closes a long- + standing filter bug where `drop_silent_and_noncoding()` silently + dropped exonic-splice-site variants whose `NormalSplicing.coding_effect` + happened to be `Silent`. + - Candidate construction is lazy: only the cheap `NormalSplicing` + candidate is built eagerly; `ExonSkipping`, `IntronRetention`, + `CrypticDonor` / `CrypticAcceptor` materialise on first + `.candidates` access. Pipelines that filter on + `modifies_protein_sequence` / `effect_priority` and never read + `.candidates` pay only the eager cost. + - `SpliceOutcomeSet` is now a `TranscriptMutationEffect` subclass + (alongside `MultiOutcomeEffect`), so it carries `gene` / + `transcript` and matches the standard `isinstance(effect, TranscriptMutationEffect)` + filter used by downstream consumers. - Unified the multi-outcome machinery: `SpliceCandidate` deleted; `MultiOutcomeEffect.outcomes` accessor + `_with_extra_outcomes` helper + `_extra_outcomes` slot removed diff --git a/README.md b/README.md index 4951fbf..a464a77 100644 --- a/README.md +++ b/README.md @@ -170,30 +170,33 @@ and ### Splice-site disruption — *where* the signal was hit -DNA-level locations: these effects say a variant landed on or near a -splice signal, but **don't themselves carry a protein consequence** — -they say nothing about how the spliceosome responds (see the next -table for that). All four share the +DNA-level locations: these classes identify *where* a variant landed +in the canonical splice window. They no longer appear as top-level +effects — every splice-disrupting variant is wrapped in a +`SpliceOutcomeSet` (varcode 6.0+), and these classes survive as the +wrapper's `disrupted_signal_class` (a type) and as the +`splice_signal` reference on each candidate mechanism (an instance). +All four share the [`SpliceSite`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20SpliceSite%28) -base, so `from varcode import SpliceSite; isinstance(effect, SpliceSite)` -matches any of them. (The four leaf classes are exported from the -package root too.) +base. | Effect type | Description | | --- | --- | -| [`SpliceDonor`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20SpliceDonor%28) | Mutation in the first two nucleotides of an intron, likely to affect splicing. | -| [`SpliceAcceptor`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20SpliceAcceptor%28) | Mutation in the last two nucleotides of an intron, likely to affect splicing. | -| [`IntronicSpliceSite`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20IntronicSpliceSite%28) | Mutation near the beginning or end of an intron but less likely to affect splicing than donor/acceptor mutations. | -| [`ExonicSpliceSite`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20ExonicSpliceSite%28) | Mutation at the beginning or end of an exon, may affect splicing; itself a `MultiOutcomeEffect` wrapping the alternate exonic coding effect alongside the splice candidates. | +| [`SpliceDonor`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20SpliceDonor%28) | Mutation at canonical donor `GT` (intronic +1/+2). | +| [`SpliceAcceptor`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20SpliceAcceptor%28) | Mutation at canonical acceptor `AG` (intronic -2/-1). | +| [`IntronicSpliceSite`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20IntronicSpliceSite%28) | Other intronic positions in the splice window (+3..+6 donor, -3 acceptor; also +1/+2 or -1/-2 when the reference signal isn't canonical). | +| [`ExonicSpliceSite`](https://github.com/openvax/varcode/blob/main/varcode/effects/effect_classes.py#:~:text=class%20ExonicSpliceSite%28) | Last 3 bases of an exon (donor side) or first base of an exon (acceptor side); changes a codon *and* disrupts the splice signal. Carries `alternate_effect` (the coding consequence if splicing proceeds). | ### Splice mechanism — *what the spliceosome does* in response **These are the splice effects that carry a protein consequence.** The protein-level outcome of a splice-signal hit is not deterministic from -DNA alone, so (when you opt in with `splice_outcomes=True`) varcode -emits these as candidates inside a +DNA alone, so varcode wraps every splice-signal disruption in a [`SpliceOutcomeSet`](https://github.com/openvax/varcode/blob/main/varcode/splice_outcomes.py#:~:text=class%20SpliceOutcomeSet%28) -(a `MultiOutcomeEffect`). Each mechanism carries the originating +(a `MultiOutcomeEffect`) carrying these mechanisms as candidates. +Wrapping is always-on as of varcode 6.0 and lazy — only the cheap +`NormalSplicing` candidate is built eagerly; the rest materialise on +`.candidates` access. Each mechanism carries the originating disruption on its `.splice_signal` attribute (a `SpliceSite` *instance*), so you can always recover *where* the hit was off any mechanism. The set also records the disruption's *class* on diff --git a/docs/effect_annotation.md b/docs/effect_annotation.md index d1e1c7e..7e876ab 100644 --- a/docs/effect_annotation.md +++ b/docs/effect_annotation.md @@ -115,50 +115,56 @@ a plain coding effect (`Substitution`, `Silent`, `FrameShift`, disrupt splicing through a non-canonical mechanism, but varcode won't flag it — see Limitations. -### Default: lightweight 2-outcome form +### The `SpliceOutcomeSet` shape -```python -variant = Variant("17", 43082575 - 5, "C", "T", "GRCh38") -effect = variant.effect_on_transcript(transcript) -# ExonicSpliceSite(...) -# .alternate_effect -> Substitution(...) # if splicing proceeds -``` - -`ExonicSpliceSite` carries `alternate_effect`: the coding -consequence that applies *if splicing still works*. Exactly -two outcomes, represented as a primary effect + one -alternate field. Cheap. Ships unconditionally. - -`SpliceDonor`, `SpliceAcceptor`, and `IntronicSpliceSite` -don't expose `alternate_effect` today because the variant -is intronic — there's no coding consequence to attach. - -### Opt-in: full possibility set +Every splice-disrupting variant emits a `SpliceOutcomeSet` — there +is no "bare splice class" path at the user-facing API as of +varcode 6.0. ```python -effects = variant.effects(splice_outcomes=True) -# SpliceOutcomeSet(...) replaces the splice effect. -# .candidates is a tuple[EffectCandidate, ...], in producer order. +variant = Variant("17", 43082575 - 5, "C", "T", "GRCh38") +splice_set = variant.effect_on_transcript(transcript) +# SpliceOutcomeSet(disrupted_signal_class=ExonicSpliceSite, ...) +# .candidates is a tuple[EffectCandidate, ...] in producer order. # Each candidate's .effect is a SpliceMechanismEffect subclass: +# EffectCandidate(effect=NormalSplicing(coding_effect=Substitution(...))) # EffectCandidate(effect=ExonSkipping(affected_exon=..., in_frame=True, # aa_ref="KGYK...", ...)) # EffectCandidate(effect=IntronRetention(retained_intron_start=..., # side="donor", ...)) # EffectCandidate(effect=CrypticDonor(affected_exon=..., ...)) -# EffectCandidate(effect=NormalSplicing(coding_effect=Substitution(...))) ``` -`SpliceOutcomeSet` replaces the splice effect with a set of -candidate mechanisms. Class identity = mechanism — `NormalSplicing`, -`ExonSkipping`, `IntronRetention`, `CrypticDonor`, `CrypticAcceptor`. -Each is a `SpliceMechanismEffect` subclass that carries its own -protein vocab on the instance (`aa_ref`, `aa_alt`, -`mutant_protein_sequence`, `mutant_transcript`); these are `None` -when the protein math couldn't resolve (e.g. intron retention -without a genomic-sequence provider), populated otherwise. Each -mechanism also carries `splice_signal` — the underlying -`SpliceDonor` / `SpliceAcceptor` / `IntronicSpliceSite` / -`ExonicSpliceSite` effect describing *where* the disruption was. +`SpliceOutcomeSet` carries: + +- `disrupted_signal_class` — the `SpliceSite` subclass (`SpliceDonor`, + `SpliceAcceptor`, `ExonicSpliceSite`, or `IntronicSpliceSite`) + identifying where in the splice window the variant landed +- `candidates` — a tuple of `EffectCandidate` objects in producer + order, one per plausible mechanism +- `effect_if_splicing_unchanged` — the coding consequence that + applies if the spliceosome still splices normally (the + `NormalSplicing` candidate's `coding_effect`), or `None` for + purely intronic disruptions where the nucleotide change doesn't + touch a coding base. Also exposed as `alternate_effect` for + back-compat with code that read `ExonicSpliceSite.alternate_effect` + +Each candidate's `.effect` is a `SpliceMechanismEffect` subclass +that carries its own protein vocab on the instance (`aa_ref`, +`aa_alt`, `mutant_protein_sequence`, `mutant_transcript`). Fields +are `None` when the protein math couldn't resolve (e.g. intron +retention without a `genomic_sequence` provider), populated +otherwise. Each mechanism also exposes `splice_signal` — the +underlying raw `SpliceDonor` / `SpliceAcceptor` / +`IntronicSpliceSite` / `ExonicSpliceSite` effect describing *where* +the disruption was. + +**Lazy construction.** Only the cheap `NormalSplicing` candidate +is built eagerly when the set is constructed; `ExonSkipping`, +`IntronRetention`, and `CrypticDonor`/`CrypticAcceptor` materialise +on first `.candidates` access and are cached. Filter pipelines +that drop variants early via `modifies_protein_sequence` / +`effect_priority` never trigger the expensive candidates. Downstream consumers dispatch by class: @@ -170,28 +176,21 @@ for c in splice_set.candidates: print(c.effect.side, c.effect.retained_intron_start) ``` -When you opt in, `SpliceDonor` / `SpliceAcceptor` / -`IntronicSpliceSite` also get wrapped, so every splice- -disrupting variant produces a `SpliceOutcomeSet`. - ### Common questions -A cheat sheet for the simple splice use cases. Each example -assumes `effect` is a single splice-disrupting effect (default -shape) or `splice_set` is a `SpliceOutcomeSet` (opt-in shape; -see [Opt-in: full possibility set](#opt-in-full-possibility-set)). +A cheat sheet for the simple splice use cases. `splice_set` is a +`SpliceOutcomeSet` (every splice-disrupting variant produces one). **Is this variant splice-disrupting?** ```python from varcode import MultiOutcomeEffect, SpliceOutcomeSet -from varcode.effects import ( - ExonicSpliceSite, SpliceDonor, SpliceAcceptor, IntronicSpliceSite, -) -# Splice-specific (covers both shapes): -isinstance(effect, (SpliceOutcomeSet, ExonicSpliceSite, - SpliceDonor, SpliceAcceptor, IntronicSpliceSite)) +# Splice-specific check: +isinstance(effect, SpliceOutcomeSet) + +# Or by disrupted signal class: +isinstance(effect, SpliceOutcomeSet) and effect.disrupted_signal_class is SpliceDonor # Broader: any multi-outcome effect, including SV outcomes # (LargeDeletion, GeneFusion, ...) — use when you want one @@ -202,12 +201,12 @@ isinstance(effect, MultiOutcomeEffect) **What coding consequence applies if splicing still proceeds?** ```python -# Default (2-outcome) shape — guard, since intronic classes don't expose it: -if isinstance(effect, ExonicSpliceSite): - coding = effect.alternate_effect +coding = splice_set.effect_if_splicing_unchanged # canonical +coding = splice_set.alternate_effect # back-compat alias -# Opt-in (SpliceOutcomeSet) shape — uniform for any disrupted signal: -coding = splice_set.effect_if_splicing_unchanged +# Either returns the NormalSplicing candidate's coding_effect (a +# Substitution / Silent / PrematureStop / ...), or None for purely +# intronic disruptions where the variant doesn't change a coding base. ``` **What's the most likely splice mechanism?** @@ -257,22 +256,7 @@ for candidate in splice_set.candidates: candidate.effect.splice_signal # SpliceDonor / SpliceAcceptor / IntronicSpliceSite / ExonicSpliceSite ``` -### Relationship between the two - -| # candidates | Class | -|---|---| -| 1 | plain `Substitution` / `Silent` / etc. — not wrapped | -| 2 | `ExonicSpliceSite` with `alternate_effect` | -| N | `SpliceOutcomeSet` (opt-in via `splice_outcomes=True`) | - -Both `ExonicSpliceSite` and `SpliceOutcomeSet` are `MultiOutcomeEffect` -subclasses, so consumers iterate `.candidates` (a tuple of -`EffectCandidate` objects) uniformly without caring about which form -they're holding. The "if splicing proceeds" coding consequence is -reachable on both shapes — `ExonicSpliceSite.alternate_effect` on -the default shape, `SpliceOutcomeSet.effect_if_splicing_unchanged` -on the opt-in shape. `candidate.effect.short_description` is uniform -across both forms. +### RNA evidence reconciliation With RNA evidence, splice sets are reconciled rather than merely extended. `SpliceOutcomeSet.with_rna_evidence(...)` returns a new set diff --git a/tests/common.py b/tests/common.py index c917721..126eac3 100644 --- a/tests/common.py +++ b/tests/common.py @@ -54,10 +54,21 @@ def expect_effect( effect = variant.effect_on_transcript(transcript) check_effect_properties(effect) if effect_class is not None: - assert effect.__class__ is effect_class, \ + # Splice-disrupting effects are always wrapped in a + # SpliceOutcomeSet (varcode 6.0+); when the caller asks for a + # splice-signal class (ExonicSpliceSite, SpliceDonor, ...), + # match against the wrapper's `disrupted_signal_class`. + from varcode import SpliceOutcomeSet + actual_class = effect.__class__ + matches = ( + actual_class is effect_class + or (actual_class is SpliceOutcomeSet + and getattr(effect, "disrupted_signal_class", None) + is effect_class)) + assert matches, \ "Expected effect class %s but got %s" % ( effect_class.__name__, - effect.__class__.__name__) + actual_class.__name__) if protein_sequence is not None: assert effect.mutant_protein_sequence == protein_sequence, \ "Expected protein sequence %s but got %s" % ( diff --git a/tests/test_annotators.py b/tests/test_annotators.py index 2c9f2af..bd45038 100644 --- a/tests/test_annotators.py +++ b/tests/test_annotators.py @@ -124,13 +124,19 @@ def test_set_default_annotator_rejects_unknown_name(): def test_fast_annotator_annotator_matches_effect_on_transcript(): + """FastEffectAnnotator returns the raw splice-signal class for + internal consumers (notably protein_diff's dual dispatch); + Variant.effect_on_transcript wraps the same raw class in a + SpliceOutcomeSet at the user-facing boundary. Both surface the + same underlying classification.""" + from varcode import SpliceOutcomeSet variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id("ENST00000003084") direct = variant.effect_on_transcript(transcript) annotated = FastEffectAnnotator().annotate_on_transcript( variant, transcript) - assert type(annotated) is type(direct) - assert annotated.short_description == direct.short_description + assert isinstance(direct, SpliceOutcomeSet) + assert type(annotated) is direct.disrupted_signal_class # ==================================================================== @@ -363,19 +369,28 @@ def test_protein_diff_parity_on_splice_donor(): def test_protein_diff_parity_on_exonic_splice_site(): # ExonicSpliceSite: dual-dispatch. Splice class from fast, - # alternate_effect from protein-diff's protein diff. + # alternate_effect from protein-diff's protein diff. The + # annotator returns a raw ExonicSpliceSite (the collection-level + # wrap converts to SpliceOutcomeSet); Variant.effect_on_transcript + # wraps directly. + from varcode import SpliceOutcomeSet + from varcode.annotators import ProteinDiffEffectAnnotator + from varcode.effects import ExonicSpliceSite variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id("ENST00000003084") - legacy = variant.effect_on_transcript(transcript) - from varcode.annotators import ProteinDiffEffectAnnotator + direct = variant.effect_on_transcript(transcript) sdiff = ProteinDiffEffectAnnotator().annotate_on_transcript( variant, transcript) - from varcode.effects import ExonicSpliceSite - assert isinstance(legacy, ExonicSpliceSite) + assert isinstance(direct, SpliceOutcomeSet) + assert direct.disrupted_signal_class is ExonicSpliceSite assert isinstance(sdiff, ExonicSpliceSite) - # alternate_effect comes from protein-diff in the new annotator. + # alternate_effect comes from protein-diff in the new annotator, + # and the SpliceOutcomeSet wrap surfaces it via the compat shim. assert sdiff.alternate_effect is not None - assert type(sdiff.alternate_effect).__name__ == type(legacy.alternate_effect).__name__ + assert direct.alternate_effect is not None + assert ( + type(sdiff.alternate_effect).__name__ + == type(direct.alternate_effect).__name__) def test_protein_diff_parity_on_reverse_strand_mnv(): diff --git a/tests/test_cosmic_mutations.py b/tests/test_cosmic_mutations.py index 743c3b1..e8e64ce 100644 --- a/tests/test_cosmic_mutations.py +++ b/tests/test_cosmic_mutations.py @@ -11,7 +11,7 @@ # limitations under the License. from pyensembl import ensembl_grch37 as ensembl -from varcode import Variant +from varcode import Variant, SpliceOutcomeSet from varcode.effects import ( Substitution, Deletion, @@ -30,11 +30,15 @@ def _get_effect(chrom, pos, dna_ref, dna_alt, transcript_id): transcript_id, variant, transcript_dict) effect = transcript_dict[transcript_id] - # COSMIC seems to ignore exonic splice sites + # COSMIC seems to ignore exonic splice sites — unwrap to the + # underlying coding consequence. Works for both the legacy + # ExonicSpliceSite default shape and the SpliceOutcomeSet wrapper + # (compat alias resolves to NormalSplicing.coding_effect). if isinstance(effect, ExonicSpliceSite): return effect.alternate_effect - else: - return effect + if isinstance(effect, SpliceOutcomeSet): + return effect.alternate_effect + return effect def _substitution(chrom, pos, dna_ref, dna_alt, transcript_id, aa_ref, aa_alt): effect = _get_effect(chrom, pos, dna_ref, dna_alt, transcript_id) diff --git a/tests/test_dbnsfp_validation.py b/tests/test_dbnsfp_validation.py index 4b1b3dd..5270378 100644 --- a/tests/test_dbnsfp_validation.py +++ b/tests/test_dbnsfp_validation.py @@ -12,7 +12,7 @@ import pytest from pyensembl import ensembl_grch37 -from varcode import Variant +from varcode import Variant, SpliceOutcomeSet from varcode.effects import ( ExonicSpliceSite, Substitution, @@ -81,10 +81,12 @@ def test_dbnsfp_validation_set_transcript_mutation( "%s not found in %s" % (ensembl_transcript_id, transcript_id_dict) effect = transcript_id_dict[ensembl_transcript_id] - if isinstance(effect, ExonicSpliceSite): - # exonic splice site mutations carry with them an alternate effect - # which is what we check against dbNSFP (since that database seemed - # to ignore exonic splicing mutations) + if isinstance(effect, (ExonicSpliceSite, SpliceOutcomeSet)): + # Exonic-splice-site mutations carry an alternate effect (the + # coding consequence if splicing proceeds) which is what we + # check against dbNSFP (the database ignores exonic splice + # disruption). Both the legacy ExonicSpliceSite default shape + # and the SpliceOutcomeSet wrapper expose ``alternate_effect``. effect = effect.alternate_effect assert isinstance(effect, Substitution), \ diff --git a/tests/test_effect_candidates.py b/tests/test_effect_candidates.py index 205cc14..957f541 100644 --- a/tests/test_effect_candidates.py +++ b/tests/test_effect_candidates.py @@ -74,26 +74,24 @@ class _C: def test_exonic_splice_site_exposes_outcomes(): - """Real integration: an SNV at the last base of an exon yields - ``ExonicSpliceSite``, which is a ``MultiOutcomeEffect``. Its - ``.candidates`` should return two :class:`EffectCandidate` entries (the - splice-disruption outcome and the coding-change alternate).""" + """Real integration: an SNV at the last base of an exon yields a + SpliceOutcomeSet wrapping an ExonicSpliceSite. The set is a + ``MultiOutcomeEffect``; its ``.candidates`` returns the splicing + mechanism candidates (NormalSplicing, ExonSkipping, IntronRetention, + cryptic) as :class:`EffectCandidate` entries.""" + from varcode import SpliceOutcomeSet # CFTR exon 3 ends at 117531114 (last exon base). variant = Variant("7", 117531114, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id("ENST00000003084") effect = variant.effect_on_transcript(transcript) assert isinstance(effect, MultiOutcomeEffect) - assert isinstance(effect, ExonicSpliceSite) + assert isinstance(effect, SpliceOutcomeSet) + assert effect.disrupted_signal_class is ExonicSpliceSite outcomes = effect.candidates - assert len(outcomes) == 2 + assert len(outcomes) >= 2 # NormalSplicing + at least one mechanism assert all(isinstance(o, EffectCandidate) for o in outcomes) - # First outcome: the splice-disruption classification (the - # ExonicSpliceSite itself). - assert outcomes[0].effect is effect - # Second outcome: the coding-change alternate. - assert outcomes[1].effect is effect.alternate_effect - # Both default to varcode-source. + # All candidates default to varcode-source. assert all(o.source == "varcode" for o in outcomes) @@ -135,7 +133,7 @@ def test_uniform_iteration_sv_and_splice_outcomes(): sv, transcript) splice_variant = Variant("7", 117531115, "G", "A", ensembl_grch38) - splice_effects = splice_variant.effects(splice_outcomes=True) + splice_effects = splice_variant.effects() splice_effect = next( e for e in splice_effects if e.transcript is transcript) diff --git a/tests/test_exonic_splice_site.py b/tests/test_exonic_splice_site.py index 63b53dd..924b0fe 100644 --- a/tests/test_exonic_splice_site.py +++ b/tests/test_exonic_splice_site.py @@ -10,17 +10,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from varcode import Variant +from varcode import Variant, SpliceOutcomeSet from varcode.effects import ExonicSpliceSite, PrematureStop def test_STAT1_stop_gain_at_exon_boundary(): # top priority effect for this variant should be PrematureStop, - # even though it's also ExonicSpliceSite + # even though it's also an exonic splice site disruption stat1_variant = Variant("2", "191872291", "G", "A", "GRCh37") effects = stat1_variant.effects() print(effects) - assert any([e.__class__ is ExonicSpliceSite for e in effects]) + assert any( + isinstance(e, SpliceOutcomeSet) + and e.disrupted_signal_class is ExonicSpliceSite + for e in effects) top_effect = effects.top_priority_effect() print(top_effect) assert top_effect.__class__ is PrematureStop diff --git a/tests/test_exonic_splice_site_multi_outcome.py b/tests/test_exonic_splice_site_multi_outcome.py deleted file mode 100644 index edff536..0000000 --- a/tests/test_exonic_splice_site_multi_outcome.py +++ /dev/null @@ -1,170 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for the ExonicSpliceSite → MultiOutcomeEffect retrofit and -the SpliceOutcomeSet accessor surface -(openvax/varcode#299, #391, #392). - -The retrofit makes ExonicSpliceSite a MultiOutcomeEffect — it -exposes the ``candidates`` / ``most_likely`` / ``priority_class`` -protocol uniformly with SpliceOutcomeSet. The SpliceOutcomeSet -side exposes ``effect_if_splicing_unchanged``, which resolves to -the NormalSplicing candidate's coding_effect (the opt-in shape's -analogue of the legacy ``ExonicSpliceSite.alternate_effect``). -""" - -from pyensembl import cached_release - -from varcode import MultiOutcomeEffect, NormalSplicing, Variant -from varcode.effects import ExonicSpliceSite - - -ensembl_grch38 = cached_release(81) -CFTR_TRANSCRIPT_ID = "ENST00000003084" - - -# ==================================================================== -# ExonicSpliceSite is now a MultiOutcomeEffect -# ==================================================================== - - -def test_exonic_splice_site_is_multi_outcome_effect(): - # CFTR exon 4 ends in ...AAG. A G->T at the last exonic base is a - # canonical ExonicSpliceSite case. - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effect = variant.effect_on_transcript(transcript) - assert isinstance(effect, ExonicSpliceSite) - assert isinstance(effect, MultiOutcomeEffect), ( - "ExonicSpliceSite should pass isinstance(e, MultiOutcomeEffect) " - "after #299 Part 1 retrofit") - - -def test_exonic_splice_site_candidates_include_self_and_alternate(): - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effect = variant.effect_on_transcript(transcript) - assert effect.alternate_effect is not None - # 2-candidate form: splice-disruption (self) + coding consequence. - # Each entry is an EffectCandidate wrapping the inner effect. - inners = tuple(c.effect for c in effect.candidates) - assert inners == (effect, effect.alternate_effect) - - -def test_exonic_splice_site_most_likely_is_self(): - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effect = variant.effect_on_transcript(transcript) - # most_likely_effect is the inner MutationEffect, which IS the - # splice-disruption outcome (this effect itself). - assert effect.most_likely_effect is effect - # most_likely_candidate wraps it in an EffectCandidate. - assert effect.most_likely_candidate.effect is effect - - -def test_exonic_splice_site_priority_class_is_self_class(): - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effect = variant.effect_on_transcript(transcript) - assert effect.priority_class is ExonicSpliceSite - - -def test_exonic_splice_site_alternate_effect_still_first_class(): - # Back-compat: the attribute access that downstream code uses - # today must keep working. The retrofit adds MultiOutcomeEffect - # machinery alongside it, not in place of it. - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effect = variant.effect_on_transcript(transcript) - alt = effect.alternate_effect - assert alt is not None - # alternate_effect is a MutationEffect (not a SpliceCandidate). - from varcode.effects import MutationEffect - assert isinstance(alt, MutationEffect) - - -# ==================================================================== -# SpliceOutcomeSet exposes effect_if_splicing_unchanged -# ==================================================================== - - -def test_effect_if_splicing_unchanged_matches_alternate_effect(): - # TODO: revisit when ExonicSpliceSite.alternate_effect is removed - # in the breaking 6.0 work (#391) — this test asserts equivalence - # between the legacy default-shape attribute and the new opt-in - # accessor, so it will need to retire or pivot to a snapshot - # comparison once the legacy attribute goes away. - # - # ExonicSpliceSite wrapped under splice_outcomes=True becomes a - # SpliceOutcomeSet. Its .effect_if_splicing_unchanged should - # return the same coding_effect that ExonicSpliceSite.alternate_effect - # would. - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - - bare = variant.effect_on_transcript(transcript) - assert isinstance(bare, ExonicSpliceSite) - bare_alt = bare.alternate_effect - - wrapped_effects = variant.effects(splice_outcomes=True) - wrapped = next(e for e in wrapped_effects if e.transcript is transcript) - # Same underlying coding-effect class; don't require object - # identity since the wrapped path may rebuild it. - assert type(wrapped.effect_if_splicing_unchanged) is type(bare_alt) - assert ( - wrapped.effect_if_splicing_unchanged.short_description - == bare_alt.short_description) - - -def test_effect_if_splicing_unchanged_none_for_intronic_donor(): - # SpliceDonor-backed SpliceOutcomeSet: the NormalSplicing candidate - # exists but its coding_effect is None (intronic variant, no - # underlying coding change). effect_if_splicing_unchanged should be None. - variant = Variant("7", 117531115, "G", "A", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - wrapped_effects = variant.effects(splice_outcomes=True) - wrapped = next(e for e in wrapped_effects if e.transcript is transcript) - normal = next( - c for c in wrapped.candidates - if isinstance(c.effect, NormalSplicing)) - assert normal.effect.coding_effect is None - assert wrapped.effect_if_splicing_unchanged is None - - -# ==================================================================== -# Unified downstream pattern: isinstance(e, MultiOutcomeEffect) works -# across both shapes. -# ==================================================================== - - -def test_multi_outcome_effect_check_works_on_both_shapes(): - # Same variant expressed two ways; both match the MultiOutcomeEffect - # isinstance check so downstream code doesn't need to special-case. - variant = Variant("7", 117531114, "G", "T", ensembl_grch38) - transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - - bare = variant.effect_on_transcript(transcript) - assert isinstance(bare, MultiOutcomeEffect) - - wrapped_effects = variant.effects(splice_outcomes=True) - wrapped = next(e for e in wrapped_effects if e.transcript is transcript) - assert isinstance(wrapped, MultiOutcomeEffect) - - # The "if splicing unchanged" coding consequence is reachable on - # both shapes (under their respective names — ExonicSpliceSite - # still carries the legacy ``alternate_effect``; SpliceOutcomeSet - # uses ``effect_if_splicing_unchanged``). - assert bare.alternate_effect is not None - assert wrapped.effect_if_splicing_unchanged is not None - assert ( - type(bare.alternate_effect) - is type(wrapped.effect_if_splicing_unchanged)) diff --git a/tests/test_splice_outcomes.py b/tests/test_splice_outcomes.py index b0157df..e536e36 100644 --- a/tests/test_splice_outcomes.py +++ b/tests/test_splice_outcomes.py @@ -67,23 +67,26 @@ def _candidate_of_type(splice_set, mechanism_cls): # -------------------------------------------------------------------- -def test_default_behavior_unchanged(): +def test_splice_donor_wraps_as_outcome_set(): + """Splice disruptions are always wrapped in SpliceOutcomeSet + (varcode 6.0+); the disrupted_signal_class records which signal + was hit.""" variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) effect = variant.effect_on_transcript(transcript) - assert effect.__class__ is SpliceDonor - assert not isinstance(effect, SpliceOutcomeSet) + assert isinstance(effect, SpliceOutcomeSet) + assert effect.disrupted_signal_class is SpliceDonor -def test_default_for_collection_unchanged(): +def test_collection_wraps_splice_effects(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) effects = variant.effects() classes = {type(e) for e in effects} - assert SpliceOutcomeSet not in classes + assert SpliceOutcomeSet in classes @pytest.mark.parametrize( - "position,ref,alt,expected_class", + "position,ref,alt,expected_signal_class", [ (117531115, "G", "A", SpliceDonor), (117530898, "G", "A", SpliceAcceptor), @@ -95,19 +98,19 @@ def test_default_for_collection_unchanged(): (117531115, "A", "G", IntronicSpliceSite), ], ) -def test_intronic_splice_classes_lack_alternate_effect( - position, ref, alt, expected_class): - """The default-shape intronic splice classes (SpliceDonor, - SpliceAcceptor, IntronicSpliceSite) don't expose alternate_effect - — the variant doesn't change a coding base, so there's no coding - consequence to attach. Only ExonicSpliceSite carries - alternate_effect on the default shape.""" +def test_intronic_disruptions_have_no_coding_alternate( + position, ref, alt, expected_signal_class): + """Purely intronic splice disruptions wrap as SpliceOutcomeSet with + no coding consequence — alternate_effect and + effect_if_splicing_unchanged both return None (the nucleotide + change doesn't touch a coding base).""" variant = Variant("7", position, ref, alt, ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) effect = variant.effect_on_transcript(transcript) - assert isinstance(effect, expected_class) - with pytest.raises(AttributeError): - effect.alternate_effect + assert isinstance(effect, SpliceOutcomeSet) + assert effect.disrupted_signal_class is expected_signal_class + assert effect.alternate_effect is None + assert effect.effect_if_splicing_unchanged is None # -------------------------------------------------------------------- @@ -117,7 +120,7 @@ def test_intronic_splice_classes_lack_alternate_effect( def test_opt_in_wraps_splice_donor(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() cftr_effect = next( e for e in effects if getattr(e, "transcript", None) is not None @@ -130,7 +133,7 @@ def test_opt_in_wraps_splice_donor(): def test_opt_in_wraps_splice_acceptor(): variant = Variant("7", 117530898, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target, SpliceOutcomeSet) assert target.disrupted_signal_class is SpliceAcceptor @@ -139,7 +142,7 @@ def test_opt_in_wraps_splice_acceptor(): def test_opt_in_wraps_exonic_splice_site(): variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target, SpliceOutcomeSet) assert target.disrupted_signal_class is ExonicSpliceSite @@ -148,7 +151,7 @@ def test_opt_in_wraps_exonic_splice_site(): def test_opt_in_wraps_intronic_splice_site(): variant = Variant("7", 117531115, "A", "G", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target, SpliceOutcomeSet) assert target.disrupted_signal_class is IntronicSpliceSite @@ -156,7 +159,7 @@ def test_opt_in_wraps_intronic_splice_site(): def test_opt_in_passes_through_non_splice_effects(): variant = Variant("17", 43082575 - 5, "CCT", "GGG", ensembl_grch38) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() classes = [type(e).__name__ for e in effects] assert "Substitution" in classes @@ -169,7 +172,7 @@ def test_opt_in_passes_through_non_splice_effects(): def test_splice_donor_candidate_set_has_expected_mechanisms(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) mechanisms = {type(c.effect) for c in target.candidates} assert mechanisms == { @@ -179,7 +182,7 @@ def test_splice_donor_candidate_set_has_expected_mechanisms(): def test_splice_acceptor_candidate_set_uses_cryptic_acceptor(): variant = Variant("7", 117530898, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) mechanisms = {type(c.effect) for c in target.candidates} assert CrypticAcceptor in mechanisms @@ -191,7 +194,7 @@ def test_all_candidates_are_splice_mechanism_effects(): subclass — uniform shape, no enum / class-identity drift.""" variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) for c in target.candidates: assert isinstance(c.effect, SpliceMechanismEffect) @@ -200,7 +203,7 @@ def test_all_candidates_are_splice_mechanism_effects(): def test_varcode_splice_candidates_are_unscored_but_ordered(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert [type(c.effect) for c in target.candidates] == [ ExonSkipping, @@ -213,7 +216,7 @@ def test_varcode_splice_candidates_are_unscored_but_ordered(): def test_most_likely_for_splice_donor_is_exon_skipping(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target.most_likely_effect, ExonSkipping) @@ -221,7 +224,7 @@ def test_most_likely_for_splice_donor_is_exon_skipping(): def test_most_likely_for_exonic_splice_site_is_normal_splicing(): variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target.most_likely_effect, NormalSplicing) @@ -235,13 +238,16 @@ def test_most_likely_for_exonic_splice_site_is_normal_splicing(): def test_splice_mechanism_effects_reference_the_splice_signal(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - bare = variant.effect_on_transcript(transcript) - assert isinstance(bare, SpliceDonor) - splice_set = enumerate_splice_outcomes(bare) - for c in splice_set.candidates: - assert c.effect.splice_signal is bare - # Reachable from the mechanism: where in the transcript was hit. - assert c.effect.splice_signal.nearest_exon is not None + splice_set = variant.effect_on_transcript(transcript) + assert isinstance(splice_set, SpliceOutcomeSet) + assert splice_set.disrupted_signal_class is SpliceDonor + # Every candidate's splice_signal points back to the same raw + # SpliceDonor instance. + splice_signals = {c.effect.splice_signal for c in splice_set.candidates} + assert len(splice_signals) == 1 + raw = next(iter(splice_signals)) + assert isinstance(raw, SpliceDonor) + assert raw.nearest_exon is not None # -------------------------------------------------------------------- @@ -256,7 +262,7 @@ def test_normal_splicing_delegates_protein_to_coding_effect(): variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) normal = _candidate_of_type(splice_set, NormalSplicing) assert normal.effect.coding_effect is not None @@ -269,7 +275,7 @@ def test_normal_splicing_for_intronic_disruption_has_no_coding_effect(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) normal = _candidate_of_type(splice_set, NormalSplicing) assert normal.effect.coding_effect is None @@ -283,7 +289,7 @@ def test_effect_if_splicing_unchanged_returns_coding_effect(): variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) normal = _candidate_of_type(splice_set, NormalSplicing) assert ( @@ -299,7 +305,7 @@ def test_effect_if_splicing_unchanged_none_for_intronic(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) assert splice_set.effect_if_splicing_unchanged is None @@ -310,7 +316,7 @@ def test_in_frame_exon_skipping_carries_aa_ref_and_protein_sequence(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) assert skip.effect.in_frame is True @@ -333,7 +339,7 @@ def test_intron_retention_predicted_state_carries_intron_coords(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) ir = _candidate_of_type(splice_set, IntronRetention) assert not ir.effect.resolved @@ -348,7 +354,7 @@ def test_cryptic_donor_predicted_state_carries_affected_exon(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) cryptic = _candidate_of_type(splice_set, CrypticDonor) assert not cryptic.effect.resolved @@ -419,7 +425,7 @@ def test_cryptic_donor_with_provider_populates_protein_vocab(): def test_collection_iteration_after_wrapping(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() items = list(effects) assert len(items) > 0 splice_set_count = sum(1 for e in items if isinstance(e, SpliceOutcomeSet)) @@ -429,7 +435,7 @@ def test_collection_iteration_after_wrapping(): def test_short_description_uses_most_likely_inner_effect(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) desc = target.short_description assert desc.startswith("splice-set:") @@ -445,7 +451,7 @@ def test_short_description_uses_most_likely_inner_effect(): def test_opt_in_works_on_reverse_strand_donor(): variant = Variant("17", 43082403, "C", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(BRCA1_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target, SpliceOutcomeSet) assert target.disrupted_signal_class is SpliceDonor @@ -489,7 +495,7 @@ def test_splice_outcome_set_is_a_multi_outcome_effect(): from varcode import MultiOutcomeEffect variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) assert isinstance(target, MultiOutcomeEffect) assert all(isinstance(c, EffectCandidate) for c in target.candidates) @@ -513,7 +519,7 @@ def test_mid_codon_in_frame_exon_skip_reshapes_boundary(): "Canonical donor G not present at 117536674; got %s." % type(bare).__name__) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) # ExonSkipping with a reshaped boundary still carries aa_ref/alt @@ -525,7 +531,7 @@ def test_mid_codon_in_frame_exon_skip_reshapes_boundary(): def test_codon_aligned_exon_skip_is_in_frame(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() splice_set = next(e for e in effects if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) # CFTR exon 3 is codon-aligned (216 bp / 72 codons). @@ -549,7 +555,7 @@ def test_out_of_frame_exon_skip_marked_out_of_frame(): "Canonical donor G not present at %d; got %s." % ( donor_plus_1, type(bare).__name__)) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) assert skip.effect.in_frame is False @@ -568,7 +574,7 @@ def test_out_of_frame_exon_skip_marked_out_of_frame(): def test_splice_outcome_set_json_round_trip(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) payload = target.to_json() @@ -586,7 +592,7 @@ def test_splice_outcome_set_round_trip_preserves_priority_class(): from varcode.effects import effect_priority variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() target = next(e for e in effects if e.transcript is transcript) restored = SpliceOutcomeSet.from_json(target.to_json()) assert effect_priority(restored) == effect_priority(target) @@ -605,20 +611,26 @@ def test_non_splice_effects_are_not_multi_outcome(): def test_splice_outcome_set_sorts_as_disrupted_signal_class(): + """SpliceOutcomeSet.priority_class delegates to disrupted_signal_class, + so the wrapper sorts at the same priority as the raw splice signal + would.""" from varcode.effects import effect_priority + from varcode.effects.effect_prediction import ( + _predict_variant_effect_on_transcript_raw, + ) variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - bare_effect = variant.effect_on_transcript(transcript) - assert isinstance(bare_effect, SpliceDonor) - bare_priority = effect_priority(bare_effect) - wrapped_effects = variant.effects(splice_outcomes=True) - wrapped = next(e for e in wrapped_effects if e.transcript is transcript) - assert effect_priority(wrapped) == bare_priority + raw = _predict_variant_effect_on_transcript_raw(variant, transcript) + assert isinstance(raw, SpliceDonor) + splice_set = variant.effect_on_transcript(transcript) + assert isinstance(splice_set, SpliceOutcomeSet) + assert splice_set.disrupted_signal_class is SpliceDonor + assert effect_priority(splice_set) == effect_priority(raw) def test_splice_outcome_set_top_priority_works(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) - effects = variant.effects(splice_outcomes=True) + effects = variant.effects() top = effects.top_priority_effect() top_class_name = type(top).__name__ assert top_class_name in ("SpliceOutcomeSet", "SpliceDonor") @@ -632,12 +644,9 @@ def test_splice_outcome_set_top_priority_works(): def test_acceptor_side_intronic_splice_site_uses_cryptic_acceptor(): variant = Variant("7", 117530896, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - bare = variant.effect_on_transcript(transcript) - assert isinstance(bare, IntronicSpliceSite) and \ - not isinstance(bare, (SpliceDonor, SpliceAcceptor)) - splice_set = next( - e for e in variant.effects(splice_outcomes=True) - if e.transcript is transcript) + splice_set = variant.effect_on_transcript(transcript) + assert isinstance(splice_set, SpliceOutcomeSet) + assert splice_set.disrupted_signal_class is IntronicSpliceSite mechanisms = {type(c.effect) for c in splice_set.candidates} assert CrypticAcceptor in mechanisms assert CrypticDonor not in mechanisms @@ -646,12 +655,9 @@ def test_acceptor_side_intronic_splice_site_uses_cryptic_acceptor(): def test_donor_side_intronic_splice_site_uses_cryptic_donor(): variant = Variant("7", 117531117, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) - bare = variant.effect_on_transcript(transcript) - assert isinstance(bare, IntronicSpliceSite) and \ - not isinstance(bare, (SpliceDonor, SpliceAcceptor)) - splice_set = next( - e for e in variant.effects(splice_outcomes=True) - if e.transcript is transcript) + splice_set = variant.effect_on_transcript(transcript) + assert isinstance(splice_set, SpliceOutcomeSet) + assert splice_set.disrupted_signal_class is IntronicSpliceSite mechanisms = {type(c.effect) for c in splice_set.candidates} assert CrypticDonor in mechanisms assert CrypticAcceptor not in mechanisms @@ -666,7 +672,7 @@ def test_candidate_proteins_keyed_by_mechanism_class(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) proteins = splice_set.candidate_proteins # Keys are mechanism classes, values are protein strings (or ""). @@ -680,7 +686,7 @@ def test_mutant_protein_sequences_collects_distinct_proteins(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) proteins = splice_set.mutant_protein_sequences assert isinstance(proteins, set) @@ -699,7 +705,7 @@ def test_most_likely_candidate_returns_effect_candidate(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) assert isinstance(splice_set.most_likely_candidate, EffectCandidate) @@ -708,7 +714,7 @@ def test_most_likely_effect_returns_splice_mechanism_effect(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) assert isinstance(splice_set.most_likely_effect, SpliceMechanismEffect) assert splice_set.most_likely_effect is splice_set.most_likely_candidate.effect @@ -718,7 +724,7 @@ def test_highest_priority_candidate_returns_most_disruptive(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) from varcode.effects import effect_priority top = splice_set.highest_priority_candidate @@ -733,7 +739,7 @@ def test_highest_priority_effect_returns_inner_effect(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) assert ( splice_set.highest_priority_effect @@ -747,7 +753,7 @@ def test_resolved_splice_mechanism_delegates_priority_to_protein_effect(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) assert isinstance(skip.effect.protein_effect, Deletion) @@ -761,7 +767,7 @@ def test_effects_unwraps_candidate_tuple(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) inner = splice_set.effects assert all(isinstance(e, SpliceMechanismEffect) for e in inner) @@ -777,7 +783,7 @@ def test_rna_evidence_reconciles_splice_set_by_excluding_unobserved_mechanisms() variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) original = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(original, ExonSkipping) observed = make_rna_outcome( @@ -809,7 +815,7 @@ def test_rna_evidence_records_added_splice_mechanisms(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) original = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(original, ExonSkipping) added_effect = CrypticAcceptor( @@ -839,7 +845,7 @@ def test_rna_evidence_support_is_tracked_per_splice_candidate(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) original = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(original, ExonSkipping) @@ -925,7 +931,7 @@ def test_cftr_exon_3_skip_protein_equals_reference_minus_skipped_aas(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) assert skip.effect.in_frame @@ -953,7 +959,7 @@ def test_cftr_exon_3_skip_short_description_uses_hgvs_del_notation(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) desc = skip.effect.short_description @@ -977,7 +983,7 @@ def test_cftr_out_of_frame_exon_skip_produces_frameshift_protein(): if not isinstance(bare, SpliceDonor): pytest.skip("g.117509143 G>A not classified as SpliceDonor") splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) assert skip.effect.in_frame is False @@ -1005,7 +1011,7 @@ def test_cftr_exon_3_skip_aa_alt_describes_junction(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) skip = _candidate_of_type(splice_set, ExonSkipping) assert skip.effect.in_frame @@ -1025,10 +1031,10 @@ def test_cftr_acceptor_disruption_skips_same_exon_as_donor_disruption(): transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) donor_set = next( - e for e in donor_variant.effects(splice_outcomes=True) + e for e in donor_variant.effects() if e.transcript is transcript) acceptor_set = next( - e for e in acceptor_variant.effects(splice_outcomes=True) + e for e in acceptor_variant.effects() if e.transcript is transcript) donor_skip = _candidate_of_type(donor_set, ExonSkipping) acceptor_skip = _candidate_of_type(acceptor_set, ExonSkipping) @@ -1110,7 +1116,7 @@ def test_exon_skipping_serialization_preserves_affected_exon_and_protein(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) restored = SpliceOutcomeSet.from_json(splice_set.to_json()) og = _candidate_of_type(splice_set, ExonSkipping) @@ -1128,7 +1134,7 @@ def test_intron_retention_serialization_preserves_intron_coords(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) restored = SpliceOutcomeSet.from_json(splice_set.to_json()) og = _candidate_of_type(splice_set, IntronRetention) @@ -1142,7 +1148,7 @@ def test_cryptic_donor_serialization_preserves_direction(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) restored = SpliceOutcomeSet.from_json(splice_set.to_json()) og = _candidate_of_type(splice_set, CrypticDonor) @@ -1155,7 +1161,7 @@ def test_normal_splicing_serialization_preserves_coding_effect_class(): variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) restored = SpliceOutcomeSet.from_json(splice_set.to_json()) og = _candidate_of_type(splice_set, NormalSplicing) @@ -1180,7 +1186,7 @@ def test_predicted_exon_skipping_short_description_includes_predicted_tag(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) # The IntronRetention candidate is predicted (no provider). ir = _candidate_of_type(splice_set, IntronRetention) @@ -1197,7 +1203,7 @@ def test_intron_retention_predicted_side_is_donor_or_acceptor_or_unknown(): variant = Variant("7", 117531115, "G", "A", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) splice_set = next( - e for e in variant.effects(splice_outcomes=True) + e for e in variant.effects() if e.transcript is transcript) ir = _candidate_of_type(splice_set, IntronRetention) # For a SpliceDonor-classed disruption we expect side="donor". diff --git a/tests/test_splice_site_effects.py b/tests/test_splice_site_effects.py index 1ff8fcc..917a539 100644 --- a/tests/test_splice_site_effects.py +++ b/tests/test_splice_site_effects.py @@ -88,19 +88,18 @@ def test_exonic_splice_disrupting_mutation_classified_as_splice_site(): def test_exonic_splice_disrupting_mutation_has_coding_alternate(): - """ExonicSpliceSite should carry the coding effect as alternate_effect. - - When a variant both disrupts a splice signal and changes coding sequence, - the ExonicSpliceSite.alternate_effect should be the coding effect (e.g., - Substitution), so both effects are accessible. - """ + """Splice-and-coding variants wrap as SpliceOutcomeSet with the + coding consequence reachable via the ``alternate_effect`` compat + shim (the legacy ``ExonicSpliceSite.alternate_effect`` access + pattern keeps working through the wrapper).""" + from varcode import SpliceOutcomeSet variant = Variant("7", 117531114, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) effect = variant.effect_on_transcript(transcript) - assert effect.__class__ is ExonicSpliceSite, \ - "Expected ExonicSpliceSite, got %s" % effect.__class__.__name__ + assert isinstance(effect, SpliceOutcomeSet) + assert effect.disrupted_signal_class is ExonicSpliceSite assert effect.alternate_effect is not None, \ - "ExonicSpliceSite should have an alternate_effect" + "ExonicSpliceSite wrap should expose alternate_effect" assert effect.alternate_effect.__class__ is Substitution, \ "Expected alternate_effect to be Substitution, got %s" % ( effect.alternate_effect.__class__.__name__,) @@ -211,14 +210,18 @@ def test_intronic_splice_site_at_plus_3_to_6(): These positions are implicated in splicing but not as confidently as +1/+2. """ + from varcode import SpliceOutcomeSet for offset in [3, 4, 5, 6]: pos = 117531114 + offset # exon 4 end + offset variant = Variant("7", pos, "G", "T", ensembl_grch38) transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID) effect = variant.effect_on_transcript(transcript) - assert effect.__class__ is IntronicSpliceSite, \ - "Expected IntronicSpliceSite at +%d, got %s" % ( + assert isinstance(effect, SpliceOutcomeSet), \ + "Expected SpliceOutcomeSet at +%d, got %s" % ( offset, effect.__class__.__name__) + assert effect.disrupted_signal_class is IntronicSpliceSite, \ + "Expected disrupted_signal_class IntronicSpliceSite at +%d, got %s" % ( + offset, effect.disrupted_signal_class.__name__) def test_intronic_splice_site_at_minus_3(): @@ -252,24 +255,33 @@ def test_deep_intronic_variant_is_intronic(): def test_exonic_splice_site_not_dropped_by_drop_silent_and_noncoding(): - """ExonicSpliceSite with a coding alternate should survive - drop_silent_and_noncoding(). - - Regression test for https://github.com/openvax/varcode/issues/136 + """ExonicSpliceSite (wrapped as SpliceOutcomeSet) with a coding + alternate should survive drop_silent_and_noncoding(). + + Regression test for https://github.com/openvax/varcode/issues/136. + Also covers the silent-splice filter bug surfaced during the + always-on switch: SpliceOutcomeSet now reports + ``modifies_protein_sequence = True`` unconditionally so splice + variants whose NormalSplicing coding effect happens to be Silent + are still kept. """ - # Use a variant that produces ExonicSpliceSite with Substitution alternate + from varcode import SpliceOutcomeSet variant = Variant("7", 117531114, "G", "T", ensembl_grch38) effects = variant.effects() - # Verify there's at least one ExonicSpliceSite - splice_effects = [e for e in effects if e.__class__ is ExonicSpliceSite] - assert len(splice_effects) > 0, "Expected at least one ExonicSpliceSite" + # The variant wraps as SpliceOutcomeSet with ExonicSpliceSite as the + # disrupted signal class. + splice_effects = [ + e for e in effects + if isinstance(e, SpliceOutcomeSet) + and e.disrupted_signal_class is ExonicSpliceSite] + assert len(splice_effects) > 0, \ + "Expected at least one SpliceOutcomeSet wrapping ExonicSpliceSite" - # After dropping silent and noncoding, the ExonicSpliceSite with a coding - # alternate should remain + # After dropping silent and noncoding, the splice variant should remain. remaining = effects.drop_silent_and_noncoding() assert len(remaining) > 0, \ - "drop_silent_and_noncoding() should not drop ExonicSpliceSite with coding alternate" + "drop_silent_and_noncoding() should not drop a splice-disrupting variant" # ----------------------------------------------------------------------- diff --git a/varcode/annotators/fast.py b/varcode/annotators/fast.py index a8c6b86..eb402c8 100644 --- a/varcode/annotators/fast.py +++ b/varcode/annotators/fast.py @@ -43,9 +43,15 @@ class FastEffectAnnotator: def annotate_on_transcript(self, variant, transcript): """Delegate to the existing per-transcript prediction. - No fast-path / slow-path dispatch at this stage; that lives - on the protein-diff annotator once it exists. + Returns the raw effect class (``ExonicSpliceSite`` / + ``SpliceDonor`` / etc. for splice disruptions), **not** wrapped + in ``SpliceOutcomeSet``. The wrap is applied at the collection + boundary in :func:`predict_variant_effects` so internal + consumers (notably the ``protein_diff`` annotator's dual + dispatch) can still pattern-match on the raw class. """ # Lazy import avoids a circular dep at package import time. - from ..effects import predict_variant_effect_on_transcript - return predict_variant_effect_on_transcript(variant, transcript) + from ..effects.effect_prediction import ( + _predict_variant_effect_on_transcript_raw, + ) + return _predict_variant_effect_on_transcript_raw(variant, transcript) diff --git a/varcode/effects/effect_classes.py b/varcode/effects/effect_classes.py index 2a76329..9bbad60 100644 --- a/varcode/effects/effect_classes.py +++ b/varcode/effects/effect_classes.py @@ -808,10 +808,12 @@ class ExonicSpliceSite(Exonic, SpliceSite, MultiOutcomeEffect): ``alternate_effect`` stays on the instance as a first-class field for back-compat with callers that depended on it. - This is the **lightweight 2-outcome form**. Callers that want - the richer exon-skipping / intron-retention / cryptic-splice - candidate set opt into ``splice_outcomes=True`` and get a - :class:`~varcode.splice_outcomes.SpliceOutcomeSet` instead. + Note: as of varcode 6.0, ``ExonicSpliceSite`` no longer surfaces + as the top-level effect at the user-facing API — splice-disrupting + variants are always wrapped in + :class:`~varcode.splice_outcomes.SpliceOutcomeSet`. ``ExonicSpliceSite`` + survives as ``splice_set.disrupted_signal_class`` (a type) and as + the ``splice_signal`` reference on each candidate (an instance). """ def __init__(self, variant, transcript, exon, alternate_effect): Exonic.__init__(self, variant, transcript) diff --git a/varcode/effects/effect_ordering.py b/varcode/effects/effect_ordering.py index 3a7d0e7..c166df1 100644 --- a/varcode/effects/effect_ordering.py +++ b/varcode/effects/effect_ordering.py @@ -371,17 +371,33 @@ def multi_gene_effect_sort_key(effect): def select_between_exonic_splice_site_and_alternate_effect(effect): """ - If the given effect is an ExonicSpliceSite then it might contain - an alternate effect of higher priority. In that case, return the - alternate effect. Otherwise, this acts as an identity function. - - An exact-class check (not ``isinstance``) is used because the - function is only meant to unwrap bare ``ExonicSpliceSite`` - instances. This has a useful side effect under - ``splice_outcomes=True``: the wrapping ``SpliceOutcomeSet`` passes - through unchanged (so every plausible outcome stays visible) and - sorts by its ``priority_class``. - """ + If the given effect is an ExonicSpliceSite (or the always-on + SpliceOutcomeSet wrapper around one) it might carry a more + disruptive coding consequence on its ``alternate_effect``. In + that case, return the alternate. Otherwise this is the identity. + + For SpliceOutcomeSet, "alternate" is the highest-priority + candidate's inner effect — that's the worst case if any of the + candidate mechanisms fires, including ExonSkipping / PrematureStop + introduced by skipping the disrupted exon. + """ + # Always-on SpliceOutcomeSet: match the legacy unwrap semantics — + # only ``ExonicSpliceSite``-disruption had a coding alternate worth + # surfacing as the canonical top effect. Intronic disruptions + # (SpliceDonor / SpliceAcceptor / IntronicSpliceSite) keep the + # SpliceOutcomeSet as the canonical top. + from ..splice_outcomes import SpliceOutcomeSet + if isinstance(effect, SpliceOutcomeSet): + if effect.disrupted_signal_class is not ExonicSpliceSite: + return effect + alt = effect.alternate_effect + if alt is None: + return effect + signal_priority = effect_priority(effect) + alt_priority = effect_priority(alt) + if alt_priority > signal_priority: + return alt + return effect if effect.__class__ is not ExonicSpliceSite: return effect if effect.alternate_effect is None: diff --git a/varcode/effects/effect_prediction.py b/varcode/effects/effect_prediction.py index 451890f..e197f87 100644 --- a/varcode/effects/effect_prediction.py +++ b/varcode/effects/effect_prediction.py @@ -46,13 +46,21 @@ def predict_variant_effects( variant, raise_on_error=False, - splice_outcomes=False, annotator=None, germline=None, phase_resolver=None): """Determine the effects of a variant on any transcripts it overlaps. Returns an EffectCollection object. + Splice-disrupting effects (where the variant lands in the canonical + splice window — see :class:`SpliceDonor` / :class:`SpliceAcceptor` / + :class:`ExonicSpliceSite` / :class:`IntronicSpliceSite`) are always + returned as a :class:`SpliceOutcomeSet` wrapping the candidate + mechanisms. Candidates are computed lazily — only the cheap + ``NormalSplicing`` candidate is built eagerly; ``ExonSkipping`` / + ``IntronRetention`` / cryptic candidates materialise on first + ``.candidates`` access. See ``varcode.splice_outcomes`` for details. + Parameters ---------- variant : Variant @@ -62,14 +70,6 @@ def predict_variant_effects( determine the effect of this variant on a transcript, or simply log the error and continue. - splice_outcomes : bool - When True, splice-disrupting effects (SpliceDonor, - SpliceAcceptor, ExonicSpliceSite, IntronicSpliceSite) are - wrapped in a :class:`SpliceOutcomeSet` carrying multiple - plausible outcomes (normal splicing, exon skipping, intron - retention, cryptic splice). Default False for back-compat. - See ``varcode.splice_outcomes`` for details. - annotator : str, EffectAnnotator, or None Which registered :class:`EffectAnnotator` to use per-transcript. ``None`` (default) picks up whatever is currently set via @@ -185,11 +185,10 @@ def predict_variant_effects( annotator_version=annotator_version, annotated_at=annotated_at, ) - if splice_outcomes: - # Lazy import to avoid circular deps; splice_outcomes lives at - # the package root and consumes effect_classes. - from ..splice_outcomes import wrap_splice_effects_in_collection - collection = wrap_splice_effects_in_collection(collection) + # SpliceOutcomeSet is always-on. Lazy import to avoid circular deps; + # splice_outcomes lives at the package root and consumes effect_classes. + from ..splice_outcomes import wrap_splice_effects_in_collection + collection = wrap_splice_effects_in_collection(collection) return collection @@ -216,11 +215,30 @@ def predict_variant_effect_on_transcript(variant, transcript): """Return the transcript effect (such as FrameShift) that results from applying this genomic variant to a particular transcript. + Splice-disrupting effects are wrapped in a + :class:`varcode.splice_outcomes.SpliceOutcomeSet` carrying the + candidate mechanisms (always-on as of varcode 6.0). The + wrapper is lazy — only the cheap ``NormalSplicing`` candidate + is built eagerly; the rest materialise on ``.candidates`` + access. + Parameters ---------- transcript : Transcript Transcript we're going to apply mutation to. """ + from ..splice_outcomes import enumerate_splice_outcomes + raw_effect = _predict_variant_effect_on_transcript_raw( + variant, transcript) + return enumerate_splice_outcomes(raw_effect) + + +def _predict_variant_effect_on_transcript_raw(variant, transcript): + """The raw classifier — returns the unwrapped splice-signal + class (``ExonicSpliceSite`` / ``SpliceDonor`` etc.) for + splice-disrupting variants. The public entry point wraps the + result in a ``SpliceOutcomeSet``. + """ if transcript.__class__ is not Transcript: raise TypeError( diff --git a/varcode/splice_outcomes.py b/varcode/splice_outcomes.py index a30e2bd..3d74cda 100644 --- a/varcode/splice_outcomes.py +++ b/varcode/splice_outcomes.py @@ -73,6 +73,7 @@ SpliceDonor, SpliceMechanismEffect, SpliceSite, + TranscriptMutationEffect, ) from .mutant_transcript import MutantTranscript, TranscriptEdit from .effect_candidates import EffectCandidate @@ -126,7 +127,7 @@ def _splice_candidate_signature(candidate): ) -class SpliceOutcomeSet(MultiOutcomeEffect): +class SpliceOutcomeSet(MultiOutcomeEffect, TranscriptMutationEffect): """A splice-disrupting variant's effect, expressed as a set of plausible mechanisms rather than a single Effect. @@ -158,9 +159,9 @@ class SpliceOutcomeSet(MultiOutcomeEffect): For back-compat, :attr:`short_description` delegates to the most likely candidate. Constructed by - :func:`enumerate_splice_outcomes` when a caller passes - ``splice_outcomes=True`` to ``Variant.effects()`` or - ``VariantCollection.effects()``. + :func:`enumerate_splice_outcomes` as part of every + ``Variant.effects()`` / ``Variant.effect_on_transcript(...)`` / + ``VariantCollection.effects()`` call (always-on as of varcode 6.0). RNA evidence reconciliation --------------------------- @@ -176,30 +177,99 @@ class SpliceOutcomeSet(MultiOutcomeEffect): overrides needed now that there's no enum to stringify. """ + # Class-level filter shortcuts. A splice disruption is always + # *potentially* protein-modifying — any non-NormalSplicing candidate + # (ExonSkipping, IntronRetention, CrypticDonor/Acceptor) can rewrite + # the protein. Hardcoded so ``drop_silent_and_noncoding()`` doesn't + # silently drop splice variants whose NormalSplicing candidate + # happens to carry a Silent coding_effect, and so the filter path + # doesn't trigger lazy candidate materialization. + modifies_protein_sequence = True + modifies_coding_sequence = True + def __init__( - self, variant, transcript, candidates, disrupted_signal_class=None, + self, variant, transcript, candidates=None, + disrupted_signal_class=None, + *, + splice_effect=None, + genomic_sequence=None, + mechanism_order=None, dna_candidates=None, rna_evidence=(), added_candidates=(), excluded_candidates=(), candidate_rna_evidence=()): - MultiOutcomeEffect.__init__(self, variant) - self.transcript = transcript - self._candidates = tuple(candidates) + # SpliceOutcomeSet is both a MultiOutcomeEffect (candidate + # set) and a TranscriptMutationEffect (carries variant, gene, + # transcript). Initialising via TranscriptMutationEffect sets + # all three; MultiOutcomeEffect carries no extra state. + TranscriptMutationEffect.__init__(self, variant, transcript) # The SpliceSite *subclass* (a type, not an instance) of the # disruption this set replaced — SpliceDonor, SpliceAcceptor, # ExonicSpliceSite, or IntronicSpliceSite. Used for priority # lookup; the disruption *instance* lives on each candidate's # effect.splice_signal. self.disrupted_signal_class = disrupted_signal_class - self.dna_candidates = tuple( - self._candidates if dna_candidates is None else dna_candidates) + if candidates is not None: + # Materialized path — RNA-reconciliation rebuilds, + # deserialization, tests with explicit candidate lists. + self._candidates_cache = tuple(candidates) + self._normal_splicing_eager = next( + (c for c in self._candidates_cache + if isinstance(c.effect, NormalSplicing)), + None) + self._lazy_state = None + else: + # Lazy path — enumerate_splice_outcomes uses this. The + # NormalSplicing candidate is computed eagerly because it's + # cheap (just wraps the existing coding effect) and is the + # fast path for ``effect_if_splicing_unchanged`` / + # ``alternate_effect``. Expensive candidates (ExonSkipping, + # IntronRetention, cryptic) are deferred until ``.candidates`` + # is read. + if splice_effect is None or mechanism_order is None: + raise ValueError( + "SpliceOutcomeSet needs either explicit ``candidates`` " + "or (``splice_effect`` + ``mechanism_order``) for " + "lazy construction.") + self._normal_splicing_eager = ( + _build_normal_splicing_candidate(splice_effect)) + self._candidates_cache = None + self._lazy_state = (splice_effect, mechanism_order, genomic_sequence) + self._explicit_dna_candidates = ( + None if dna_candidates is None else tuple(dna_candidates)) self.rna_evidence = tuple(rna_evidence) self.added_candidates = tuple(added_candidates) self.excluded_candidates = tuple(excluded_candidates) self.candidate_rna_evidence = tuple(candidate_rna_evidence) + @property + def _candidates(self): + if self._candidates_cache is None: + splice_effect, mechanism_order, genomic_sequence = self._lazy_state + materialized = [] + for mech_cls in mechanism_order: + if mech_cls is NormalSplicing: + materialized.append(self._normal_splicing_eager) + else: + materialized.append(_build_candidate_for( + mech_cls, splice_effect, genomic_sequence)) + self._candidates_cache = tuple(materialized) + self._lazy_state = None + return self._candidates_cache + @property def candidates(self): return self._combine_with_extra_candidates(self._candidates) + @property + def dna_candidates(self): + # When not explicitly set, defaults to the DNA-derived candidate + # set — same as ``_candidates`` here (RNA-attached outcomes live + # only on the public ``candidates`` property via + # ``_extra_candidates``). Reading this triggers full + # materialization if the set was constructed lazily. + if self._explicit_dna_candidates is not None: + return self._explicit_dna_candidates + return self._candidates + def with_rna_evidence(self, observed_candidates): """Return a new set reconciled with RNA-observed splice evidence. @@ -335,13 +405,19 @@ def effect_if_splicing_unchanged(self): Unlike the old ``ExonicSpliceSite.alternate_effect`` it works for intronic disruptions too. """ - # Iterate DNA-derived candidates only; ``candidates`` also - # includes RNA-attached outcomes, but "if splicing proceeds" - # is a DNA property. - for candidate in self._candidates: - if isinstance(candidate.effect, NormalSplicing): - return candidate.effect.coding_effect - return None + # Fast path: NormalSplicing is computed eagerly and stored on + # the instance, so this read never triggers full materialization + # of the expensive candidates (ExonSkipping, IntronRetention, + # cryptic). + if self._normal_splicing_eager is None: + return None + return self._normal_splicing_eager.effect.coding_effect + + # Back-compat alias. Existing callers reading ``effect.alternate_effect`` + # on the default 2-outcome shape (``ExonicSpliceSite``) keep working + # when the effect is now wrapped in a ``SpliceOutcomeSet``. New code + # should use ``effect_if_splicing_unchanged``. + alternate_effect = effect_if_splicing_unchanged @property def candidate_proteins(self): @@ -1254,6 +1330,26 @@ def enumerate_splice_outcomes(splice_effect, genomic_sequence=None): input is a :class:`SpliceSite` disruption; otherwise the input unchanged. """ + # Already-wrapped: idempotent for the no-provider case; with a + # provider, rebuild so the new sequence flows through to lazy + # IntronRetention / cryptic candidates. + if isinstance(splice_effect, SpliceOutcomeSet): + if genomic_sequence is None: + return splice_effect + if splice_effect._lazy_state is None: + raise RuntimeError( + "enumerate_splice_outcomes(splice_set, genomic_sequence=...) " + "needs a lazy SpliceOutcomeSet; this one has already " + "materialized its candidates.") + raw, mechanism_order, _ = splice_effect._lazy_state + return SpliceOutcomeSet( + variant=splice_effect.variant, + transcript=splice_effect.transcript, + disrupted_signal_class=splice_effect.disrupted_signal_class, + splice_effect=raw, + mechanism_order=mechanism_order, + genomic_sequence=genomic_sequence, + ) # Only splice-signal disruptions (the SpliceSite subclasses) get # wrapped; everything else passes through untouched. if not isinstance(splice_effect, SpliceSite): @@ -1266,31 +1362,40 @@ def enumerate_splice_outcomes(splice_effect, genomic_sequence=None): # through rather than crashing on a None mechanism order. return splice_effect - candidates = [] - for mechanism_cls in mechanism_order: - if mechanism_cls is NormalSplicing: - candidates.append(_build_normal_splicing_candidate( - splice_effect)) - elif mechanism_cls is ExonSkipping: - candidates.append(_build_exon_skipping_candidate( - splice_effect)) - elif mechanism_cls is IntronRetention: - candidates.append(_build_intron_retention_candidate( - splice_effect, - genomic_sequence=genomic_sequence)) - elif mechanism_cls in (CrypticDonor, CrypticAcceptor): - candidates.append(_build_cryptic_splice_candidate( - splice_effect, mechanism_cls, - genomic_sequence=genomic_sequence)) - + # Lazy construction: the SpliceOutcomeSet eagerly builds only the + # cheap NormalSplicing candidate (used by the fast + # effect_if_splicing_unchanged / alternate_effect path) and defers + # ExonSkipping / IntronRetention / cryptic until ``.candidates`` is + # accessed. return SpliceOutcomeSet( variant=splice_effect.variant, transcript=getattr(splice_effect, "transcript", None), - candidates=candidates, disrupted_signal_class=type(splice_effect), + splice_effect=splice_effect, + mechanism_order=mechanism_order, + genomic_sequence=genomic_sequence, ) +def _build_candidate_for(mechanism_cls, splice_effect, genomic_sequence): + """Dispatch from a mechanism class to its builder. Used by the lazy + materialization path in :class:`SpliceOutcomeSet`. + """ + if mechanism_cls is NormalSplicing: + return _build_normal_splicing_candidate(splice_effect) + if mechanism_cls is ExonSkipping: + return _build_exon_skipping_candidate(splice_effect) + if mechanism_cls is IntronRetention: + return _build_intron_retention_candidate( + splice_effect, genomic_sequence=genomic_sequence) + if mechanism_cls in (CrypticDonor, CrypticAcceptor): + return _build_cryptic_splice_candidate( + splice_effect, mechanism_cls, + genomic_sequence=genomic_sequence) + raise ValueError( + "Unknown splice mechanism class: %r" % (mechanism_cls,)) + + def wrap_splice_effects_in_collection(effect_collection): """Apply :func:`enumerate_splice_outcomes` to every splice-related Effect in an EffectCollection. Non-splice effects pass through. diff --git a/varcode/variant.py b/varcode/variant.py index 7a15f3a..6ce67a1 100644 --- a/varcode/variant.py +++ b/varcode/variant.py @@ -452,25 +452,23 @@ def coding_genes(self): ] def effects( - self, raise_on_error=True, splice_outcomes=False, + self, raise_on_error=True, annotator=None, phase_resolver=None, rna_resolver=None, germline=None): """Predict the variant's effects on overlapping transcripts. + Splice-disrupting effects are always wrapped in a + :class:`varcode.splice_outcomes.SpliceOutcomeSet` carrying the + candidate mechanisms (normal splicing, exon skipping, intron + retention, cryptic splice). Mechanism candidates are computed + lazily, so the wrap is cheap. See openvax/varcode#262, #391. + Parameters ---------- raise_on_error : bool If True, raise on annotation errors; if False, capture them as Failure effects. - splice_outcomes : bool - If True, splice-disrupting effects are wrapped in a - :class:`varcode.splice_outcomes.SpliceOutcomeSet` carrying - multiple plausible outcomes (normal splicing, exon - skipping, intron retention, cryptic splice). Opt-in; - default False preserves existing behaviour. See - openvax/varcode#262. - annotator : str, EffectAnnotator, or None Per-call annotator override. ``None`` uses the currently configured default (``"fast"`` today; swappable via @@ -515,7 +513,6 @@ def effects( effects = predict_variant_effects( variant=self, raise_on_error=raise_on_error, - splice_outcomes=splice_outcomes, annotator=annotator, germline=germline, phase_resolver=phase_resolver, diff --git a/varcode/variant_collection.py b/varcode/variant_collection.py index 309f600..7f67371 100644 --- a/varcode/variant_collection.py +++ b/varcode/variant_collection.py @@ -115,10 +115,17 @@ def clone_with_new_elements(self, new_elements): return self.from_dict(kwargs) def effects( - self, raise_on_error=True, splice_outcomes=False, + self, raise_on_error=True, annotator=None, phase_resolver=None, rna_resolver=None, germline=None, validate_reference=True): """ + Splice-disrupting effects are always wrapped in a + :class:`varcode.splice_outcomes.SpliceOutcomeSet` carrying the + candidate mechanisms (always-on as of varcode 6.0). Mechanism + candidates are computed lazily — only the cheap + ``NormalSplicing`` candidate is built eagerly. See + openvax/varcode#262, #391. + Parameters ---------- raise_on_error : bool, optional @@ -126,12 +133,6 @@ def effects( transcript, should it be raised? This default is True, meaning errors result in raised exceptions, otherwise they are only logged. - splice_outcomes : bool, optional - If True, splice-disrupting effects are wrapped in a - :class:`varcode.splice_outcomes.SpliceOutcomeSet` carrying - multiple plausible outcomes. Opt-in; default False - preserves existing behaviour. See openvax/varcode#262. - annotator : str, EffectAnnotator, or None Per-call annotator override applied to every variant in the collection. See :meth:`Variant.effects` and @@ -189,7 +190,6 @@ def effects( for variant in self for effect in variant.effects( raise_on_error=raise_on_error, - splice_outcomes=splice_outcomes, annotator=annotator, germline=germline, phase_resolver=phase_resolver, From c210f13ac7dd9cd6f5a7cfca503761cc614f0478 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Mon, 25 May 2026 15:16:43 -0400 Subject: [PATCH 6/7] Fix select_between_exonic_splice_site_and_alternate_effect docstring Review pointed out the docstring claimed "highest-priority candidate's inner effect" but the implementation reads ``alternate_effect`` (i.e. ``NormalSplicing.coding_effect``). Implementation is the intended behaviour (preserves legacy ``top_priority_effect`` semantics, keeps the SpliceOutcomeSet as the canonical top for mechanism-only outcomes like ExonSkipping); docstring was misleading. Rewrote the docstring to be explicit: - "Alternate" is the if-splicing-proceeds coding effect, not the highest-priority across mechanisms. - Behavior contract: exonic variant that introduces PrematureStop in-codon surfaces as PrematureStop; mechanism-only outcomes leave the SpliceOutcomeSet as the top so the full possibility set stays visible. --- varcode/effects/effect_ordering.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/varcode/effects/effect_ordering.py b/varcode/effects/effect_ordering.py index c166df1..a9a8f12 100644 --- a/varcode/effects/effect_ordering.py +++ b/varcode/effects/effect_ordering.py @@ -376,10 +376,15 @@ def select_between_exonic_splice_site_and_alternate_effect(effect): disruptive coding consequence on its ``alternate_effect``. In that case, return the alternate. Otherwise this is the identity. - For SpliceOutcomeSet, "alternate" is the highest-priority - candidate's inner effect — that's the worst case if any of the - candidate mechanisms fires, including ExonSkipping / PrematureStop - introduced by skipping the disrupted exon. + "Alternate" here is specifically the *if-splicing-proceeds* + coding effect (``NormalSplicing.coding_effect`` on the wrapper, + ``ExonicSpliceSite.alternate_effect`` on the legacy raw class) — + **not** the highest-priority across all mechanism candidates. + This preserves the legacy ``top_priority_effect`` behavior: an + exonic variant that introduces a PrematureStop in-codon still + surfaces as PrematureStop, but mechanism-only outcomes like + ExonSkipping leave the SpliceOutcomeSet as the canonical top + (so the full possibility set stays visible to consumers). """ # Always-on SpliceOutcomeSet: match the legacy unwrap semantics — # only ``ExonicSpliceSite``-disruption had a coding alternate worth From 85f0432e0b9eb8b0ae53b87bb4ba66eac9f45bc7 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Tue, 26 May 2026 11:33:06 -0400 Subject: [PATCH 7/7] Bump version to 6.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Promotes the splice-unification work (#391, #392) to 6.0.0: SpliceOutcomeSet always-on, splice_outcomes= flag removed, discriminator migration documented in the Breaking changelog. CHANGELOG: move the [Unreleased] section to [v6.0.0] with today's date. All accumulated entries since 5.0.6 — splice unification + RNAReadPhasingSource + IUPAC ambiguity-code fix + apply_variants_to_transcript insertion-abutting fix — ship together. --- CHANGELOG.md | 2 +- varcode/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df6b23a..27b5d12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Change Log -## [Unreleased] +## [v6.0.0](https://github.com/openvax/varcode/tree/v6.0.0) (2026-05-26) **Fixed** - `apply_variants_to_transcript` now refuses an insertion abutting diff --git a/varcode/version.py b/varcode/version.py index 599b2cd..0f607a5 100644 --- a/varcode/version.py +++ b/varcode/version.py @@ -1 +1 @@ -__version__ = "5.0.6" +__version__ = "6.0.0"