innate2adaptive · mvcowley · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -1,9 +1,10 @@
 name: publish-to-pypi-and-test-pypi
-
 on:
   push:
     tags:
       - v*
+    branches:
+      - master
   workflow_dispatch:
     inputs:
       tag:
@@ -15,12 +16,10 @@ on:
         required: false
         type: boolean
         default: false
-
 jobs:
   build:
     name: build-dist 📦
     runs-on: ubuntu-latest
-
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python
@@ -40,7 +39,6 @@ jobs:
       with:
         name: python-package-distributions
         path: dist/
-
   publish-to-pypi:
     name: >-
       publish-to-pypi 📢
@@ -53,7 +51,6 @@ jobs:
       url: https://pypi.org/p/decombinator
     permissions:
       id-token: write  # IMPORTANT: mandatory for trusted publishing
-
     steps:
     - name: download-dists
       uses: actions/download-artifact@v4
@@ -62,18 +59,16 @@ jobs:
         path: dist/
     - name: publish-dist-to-pypi 
       uses: pypa/gh-action-pypi-publish@release/v1
-
   github-release:
     name: >-
       signed-github-release ✍️
     needs:
     - build
+    if: ${{ startsWith(github.ref, 'refs/tags/') }}
     runs-on: ubuntu-latest
-
     permissions:
       contents: write  # IMPORTANT: mandatory for making GitHub Releases
       id-token: write  # IMPORTANT: mandatory for sigstore
-
     steps:
     - name: download-dists
       uses: actions/download-artifact@v4
@@ -97,28 +92,21 @@ jobs:
     - name: Upload artifact signatures to GitHub Release
       env:
         GITHUB_TOKEN: ${{ github.token }}
-      # Upload to GitHub Release using the `gh` CLI.
-      # `dist/` contains the built packages, and the
-      # sigstore-produced signatures and certificates.
       run: >-
         gh release upload
         '${{ github.ref_name }}' dist/**
         --repo '${{ github.repository }}'
-
   publish-to-testpypi:
     name: publish-to-testpypi 🔨
-    if: ${{startsWith(github.ref, 'refs/tags/') && contains(github.ref, '.dev')}}  # only publish to test-PyPI on dev tag pushes
+    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
     needs:
     - build
     runs-on: ubuntu-latest
-
     environment:
       name: test-pypi
       url: https://test.pypi.org/p/decombinator
-
     permissions:
       id-token: write  # IMPORTANT: mandatory for trusted publishing
-
     steps:
     - name: download-dists
       uses: actions/download-artifact@v4

diff --git a/.zenodo.json b/.zenodo.json
@@ -0,0 +1,53 @@
+{
+  "upload_type": "software",
+  "description": "<p><code>decombinator</code> is a fast and efficient tool for the analysis of T-cell receptor (TCR) repertoire sequences produced by deep sequencing. It accepts TCR repertoire sequencing (TCRseq) FASTQ data and returns <a href=\"https://docs.airr-community.org/en/stable/datarep/rearrangements.html\">AIRR</a> compliant files detailing TCR V(D)J recombination and counts.</p><p>TCRseq offers a powerful means to investigate biological samples to observe sequence distributions. However current high-throughput sequencing (HTS) technologies can produce large amounts of raw data which will also unavoidably contain errors relative to the original input molecules.</p><p><code>decombinator</code> addresses the problem of large datasets through speed - employing a rapid and <a href=\"https://figshare.com/articles/Aho_Corasick_String_Matching_Video/771968\">highly efficient string matching algorithm</a> to search the FASTQ files produced by HTS machines for rearranged TCR sequences. The central algorithm searches for 'tag' sequences, the presence of which uniquely indicates the inclusion of particular V or J genes in a recombination. If V and J tags are found, <code>decombinator</code> can then deduce where the ends of the germline V and J gene sections are (i.e. how much nucleotide removal occurred during V(D)J recombination), and what nucleotide sequence (the 'insert sequence') remains between the two.</p><p>These five pieces of information - the V and J genes used, how many deletions each had and the insert sequence - contain all of the information required to reconstruct the whole TCR nucleotide sequence, in a more readily stored and analysed way.</p><p>The pipeline also handles FASTQ data with many reads of the same molecule due to over-amplification. It does this by using unique molecular identifiers to \"collapse\" down duplicate reads.</p><p>Running <code>decombinator</code> is easy. All that is required is HTS read data and a few arguments specifying the chemistry of your prepared library:</p><pre><code>decombinator pipeline -in XXXX.fq -c b -br R2 -bl 42 -ol M13</code></pre>",
+  "license": "cc-by-4.0",
+  "creators": [
+    {
+      "name": "Cowley, Matthew V.",
+      "affiliation": "University College London",
+      "orcid": "0000-0002-5258-8024"
+    },
+    {
+      "name": "Heather, James M.",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Best, Katherine",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Ismail, Mazlina",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Thomas, Niclas",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Ronel, Tahel",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Oakes, Theres",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Peacock, Thomas",
+      "affiliation": "University College London"
+    },
+    {
+      "name": "Chain, Benny",
+      "affiliation": "University College London",
+      "orcid": "0000-0002-7417-3970"
+    }
+  ],
+  "related_identifiers": [
+    {
+      "identifier": "https://github.com/innate2adaptive/decombinator/tree/v5.0.0",
+      "relation": "isSupplementTo",
+      "resource_type": "software",
+      "scheme": "url"
+    }
+  ]
+}