From 05c942691610b4f41acd31f8f50d1aec3f5eb629 Mon Sep 17 00:00:00 2001 From: MVCowley <51127523+MVCowley@users.noreply.github.com> Date: Fri, 20 Mar 2026 17:10:02 +0000 Subject: [PATCH 1/2] build(zenodo): add new description file for zenodo --- .zenodo.json | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .zenodo.json diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..f85f58f --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,53 @@ +{ + "upload_type": "software", + "description": "

decombinator is a fast and efficient tool for the analysis of T-cell receptor (TCR) repertoire sequences produced by deep sequencing. It accepts TCR repertoire sequencing (TCRseq) FASTQ data and returns AIRR compliant files detailing TCR V(D)J recombination and counts.

TCRseq offers a powerful means to investigate biological samples to observe sequence distributions. However current high-throughput sequencing (HTS) technologies can produce large amounts of raw data which will also unavoidably contain errors relative to the original input molecules.

decombinator addresses the problem of large datasets through speed - employing a rapid and highly efficient string matching algorithm to search the FASTQ files produced by HTS machines for rearranged TCR sequences. The central algorithm searches for 'tag' sequences, the presence of which uniquely indicates the inclusion of particular V or J genes in a recombination. If V and J tags are found, decombinator can then deduce where the ends of the germline V and J gene sections are (i.e. how much nucleotide removal occurred during V(D)J recombination), and what nucleotide sequence (the 'insert sequence') remains between the two.

These five pieces of information - the V and J genes used, how many deletions each had and the insert sequence - contain all of the information required to reconstruct the whole TCR nucleotide sequence, in a more readily stored and analysed way.

The pipeline also handles FASTQ data with many reads of the same molecule due to over-amplification. It does this by using unique molecular identifiers to \"collapse\" down duplicate reads.

Running decombinator is easy. All that is required is HTS read data and a few arguments specifying the chemistry of your prepared library:

decombinator pipeline -in XXXX.fq -c b -br R2 -bl 42 -ol M13
", + "license": "cc-by-4.0", + "creators": [ + { + "name": "Cowley, Matthew V.", + "affiliation": "University College London", + "orcid": "0000-0002-5258-8024" + }, + { + "name": "Heather, James M.", + "affiliation": "University College London" + }, + { + "name": "Best, Katherine", + "affiliation": "University College London" + }, + { + "name": "Ismail, Mazlina", + "affiliation": "University College London" + }, + { + "name": "Thomas, Niclas", + "affiliation": "University College London" + }, + { + "name": "Ronel, Tahel", + "affiliation": "University College London" + }, + { + "name": "Oakes, Theres", + "affiliation": "University College London" + }, + { + "name": "Peacock, Thomas", + "affiliation": "University College London" + }, + { + "name": "Chain, Benny", + "affiliation": "University College London", + "orcid": "0000-0002-7417-3970" + } + ], + "related_identifiers": [ + { + "identifier": "https://github.com/innate2adaptive/decombinator/tree/v5.0.0", + "relation": "isSupplementTo", + "resource_type": "software", + "scheme": "url" + } + ] +} From d23255c35535c1bb5add6085ac0c2eeaf8dcfc5d Mon Sep 17 00:00:00 2001 From: MVCowley <51127523+MVCowley@users.noreply.github.com> Date: Fri, 20 Mar 2026 17:19:20 +0000 Subject: [PATCH 2/2] build(workflow): make releases only on tag push and make test PyPI upload on merge to main --- .github/workflows/publish-to-pypi.yml | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 9eef9c2..97931cb 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -1,9 +1,10 @@ name: publish-to-pypi-and-test-pypi - on: push: tags: - v* + branches: + - master workflow_dispatch: inputs: tag: @@ -15,12 +16,10 @@ on: required: false type: boolean default: false - jobs: build: name: build-dist 📦 runs-on: ubuntu-latest - steps: - uses: actions/checkout@v4 - name: Set up Python @@ -40,7 +39,6 @@ jobs: with: name: python-package-distributions path: dist/ - publish-to-pypi: name: >- publish-to-pypi 📢 @@ -53,7 +51,6 @@ jobs: url: https://pypi.org/p/decombinator permissions: id-token: write # IMPORTANT: mandatory for trusted publishing - steps: - name: download-dists uses: actions/download-artifact@v4 @@ -62,18 +59,16 @@ jobs: path: dist/ - name: publish-dist-to-pypi uses: pypa/gh-action-pypi-publish@release/v1 - github-release: name: >- signed-github-release ✍️ needs: - build + if: ${{ startsWith(github.ref, 'refs/tags/') }} runs-on: ubuntu-latest - permissions: contents: write # IMPORTANT: mandatory for making GitHub Releases id-token: write # IMPORTANT: mandatory for sigstore - steps: - name: download-dists uses: actions/download-artifact@v4 @@ -97,28 +92,21 @@ jobs: - name: Upload artifact signatures to GitHub Release env: GITHUB_TOKEN: ${{ github.token }} - # Upload to GitHub Release using the `gh` CLI. - # `dist/` contains the built packages, and the - # sigstore-produced signatures and certificates. run: >- gh release upload '${{ github.ref_name }}' dist/** --repo '${{ github.repository }}' - publish-to-testpypi: name: publish-to-testpypi 🔨 - if: ${{startsWith(github.ref, 'refs/tags/') && contains(github.ref, '.dev')}} # only publish to test-PyPI on dev tag pushes + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} needs: - build runs-on: ubuntu-latest - environment: name: test-pypi url: https://test.pypi.org/p/decombinator - permissions: id-token: write # IMPORTANT: mandatory for trusted publishing - steps: - name: download-dists uses: actions/download-artifact@v4