Skip to content
2 changes: 2 additions & 0 deletions beagle/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@

RIDGEBACK_URL = os.environ.get('BEAGLE_RIDGEBACK_URL', 'http://localhost:5003')

MPATH_URL = os.environ.get('BEAGLE_MPATH_URL', 'http://localhost:7331')

LOG_PATH = os.environ.get('BEAGLE_LOG_PATH', 'beagle-server.log')

LOGGING = {
Expand Down
12 changes: 12 additions & 0 deletions beagle_etl/fixtures/beagle_etl.operator.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,17 @@
"version": "v2.0.0",
"slug": "AccessQCOperator"
}
},
{
"model": "beagle_etl.operator",
"pk": 12,
"fields": {
"active": true,
"recipes": "[\"None\"]",
"class_name": "runner.operator.access.v1_0_0.sample_sheet.AccessSampleSheetOperator",
"version": "v1.0.0",
"slug": "AccessSampleSheetOperator"
}
}

]
10 changes: 10 additions & 0 deletions fixtures/tests/10075_D.filemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L013_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": "10075_D_5_1_1_1",
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -93,6 +94,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_4",
"cmoSampleName": "s_C_0CREWW_L012_d",
"barcodeId": "DUAL_IDT_LIB_255",
"libraryId": "10075_D_4_1_1_1",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -168,6 +170,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_2",
"cmoSampleName": "s_C_0CREWW_L011_d",
"barcodeId": null,
"libraryId": "10075_D_2",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -244,6 +247,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": null,
"libraryId": "10075_D_3",
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -320,6 +324,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_4",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": "DUAL_IDT_LIB_255",
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -395,6 +400,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_2",
"cmoSampleName": "s_C_0CREWW_L009_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -471,6 +477,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L008_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -547,6 +554,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L007_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": null,
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -622,6 +630,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_1",
"cmoSampleName": "s_C_0CREWW_L006_d",
"barcodeId": "DUAL_IDT_LIB_243",
"libraryId": "10075_D_1_1_1_1",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -697,6 +706,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_1",
"cmoSampleName": "s_C_0CREWW_L005_d",
"barcodeId": "DUAL_IDT_LIB_243",
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down
8 changes: 8 additions & 0 deletions fixtures/tests/10075_D_single_TN_pair.filemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L013_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": "10075_D_5_1_1_1",
"patientId": "C-8VK0V7",
Expand All @@ -31,6 +32,7 @@
"captureName": "Pool-09483_R-10075_D-Tube7_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": "GTATTGGC-TTGTCGGT",
"labHeadEmail": "email@internet.com",
"oncoTreeCode": "MEL",
Expand Down Expand Up @@ -95,6 +97,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L012_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": null,
"patientId": "C-8VK0V7",
Expand All @@ -107,6 +110,7 @@
"captureName": "Pool-09483_R-10075_D-Tube7_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": "GTATTGGC-TTGTCGGT",
"labHeadEmail": "email@internet.com",
"oncoTreeCode": "MEL",
Expand Down Expand Up @@ -171,6 +175,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L011_d",
"barcodeId": null,
"libraryId": "10075_D_3",
"patientId": "C-8VK0V7",
Expand All @@ -183,6 +188,7 @@
"captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": null,
"labHeadEmail": "email@internet.com",
"oncoTreeCode": null,
Expand Down Expand Up @@ -248,6 +254,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-8VK0V7",
Expand All @@ -260,6 +267,7 @@
"captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": null,
"labHeadEmail": "email@internet.com",
"oncoTreeCode": null,
Expand Down
16 changes: 16 additions & 0 deletions runner/fixtures/runner.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,22 @@
"default": true
}
},
{
"model": "runner.pipeline",
"pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbffb",
"fields": {
"created_date": "2019-11-18T17:46:45.118Z",
"modified_date": "2019-12-05T01:12:39.854Z",
"name": "sample sheet",
"github": "git@github.com:mskcc/ACCESS-Pipeline",
"version": "ij/output_bam_files_instead_of_directory",
"entrypoint": "cwl_tools/sample_sheet/sample_sheet.cwl",
"output_file_group": "a975f490-1b02-4575-abae-a4f8e3667733",
"output_directory": "/work/access/production/runs/voyager/sample_sheets",
"operator": 10,
"default": true
}
},
{
"model": "runner.pipeline",
"pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbabc",
Expand Down
141 changes: 141 additions & 0 deletions runner/operator/access/v1_0_0/mpath_submitter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
" This operator submits each downstream pipeline to MPath
" An operator trigger, for each downstream pipeline (MSI/CNV/SV/SNV),
" _when all runs are complete_, should be created.
" For additional information on the API and how MPath ACCESS server
" is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/
"""
import requests

from beagle.settings import MPATH_URL
from runner.operator.operator import Operator
from runner.models import PortType, Run, RunStatus, File

PIPELINE_NAME_TO_MPATH_TYPE = {
"access legacy MSI": "admie_microsatellite_instability",
"access legacy SNV": "snp_indel_variants",
"access legacy SV": "structural_variants",
"access legacy CNV": "copy_number_variants",
}

PIPELINE_NAME_TO_MPATH_LOCATION_KEY = {
"access legacy MSI": "msi_fs_location",
"access legacy SNV": "snp_fs_location",
"access legacy SV": "sv_fs_location",
"access legacy CNV": "cnv_fs_location",
}

PIPELINE_NAME_TO_FILES = {
"access legacy MSI": ["msi_results.txt"],
"access legacy SNV": [], #TODO
"access legacy SV": [],
"access legacy CNV": []
}


def get_sample_sheet(request_id, job_group_id):
sample_sheet_run = Run.objects.filter(
app__name="sample sheet",
status=RunStatus.COMPLETED,

# Using job_group_id is better but in order to trigger MPath Submitter
# for the massive backlog where Sample Sheet was generated in a different
# job_group we have to use request ID
# To trigger this for the backlog see:
# https://app.gitbook.com/@mskcc-1/s/voyager/debugging/using-the-django-shell
# Once the backlog is submitted this should be reverted

# job_group_id=job_group_id,

tags__requestId=request_id
).order_by('-created_date').first()

sample_sheet = File.objects.filter(
port__run=sample_sheet_run,
port__port_type=PortType.OUTPUT
).first()

return sample_sheet.path


def juno_path_to_mpath(path):
[_, p] = path.split("/voyager")
return "/voyager" + p


# This will return 400 bad request if the project already exists.
def submit_project(request_id):
payload = {
"data": [
{
"comments": "",
"dmp_alys_task_name": "ACCESSv1-" + request_id,
"dmp_alys_task_type_cv_id": 7,
# TODO
"analyst_cv_id": None,
"dmp_dms_at_id": None,
"dmp_dms_id": None,
"dmp_lims_id": None,
"fellow_cv_id": None,
"fs_location": "N/A",
"is_clinical": 0,
"pathologist_cv_id": None
}
]
}

requests.post(MPATH_URL + "/ngs/projects", json=payload)


def submit_pipeline(request_id, pipeline_name, files, sample_sheet_path):
mpath_type = PIPELINE_NAME_TO_MPATH_TYPE[pipeline_name]
location_key = PIPELINE_NAME_TO_MPATH_LOCATION_KEY[pipeline_name]

data = {
"dmp_alys_task_name": "ACCESSv1-" + request_id,
"ss_location": [
juno_path_to_mpath(sample_sheet_path)
],
# This shouldn't be required. We can't use READONLY dirs so pointing to
# /voyager does not work. Talk with Anoop on what should be done.
"fs_location": "/srv",
"options": [
mpath_type,
"samples"
],
}
data[location_key] = [juno_path_to_mpath(f.path) for f in files]

payload = {
"data": [data]
}

requests.post(MPATH_URL + "/ngs/", json=payload)


def get_files(pipeline_name, runs):
return File.objects.filter(
port__run__in=runs,
port__run__status=RunStatus.COMPLETED,
port__port_type=PortType.OUTPUT,
file_name__in=PIPELINE_NAME_TO_FILES[pipeline_name]
).all()


class AccessMPathSubmitter(Operator):
def get_jobs(self):
runs = Run.objects.filter(id__in=self.run_ids)
meta_run = runs[0]

request_id = meta_run.metadata["requestId"]
pipeline_name = meta_run.app.name
job_group_id = meta_run.job_group_id

sample_sheet_path = get_sample_sheet(request_id, job_group_id)

files = get_files(pipeline_name, runs)

submit_project(request_id)
submit_pipeline(request_id, pipeline_name, files, sample_sheet_path)

return []
Loading