diff --git a/src/encoded/schemas/changelogs/file.md b/src/encoded/schemas/changelogs/file.md index 1ab87586804..1d9481393b7 100644 --- a/src/encoded/schemas/changelogs/file.md +++ b/src/encoded/schemas/changelogs/file.md @@ -1,5 +1,8 @@ ## Changelog for file.json +### Schema version 29 +* Recent update removed mapped_run_type and mapped_read_length from bam files deriving from fastqs sequenced with Pacific Biosciences or Nanopore platforms + ### Minor changes since schema version 28 * Added *element enrichments* to the enum list for *file_format_type* * Added *curated binding sites*, *curated SNVs*, *dsQTLs*, *eQTLs* and *PWMs* to the enum list for *output_type*. diff --git a/src/encoded/schemas/file.json b/src/encoded/schemas/file.json index f364d699120..53b0a07d67c 100644 --- a/src/encoded/schemas/file.json +++ b/src/encoded/schemas/file.json @@ -1079,7 +1079,7 @@ "permission": "import_items" }, "schema_version": { - "default": "28" + "default": "29" }, "accession": { "accessionType": "FF" diff --git a/src/encoded/tests/fixtures/schemas/file.py b/src/encoded/tests/fixtures/schemas/file.py index 664331613f3..f85c8db4177 100644 --- a/src/encoded/tests/fixtures/schemas/file.py +++ b/src/encoded/tests/fixtures/schemas/file.py @@ -1927,6 +1927,43 @@ def file_27(testapp, lab, award, experiment): return item +@pytest.fixture +def file_28_fastq_nanopore(testapp, lab, award, experiment, base_replicate, platform4): + item = { + 'dataset': experiment['@id'], + 'file_format': 'fastq', + 'md5sum': '15dd66b6f21515393507f4ebfa55e77c', + 'replicate': base_replicate['@id'], + 'output_type': 'reads', + 'file_size': 800, + 'platform': platform4['uuid'], + 'lab': lab['@id'], + 'award': award['@id'], + 'status': 'in progress' + } + return testapp.post_json('/file', item).json['@graph'][0] + + +@pytest.fixture +def file_28_bam_mapped_props(testapp, lab, award, experiment, file_28_fastq_nanopore): + item = { + 'dataset': experiment['@id'], + 'file_format': 'bam', + 'md5sum': 'eeb1325f54a0ec4911c4a3df0ed32f20', + 'output_type': 'alignments', + 'assembly': 'hg19', + 'file_size': 888328, + 'derived_from': [file_28_fastq_nanopore['uuid']], + 'lab': lab['@id'], + 'award': award['@id'], + 'mapped_run_type': 'single-ended', + 'mapped_read_length': 101, + 'status': 'in progress', # avoid s3 upload codepath + 'schema_version': '28' + } + return item + + @pytest.fixture def file_nanopore_signal(testapp, experiment, award, lab, replicate_url, platform4): item = { diff --git a/src/encoded/tests/test_upgrade_file.py b/src/encoded/tests/test_upgrade_file.py index 9586112dc8b..05c7a7dff45 100644 --- a/src/encoded/tests/test_upgrade_file.py +++ b/src/encoded/tests/test_upgrade_file.py @@ -200,3 +200,10 @@ def test_file_upgrade_27_to_28(upgrader, file_27): value = upgrader.upgrade('file', file_27, current_version='27', target_version='28') assert value['schema_version'] == '28' assert value['output_type'] == 'exclusion list regions' + + +def test_file_upgrade_28_to_29(root, testapp, upgrader, registry, file_28_bam_mapped_props): + value = upgrader.upgrade('file', file_28_bam_mapped_props, registry=registry, current_version='28', target_version='29') + assert value['schema_version'] == '29' + assert 'mapped_run_type' not in value + assert 'mapped_read_length' not in value diff --git a/src/encoded/upgrade/file.py b/src/encoded/upgrade/file.py index 8c921d5adc3..9912f16f33c 100644 --- a/src/encoded/upgrade/file.py +++ b/src/encoded/upgrade/file.py @@ -680,7 +680,6 @@ def file_20_21(value, system): conn = system['registry'][CONNECTION] datasetContext = conn.get_by_uuid(value['dataset']) assay_type = datasetContext.properties.get('assay_term_name', None) - if assay_type == 'DNase-seq' and output_type == 'enrichment': value['output_type'] = 'FDR cut rate' return @@ -751,3 +750,30 @@ def file_27_28(value, system): for old_term, new_term in term_pairs: if output_type == old_term: value['output_type'] = new_term + + +@upgrade_step('file', '28', '29') +def file_28_29(value, system): + # https://encodedcc.atlassian.net/browse/ENCD-5950 + conn = system['registry'][CONNECTION] + if value.get('file_format', '') == 'bam': + derived_from = value.get('derived_from', None) + checkPlatform = [] + if derived_from: + for item in derived_from: + file = conn.get_by_uuid(item) + file_format = file.properties.get('file_format', None) + if file_format == 'fastq': + checkPlatform.append(file.properties.get('platform', None)) + if checkPlatform: + for platform in checkPlatform: + if platform in ['ced61406-dcc6-43c4-bddd-4c977cc676e8', + 'c7564b38-ab4f-4c42-a401-3de48689a998', + 'e2be5728-5744-4da4-8881-cb9526d0389e', + '7cc06b8c-5535-4a77-b719-4c23644e767d', + '8f1a9a8c-3392-4032-92a8-5d196c9d7810', + '6c275b37-018d-4bf8-85f6-6e3b830524a9', + '6ce511d5-eeb3-41fc-bea7-8c38301e88c1' + ]: + value.pop('mapped_read_length', 'None') + value.pop('mapped_run_type', 'None')