diff --git a/pipelines/nf-atacseq/assets/email_template.html b/pipelines/nf-atacseq/assets/email_template.html new file mode 100644 index 0000000..161c1e4 --- /dev/null +++ b/pipelines/nf-atacseq/assets/email_template.html @@ -0,0 +1,48 @@ + + + + + + ${workflow.manifest.name} Pipeline Report + + +
+ + + +

${workflow.manifest.name} v${workflow.manifest.version}

+

Run Name: $runName

+ +<% if (!success) { %> +
+

⚠️ ${workflow.manifest.name} execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+<% } else { %> +
+${workflow.manifest.name} execution completed successfully! +
+<% } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + + +<% if (summary.size() > 0) { %> +<% for (e in summary) { %> +<% } %> +<% } %> +
Nextflow Version$nextflowVersion
Run Name$runName
Session ID$sessionId
${e.key}${e.value}
+ +

--- ${workflow.manifest.name}

+ +
+ + diff --git a/pipelines/nf-atacseq/assets/nf-core-pipeline_logo_light.png b/pipelines/nf-atacseq/assets/nf-core-pipeline_logo_light.png new file mode 100644 index 0000000..476ce66 Binary files /dev/null and b/pipelines/nf-atacseq/assets/nf-core-pipeline_logo_light.png differ diff --git a/pipelines/nf-atacseq/environment.yml b/pipelines/nf-atacseq/environment.yml new file mode 100644 index 0000000..8ba8147 --- /dev/null +++ b/pipelines/nf-atacseq/environment.yml @@ -0,0 +1,23 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for nf-atacseq local Python/WASP2 modules +# (wasp2_make_reads, wasp2_filter_remapped, wasp2_count_variants, wasp2_find_imbalance) +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.10 + - numpy>=1.21,<2.0 + - pandas>=2.0 + - polars>=0.19 + - scipy>=1.10 + - pysam + - pybedtools + - samtools + - bcftools + - bedtools + - typer + - rich + - pip + - pip: + - wasp2==1.2.1 diff --git a/pipelines/nf-atacseq/modules.json b/pipelines/nf-atacseq/modules.json index 0d78430..bb4626c 100644 --- a/pipelines/nf-atacseq/modules.json +++ b/pipelines/nf-atacseq/modules.json @@ -1,5 +1,5 @@ { "name": "wasp2/nf-atacseq", - "homePage": "", + "homePage": "https://github.com/mcvickerlab/WASP2", "repos": {} } diff --git a/pipelines/nf-atacseq/nextflow.config b/pipelines/nf-atacseq/nextflow.config index e844cbf..8e709e1 100644 --- a/pipelines/nf-atacseq/nextflow.config +++ b/pipelines/nf-atacseq/nextflow.config @@ -15,6 +15,8 @@ plugins { manifest { name = 'wasp2/nf-atacseq' author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' description = 'ATAC-seq Allelic Imbalance Pipeline with WASP2 mapping bias correction' mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' @@ -73,6 +75,7 @@ params { help = false version = false tracedir = "${params.outdir}/pipeline_info" + validate_params = true } // Load configuration files @@ -97,7 +100,6 @@ profiles { conda.enabled = true docker.enabled = false singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" } docker { docker.enabled = true @@ -153,15 +155,15 @@ profiles { def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.tracedir}/timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.tracedir}/report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/trace_${trace_timestamp}.txt" } dag { enabled = true @@ -184,7 +186,7 @@ process { withName: 'WASP2_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_VARIANTS|WASP2_FIND_IMBALANCE' { container = wasp2_container } - withName: 'BWA_MEM' { + withName: 'BWA_INDEX|BWA_MEM' { container = bwa_samtools_container } withName: 'SAMTOOLS_INDEX|SAMTOOLS_FAIDX|SAMTOOLS_STATS|SAMTOOLS_FLAGSTAT|SAMTOOLS_IDXSTATS|SAMTOOLS_SORT' { @@ -201,27 +203,24 @@ def check_max(obj, type) { try { if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println "WARNING: Invalid max_memory '${params.max_memory}', using default" + else return obj + } catch (Exception e) { + log.warn "Invalid memory config (${obj}, max=${params.max_memory}): ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println "WARNING: Invalid max_time '${params.max_time}', using default" + else return obj + } catch (Exception e) { + log.warn "Invalid time config (${obj}, max=${params.max_time}): ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { - return Math.min(obj, params.max_cpus as int) - } catch (all) { - println "WARNING: Invalid max_cpus '${params.max_cpus}', using default" + try { return Math.min(obj, params.max_cpus as int) } + catch (Exception e) { + log.warn "Invalid CPU config (${obj}, max=${params.max_cpus}): ${e.message}. Using ${obj}" return obj } } diff --git a/pipelines/nf-atacseq/tests/data/annotation.gtf b/pipelines/nf-atacseq/tests/data/annotation.gtf deleted file mode 120000 index 993462d..0000000 --- a/pipelines/nf-atacseq/tests/data/annotation.gtf +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/annotation.gtf \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa index 923c055..182b3f7 100644 --- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa +++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa @@ -1,331 +1,335 @@ >chr_test -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG +GAAAGGCATAATAAGTAGCACGTACTAACGCGTCTTCGCTGAAAATAGTTAACGGAGATC +GTGCGAATAACCTGTCTAATAGCTACTAAAGCTATCTCCAGGTAGATTCCATACCTGGAG +TGTATACCCTACCATAGGATTACTATGATCGTTAATGAAAGACCAAGAACTTGCAATTTG +GCATTCAATTAACTCTACCCCATATATCAGTTCCTGATCTTGAGTCACAAGGAACAGGTG +TCAGATGTTGATCCAAACCCTACGGCGACTGCAAATAGGAGATCCATAAGGAGTTAACCT +CGAATCCCCAAAGCTGACCCCAGTCCCCAGACCACTTCAAATCCAGTCTCACACAATGTG +TTTAGACTGGGTAGTTCGTTTTATCGCGTTAATTGTTATCCAATGTCGGAAAATCATGAG +TAGAGGATACTAACTCGCGCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCGGCGTATG +CCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGACCAA +TGCTGAGGAGCGACGTTTTCCACGCGTGCACTTTGACCACATGTACAACTCGAACAGTGG +GTCAAGTGTTTGTGAAAAGGAATGCTAAAATTACTGACTCTTTAACTCTAGAATTCAGGC +ATTTCCTGGGCAAGAAAATGTAGGTGCGGGCTTGCCAATGTAAGGCTTAATTAACCTCCG +AAGTGCAGGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTACTGTACC +ATTCTCCACTCTCATGCTATTTTAAGTAGAGGCTGCCTATGCCTTTGTGATCTGGCCCTT +GGCAAGCCGTAGCTGCACTTATTCAACGACATAAACCGATTGGTACATTATTCTCGATGG +AGTCACGTGGGGCGCGTTTGATGAATCTCCACTCGTACACCGCCCTCATTGGGCCAAACT +CAACCTTACTTACATGGCTGATATTCATTCCAGTCTTAACTGGGAGAATAGAACTACACA +AAAGAAGATAAGTGTGTATCAGCTTCATTGTCAAGAAGTTCTTGAGCGGGATATTTATGT +ACACAAGCTGTTATGGCGCGTTAGAACTGTCCCCGGACCAAGTTACTTAGAGATTTGGTA +AAGGAGTTAGATAGTGATGATAAATAGATGTCCACAACCTTGTAATCGCCACAGTTTTAT +ATCTGCCAAAGGGAGTGGTCTGGCGAATTAATTTACACCGTTTCCTCGTTAAACTGTAAT +TTATATTGGGAAGAGGCCTGATCGTGCTTCCGCGGTGTTTAACTAAACAGCCATGATACG +CATTGATAGTTTCTCTCCTATTCCCAAGCTACCAGACATCATTAATACTACCGCAACGAG +TAAACTGTAATATCTACGATAATGATGACATTCTTTGCAAGTGGGGTATCAGTGGCAGTT +AAACTCTAGATGCTATCGCTCTTCTCGAGCTTAGTGTGTACTCACCAGTCGCAGGAAGTT +TGGCTGTTTGAAGTTTAATCACTACTCTAGCTTATCCGCGCTAAACATTCTGATCGTGCA +CGTGTCGGACTCAAAATGTCCCAGTATTTACAGGGCTCAAGTGGTGTGACTCGTAATTAG +TGGCCATTTTAAATTGACATTTGTTTTACTCATATCGTTCTCGGTTTATATGACGACTCT +CGATTAATTTGTTGACGTTCTGTCTGCGCGGATCGGTGGAGGCAGACAATAGTGCCGAAA +TGTTACTTGGGGAATACTAAGTTCCAAGTCCCCTAGTTATATCGAGGAGTGATGAGATCT +CCTACTGCATTGCCACACCTTCCCCATACACTTCCTAAATAAGCTGACCCTAGAATAAAG +CTGAGGAATTTCGTACTGAAAGGTTTTGAAGCATGATATTTATTAAGATCTTTATCGTCG +TATACCACATGGCGTCTCCTGGTGTATTGAAATGTTCATACGACTGCAAAAGGAGTAACA +TACGTGGTTAGATACCCGTTCCGGTTATGTCTGCCTCTAAAGCCAGAAGGCAGGTTCTCA +CCACTAGACTGTTTATTACTCCTTTAAACTTATTCTGGACCGTACAGTCTGAACCGGTCA +GATTGGGTTATATACACGCCAAAATCATTTTCAGCGCGATTAAATTGTCATAACCTAACC +TACTCGGGTAAACTCTGACGTCATCTGCTGAACTTCTGGAGCGAAGGGTAATTAAATTTA +TAGTTTTACCCTATATTATTTAAAGGAATCTGCTTCCCATCATCCTGTTATCTATGTGTC +TGTTGCCTTGAGGGACTTTCGTCTCTGAGGTGACGTGCTAATTGTTTGGTTAATCACATT +ATTTGTTCACGGACAAATCATAGTAGAGTGAGCAACATTACTGGGGTCGCGTGAAATAGT +TATAGGGCTTATTATAACCTTGTCTAAGTATATGGTAAGCTCAGTCACGTCTTCTCGACG +TGGAAAATATTGAACCGACGCCCACAGCGGTTATTGCATACTCTAGGGTGTATATAACTT +TTGAAGTACTACAGAGACAGATCATTGAGGATAAGAGCCTAATGATCAGGACATAGTGGA +TGCAAGGTCTAAATGGGGCGTTTGTACCTATGTCCCACTTGGCGAAAACTGTTGATGATT +ACTTGCGAGGCAATTGTGGAGGACTGGAAGACGACAAGTATTTTAATGATACATTACCTC +GTTTGAATTCACCCATACTTAATTGTGTGACGAATATCCCAGCGATATACGACCTGTCAA +ACATTCAATCGGTAAAGGAATTTCATAAAGCGACTAATTGACATTGATCAACCACTGGGA +CAACTACCTATATCTAGAAAACAGATTTAAAACTGCCCGTTTCTTATACGACTGCCAGAC +CACACCTCCAGCGCAGCTTACCTTTAAATACAAGCCTAGCGCCCTCTATAACCCGACGCG +AGATGAGCCTCCAGCCATCAGACACAGGCTAAAATTGCCTTTATCGGAACTTCAATGTCA +GGTACACAAAAGGGAAAATCATTTGGAAATACTTTGATACTTATAAAGGATTCGTCCTTC +TCTACGTCCGGAGACCCATCTCGCACCATTTATCGGTTTAGGCCTAATTTTGAAAGGACT +AGCCACTATGACACTCATGAACGGCCTATTACCAACCATCGACTGAATGACGTACGGATA +TCCGGATAGGACGGAACTCGTTTATGCTATGCTGGTAACGCAGCTAGCCCGGGGCATTAG +TAGATGCGTCCCAAAACGAGTATGTGTATCTCGCACTCTTACAATTCTTGGTGAGAAGAG +TGAGGTCTAATATCAGGAGTATGACTTGGTCCTCTACCTAGAGGATGACATACGGAGTTT +TAGGTGGAGACAGAAAATTAGTATACTAGCCGAATGAAACTTAAATCTGAGACGATTGCA +CATCATCCGCAGACATGCGATTAGCCACATAATGGGTTCGTTGAGATGTCTCAGACCCAT +ACAAGTATCTCTATGATTAAGGTTAGCTAATTGTGGAGATCCTTGAAAGGAGACTTGGAT +CCGGTGCATTACCTTCATGATGCTTCCGACCTATGGTGCGCGAGTTGCGCTGTATTTGTG +CACCTAAGAGAAACGTGACACGCGTAGCAGCTCCTTAAGGCCCGGGTGGCTAGAATTTTA +GATGAATACGGTTTGTAAATTTAAATTAGTCCCAGTCGGCGTCCTTACCTCTACATCACT +AAGGCTATGCGGCGATTAACTTAATGTAGTGGGGACAGTAGTTGTTATCTCAGCCGTCTT +AAGTCTGCTTGTAACAACCCCTTTAAGTTAGAGCTTGTGTTTTAAAGTCAGCTTTTAGCC +ATACAAATAGTGCTTCTGTAGGTTTTGCCGATTACGCGTTATATAACTTTACTGTCCATA +GTGCTTCTTCTTGTAAAGAATGAACGTTAACAATAGATAAACGTAGGAATCCACGCCAGA +GTTGATAACTTAATGAGTATAGCCGGTTATACGTGGGGAATACACTAGGTAAGGTTAGAC +TTAGGTGTTTATTGGCGGTGAATTTGGACAAACTAAAATCGTGGCCGTAGCAAGTAAAAT +CGTTGTGAAACCTCAGACTATAATCCCCTGCTGGCTTGAAAGCGATCTACAAGCACTTCA +CGCTAGCAAAGAACGGGGTATGTCCCTCCAATACTTTTGACGTGAAGTGATATGTTAGTC +AAATAAAATTACACATCCTGGTTTTGACTGTTTTCAAACCATGAGTGTGCTAGAACTGTC +AAATTAGATCTGCTAAGGCGAAAACTATGAAAGCTAAGACAGCTTCTATCGAGGGTTGTT +TCTTATACCTTACCTATTAATTTTAGTTATAGCCGAGCTCAAGGAGAAATAAAGGAATTT +CCTCTCCAGATACCCAGAGTGATGTCTGTTGACTAGACCAAGTAAAGAAGTGTAAAGCCG +AGGCAACGGCTAGTACTTTGAATGACCTAATATAGTAACGAGGTTTTGTGATACACATAT +CGTGATGACATCACATCTTGCAAATCCAGTATAGAGTAGTTGCAATTACTTTCTTGTGGT +AGCACTTGCGTCTTACACGATTCAATATGACATCGGCACGTCGTGTAAGTCTCCAGGAGT +TATATAAGTTGTAATAATATATGAATTGAGGAAGTCAGTTTGATCGCTAACATGCAACCC +CAGATAATATATGAGAGGAAAGGAGATACGCACGATCATCTATTCAATTTATTGACTCGC +CCATAACGATCGGAAACCTTAATCCTGTACCACCTTCATCGGCTTTCCCAGAAGGATAAG +TGTTGGTCTAAAGAATGCGACCCTTTATAGTTGGGTCGTTCACTTGTTGATTTCTTGATA +CTGAGCGATTAGGATAGCCGAATTTTCTCTTGCTGACAGTTGTGAAAGATCTACAGTTAG +ATGTCAAGACGCTCATAGGGGATTCATTTATTTAGATTGGAGGCTGCCAGTTCTATTGTA +GGCAAGACCCTTTGAAACTTTAGTGGAATTGCCGTGCTTGTGCTGTTAGCCTCAACGCTT +GCGGTATTATCATAGGCTATTACGTGACCCGAGTGTACGGATATGTTTCTAATTAAAAGT +ATTAGAAAGTTATGAATAGGCGGTCGGTCGTACCTTGGTAACGCTGGGCTATTTAGGAAC +CTGCTTTGTCTTCGGTGTAGACTTGTTCACAACGTTGACCCGAAATTTAGTTCTCTCTAA +CTATTTAGCTCCAGTTTTGTATCCACGAAAGTTCAGTTGGTATTTTAGTCATTTTCTGAT +GAGCCGTACATGCAGCTATGTTTGTCCAACGGTATAACCGAATCAAACAAAGATCAGTCC +TAACATCGATGAGTGGAATTGGTTGTACACTGCGACGCTCCTAAGTGGGGATGATGCAAA +TAAAACGCCGGACAGCTCCGATCGCATCGTAAGTTACATTCGATAGAGCGAATATCAGCG +AGCTTCTTCGGTACCTTCTGTGCATCATGGAATAGCGTAGGAAGGTATTTCTCAAGAACG +TGCATCAAGTCAGAAATCTAGCATCACTCCGTCTACCGGTAATGTTCAACGGATAAAGCT +CGGAGTTCGAATCGGTAAATATGTAGGAACGCTAGAGATTCGAGCAGTACGGTAGTGTAG +CTATTCACTTAGGCAAGAACTATCGGGGACCACTCGCAGGATTCGATACATGATTCCTAT +AGCATGATTGCGATGCTGTTGCACTATACTCGACGACGCATGTATAGACAATCGCAGATA +GAATTTAGGTTGCCCCACTACACAAGTCTGTCTATTGTACACGTTGTGGCTTAGAATCGA +TTACGACCGGAAATAAATATTTTATCTTATTAGCTGTACCTATCTGGCATTTCTAAGGAC +AATTGATATGCCTACTTATCCAGTCCACCTCAGAATCCACGATCTTGGAATTACCTTTAA +ACCTGCTTGAAACAGGTCGTGATTCAATCAAATCTATCTGAAGTCCGTGGAGCATTTTCA +AAACGCTTTGATACCTTTCCGGTGACACAAAAGGAGGAACTAAAAGGGCACATACCCTAT +GATATAAAACTCAATGTGTCATTAAACAAAGGTATAAGTCTTTCAACTGACTATGAATGA +CCACTGCACGAGGAGGTTGTTAGAATGAAAAGCTGAGAAGGCAGTATCTCATCTTTTATC +TGTAGTAGGGTTCTTTCGTCTAACTGACTATTTGAGGCATTATTCTCAGGCTTTCAGTTG +TGTTTCGCTAACTAGACATACTACGTCTTATGTGAAGCTACGTCTGGTTGTTAAGTTTCA +ATCGAGTAAACTTTGAAAACGACCTACAGCCTTGACGAAGCTCCCACAACTGTGATAACT +AGTTCTTGCCCTGCACGCGCGGATTCTCACCTCTCAACAACCGCGTACCCTTCGCCCGTT +GCGTAAGGCATGTAATCCGCGCTTGAGCCATACCCACCGGCCAGATTAATCAGTCTGAGA +CGATACGCAGTTATAGCTGTAATGGGGAAATACCCCGGAAGTTTCTGATCCATTAAAACC +GCACGGATCTCGACGCAAAACTCCATGTTCCAACAATACGGCTTTAGGCAGGTGCCAACG +TCGACGCTGGCTAAGTAACTTACCACAGAGGATTCTGAGCTTCTTTGCGTTATTAGATGT +TTCTAACCTTAAAATAGTAAATAGAATACTGTGGACCAAGGCATAAATGCCGTGCTGGTT +AAAACCAGGTGCATTTAAAGCTCGATCAAGGCCGGTTTTGGGCTGTTTACTTTCTGAAAT +AACTGCGATGCCGGCCCGAGGAAGATCTAAACTACCAATGAAATTACAAGTGGCTTCAAG +GCCAAGCCATTTGAGTACTTGACTTATGTGAGTACTTTCCTAAACCATCAAGGGCAGGGT +TTGTTGCAATCGTATGGGCGTATATGGACAATTGAACGAGGCAATGTAGATGTCCCTCGT +GTAGGGGTATGCTAGCAACTTTTGTTATTTCTCCAAGAGCAATGCTCGTATAATCTTCAG +ACCACTATCTTTCGTGGGTTTTCTCGTATTCCGGCGTCGTATAGTATATCACAAGAGCTC +GTACATTCTAAAATATTAGTAATTTTCAAGGTGTAATTTTACACGATGTTAGACTCGTTC +TATCACACTGCTTGGTAGTTTAATATGCTGTAGTACTTGAGGATCGTCGGTGGAACGGTC +CTAGGATCTAAACTAGTGATTACGAACTCTTTGTGTAAAATATGAGCGTATTCGCACTCA +GTTGCAATTAAATAGCTAAATGATCGGTAAATATCCGGGGTAAATCAACTTGAGTTTAGA +GGATCCGTCGTTAAGAGATGATGTACATTCGTCGATTTAGGATCCTAACGTGGCGTTCGT +ATGAAAAGAGCTGAACTAAATAGGAAAACGTTAACCAGTGACTACGCCCCAACCATTGCA +AGATGTACCCCAATGATGGTTTTGGTATCGAAACTTCTCTTAATTGTGTTTCTTAAGTAC +TGGCAAAATTCGAGCCGGCATCGTTTGTTGATAGTTGGGTCTAGGATTTTACACCTTGTG +TTAGCACTGGGCCATTAATTCAATAGTAACAAGAATACTAATTACCAATGTGCGTGAAAA +TCTCCTTGACTGGTGCAACGTCATTCACAGTCGGATCTCAAGTTATTAGGTGCTAACTGT +ATACACCAAATTTAGGATAAGAGCCGGCTTAAGGCTAATCTAGACCCAATATTAATCAAT +ATTTTACGTAATGCATCCACGCGGCGTGCTCTTGGTGAGCAGCTGGGATTAAACGCGTAG +GTCGAACTATCGAGGGTTTACAAGAAAGCCAAGTGAAAATGAGACTATTGGCCATCGCGA +GATTTGAATAAATGTCCCTTGGTACTTATACGTTGGGCGAACGGGGATGAGCCAGGCTGC +TATCATCGTTTCGAGGTAGCTTCCAAGTGGATGAACTCAAAGACTGGCATTATGTGAAGA +GCATAGCGCTTTTCCCCGTATTATGGCAGCAGCTGGTTACCCATACTTGTGATCCCCGTA +ATTCTACTGTCATAGAAGGATGACCGAATCAATGAGCCGGGTGGTGTCCAAAAGCGATCC +TAATCCTTGCTGATTTACCTTGAGCGGTCACGTCTGTCTCAGCGACATTCGCCTTGCGTT +AGACTAGGCCGTAAGTAAGGAGTGCACTCCACAACGGCGTAATGCGTGCGGCGAGTAATG +TATTAGCATGTTAACCACATTCTTGGCAGCCAGATCAAAATCACTTTTCATCTGGTTGTC +TTAACAATCCGATAGAATCTAATGTAGCGATGCGTACTAGAAATAGTTACAATCTACAGT +CTTGCTGCACTTGCTGCTAATAATGAGCGAGGACCTATCCCTCCTTAAGCAAGTTCCTTG +TTCCGTGCGGGGAGCCCTGGCGCTAACTCTTTACATGATTAGTATCGCATGTTGTTACAT +ATATAATAGATTTACATCATTTCAAATGCAATGATTCGTGCTCCTAAAATGAGTCGTATG +AATAGCCACAGCGTACGGAAACCTGAATTGATTTGTAATTTAAAGATCAACTTAATCTGT +GTTGATCAGAGCGAGCATTGCAGAATACCCCTGCATCTAGGAATCGGTGCCAGTGTAAAA +GCCTGTTAGTAAAACCACGACTATGTAGTGTGTACCACACTCGGAGTGCGTCAAGCGAAG +TCAAACATGGAAATGAAACCATGCGTACGGAAAAGACCAGTGATTTATAAGGACATTCAC +ATAGACTCCAAAACTGACCCGATGGAGTCTACGCCGAACAGTTGGTATCAACATTTGTCT +CGATTTTCTGTTGGGAACATCCATCCCTACCCACAACGTACTGGACCATAATCAAGGGTT +TGGAACAGTACGCTCCTGTACTCAAGAAGTCCTTGCACGAAAGCAATAGGTTGAACTTCA +TCATATAGGCGATGACAGTGCTATCAGCCGGACTGGCTGTTCTCGTAGAAGTCACTCGAA +TCAATAAGATACGAATACTCCATCCTGTACGGGGACACTATATTATGCTAGCCGATTCTG +TAAATGTAGTCTTTACCGAGAATTGCTGACACTGATTTGAGTGTAGGAGGTCCGGTATAC +ACTTATCATCAACTTATTCCTACACTCGGTTTTCAATAGTTCGTAGCCCCAGGTTGCATG +AATATTATACCTCGGATAACACCTACTAATCCGTCCACAGCCTAGCACTTACTGGCGATC +AATGGAGCATGATGTACTTAGGGGACGGTATGAACATTCTTAACAGTTCCAAATGACCTG +TAGCAAATACAATAGCATCTTTGTTTAAGCATGGTCCTCTGCGGTTTGAAATGTCGCTAA +TCTAGTGATATTCCTTGTAAGCCACTGTTACTCTAATTTAGCCCACTCCAGAACGAGTTT +GTGTCCATGAAAATGTAACTCCCCAGACATGCAAATACGCCTTATTGCTGAATATCGGAA +CAAACAAAGTCGTTATCATCCTGAAATCGACGACAAGTACATATTAAAGGTTTGTTTGGC +AAAATAGGTAGCAAGTAGGATGTTCATAACAATTAAAGCGCGTAACTCCTAAATGTGCAT +TATGCGCCGAGGACCGATAGCTGACGCCGCTCTAGCTTCTATTGTTCCACTGTACGGTAC +AAAGATTGAATACGGAAACAGAATTCGTCAATTTGTTGAATTATGTTCTATTCGTTTTAT +CTGGTATATTTGTTACCTAACGTATTTAGGGAAAGTAGCTTCATGAAGAAATCTAATCCC +TCGCGTGACGAGTTTGCTGTGATTATTATGCGACCTGACTCTTGTAGTGTGGAGTTCGTT +GTCGTATCTGTACAAACTGCCGACACGTAGACAGGCCTGTCTAATAAACCAGGGACCTTT +AAGCGTCTTTGTAATTAAGTAAGTACCAGACCATCCTTAGATCAATATGATGCGCAACCG +GACCGGATCAAATGTTCCAAGCTCGGTAGGTTATCCTATAAGAGCCTCAGCAAAATGATG +TAAATTGTCAGCGTGTAGTACGGAAACAGATCACGGTATAATCAAGTCTAAATATTTAGC +CCCGGTCTTGGAATGGCCTTTTATGCAACCAATTTGTGGCGATTAATTTCTCAACAGTAA +GACAGAGAAAGCTAGAGAAGCTGGTATTATTCTGCATGTTGTCGAACCAGCTGTGTACAG +TCAACATTTTGCTATTTACTAAGTTGAAGCTTTCGGTTTCATGTGAAATATCTGGCCAAA +TCGAATGCACCCTTTGACCGGCAGTTTTCATAAGCCACGTGTTTGCATTTCTCTTTAACG +CATTGAAAATCACCGCGAACGACCTCACAACTGTCTAGCTTACCGATACGTTAGTGGTCT +CCTCGCAGAATCGAACGAACCCGAATAATATGGTGATATTCTTTAACGACTGATTAGGGT +CTTATTCGAGATTTTCAGTCTTTAAGCGTGAGCAGCGTGTTAATCACCTAGCAACATTAT +AGAAAGGAGAAAGGTACGAGCAGTTTAAAAGTTACTTCTAATTTTAACTATTGTCCAACT +AAGTGTAGATTATTTAGGCTTGTGTCCAAGTGAGATCATACTGTTTTCGTGTGATAGGTA +TCCGCATCATAACTAGTTATATTAGCACCGTGTATGAAGAAACGGTGGACCGTAGCACAA +CTCATTGTTATTTTGTCCCCTCTTGGTTTATTGGATCCTAGATTATATACGAATAGAGCC +CCTTTCGCAACAGCATCAGAATCAGACCTGCGCTCTCGACTGATAATAGCAATTTGTTAA +GAGCGGATAGACGCAGAAGAATAACATGATTTGTGCACTTAGTCCAGTCCAGATAAGAAG +TTGAGGCATTGACTTAACTTTTCATTGTCCGCTTGCTATCCCCACGATCCTGCTAAACTA +AAAGCTTTTGGCGCGGAAGAGCCGTTATGGAGGTTCGGCGAAATTGTATCACTAGCTAGA +CCATTTTCTGTAGGCTTTTAGCTTGATCGACGTAAATTCGATTCTATATGGTAGAAAGGT +ACGACCGTTATACGCTCACGTACAGCCTAAATTCACTTGTGGAGGCGATATAAGCTAATA +AGCGGTTCATTTTGAGGAACCGTTACTTTGAGATTCACTTACAGCAACTAAGGTTGTGTT +ACCGTTTCTTCTCAATTTACTGCTGGAGCGGCTATTATGCGTCCATCACCTTCATAGCCC +TAGTCATCAAGCCCATAGAGGTATGTTCGTGTGTAAACGAATTCCAAGACTAATTGGTGG +AAATTTCAGTTTGGATTGAATGAGGCTGATACTTCTATACACTTAAGGGTTCCCCGTAAG +TATATTGCCATAAGGGAGTAGTAACACTAAGGTTGTGAAAATATTGCACGACGTAGGTAT +TCTCAATTTCCTTCTAATTCTGTAGGATTTATGTAAGGCGACCGGGACTCTATTGTTTTG +TCTCCGAGAGTTTCTTAATCAATTGTCAGGCTAGTAGATCAAGTGTAATAAATGATTAGA +GGTCCTCATTTGGAGAATTTATCTATATCCTTGGTCGTCCACGCGGTATCGGAGTTGCTA +TACAATAAGTTGGTTCCAGAAAGCGTCTTAATTACATACTCTTGGTTTATCAACGAGATG +GTACCTAATACTCTCCTCTCAGTTCAGTAATAAGGACCGTTAACCGCACAATTGCATGTC +ACCATGTAACACATCCTAGGTTCAGTGGTGCAAACAAATCAAAGTCGTTCGATGTCACTA +AAACATTTTGCTTAGTAAGCTCACTTGGTTATGCAATATTCTTCACTTCCACAAGTGACT +CTACTTAAGGCGACGCACCTCCCTACAATTCGCATACGCCAGGTACACACAGCATGGAAT +AGTGTAGTACCTACTCATGCGCGAACGGTCGCCTGCAGAATTCCAACATGGAGGTCTTCT +GGCCTAGTGCTTGTGCTTCCGGGATACACCGCACTCATATCACAGTTTTCCCTGGCACAG +GTTATAGTCCGCTAGCGTGTTGAAGCTAGTTCACCCTTACTATGATCCAAGAAAAGCTTT +TCGGCCGGCCATCCTTCACCATACGTTTCGGGGTCTTAGTTCATTATCAGAGTCGGTGCC +ATTGTTCCATGTAGGTACGTGGAGGAAGTAACTCTTGATATGCTATACGTGTAGCATACT +ATACTCCAGAATCCGTCGCAACAATCCCTTTATCTGCCCCTTTATTTACATTCCCCGCAT +GTTTTGATTACTTAAATGTCGGGTACTGCTGGTATACACCGTATGCACCGAAAGACAGCA +ACCCCTCAAAGCTTCGACGAGTTACCTGGTGTGAGACTATCAGCTTATAACCCTTACTAA +CAGCAGTAGACGAATTCTCCTAGTATAAAGTCAATTACAGTTGACTAAATTCGAAGTAGC +CGAGTGGGTCTCATTAGACCCTACATGTATCTCTTGTTTTCAAAACGGCTGTGAAAGTCG +GAATATTATGTGAGTATGATTCACTCGGCGGAACACTCAAACTCGCTGAATCATTGATTC +GCCGATGATTAAGCCGACCCTCCCAATTACCGCTGCAGCACTACAATCTCAATTTAGGTA +TACGGATCTAGGTCCGTTCGTTACCAGTTACCAATACGCAACCGAGCTCGAAGAGAACAC +AAATTTACGAAGCAAAATTCGGAATCAGGGTATCGTGCAGAATGGCAGGAGAGCTGGAAC +TGTTGTCAGATTTCCCTCTAGTAATCGTACGAGAATATATTCTATGTCACACATTAACCT +ATAGGTAAAGCCTCATTATACTCCGTTTAATGCAGACTTATAGGATGCCATGCAACAAGT +CTAATCGTCGCGAGGACACTCAAAAGGATCAGTGGAAAGTAACACTTTGTGGTTCAATTC +AGAAAATCAGCTTGTTTGTACCTACAAGTACAAAACTTGGAGTGGTAGAGAGGTCAATCG +ATTAAGTTAAAAGGTTAACGCATGCGCCTAGTCATTAATTGGTTGCTGCGCAAAATAATG +CATGCGTAGTAAATCCCAGCCCCAAGTCGAATAGATTATTAACGCCGGAAGCAGCCATCT +GCGGAATCTTCGTTGTGTCGAGCGTCAAACGTTGCTCCATGGCTCCCTCCCTTTATCGGG +TTCTCTCATTGAGTCCAACTAAACATCTACAAAAGAACTTTGTTATGTGATATAGCTTAG +GTCTAATCTTAGGCTGACATGCATAACGCTTTGTCGAGGTCTATTAACATAGCCGAATGC +ATGCAAGCTTTGATGGATATTAACTTCCCAATGTCTAAGATTAAAGAAGAGGACACCCAT +TATGTCAATCATCTAGCTAAATCGAGCTGCGAGCCGGAGAGTAAACAGTTTCCTTTTCTT +CGGCGGTTATTTGAAAATTCCTTTCTTATGGCAGTGTTTCGAGCGAGCAGTATATTAGAC +CCAACCTCGATAATCGTTAATCACATAGCGACTATGATAGTATCATTACCAGCAGCATAC +ATAAAATTGTAAAGTGTGTTACTGTTTGCGTGGGTGATTATAGTACAGTCTTTTGCAAAT +CTACGGCCCTGACAGAACTTCACATTAAAGGCCATCCACAGAACAATGGACAACGTATAA +AACCTAAAAGGATATCGTTTTCCTGGGGTTTTCAGTTGTTTTAATGACCGGTAAATTTTC +TTACCCTATTGTGTTTCCTTACACAGAAATATCTGAATATTGAGGTACCTGTGAACATTA +TCATTCATACAACATATCCTATCGCCCATCCTGTGCGGCGACTACTCCAGCACTCACTAA +TTGTTAATCATCTCATACAACTCGTCAGAATTAACATTACCGCAAACTGCTTACTAGCGC +AATCAGGTCAAGAGGAGGACGGCTTTGTCACTTAAAAGAATAAGGTGTAGCTGCATAAAA +CAATGTGTATCTTCTGAGCTTCACAGCCGTGGGCTATCTATGGTTCCGGTCCTGTTGATT +GCTCCCGATGTTGAACAATACTTTCCACTTTCCGTGACAGAAACTTTAGAGCAAGAGGTC +AAACTTTACCCAAGCCCATAGGTAGAAGTTACGCGCGCATTGACGTTTGATCAAGGGACA +GCTGTGAATATCCGTCCCACGTAATCGTGACTTCTCATCAATATTATATTACTGCCGCTA +ATCAACAACTTCCTTGTTTCGACTGAAACGATTTTAGTCAAGTCGAAGACCTCATACGAT +AAGATTTGCAACATGTCTAAAAGAGAACGGGAACTGGCAAAAGGCTTGGTAGATCCGTCT +ATAGCGTAAAACTGATTAACCCATTAGGTCTGAATAACTTTACACAACCCTCCGCACTGT +TAAATGACGGGCTTTGCTCTGTTTTGACACATCAGCTAGAAACTCGCCACGAAGGCATAA +GGCTCCCATATAGCGTAGCTGACAAACATATGAGGTGGCTGCATAAACTAAATTGAGGCT +CGCGTTCGGATACTTGCCCATGTAGCAAGTCTTGGCAACCAACTATATAATCATCACGAA +TTGAGTGCTAAAGACATGCGAACAGTTGGGGCTGCTATATAGTATGACAGATATAGAAAT +TTTATAAAATGTCGTAGGAATCTGGAGGCCAAAATCATTAGACACTCTTGTAAAAGGTAT +GGTAATGTGTATGACCTCTTGGCATAGTGTCCAATTATTCTCGGTTTACTCTCAGAGACA +CAGTCATGTAAAAGTGGTGAGGAATTACCGCCGTGTTTTGCCAACCAAGAAGCATTGAAC +AGTAGATCAATAATGATATTCGGTAGCGTATTTACGCTTTGCGGTTTTCAGAAGAAACTA +TCACAATTGAAACTCTATTCTTCGCCTCATTCCGTACCGTTAGGAATGACTCGAATCGTA +CTGTCTGCCGCGGGGCATAGTGTATTGCTCCCCACCAGGTTCAGATAGTTCGAATCAGTG +CGCTGTACAATTGCCTTACGTGTAGATTTGCATCACCGCTTCACGTAGGCACCCAGAGTG +CTCACTAAAGCCACTAGAGAGATAGAGTTAGAAATTAAGTATCGGTTACGCCCCTCAGAC +GACATAACTCACTTCTACCGAATATCCTTTCTATCTTGGATACTACTAATGCTTCCGTTC +ACGCCGCAATCATGTGGATCCTCCAGTAAGCAGGGTGCTGTCATGACTATACAGTACGGA +TCCGTAAGCATTTTGAGGATGATAACATAGGGTCGGTTACTGTGGATTTCCGTTACTTAG +GAGAGCAGCTTTAGCTGACTTTGCTGAGGCTGCGCGTGTTAGACAGCAATTTACGAACGG +CGCACTCTATAGCAGGCACTCACAGTGGACCAGTAGTCCTATTGCAAGAGTTCATTATGG +AACATTTTAGTCCTCTATCACACGGACCATTGCAGTAGATAACTCTAATCCTATGTCTTT +ATTTGGTTGCCTGGAACCCCTTACCACTAGACACCCCAATAAGTAATCTTGCTTCCATGT +CGAATTGATACTCATCGAAAACATATAAAACTAATTATGCTTGTGTTCCTGTGGTCTGTT +ATATAGAGGCGCCCTATTGGCCGCGGGATAAGGATCATTTTGGCACACTAACGGGATCCT +AAAACTTTATCTTTCAACGACTCCTACATGCCTTTTAGGTTAGTACGCGAATCGCCTAAC +AAGCCAATGGGTATTGGAGAATTAGACAAAATGGTTGAGGAATAAAGTGGCGCAGGATTT +TGTCCGAGAAAGGGATAGCAAACGGTCGCAGGCAGGAGTAACAATTTTCAACCGACCTTA +ATAGAGCTCAAAAGCTACCGGAGAAAGCTTCGTCTATGCTTAATACATATGCTAACCTAT +GAATTTCGTAAGCGTAATATAAACTTATCAGATATTTTAAAAGCATCCTATTCAGTCGTA +CTTTTGGCAGGAAAGGTCAGGCGAAACAGAGTCTCCCTGCGGAGGCTTTTAAAATAAATA +GCGGGCCTAGCATCGATTCTAAAAGACGACCCCAGGTGCGTAACCGTGCCTCCCCAAGTC +TTCTTTTAACAATTACCTAGAGAACGGCGTCAGTCGCGAATGACCTTACGAACGTTTACG +CGGAGCCGAGTAAGATTAATAACTGCTTATTGATTTGCAATCGTTTGATACGGGTGGCCC +GAAGCTCAATATCAACATAAATAAAATTAGTCGGAATGGTCGCTTAAATCGCGCGCTGTC +ACTGTCTTCATATGAGGGAGTTGTGTAAGACTGCATTGATATATAGGTATGATTTCGGTT +TAGAACTTTGTCTGTTAGCAACTCCGCATGATTGAAGGAAATCCTCGTTGGTAAGATCTC +TTTAGCATTTGCACAGCTGACTCTAACAGCATAGTATGTGATCGTATTATGTCTGCAGTT +TGTAACACAGTGGGCGGCATGGATGGTACTTAATGGACGTAATGAGCAGTAGACCACCGG +TGTTACCTAACCATCATTAGAGTAGGCGAGATTGCGCTTGTACGACTTATATATAAGGGT +AACCGGAATACCGTTCCTCTTATCAACAACAGTTACTGGTCTTAATTCACATCGGATATT +GCGATCGCCAAGACTATCCCGTAAGTCGTAAGCTAACCAACTAGCGGTTAGGTTTATTGA +GGTTTTGATGGGAACTTCTCAGACACGTCGTCAACTACCTAATTTCTTGGATGGAGCTAG +GCTAACTGTCCCAGAACTTTCTGACACTCGAGATCCTCTAACTAATTGGAATCCAGGAAT +TCCCTTATTGCATCGCCACAAACGACCATAAATTACAGCATGTTTCATTGTCTAACGTGC +CTATCCACGAAATTGAATTCGGTTCACATTATATATCCCCTTCTACCGCTAATTTAATGT +TTAACGTTGATGGGGCAAAGCACATTCGAGAAGTACCGAAAAGTCTCAATCCAAAGACCG +GAGGAACTGGCTTCGGTAAGAATCGCGAGTATCCTTGGATGCCCTGCCTGATTATAACTT +GTTCCATGTAGATAGGCGTAGCTAATTCATAGCAATACAATAAACGAGTCAGAACTGTAG +TCTAACATAACAGCCTGCTCTCCAGGTAACAGCCCATTATTAGATATAGTATCACGATCG +TCGGTTGTATTAGTGGTGATAACTATCGATTCTGCCACTAATAGAATGTGCAGAAATAAA +GTATCTGAAAGAAAACGAAGTCACAGAGAATAAAGCTCACTTCATAAAAGTCGGTTGCAG +TAGACGCATATCAATTTTCCCTGCTGCATTTTAGAGTTCGGAATAGTTAAACATAATACT +GGAAGCGCTTCCGGCAATCAGGAATAACCCCATATAAACCAACCTTTGTTGCTATTGCCA +GCGCTATTCTCGTCAAAATTTCTCCCTATGGTCTTCACATCATGCATCACCGGACCCTTT +GATAGACGATGACCCAATTACAATCACTCCACGGATGAGCATCCCATTTTATACGAGGCC +CACTGGAAACAATTGCAATCGACGTGACCAAGTAGAGGAGCGTGCTCGAAAGGTGATGAT +TGCCGAATTCTAACAAGGATACTATAAGCCACGGAACGCTGACGTTGAACAGACCTGGTC +TCCTGGGCACTTCGCAGCACCTCAGTAGTAATTCCGGTAGATTAGGACTTAGCATTCCGT +TGATCTTACAGGATTTATAAATAAGGAGATCTGTCTTGTTTAATTAGGAGGACGCTTTTC +CCGCGTAAGTACGGGAAAACGTTCTTCTGATTTTGTTTGCCACTTGACATTGTAGCTGCT +AGGAGAAGGGATAATATCCGCGTTTTCTTTTACCGTAACGTCGGAGCATACCATGGTAAT +TGTCCGTGTCAAAACTAGATATCTAGGTTGCAAAATTCAGTCAGTAAGTCCTGAGGCCTT +CCGCATTATTAATTCTACAGACATATGAATTTGCTCCACCGGCTAGCACAGTCAACTCAA +CCCACGATAGGGGAACGAAATCACAAATAGGTTCACATGGTCAATACAAGGCAAACCATT +CCCCATAACTCACGCACTGACGGTAAGGCCATTTCAGGTCAAGCGGTGAATGCTGTGAAA +AGCAGCTCGACCACCTGCCGTGGATGGCAAACCGATAACAAAGGACTCCGATACTTCATT +TGTAAACGTTTGCAGTGCTGACGTAACTCATATCTACAGTCAAACCGAATGGTTTGATCG +GCATTATGTAAAGGAATCGACACACGTTGCGTCTTCTAGATTATTACACACCTGTCTGCG +ACGGATATAGGTAAATAAGTCAGCCTCCACTCTGCAGAAGATACTAGAAACGTATCAGTA +ATAGCTATCAGGATTTCGCCATCCTCGCACTGTGCCCGGATATCACAGCAAGATTCTAGG +ATGGCACTTGTGTGACTAGAGGTTTTACTCGTTGAGCCATTCTTACTATAGGCATGGGAT +TACAATGTGCATGTTTGTGATGTTATCCCATATCTTGCATGTATCAGCCTACCAATTAGA +CATATGACTAGATGTAGTCGATCAACGCAAGGGTGCGGACTTTGATTCCTTTTGAATTGA +AGTCAACTCAGATGCTCCTTAAGACGTTTTACAGTAGGTATTTTGTGGTACAAACCAGAA +CCAGTGCCAGTCGGTAGTTATTGTAGTGTGTTCTTAATACATATTTGGTATTGGAGTTTC +TAACATTTAAAAGGAGCCTATTACACTTACTTAATTTGCGTCTATATTTCTGTTACGATA +TGTCGTCTGTCGATTTTACGAGTTTCATACGTGCGGGTTCCCTGTTCGCAATGGGCCCCT +TGCTAATGTCCCGCATCTTTAGGATGCAAACTTACTCACGCCTCCTTTACCGAGACTTGG +TGGGAGAGAAGACTCCTGTAGAATCCCGATCTGAATGGTTTCAGTGTAAGGGTCCCTTCT +AGCCATATCATTGAATATTCTTGTACTTTAAGTAACTCGATCCTACCAGTACAATTCTAG +GTTTGCCTTATAGCCGGAATGAGTATCAGCGTCATTCACCCCGGCCGGATATTATTTGCA +ATGTCAGGGACACCCAAAATAGACCGGTTAGAAGGCATATGCGATGAGAGTTGGTGCCTA +AATTAAACGATACAATTGATATGACAAGGACTATACGATGAAATCCATGAGATAATTATC +GTAACTCGGCCAACCTAAAACCGTGCAAGATAGGAGCGGTCCTAGAAGTACTATCGACAC +CTTAAATACTCACTTGAGTTTTCCGATCCTATAGTGCCAATCATATGGCGCAGGAATATT +ACAAACTAAGAAAGTCAACAAAAGATGTAAATTGCAACACCTGGCATCGGTGGGGTTGTC +CCCTTAAACCCTGAAACCAACTGTTATGCTCAACATTATATCGAGGCTAAAACGCGTATC +GTGGCACATTAATAACGATCACATAAGCTTTGCGGCTAGCAATAATAATTTAGGACAGCT +TAGATTTTGACCCGTGCTAATCCTCAGTATGGAGTAATTTTACGGATCTCTCGTTGTAAC +CGTCCTCAGTCGTGTACATTTTAACCTTTGTAAACTAGTTTACGAACGAGTATTTAGAAG +GTCCGTACTCTCACCCAACTGACACATTGTACTAGCTCAAGATCGCAAACACTAAGGGTG +TGAGTCGCGGGATAGCGCTTAAATATGACTGCTAATGGTCAAGAGCACGCGCATAATATT +CCACTGGTTCTAGGTCACCACTACGGTCAGACGTTGACCTGCATGCCCTACATCCGGCAC +GGGCTACTAACGGCCTAATATTCTTTGAGCCATATCCATACTCGTCTATGCATATTCAGG +TATACGGCTATAGTGCGTTATTAACTTCGTCGTGATTAAATCCTTTAATTGTTCCATTAT +AAGTATACATGCTTAGATGCGTGAACTTGAGGGATATCGTTGCTCTAAAGTTGTCTTATA +GACTAAATCTAAACAAGCCGTGCAAGACTACTTAAATTACAAATCTTACAGACATCTCGC +CACTGCGCTAACACTAACAA diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb index 0719bfe..5d6da8b 100644 --- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb +++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb @@ -1 +1 @@ -19800 1 0 +20000 1 0 diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann index 01f4a1e..a633aab 100644 --- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann +++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann @@ -1,3 +1,3 @@ -19800 1 11 +20000 1 11 0 chr_test (null) -0 19800 0 +0 20000 0 diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt index 7b2e7ab..9ed4852 100644 Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt differ diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac index dd39245..d99d805 100644 Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac differ diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa index 76e12a6..b19e11c 100644 Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa differ diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa b/pipelines/nf-atacseq/tests/data/chr_test.fa deleted file mode 120000 index 60a78a3..0000000 --- a/pipelines/nf-atacseq/tests/data/chr_test.fa +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/chr_test.fa \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa b/pipelines/nf-atacseq/tests/data/chr_test.fa new file mode 100644 index 0000000..182b3f7 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/chr_test.fa @@ -0,0 +1,335 @@ +>chr_test +GAAAGGCATAATAAGTAGCACGTACTAACGCGTCTTCGCTGAAAATAGTTAACGGAGATC +GTGCGAATAACCTGTCTAATAGCTACTAAAGCTATCTCCAGGTAGATTCCATACCTGGAG +TGTATACCCTACCATAGGATTACTATGATCGTTAATGAAAGACCAAGAACTTGCAATTTG +GCATTCAATTAACTCTACCCCATATATCAGTTCCTGATCTTGAGTCACAAGGAACAGGTG +TCAGATGTTGATCCAAACCCTACGGCGACTGCAAATAGGAGATCCATAAGGAGTTAACCT +CGAATCCCCAAAGCTGACCCCAGTCCCCAGACCACTTCAAATCCAGTCTCACACAATGTG +TTTAGACTGGGTAGTTCGTTTTATCGCGTTAATTGTTATCCAATGTCGGAAAATCATGAG +TAGAGGATACTAACTCGCGCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCGGCGTATG +CCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGACCAA +TGCTGAGGAGCGACGTTTTCCACGCGTGCACTTTGACCACATGTACAACTCGAACAGTGG +GTCAAGTGTTTGTGAAAAGGAATGCTAAAATTACTGACTCTTTAACTCTAGAATTCAGGC +ATTTCCTGGGCAAGAAAATGTAGGTGCGGGCTTGCCAATGTAAGGCTTAATTAACCTCCG +AAGTGCAGGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTACTGTACC +ATTCTCCACTCTCATGCTATTTTAAGTAGAGGCTGCCTATGCCTTTGTGATCTGGCCCTT +GGCAAGCCGTAGCTGCACTTATTCAACGACATAAACCGATTGGTACATTATTCTCGATGG +AGTCACGTGGGGCGCGTTTGATGAATCTCCACTCGTACACCGCCCTCATTGGGCCAAACT +CAACCTTACTTACATGGCTGATATTCATTCCAGTCTTAACTGGGAGAATAGAACTACACA +AAAGAAGATAAGTGTGTATCAGCTTCATTGTCAAGAAGTTCTTGAGCGGGATATTTATGT +ACACAAGCTGTTATGGCGCGTTAGAACTGTCCCCGGACCAAGTTACTTAGAGATTTGGTA +AAGGAGTTAGATAGTGATGATAAATAGATGTCCACAACCTTGTAATCGCCACAGTTTTAT +ATCTGCCAAAGGGAGTGGTCTGGCGAATTAATTTACACCGTTTCCTCGTTAAACTGTAAT +TTATATTGGGAAGAGGCCTGATCGTGCTTCCGCGGTGTTTAACTAAACAGCCATGATACG +CATTGATAGTTTCTCTCCTATTCCCAAGCTACCAGACATCATTAATACTACCGCAACGAG +TAAACTGTAATATCTACGATAATGATGACATTCTTTGCAAGTGGGGTATCAGTGGCAGTT +AAACTCTAGATGCTATCGCTCTTCTCGAGCTTAGTGTGTACTCACCAGTCGCAGGAAGTT +TGGCTGTTTGAAGTTTAATCACTACTCTAGCTTATCCGCGCTAAACATTCTGATCGTGCA +CGTGTCGGACTCAAAATGTCCCAGTATTTACAGGGCTCAAGTGGTGTGACTCGTAATTAG +TGGCCATTTTAAATTGACATTTGTTTTACTCATATCGTTCTCGGTTTATATGACGACTCT +CGATTAATTTGTTGACGTTCTGTCTGCGCGGATCGGTGGAGGCAGACAATAGTGCCGAAA +TGTTACTTGGGGAATACTAAGTTCCAAGTCCCCTAGTTATATCGAGGAGTGATGAGATCT +CCTACTGCATTGCCACACCTTCCCCATACACTTCCTAAATAAGCTGACCCTAGAATAAAG +CTGAGGAATTTCGTACTGAAAGGTTTTGAAGCATGATATTTATTAAGATCTTTATCGTCG +TATACCACATGGCGTCTCCTGGTGTATTGAAATGTTCATACGACTGCAAAAGGAGTAACA +TACGTGGTTAGATACCCGTTCCGGTTATGTCTGCCTCTAAAGCCAGAAGGCAGGTTCTCA +CCACTAGACTGTTTATTACTCCTTTAAACTTATTCTGGACCGTACAGTCTGAACCGGTCA +GATTGGGTTATATACACGCCAAAATCATTTTCAGCGCGATTAAATTGTCATAACCTAACC +TACTCGGGTAAACTCTGACGTCATCTGCTGAACTTCTGGAGCGAAGGGTAATTAAATTTA +TAGTTTTACCCTATATTATTTAAAGGAATCTGCTTCCCATCATCCTGTTATCTATGTGTC +TGTTGCCTTGAGGGACTTTCGTCTCTGAGGTGACGTGCTAATTGTTTGGTTAATCACATT +ATTTGTTCACGGACAAATCATAGTAGAGTGAGCAACATTACTGGGGTCGCGTGAAATAGT +TATAGGGCTTATTATAACCTTGTCTAAGTATATGGTAAGCTCAGTCACGTCTTCTCGACG +TGGAAAATATTGAACCGACGCCCACAGCGGTTATTGCATACTCTAGGGTGTATATAACTT +TTGAAGTACTACAGAGACAGATCATTGAGGATAAGAGCCTAATGATCAGGACATAGTGGA +TGCAAGGTCTAAATGGGGCGTTTGTACCTATGTCCCACTTGGCGAAAACTGTTGATGATT +ACTTGCGAGGCAATTGTGGAGGACTGGAAGACGACAAGTATTTTAATGATACATTACCTC +GTTTGAATTCACCCATACTTAATTGTGTGACGAATATCCCAGCGATATACGACCTGTCAA +ACATTCAATCGGTAAAGGAATTTCATAAAGCGACTAATTGACATTGATCAACCACTGGGA +CAACTACCTATATCTAGAAAACAGATTTAAAACTGCCCGTTTCTTATACGACTGCCAGAC +CACACCTCCAGCGCAGCTTACCTTTAAATACAAGCCTAGCGCCCTCTATAACCCGACGCG +AGATGAGCCTCCAGCCATCAGACACAGGCTAAAATTGCCTTTATCGGAACTTCAATGTCA +GGTACACAAAAGGGAAAATCATTTGGAAATACTTTGATACTTATAAAGGATTCGTCCTTC +TCTACGTCCGGAGACCCATCTCGCACCATTTATCGGTTTAGGCCTAATTTTGAAAGGACT +AGCCACTATGACACTCATGAACGGCCTATTACCAACCATCGACTGAATGACGTACGGATA +TCCGGATAGGACGGAACTCGTTTATGCTATGCTGGTAACGCAGCTAGCCCGGGGCATTAG +TAGATGCGTCCCAAAACGAGTATGTGTATCTCGCACTCTTACAATTCTTGGTGAGAAGAG +TGAGGTCTAATATCAGGAGTATGACTTGGTCCTCTACCTAGAGGATGACATACGGAGTTT +TAGGTGGAGACAGAAAATTAGTATACTAGCCGAATGAAACTTAAATCTGAGACGATTGCA +CATCATCCGCAGACATGCGATTAGCCACATAATGGGTTCGTTGAGATGTCTCAGACCCAT +ACAAGTATCTCTATGATTAAGGTTAGCTAATTGTGGAGATCCTTGAAAGGAGACTTGGAT +CCGGTGCATTACCTTCATGATGCTTCCGACCTATGGTGCGCGAGTTGCGCTGTATTTGTG +CACCTAAGAGAAACGTGACACGCGTAGCAGCTCCTTAAGGCCCGGGTGGCTAGAATTTTA +GATGAATACGGTTTGTAAATTTAAATTAGTCCCAGTCGGCGTCCTTACCTCTACATCACT +AAGGCTATGCGGCGATTAACTTAATGTAGTGGGGACAGTAGTTGTTATCTCAGCCGTCTT +AAGTCTGCTTGTAACAACCCCTTTAAGTTAGAGCTTGTGTTTTAAAGTCAGCTTTTAGCC +ATACAAATAGTGCTTCTGTAGGTTTTGCCGATTACGCGTTATATAACTTTACTGTCCATA +GTGCTTCTTCTTGTAAAGAATGAACGTTAACAATAGATAAACGTAGGAATCCACGCCAGA +GTTGATAACTTAATGAGTATAGCCGGTTATACGTGGGGAATACACTAGGTAAGGTTAGAC +TTAGGTGTTTATTGGCGGTGAATTTGGACAAACTAAAATCGTGGCCGTAGCAAGTAAAAT +CGTTGTGAAACCTCAGACTATAATCCCCTGCTGGCTTGAAAGCGATCTACAAGCACTTCA +CGCTAGCAAAGAACGGGGTATGTCCCTCCAATACTTTTGACGTGAAGTGATATGTTAGTC +AAATAAAATTACACATCCTGGTTTTGACTGTTTTCAAACCATGAGTGTGCTAGAACTGTC +AAATTAGATCTGCTAAGGCGAAAACTATGAAAGCTAAGACAGCTTCTATCGAGGGTTGTT +TCTTATACCTTACCTATTAATTTTAGTTATAGCCGAGCTCAAGGAGAAATAAAGGAATTT +CCTCTCCAGATACCCAGAGTGATGTCTGTTGACTAGACCAAGTAAAGAAGTGTAAAGCCG +AGGCAACGGCTAGTACTTTGAATGACCTAATATAGTAACGAGGTTTTGTGATACACATAT +CGTGATGACATCACATCTTGCAAATCCAGTATAGAGTAGTTGCAATTACTTTCTTGTGGT +AGCACTTGCGTCTTACACGATTCAATATGACATCGGCACGTCGTGTAAGTCTCCAGGAGT +TATATAAGTTGTAATAATATATGAATTGAGGAAGTCAGTTTGATCGCTAACATGCAACCC +CAGATAATATATGAGAGGAAAGGAGATACGCACGATCATCTATTCAATTTATTGACTCGC +CCATAACGATCGGAAACCTTAATCCTGTACCACCTTCATCGGCTTTCCCAGAAGGATAAG +TGTTGGTCTAAAGAATGCGACCCTTTATAGTTGGGTCGTTCACTTGTTGATTTCTTGATA +CTGAGCGATTAGGATAGCCGAATTTTCTCTTGCTGACAGTTGTGAAAGATCTACAGTTAG +ATGTCAAGACGCTCATAGGGGATTCATTTATTTAGATTGGAGGCTGCCAGTTCTATTGTA +GGCAAGACCCTTTGAAACTTTAGTGGAATTGCCGTGCTTGTGCTGTTAGCCTCAACGCTT +GCGGTATTATCATAGGCTATTACGTGACCCGAGTGTACGGATATGTTTCTAATTAAAAGT +ATTAGAAAGTTATGAATAGGCGGTCGGTCGTACCTTGGTAACGCTGGGCTATTTAGGAAC +CTGCTTTGTCTTCGGTGTAGACTTGTTCACAACGTTGACCCGAAATTTAGTTCTCTCTAA +CTATTTAGCTCCAGTTTTGTATCCACGAAAGTTCAGTTGGTATTTTAGTCATTTTCTGAT +GAGCCGTACATGCAGCTATGTTTGTCCAACGGTATAACCGAATCAAACAAAGATCAGTCC +TAACATCGATGAGTGGAATTGGTTGTACACTGCGACGCTCCTAAGTGGGGATGATGCAAA +TAAAACGCCGGACAGCTCCGATCGCATCGTAAGTTACATTCGATAGAGCGAATATCAGCG +AGCTTCTTCGGTACCTTCTGTGCATCATGGAATAGCGTAGGAAGGTATTTCTCAAGAACG +TGCATCAAGTCAGAAATCTAGCATCACTCCGTCTACCGGTAATGTTCAACGGATAAAGCT +CGGAGTTCGAATCGGTAAATATGTAGGAACGCTAGAGATTCGAGCAGTACGGTAGTGTAG +CTATTCACTTAGGCAAGAACTATCGGGGACCACTCGCAGGATTCGATACATGATTCCTAT +AGCATGATTGCGATGCTGTTGCACTATACTCGACGACGCATGTATAGACAATCGCAGATA +GAATTTAGGTTGCCCCACTACACAAGTCTGTCTATTGTACACGTTGTGGCTTAGAATCGA +TTACGACCGGAAATAAATATTTTATCTTATTAGCTGTACCTATCTGGCATTTCTAAGGAC +AATTGATATGCCTACTTATCCAGTCCACCTCAGAATCCACGATCTTGGAATTACCTTTAA +ACCTGCTTGAAACAGGTCGTGATTCAATCAAATCTATCTGAAGTCCGTGGAGCATTTTCA +AAACGCTTTGATACCTTTCCGGTGACACAAAAGGAGGAACTAAAAGGGCACATACCCTAT +GATATAAAACTCAATGTGTCATTAAACAAAGGTATAAGTCTTTCAACTGACTATGAATGA +CCACTGCACGAGGAGGTTGTTAGAATGAAAAGCTGAGAAGGCAGTATCTCATCTTTTATC +TGTAGTAGGGTTCTTTCGTCTAACTGACTATTTGAGGCATTATTCTCAGGCTTTCAGTTG +TGTTTCGCTAACTAGACATACTACGTCTTATGTGAAGCTACGTCTGGTTGTTAAGTTTCA +ATCGAGTAAACTTTGAAAACGACCTACAGCCTTGACGAAGCTCCCACAACTGTGATAACT +AGTTCTTGCCCTGCACGCGCGGATTCTCACCTCTCAACAACCGCGTACCCTTCGCCCGTT +GCGTAAGGCATGTAATCCGCGCTTGAGCCATACCCACCGGCCAGATTAATCAGTCTGAGA +CGATACGCAGTTATAGCTGTAATGGGGAAATACCCCGGAAGTTTCTGATCCATTAAAACC +GCACGGATCTCGACGCAAAACTCCATGTTCCAACAATACGGCTTTAGGCAGGTGCCAACG +TCGACGCTGGCTAAGTAACTTACCACAGAGGATTCTGAGCTTCTTTGCGTTATTAGATGT +TTCTAACCTTAAAATAGTAAATAGAATACTGTGGACCAAGGCATAAATGCCGTGCTGGTT +AAAACCAGGTGCATTTAAAGCTCGATCAAGGCCGGTTTTGGGCTGTTTACTTTCTGAAAT +AACTGCGATGCCGGCCCGAGGAAGATCTAAACTACCAATGAAATTACAAGTGGCTTCAAG +GCCAAGCCATTTGAGTACTTGACTTATGTGAGTACTTTCCTAAACCATCAAGGGCAGGGT +TTGTTGCAATCGTATGGGCGTATATGGACAATTGAACGAGGCAATGTAGATGTCCCTCGT +GTAGGGGTATGCTAGCAACTTTTGTTATTTCTCCAAGAGCAATGCTCGTATAATCTTCAG +ACCACTATCTTTCGTGGGTTTTCTCGTATTCCGGCGTCGTATAGTATATCACAAGAGCTC +GTACATTCTAAAATATTAGTAATTTTCAAGGTGTAATTTTACACGATGTTAGACTCGTTC +TATCACACTGCTTGGTAGTTTAATATGCTGTAGTACTTGAGGATCGTCGGTGGAACGGTC +CTAGGATCTAAACTAGTGATTACGAACTCTTTGTGTAAAATATGAGCGTATTCGCACTCA +GTTGCAATTAAATAGCTAAATGATCGGTAAATATCCGGGGTAAATCAACTTGAGTTTAGA +GGATCCGTCGTTAAGAGATGATGTACATTCGTCGATTTAGGATCCTAACGTGGCGTTCGT +ATGAAAAGAGCTGAACTAAATAGGAAAACGTTAACCAGTGACTACGCCCCAACCATTGCA +AGATGTACCCCAATGATGGTTTTGGTATCGAAACTTCTCTTAATTGTGTTTCTTAAGTAC +TGGCAAAATTCGAGCCGGCATCGTTTGTTGATAGTTGGGTCTAGGATTTTACACCTTGTG +TTAGCACTGGGCCATTAATTCAATAGTAACAAGAATACTAATTACCAATGTGCGTGAAAA +TCTCCTTGACTGGTGCAACGTCATTCACAGTCGGATCTCAAGTTATTAGGTGCTAACTGT +ATACACCAAATTTAGGATAAGAGCCGGCTTAAGGCTAATCTAGACCCAATATTAATCAAT +ATTTTACGTAATGCATCCACGCGGCGTGCTCTTGGTGAGCAGCTGGGATTAAACGCGTAG +GTCGAACTATCGAGGGTTTACAAGAAAGCCAAGTGAAAATGAGACTATTGGCCATCGCGA +GATTTGAATAAATGTCCCTTGGTACTTATACGTTGGGCGAACGGGGATGAGCCAGGCTGC +TATCATCGTTTCGAGGTAGCTTCCAAGTGGATGAACTCAAAGACTGGCATTATGTGAAGA +GCATAGCGCTTTTCCCCGTATTATGGCAGCAGCTGGTTACCCATACTTGTGATCCCCGTA +ATTCTACTGTCATAGAAGGATGACCGAATCAATGAGCCGGGTGGTGTCCAAAAGCGATCC +TAATCCTTGCTGATTTACCTTGAGCGGTCACGTCTGTCTCAGCGACATTCGCCTTGCGTT +AGACTAGGCCGTAAGTAAGGAGTGCACTCCACAACGGCGTAATGCGTGCGGCGAGTAATG +TATTAGCATGTTAACCACATTCTTGGCAGCCAGATCAAAATCACTTTTCATCTGGTTGTC +TTAACAATCCGATAGAATCTAATGTAGCGATGCGTACTAGAAATAGTTACAATCTACAGT +CTTGCTGCACTTGCTGCTAATAATGAGCGAGGACCTATCCCTCCTTAAGCAAGTTCCTTG +TTCCGTGCGGGGAGCCCTGGCGCTAACTCTTTACATGATTAGTATCGCATGTTGTTACAT +ATATAATAGATTTACATCATTTCAAATGCAATGATTCGTGCTCCTAAAATGAGTCGTATG +AATAGCCACAGCGTACGGAAACCTGAATTGATTTGTAATTTAAAGATCAACTTAATCTGT +GTTGATCAGAGCGAGCATTGCAGAATACCCCTGCATCTAGGAATCGGTGCCAGTGTAAAA +GCCTGTTAGTAAAACCACGACTATGTAGTGTGTACCACACTCGGAGTGCGTCAAGCGAAG +TCAAACATGGAAATGAAACCATGCGTACGGAAAAGACCAGTGATTTATAAGGACATTCAC +ATAGACTCCAAAACTGACCCGATGGAGTCTACGCCGAACAGTTGGTATCAACATTTGTCT +CGATTTTCTGTTGGGAACATCCATCCCTACCCACAACGTACTGGACCATAATCAAGGGTT +TGGAACAGTACGCTCCTGTACTCAAGAAGTCCTTGCACGAAAGCAATAGGTTGAACTTCA +TCATATAGGCGATGACAGTGCTATCAGCCGGACTGGCTGTTCTCGTAGAAGTCACTCGAA +TCAATAAGATACGAATACTCCATCCTGTACGGGGACACTATATTATGCTAGCCGATTCTG +TAAATGTAGTCTTTACCGAGAATTGCTGACACTGATTTGAGTGTAGGAGGTCCGGTATAC +ACTTATCATCAACTTATTCCTACACTCGGTTTTCAATAGTTCGTAGCCCCAGGTTGCATG +AATATTATACCTCGGATAACACCTACTAATCCGTCCACAGCCTAGCACTTACTGGCGATC +AATGGAGCATGATGTACTTAGGGGACGGTATGAACATTCTTAACAGTTCCAAATGACCTG +TAGCAAATACAATAGCATCTTTGTTTAAGCATGGTCCTCTGCGGTTTGAAATGTCGCTAA +TCTAGTGATATTCCTTGTAAGCCACTGTTACTCTAATTTAGCCCACTCCAGAACGAGTTT +GTGTCCATGAAAATGTAACTCCCCAGACATGCAAATACGCCTTATTGCTGAATATCGGAA +CAAACAAAGTCGTTATCATCCTGAAATCGACGACAAGTACATATTAAAGGTTTGTTTGGC +AAAATAGGTAGCAAGTAGGATGTTCATAACAATTAAAGCGCGTAACTCCTAAATGTGCAT +TATGCGCCGAGGACCGATAGCTGACGCCGCTCTAGCTTCTATTGTTCCACTGTACGGTAC +AAAGATTGAATACGGAAACAGAATTCGTCAATTTGTTGAATTATGTTCTATTCGTTTTAT +CTGGTATATTTGTTACCTAACGTATTTAGGGAAAGTAGCTTCATGAAGAAATCTAATCCC +TCGCGTGACGAGTTTGCTGTGATTATTATGCGACCTGACTCTTGTAGTGTGGAGTTCGTT +GTCGTATCTGTACAAACTGCCGACACGTAGACAGGCCTGTCTAATAAACCAGGGACCTTT +AAGCGTCTTTGTAATTAAGTAAGTACCAGACCATCCTTAGATCAATATGATGCGCAACCG +GACCGGATCAAATGTTCCAAGCTCGGTAGGTTATCCTATAAGAGCCTCAGCAAAATGATG +TAAATTGTCAGCGTGTAGTACGGAAACAGATCACGGTATAATCAAGTCTAAATATTTAGC +CCCGGTCTTGGAATGGCCTTTTATGCAACCAATTTGTGGCGATTAATTTCTCAACAGTAA +GACAGAGAAAGCTAGAGAAGCTGGTATTATTCTGCATGTTGTCGAACCAGCTGTGTACAG +TCAACATTTTGCTATTTACTAAGTTGAAGCTTTCGGTTTCATGTGAAATATCTGGCCAAA +TCGAATGCACCCTTTGACCGGCAGTTTTCATAAGCCACGTGTTTGCATTTCTCTTTAACG +CATTGAAAATCACCGCGAACGACCTCACAACTGTCTAGCTTACCGATACGTTAGTGGTCT +CCTCGCAGAATCGAACGAACCCGAATAATATGGTGATATTCTTTAACGACTGATTAGGGT +CTTATTCGAGATTTTCAGTCTTTAAGCGTGAGCAGCGTGTTAATCACCTAGCAACATTAT +AGAAAGGAGAAAGGTACGAGCAGTTTAAAAGTTACTTCTAATTTTAACTATTGTCCAACT +AAGTGTAGATTATTTAGGCTTGTGTCCAAGTGAGATCATACTGTTTTCGTGTGATAGGTA +TCCGCATCATAACTAGTTATATTAGCACCGTGTATGAAGAAACGGTGGACCGTAGCACAA +CTCATTGTTATTTTGTCCCCTCTTGGTTTATTGGATCCTAGATTATATACGAATAGAGCC +CCTTTCGCAACAGCATCAGAATCAGACCTGCGCTCTCGACTGATAATAGCAATTTGTTAA +GAGCGGATAGACGCAGAAGAATAACATGATTTGTGCACTTAGTCCAGTCCAGATAAGAAG +TTGAGGCATTGACTTAACTTTTCATTGTCCGCTTGCTATCCCCACGATCCTGCTAAACTA +AAAGCTTTTGGCGCGGAAGAGCCGTTATGGAGGTTCGGCGAAATTGTATCACTAGCTAGA +CCATTTTCTGTAGGCTTTTAGCTTGATCGACGTAAATTCGATTCTATATGGTAGAAAGGT +ACGACCGTTATACGCTCACGTACAGCCTAAATTCACTTGTGGAGGCGATATAAGCTAATA +AGCGGTTCATTTTGAGGAACCGTTACTTTGAGATTCACTTACAGCAACTAAGGTTGTGTT +ACCGTTTCTTCTCAATTTACTGCTGGAGCGGCTATTATGCGTCCATCACCTTCATAGCCC +TAGTCATCAAGCCCATAGAGGTATGTTCGTGTGTAAACGAATTCCAAGACTAATTGGTGG +AAATTTCAGTTTGGATTGAATGAGGCTGATACTTCTATACACTTAAGGGTTCCCCGTAAG +TATATTGCCATAAGGGAGTAGTAACACTAAGGTTGTGAAAATATTGCACGACGTAGGTAT +TCTCAATTTCCTTCTAATTCTGTAGGATTTATGTAAGGCGACCGGGACTCTATTGTTTTG +TCTCCGAGAGTTTCTTAATCAATTGTCAGGCTAGTAGATCAAGTGTAATAAATGATTAGA +GGTCCTCATTTGGAGAATTTATCTATATCCTTGGTCGTCCACGCGGTATCGGAGTTGCTA +TACAATAAGTTGGTTCCAGAAAGCGTCTTAATTACATACTCTTGGTTTATCAACGAGATG +GTACCTAATACTCTCCTCTCAGTTCAGTAATAAGGACCGTTAACCGCACAATTGCATGTC +ACCATGTAACACATCCTAGGTTCAGTGGTGCAAACAAATCAAAGTCGTTCGATGTCACTA +AAACATTTTGCTTAGTAAGCTCACTTGGTTATGCAATATTCTTCACTTCCACAAGTGACT +CTACTTAAGGCGACGCACCTCCCTACAATTCGCATACGCCAGGTACACACAGCATGGAAT +AGTGTAGTACCTACTCATGCGCGAACGGTCGCCTGCAGAATTCCAACATGGAGGTCTTCT +GGCCTAGTGCTTGTGCTTCCGGGATACACCGCACTCATATCACAGTTTTCCCTGGCACAG +GTTATAGTCCGCTAGCGTGTTGAAGCTAGTTCACCCTTACTATGATCCAAGAAAAGCTTT +TCGGCCGGCCATCCTTCACCATACGTTTCGGGGTCTTAGTTCATTATCAGAGTCGGTGCC +ATTGTTCCATGTAGGTACGTGGAGGAAGTAACTCTTGATATGCTATACGTGTAGCATACT +ATACTCCAGAATCCGTCGCAACAATCCCTTTATCTGCCCCTTTATTTACATTCCCCGCAT +GTTTTGATTACTTAAATGTCGGGTACTGCTGGTATACACCGTATGCACCGAAAGACAGCA +ACCCCTCAAAGCTTCGACGAGTTACCTGGTGTGAGACTATCAGCTTATAACCCTTACTAA +CAGCAGTAGACGAATTCTCCTAGTATAAAGTCAATTACAGTTGACTAAATTCGAAGTAGC +CGAGTGGGTCTCATTAGACCCTACATGTATCTCTTGTTTTCAAAACGGCTGTGAAAGTCG +GAATATTATGTGAGTATGATTCACTCGGCGGAACACTCAAACTCGCTGAATCATTGATTC +GCCGATGATTAAGCCGACCCTCCCAATTACCGCTGCAGCACTACAATCTCAATTTAGGTA +TACGGATCTAGGTCCGTTCGTTACCAGTTACCAATACGCAACCGAGCTCGAAGAGAACAC +AAATTTACGAAGCAAAATTCGGAATCAGGGTATCGTGCAGAATGGCAGGAGAGCTGGAAC +TGTTGTCAGATTTCCCTCTAGTAATCGTACGAGAATATATTCTATGTCACACATTAACCT +ATAGGTAAAGCCTCATTATACTCCGTTTAATGCAGACTTATAGGATGCCATGCAACAAGT +CTAATCGTCGCGAGGACACTCAAAAGGATCAGTGGAAAGTAACACTTTGTGGTTCAATTC +AGAAAATCAGCTTGTTTGTACCTACAAGTACAAAACTTGGAGTGGTAGAGAGGTCAATCG +ATTAAGTTAAAAGGTTAACGCATGCGCCTAGTCATTAATTGGTTGCTGCGCAAAATAATG +CATGCGTAGTAAATCCCAGCCCCAAGTCGAATAGATTATTAACGCCGGAAGCAGCCATCT +GCGGAATCTTCGTTGTGTCGAGCGTCAAACGTTGCTCCATGGCTCCCTCCCTTTATCGGG +TTCTCTCATTGAGTCCAACTAAACATCTACAAAAGAACTTTGTTATGTGATATAGCTTAG +GTCTAATCTTAGGCTGACATGCATAACGCTTTGTCGAGGTCTATTAACATAGCCGAATGC +ATGCAAGCTTTGATGGATATTAACTTCCCAATGTCTAAGATTAAAGAAGAGGACACCCAT +TATGTCAATCATCTAGCTAAATCGAGCTGCGAGCCGGAGAGTAAACAGTTTCCTTTTCTT +CGGCGGTTATTTGAAAATTCCTTTCTTATGGCAGTGTTTCGAGCGAGCAGTATATTAGAC +CCAACCTCGATAATCGTTAATCACATAGCGACTATGATAGTATCATTACCAGCAGCATAC +ATAAAATTGTAAAGTGTGTTACTGTTTGCGTGGGTGATTATAGTACAGTCTTTTGCAAAT +CTACGGCCCTGACAGAACTTCACATTAAAGGCCATCCACAGAACAATGGACAACGTATAA +AACCTAAAAGGATATCGTTTTCCTGGGGTTTTCAGTTGTTTTAATGACCGGTAAATTTTC +TTACCCTATTGTGTTTCCTTACACAGAAATATCTGAATATTGAGGTACCTGTGAACATTA +TCATTCATACAACATATCCTATCGCCCATCCTGTGCGGCGACTACTCCAGCACTCACTAA +TTGTTAATCATCTCATACAACTCGTCAGAATTAACATTACCGCAAACTGCTTACTAGCGC +AATCAGGTCAAGAGGAGGACGGCTTTGTCACTTAAAAGAATAAGGTGTAGCTGCATAAAA +CAATGTGTATCTTCTGAGCTTCACAGCCGTGGGCTATCTATGGTTCCGGTCCTGTTGATT +GCTCCCGATGTTGAACAATACTTTCCACTTTCCGTGACAGAAACTTTAGAGCAAGAGGTC +AAACTTTACCCAAGCCCATAGGTAGAAGTTACGCGCGCATTGACGTTTGATCAAGGGACA +GCTGTGAATATCCGTCCCACGTAATCGTGACTTCTCATCAATATTATATTACTGCCGCTA +ATCAACAACTTCCTTGTTTCGACTGAAACGATTTTAGTCAAGTCGAAGACCTCATACGAT +AAGATTTGCAACATGTCTAAAAGAGAACGGGAACTGGCAAAAGGCTTGGTAGATCCGTCT +ATAGCGTAAAACTGATTAACCCATTAGGTCTGAATAACTTTACACAACCCTCCGCACTGT +TAAATGACGGGCTTTGCTCTGTTTTGACACATCAGCTAGAAACTCGCCACGAAGGCATAA +GGCTCCCATATAGCGTAGCTGACAAACATATGAGGTGGCTGCATAAACTAAATTGAGGCT +CGCGTTCGGATACTTGCCCATGTAGCAAGTCTTGGCAACCAACTATATAATCATCACGAA +TTGAGTGCTAAAGACATGCGAACAGTTGGGGCTGCTATATAGTATGACAGATATAGAAAT +TTTATAAAATGTCGTAGGAATCTGGAGGCCAAAATCATTAGACACTCTTGTAAAAGGTAT +GGTAATGTGTATGACCTCTTGGCATAGTGTCCAATTATTCTCGGTTTACTCTCAGAGACA +CAGTCATGTAAAAGTGGTGAGGAATTACCGCCGTGTTTTGCCAACCAAGAAGCATTGAAC +AGTAGATCAATAATGATATTCGGTAGCGTATTTACGCTTTGCGGTTTTCAGAAGAAACTA +TCACAATTGAAACTCTATTCTTCGCCTCATTCCGTACCGTTAGGAATGACTCGAATCGTA +CTGTCTGCCGCGGGGCATAGTGTATTGCTCCCCACCAGGTTCAGATAGTTCGAATCAGTG +CGCTGTACAATTGCCTTACGTGTAGATTTGCATCACCGCTTCACGTAGGCACCCAGAGTG +CTCACTAAAGCCACTAGAGAGATAGAGTTAGAAATTAAGTATCGGTTACGCCCCTCAGAC +GACATAACTCACTTCTACCGAATATCCTTTCTATCTTGGATACTACTAATGCTTCCGTTC +ACGCCGCAATCATGTGGATCCTCCAGTAAGCAGGGTGCTGTCATGACTATACAGTACGGA +TCCGTAAGCATTTTGAGGATGATAACATAGGGTCGGTTACTGTGGATTTCCGTTACTTAG +GAGAGCAGCTTTAGCTGACTTTGCTGAGGCTGCGCGTGTTAGACAGCAATTTACGAACGG +CGCACTCTATAGCAGGCACTCACAGTGGACCAGTAGTCCTATTGCAAGAGTTCATTATGG +AACATTTTAGTCCTCTATCACACGGACCATTGCAGTAGATAACTCTAATCCTATGTCTTT +ATTTGGTTGCCTGGAACCCCTTACCACTAGACACCCCAATAAGTAATCTTGCTTCCATGT +CGAATTGATACTCATCGAAAACATATAAAACTAATTATGCTTGTGTTCCTGTGGTCTGTT +ATATAGAGGCGCCCTATTGGCCGCGGGATAAGGATCATTTTGGCACACTAACGGGATCCT +AAAACTTTATCTTTCAACGACTCCTACATGCCTTTTAGGTTAGTACGCGAATCGCCTAAC +AAGCCAATGGGTATTGGAGAATTAGACAAAATGGTTGAGGAATAAAGTGGCGCAGGATTT +TGTCCGAGAAAGGGATAGCAAACGGTCGCAGGCAGGAGTAACAATTTTCAACCGACCTTA +ATAGAGCTCAAAAGCTACCGGAGAAAGCTTCGTCTATGCTTAATACATATGCTAACCTAT +GAATTTCGTAAGCGTAATATAAACTTATCAGATATTTTAAAAGCATCCTATTCAGTCGTA +CTTTTGGCAGGAAAGGTCAGGCGAAACAGAGTCTCCCTGCGGAGGCTTTTAAAATAAATA +GCGGGCCTAGCATCGATTCTAAAAGACGACCCCAGGTGCGTAACCGTGCCTCCCCAAGTC +TTCTTTTAACAATTACCTAGAGAACGGCGTCAGTCGCGAATGACCTTACGAACGTTTACG +CGGAGCCGAGTAAGATTAATAACTGCTTATTGATTTGCAATCGTTTGATACGGGTGGCCC +GAAGCTCAATATCAACATAAATAAAATTAGTCGGAATGGTCGCTTAAATCGCGCGCTGTC +ACTGTCTTCATATGAGGGAGTTGTGTAAGACTGCATTGATATATAGGTATGATTTCGGTT +TAGAACTTTGTCTGTTAGCAACTCCGCATGATTGAAGGAAATCCTCGTTGGTAAGATCTC +TTTAGCATTTGCACAGCTGACTCTAACAGCATAGTATGTGATCGTATTATGTCTGCAGTT +TGTAACACAGTGGGCGGCATGGATGGTACTTAATGGACGTAATGAGCAGTAGACCACCGG +TGTTACCTAACCATCATTAGAGTAGGCGAGATTGCGCTTGTACGACTTATATATAAGGGT +AACCGGAATACCGTTCCTCTTATCAACAACAGTTACTGGTCTTAATTCACATCGGATATT +GCGATCGCCAAGACTATCCCGTAAGTCGTAAGCTAACCAACTAGCGGTTAGGTTTATTGA +GGTTTTGATGGGAACTTCTCAGACACGTCGTCAACTACCTAATTTCTTGGATGGAGCTAG +GCTAACTGTCCCAGAACTTTCTGACACTCGAGATCCTCTAACTAATTGGAATCCAGGAAT +TCCCTTATTGCATCGCCACAAACGACCATAAATTACAGCATGTTTCATTGTCTAACGTGC +CTATCCACGAAATTGAATTCGGTTCACATTATATATCCCCTTCTACCGCTAATTTAATGT +TTAACGTTGATGGGGCAAAGCACATTCGAGAAGTACCGAAAAGTCTCAATCCAAAGACCG +GAGGAACTGGCTTCGGTAAGAATCGCGAGTATCCTTGGATGCCCTGCCTGATTATAACTT +GTTCCATGTAGATAGGCGTAGCTAATTCATAGCAATACAATAAACGAGTCAGAACTGTAG +TCTAACATAACAGCCTGCTCTCCAGGTAACAGCCCATTATTAGATATAGTATCACGATCG +TCGGTTGTATTAGTGGTGATAACTATCGATTCTGCCACTAATAGAATGTGCAGAAATAAA +GTATCTGAAAGAAAACGAAGTCACAGAGAATAAAGCTCACTTCATAAAAGTCGGTTGCAG +TAGACGCATATCAATTTTCCCTGCTGCATTTTAGAGTTCGGAATAGTTAAACATAATACT +GGAAGCGCTTCCGGCAATCAGGAATAACCCCATATAAACCAACCTTTGTTGCTATTGCCA +GCGCTATTCTCGTCAAAATTTCTCCCTATGGTCTTCACATCATGCATCACCGGACCCTTT +GATAGACGATGACCCAATTACAATCACTCCACGGATGAGCATCCCATTTTATACGAGGCC +CACTGGAAACAATTGCAATCGACGTGACCAAGTAGAGGAGCGTGCTCGAAAGGTGATGAT +TGCCGAATTCTAACAAGGATACTATAAGCCACGGAACGCTGACGTTGAACAGACCTGGTC +TCCTGGGCACTTCGCAGCACCTCAGTAGTAATTCCGGTAGATTAGGACTTAGCATTCCGT +TGATCTTACAGGATTTATAAATAAGGAGATCTGTCTTGTTTAATTAGGAGGACGCTTTTC +CCGCGTAAGTACGGGAAAACGTTCTTCTGATTTTGTTTGCCACTTGACATTGTAGCTGCT +AGGAGAAGGGATAATATCCGCGTTTTCTTTTACCGTAACGTCGGAGCATACCATGGTAAT +TGTCCGTGTCAAAACTAGATATCTAGGTTGCAAAATTCAGTCAGTAAGTCCTGAGGCCTT +CCGCATTATTAATTCTACAGACATATGAATTTGCTCCACCGGCTAGCACAGTCAACTCAA +CCCACGATAGGGGAACGAAATCACAAATAGGTTCACATGGTCAATACAAGGCAAACCATT +CCCCATAACTCACGCACTGACGGTAAGGCCATTTCAGGTCAAGCGGTGAATGCTGTGAAA +AGCAGCTCGACCACCTGCCGTGGATGGCAAACCGATAACAAAGGACTCCGATACTTCATT +TGTAAACGTTTGCAGTGCTGACGTAACTCATATCTACAGTCAAACCGAATGGTTTGATCG +GCATTATGTAAAGGAATCGACACACGTTGCGTCTTCTAGATTATTACACACCTGTCTGCG +ACGGATATAGGTAAATAAGTCAGCCTCCACTCTGCAGAAGATACTAGAAACGTATCAGTA +ATAGCTATCAGGATTTCGCCATCCTCGCACTGTGCCCGGATATCACAGCAAGATTCTAGG +ATGGCACTTGTGTGACTAGAGGTTTTACTCGTTGAGCCATTCTTACTATAGGCATGGGAT +TACAATGTGCATGTTTGTGATGTTATCCCATATCTTGCATGTATCAGCCTACCAATTAGA +CATATGACTAGATGTAGTCGATCAACGCAAGGGTGCGGACTTTGATTCCTTTTGAATTGA +AGTCAACTCAGATGCTCCTTAAGACGTTTTACAGTAGGTATTTTGTGGTACAAACCAGAA +CCAGTGCCAGTCGGTAGTTATTGTAGTGTGTTCTTAATACATATTTGGTATTGGAGTTTC +TAACATTTAAAAGGAGCCTATTACACTTACTTAATTTGCGTCTATATTTCTGTTACGATA +TGTCGTCTGTCGATTTTACGAGTTTCATACGTGCGGGTTCCCTGTTCGCAATGGGCCCCT +TGCTAATGTCCCGCATCTTTAGGATGCAAACTTACTCACGCCTCCTTTACCGAGACTTGG +TGGGAGAGAAGACTCCTGTAGAATCCCGATCTGAATGGTTTCAGTGTAAGGGTCCCTTCT +AGCCATATCATTGAATATTCTTGTACTTTAAGTAACTCGATCCTACCAGTACAATTCTAG +GTTTGCCTTATAGCCGGAATGAGTATCAGCGTCATTCACCCCGGCCGGATATTATTTGCA +ATGTCAGGGACACCCAAAATAGACCGGTTAGAAGGCATATGCGATGAGAGTTGGTGCCTA +AATTAAACGATACAATTGATATGACAAGGACTATACGATGAAATCCATGAGATAATTATC +GTAACTCGGCCAACCTAAAACCGTGCAAGATAGGAGCGGTCCTAGAAGTACTATCGACAC +CTTAAATACTCACTTGAGTTTTCCGATCCTATAGTGCCAATCATATGGCGCAGGAATATT +ACAAACTAAGAAAGTCAACAAAAGATGTAAATTGCAACACCTGGCATCGGTGGGGTTGTC +CCCTTAAACCCTGAAACCAACTGTTATGCTCAACATTATATCGAGGCTAAAACGCGTATC +GTGGCACATTAATAACGATCACATAAGCTTTGCGGCTAGCAATAATAATTTAGGACAGCT +TAGATTTTGACCCGTGCTAATCCTCAGTATGGAGTAATTTTACGGATCTCTCGTTGTAAC +CGTCCTCAGTCGTGTACATTTTAACCTTTGTAAACTAGTTTACGAACGAGTATTTAGAAG +GTCCGTACTCTCACCCAACTGACACATTGTACTAGCTCAAGATCGCAAACACTAAGGGTG +TGAGTCGCGGGATAGCGCTTAAATATGACTGCTAATGGTCAAGAGCACGCGCATAATATT +CCACTGGTTCTAGGTCACCACTACGGTCAGACGTTGACCTGCATGCCCTACATCCGGCAC +GGGCTACTAACGGCCTAATATTCTTTGAGCCATATCCATACTCGTCTATGCATATTCAGG +TATACGGCTATAGTGCGTTATTAACTTCGTCGTGATTAAATCCTTTAATTGTTCCATTAT +AAGTATACATGCTTAGATGCGTGAACTTGAGGGATATCGTTGCTCTAAAGTTGTCTTATA +GACTAAATCTAAACAAGCCGTGCAAGACTACTTAAATTACAAATCTTACAGACATCTCGC +CACTGCGCTAACACTAACAA diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai deleted file mode 120000 index 8158c3c..0000000 --- a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/chr_test.fa.fai \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai new file mode 100644 index 0000000..4e99d5b --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai @@ -0,0 +1 @@ +chr_test 20000 10 60 61 diff --git a/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py b/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py new file mode 100644 index 0000000..cb9d937 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Generate a realistic ~20kb non-repetitive reference sequence for ATAC-seq testing. + +Properties: + - ~42% GC content (human-like) + - No homopolymer runs > 5bp + - High k-mer uniqueness (>99% unique 20-mers) + - Deterministic (seeded RNG) + +Output: chr_test.fa with contig name 'chr_test' +""" + +import random +import sys + +SEED = 42 +LENGTH = 20000 +CONTIG = "chr_test" +LINE_WIDTH = 60 +MAX_HOMOPOLYMER = 5 + +# Target base frequencies for ~42% GC +# A=29%, T=29%, G=21%, C=21% +BASES = "ATGC" +WEIGHTS = [0.29, 0.29, 0.21, 0.21] + + +def generate_sequence(length, seed=SEED): + """Generate a non-repetitive sequence with controlled GC content.""" + rng = random.Random(seed) + + seq = [] + homopolymer_count = 0 + last_base = None + + for _ in range(length): + # Pick a base using weighted random + base = rng.choices(BASES, weights=WEIGHTS, k=1)[0] + + # Prevent long homopolymers + if base == last_base: + homopolymer_count += 1 + if homopolymer_count >= MAX_HOMOPOLYMER: + # Force a different base + alternatives = [b for b in BASES if b != base] + alt_weights = [WEIGHTS[BASES.index(b)] for b in alternatives] + total = sum(alt_weights) + alt_weights = [w / total for w in alt_weights] + base = rng.choices(alternatives, weights=alt_weights, k=1)[0] + homopolymer_count = 1 + else: + homopolymer_count = 1 + + seq.append(base) + last_base = base + + return "".join(seq) + + +def validate_sequence(seq): + """Validate sequence properties.""" + gc = sum(1 for b in seq if b in "GC") / len(seq) + + # Check k-mer uniqueness + kmers_20 = set() + for i in range(len(seq) - 19): + kmers_20.add(seq[i : i + 20]) + unique_20 = len(kmers_20) + total_20 = len(seq) - 19 + uniqueness = unique_20 / total_20 + + # Check max homopolymer + max_hp = 1 + current_hp = 1 + for i in range(1, len(seq)): + if seq[i] == seq[i - 1]: + current_hp += 1 + max_hp = max(max_hp, current_hp) + else: + current_hp = 1 + + return { + "length": len(seq), + "gc_content": gc, + "unique_20mers": unique_20, + "total_20mers": total_20, + "uniqueness_pct": uniqueness * 100, + "max_homopolymer": max_hp, + } + + +def write_fasta(seq, contig, filepath, line_width=LINE_WIDTH): + """Write sequence as FASTA.""" + with open(filepath, "w") as f: + f.write(f">{contig}\n") + for i in range(0, len(seq), line_width): + f.write(seq[i : i + line_width] + "\n") + + +def main(): + output = sys.argv[1] if len(sys.argv) > 1 else "chr_test.fa" + + print(f"Generating {LENGTH}bp non-repetitive reference sequence...") + seq = generate_sequence(LENGTH) + + stats = validate_sequence(seq) + print(f" Length: {stats['length']}bp") + print(f" GC content: {stats['gc_content']:.1%}") + print(f" Unique 20-mers: {stats['unique_20mers']}/{stats['total_20mers']} ({stats['uniqueness_pct']:.1f}%)") + print(f" Max homopolymer: {stats['max_homopolymer']}bp") + + # Validate + assert stats["gc_content"] > 0.38 and stats["gc_content"] < 0.46, f"GC content out of range: {stats['gc_content']}" + assert stats["uniqueness_pct"] > 99.0, f"Uniqueness too low: {stats['uniqueness_pct']}" + assert stats["max_homopolymer"] <= MAX_HOMOPOLYMER, f"Homopolymer too long: {stats['max_homopolymer']}" + + write_fasta(seq, CONTIG, output) + print(f" Wrote {output}") + + +if __name__ == "__main__": + main() diff --git a/pipelines/nf-atacseq/tests/data/generate_test_data.sh b/pipelines/nf-atacseq/tests/data/generate_test_data.sh index f5cb288..9cb2eb4 100755 --- a/pipelines/nf-atacseq/tests/data/generate_test_data.sh +++ b/pipelines/nf-atacseq/tests/data/generate_test_data.sh @@ -1,11 +1,18 @@ #!/bin/bash # ============================================================================= -# WASP2 nf-atacseq Test Data Generator +# WASP2 nf-atacseq Test Data Generator (v2 — realistic reference) # ============================================================================= -# Creates ATAC-seq-like test data by symlinking shared core data and generating -# pipeline-specific files (shorter fragment FASTQs, BWA index, samplesheet). +# Generates self-contained ATAC-seq test data with a non-repetitive reference +# so BWA alignment produces meaningful mapping rates (>80%). # -# Prerequisites: samtools, bgzip, tabix, wgsim, bwa (WASP2_dev2 conda env) +# Previous version used the shared chr_test.fa which is a repetitive ATGC +# pattern yielding ~0% mapping. This version generates its own reference. +# +# To produce non-zero allele counts, reads are simulated from BOTH haplotypes: +# half from the REF haplotype, half from an ALT haplotype with het SNPs applied. +# +# Prerequisites: python3, samtools, bgzip, tabix, wgsim, bwa +# (all available in WASP2_dev2 conda env or WASP2 micromamba env) # # Usage: # cd pipelines/nf-atacseq/tests/data @@ -17,106 +24,307 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" -SHARED_DATA="../../../../tests/shared_data" +# BWA may not be in PATH; check common conda/micromamba locations +if ! command -v bwa &>/dev/null; then + for candidate in \ + /usr/local/Cellar/micromamba/*/envs/WASP2/bin/bwa \ + /usr/local/Cellar/micromamba/*/envs/WASP2_dev2/bin/bwa \ + "${HOME}/miniforge3/envs/WASP2/bin/bwa" \ + "${HOME}/miniconda3/envs/WASP2/bin/bwa"; do + if [[ -x "$candidate" ]]; then + export PATH="$(dirname "$candidate"):$PATH" + break + fi + done +fi echo "===================================================================" -echo " WASP2 nf-atacseq Test Data Generator" +echo " WASP2 nf-atacseq Test Data Generator (v2)" echo "===================================================================" -# Validate shared core data exists -if [[ ! -f "$SHARED_DATA/chr_test.fa" ]]; then - echo "ERROR: Shared core data not found at $SHARED_DATA" - echo " Run: cd tests/shared_data && bash generate_core_data.sh" - exit 1 -fi - # ----------------------------------------------------------------------------- -# Symlink shared reference and variants +# Check prerequisites # ----------------------------------------------------------------------------- -echo "[1/4] Symlinking shared reference data..." +echo "[0/7] Checking prerequisites..." + +check_tool() { + if ! command -v "$1" &>/dev/null; then + echo "ERROR: $1 is required but not found in PATH" + echo " Try: conda activate WASP2_dev2" + exit 1 + fi + echo " OK: $1" +} + +check_tool python3 +check_tool samtools +check_tool bwa +check_tool wgsim +check_tool bgzip +check_tool tabix +echo "" +# ----------------------------------------------------------------------------- +# Clean stale symlinks and old data (one-time migration from v1) +# ----------------------------------------------------------------------------- +echo "[1/7] Cleaning stale data..." for f in chr_test.fa chr_test.fa.fai variants.vcf.gz variants.vcf.gz.tbi annotation.gtf regions.bed; do - if [[ ! -e "$f" ]]; then - ln -sf "$SHARED_DATA/$f" "$f" - echo " ✓ Linked $f" - else - echo " - $f already exists" + if [[ -L "$f" ]]; then + rm -f "$f" + echo " Removed symlink: $f" fi done +rm -rf bwa_index +rm -f sample1_R1.fq.gz sample1_R2.fq.gz +rm -f chr_test.fa chr_test.fa.fai variants.vcf variants.vcf.gz variants.vcf.gz.tbi regions.bed +echo " Cleaned previous outputs" +echo "" + +# ----------------------------------------------------------------------------- +# Generate realistic non-repetitive reference +# ----------------------------------------------------------------------------- +echo "[2/7] Generating realistic reference genome..." +python3 "${SCRIPT_DIR}/generate_realistic_reference.py" chr_test.fa +samtools faidx chr_test.fa +echo " Created chr_test.fa + .fai" +echo "" +# ----------------------------------------------------------------------------- +# Generate VCF with ~30 het SNPs + ALT haplotype reference +# ----------------------------------------------------------------------------- +echo "[3/7] Creating VCF with 30 het SNPs and ALT haplotype..." + +python3 - <<'PYEOF' +import random + +# Read reference +with open("chr_test.fa") as f: + lines = f.readlines() +seq = "".join(l.strip() for l in lines[1:]) + +# Deterministic SNP positions spread across the reference +rng = random.Random(99) +positions = sorted(rng.sample(range(200, 19800), 30)) + +# Transition mapping for plausible variants +transitions = {"A": "G", "G": "A", "T": "C", "C": "T"} + +# --- Write VCF --- +vcf_lines = [] +vcf_lines.append("##fileformat=VCFv4.2") +vcf_lines.append("##source=WASP2_nf_atacseq_test_data_v2") +vcf_lines.append("##reference=chr_test.fa") +vcf_lines.append("##contig=") +vcf_lines.append('##INFO=') +vcf_lines.append('##FORMAT=') +vcf_lines.append('##FORMAT=') +vcf_lines.append("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample1") + +snp_map = {} # pos -> (ref, alt) +for i, pos in enumerate(positions): + ref = seq[pos - 1] # 1-based + alt = transitions[ref] + snp_id = f"snp{i+1:03d}" + vcf_lines.append( + f"chr_test\t{pos}\t{snp_id}\t{ref}\t{alt}\t100\tPASS\tDP=50\tGT:DP\t0|1:50" + ) + snp_map[pos] = (ref, alt) + +with open("variants.vcf", "w") as f: + f.write("\n".join(vcf_lines) + "\n") + +print(f" Created variants.vcf with {len(positions)} het SNPs") + +# --- Write ALT haplotype reference --- +alt_seq = list(seq) +for pos, (ref, alt) in snp_map.items(): + assert alt_seq[pos - 1] == ref, f"Mismatch at {pos}: expected {ref}, got {alt_seq[pos-1]}" + alt_seq[pos - 1] = alt + +with open("chr_test_alt.fa", "w") as f: + f.write(">chr_test\n") + alt_str = "".join(alt_seq) + for i in range(0, len(alt_str), 60): + f.write(alt_str[i:i+60] + "\n") + +print(f" Created chr_test_alt.fa (ALT haplotype with {len(snp_map)} substitutions)") +PYEOF + +# Compress and index +rm -f variants.vcf.gz variants.vcf.gz.tbi +bgzip -c variants.vcf > variants.vcf.gz +tabix -p vcf variants.vcf.gz +echo " Created variants.vcf.gz + .tbi" echo "" # ----------------------------------------------------------------------------- -# Simulate ATAC-seq-like reads (shorter fragments, 150-250bp) +# Create regions BED covering all SNP positions # ----------------------------------------------------------------------------- -echo "[2/4] Simulating ATAC-seq reads..." +echo "[4/7] Creating regions BED..." + +python3 - <<'PYEOF' +import random + +rng = random.Random(99) +positions = sorted(rng.sample(range(200, 19800), 30)) -NUM_READS=500 +# Create ~500bp regions centered on each SNP, merge overlapping +regions = [] +for pos in positions: + start = max(0, pos - 250) + end = min(20000, pos + 250) + regions.append((start, end)) + +# Merge overlapping regions +merged = [regions[0]] +for start, end in regions[1:]: + if start <= merged[-1][1]: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + else: + merged.append((start, end)) + +with open("regions.bed", "w") as f: + for i, (start, end) in enumerate(merged): + f.write(f"chr_test\t{start}\t{end}\tpeak_{i+1}\n") + +print(f" Created regions.bed with {len(merged)} peaks covering {len(positions)} SNPs") +PYEOF +echo "" + +# ----------------------------------------------------------------------------- +# Simulate ATAC-seq reads from BOTH haplotypes (REF + ALT) +# ----------------------------------------------------------------------------- +echo "[5/7] Simulating ATAC-seq paired-end reads (dual haplotype)..." + +# 20kb genome, 75bp reads, ~20x total coverage +# Split: ~1350 pairs from REF, ~1350 pairs from ALT +NUM_READS_PER_HAP=1350 READ_LEN=75 FRAG_SIZE=180 FRAG_STD=30 ERROR_RATE=0.001 -SEED=100 - -if [[ -f "sample1_R1.fq.gz" && -f "sample1_R2.fq.gz" ]]; then - echo " FASTQs already exist, skipping" -else - wgsim -N $NUM_READS \ - -1 $READ_LEN \ - -2 $READ_LEN \ - -r 0 -R 0 -X 0 \ - -e $ERROR_RATE \ - -S $SEED \ - -d $FRAG_SIZE \ - -s $FRAG_STD \ - "$SHARED_DATA/chr_test.fa" \ - sample1_R1.fq \ - sample1_R2.fq \ - > /dev/null 2>&1 - - gzip -f sample1_R1.fq - gzip -f sample1_R2.fq - echo " ✓ Created sample1_R{1,2}.fq.gz (${NUM_READS} pairs, ${READ_LEN}bp, ${FRAG_SIZE}bp frags)" -fi +# Simulate from REF haplotype +wgsim -N $NUM_READS_PER_HAP \ + -1 $READ_LEN \ + -2 $READ_LEN \ + -r 0 -R 0 -X 0 \ + -e $ERROR_RATE \ + -S 100 \ + -d $FRAG_SIZE \ + -s $FRAG_STD \ + chr_test.fa \ + ref_R1.fq \ + ref_R2.fq \ + > /dev/null 2>&1 +echo " Simulated ${NUM_READS_PER_HAP} pairs from REF haplotype" + +# Simulate from ALT haplotype +wgsim -N $NUM_READS_PER_HAP \ + -1 $READ_LEN \ + -2 $READ_LEN \ + -r 0 -R 0 -X 0 \ + -e $ERROR_RATE \ + -S 200 \ + -d $FRAG_SIZE \ + -s $FRAG_STD \ + chr_test_alt.fa \ + alt_R1.fq \ + alt_R2.fq \ + > /dev/null 2>&1 +echo " Simulated ${NUM_READS_PER_HAP} pairs from ALT haplotype" + +# Combine and compress +cat ref_R1.fq alt_R1.fq | gzip -c > sample1_R1.fq.gz +cat ref_R2.fq alt_R2.fq | gzip -c > sample1_R2.fq.gz +echo " Combined into sample1_R{1,2}.fq.gz ($((NUM_READS_PER_HAP * 2)) total pairs)" + +# Clean up temporary files +rm -f ref_R1.fq ref_R2.fq alt_R1.fq alt_R2.fq chr_test_alt.fa echo "" # ----------------------------------------------------------------------------- -# Build BWA index (for local testing) +# Build BWA index # ----------------------------------------------------------------------------- -echo "[3/4] Building BWA index..." +echo "[6/7] Building BWA index..." BWA_INDEX_DIR="bwa_index" -if [[ -f "${BWA_INDEX_DIR}/chr_test.fa.bwt" ]]; then - echo " BWA index already exists, skipping" -else - mkdir -p "$BWA_INDEX_DIR" - cp "$SHARED_DATA/chr_test.fa" "$BWA_INDEX_DIR/" - bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -2 - echo " ✓ Created BWA index ($(du -sh $BWA_INDEX_DIR | cut -f1))" -fi - +mkdir -p "$BWA_INDEX_DIR" +cp chr_test.fa "$BWA_INDEX_DIR/" +bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -2 +echo " Created BWA index" echo "" # ----------------------------------------------------------------------------- -# Create test samplesheet +# Create samplesheets (both test and local variants) # ----------------------------------------------------------------------------- -echo "[4/4] Creating test samplesheet..." +echo "[7/7] Creating samplesheets..." -SAMPLESHEET="samplesheet_test.csv" -if [[ -f "$SAMPLESHEET" ]]; then - echo " $SAMPLESHEET already exists, skipping" -else - cat > "$SAMPLESHEET" << EOF +# test samplesheet uses absolute paths +cat > samplesheet_test.csv << EOF sample,fastq_1,fastq_2,sample_name -test_sample1,${SCRIPT_DIR}/sample1_R1.fq.gz,${SCRIPT_DIR}/sample1_R2.fq.gz,SAMPLE1 +test_sample1,${SCRIPT_DIR}/sample1_R1.fq.gz,${SCRIPT_DIR}/sample1_R2.fq.gz,sample1 EOF - echo " ✓ Created $SAMPLESHEET" -fi +echo " Created samplesheet_test.csv" + +# local samplesheet uses ${projectDir} relative paths (for nextflow) +cat > samplesheet_local.csv << 'EOF' +sample,fastq_1,fastq_2,sample_name +test_sample1,${projectDir}/tests/data/sample1_R1.fq.gz,${projectDir}/tests/data/sample1_R2.fq.gz,sample1 +EOF +echo " Created samplesheet_local.csv" + +# ----------------------------------------------------------------------------- +# Quick validation +# ----------------------------------------------------------------------------- +echo "" +echo "===================================================================" +echo " Validation" +echo "===================================================================" + +# Check BWA alignment quality +echo "" +echo "--- Quick alignment test (first 100 pairs) ---" +bwa mem -t 2 \ + -R "@RG\tID:sample1\tSM:sample1\tPL:ILLUMINA\tLB:lib1" \ + "$BWA_INDEX_DIR/chr_test.fa" \ + <(gunzip -c sample1_R1.fq.gz | head -400) \ + <(gunzip -c sample1_R2.fq.gz | head -400) \ + 2>/dev/null \ +| samtools flagstat - 2>/dev/null + +echo "" + +# Check VCF REF alleles match reference +echo "--- VCF REF allele validation ---" +python3 - <<'PYEOF' +seq_lines = open("chr_test.fa").readlines() +seq = "".join(l.strip() for l in seq_lines[1:]) + +errors = 0 +total = 0 +with open("variants.vcf") as f: + for line in f: + if line.startswith("#"): + continue + fields = line.strip().split("\t") + pos = int(fields[1]) + ref = fields[3] + actual = seq[pos - 1] + total += 1 + if ref != actual: + print(f" MISMATCH at pos {pos}: VCF REF={ref}, actual={actual}") + errors += 1 + +if errors == 0: + print(f" All {total} REF alleles match reference") +else: + print(f" {errors}/{total} mismatches found!") +PYEOF echo "" echo "===================================================================" -echo " SUCCESS! nf-atacseq test data generated." +echo " SUCCESS! nf-atacseq test data generated (v2)." echo "===================================================================" echo "Total: $(du -sh . | cut -f1)" echo "" diff --git a/pipelines/nf-atacseq/tests/data/regions.bed b/pipelines/nf-atacseq/tests/data/regions.bed deleted file mode 120000 index da6c378..0000000 --- a/pipelines/nf-atacseq/tests/data/regions.bed +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/regions.bed \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/regions.bed b/pipelines/nf-atacseq/tests/data/regions.bed new file mode 100644 index 0000000..9b399eb --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/regions.bed @@ -0,0 +1,15 @@ +chr_test 2668 3393 peak_1 +chr_test 4316 4816 peak_2 +chr_test 4939 5439 peak_3 +chr_test 5808 6315 peak_4 +chr_test 6486 7995 peak_5 +chr_test 8090 8679 peak_6 +chr_test 11052 11552 peak_7 +chr_test 12204 13167 peak_8 +chr_test 13187 13687 peak_9 +chr_test 13766 14266 peak_10 +chr_test 15071 15819 peak_11 +chr_test 15982 16482 peak_12 +chr_test 17274 18099 peak_13 +chr_test 18526 19026 peak_14 +chr_test 19230 19730 peak_15 diff --git a/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz b/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz index 2d8e601..88debd7 100644 Binary files a/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz and b/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz differ diff --git a/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz b/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz index 76535bc..8767790 100644 Binary files a/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz and b/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz differ diff --git a/pipelines/nf-atacseq/tests/data/samplesheet_test.csv b/pipelines/nf-atacseq/tests/data/samplesheet_test.csv index d50c362..cf1884a 100644 --- a/pipelines/nf-atacseq/tests/data/samplesheet_test.csv +++ b/pipelines/nf-atacseq/tests/data/samplesheet_test.csv @@ -1,2 +1,2 @@ sample,fastq_1,fastq_2,sample_name -test_sample1,${projectDir}/tests/data/sample1_R1.fq.gz,${projectDir}/tests/data/sample1_R2.fq.gz,sample1 +test_sample1,/Users/jeffjaureguy/Desktop/WASP2/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz,/Users/jeffjaureguy/Desktop/WASP2/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz,sample1 diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf b/pipelines/nf-atacseq/tests/data/variants.vcf new file mode 100644 index 0000000..e3e67f2 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/variants.vcf @@ -0,0 +1,38 @@ +##fileformat=VCFv4.2 +##source=WASP2_nf_atacseq_test_data_v2 +##reference=chr_test.fa +##contig= +##INFO= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 +chr_test 2918 snp001 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 3037 snp002 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 3077 snp003 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 3143 snp004 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 4566 snp005 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 5189 snp006 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6058 snp007 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6065 snp008 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6736 snp009 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6756 snp010 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 7166 snp011 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 7300 snp012 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 7745 snp013 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 8340 snp014 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 8429 snp015 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 11302 snp016 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12454 snp017 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12676 snp018 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12752 snp019 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12917 snp020 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 13437 snp021 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 14016 snp022 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 15321 snp023 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 15569 snp024 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 16232 snp025 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 17524 snp026 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 17593 snp027 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 17849 snp028 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 18776 snp029 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 19480 snp030 T C 100 PASS DP=50 GT:DP 0|1:50 diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz b/pipelines/nf-atacseq/tests/data/variants.vcf.gz deleted file mode 120000 index 380b7aa..0000000 --- a/pipelines/nf-atacseq/tests/data/variants.vcf.gz +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/variants.vcf.gz \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz b/pipelines/nf-atacseq/tests/data/variants.vcf.gz new file mode 100644 index 0000000..d7a7f83 Binary files /dev/null and b/pipelines/nf-atacseq/tests/data/variants.vcf.gz differ diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi deleted file mode 120000 index 7a95bbe..0000000 --- a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/variants.vcf.gz.tbi \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi new file mode 100644 index 0000000..98e77de Binary files /dev/null and b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi differ diff --git a/pipelines/nf-modules/modules/star/align/main.nf b/pipelines/nf-modules/modules/star/align/main.nf index fa9dfa6..db6b356 100644 --- a/pipelines/nf-modules/modules/star/align/main.nf +++ b/pipelines/nf-modules/modules/star/align/main.nf @@ -32,7 +32,7 @@ process STAR_ALIGN { --runThreadN ${task.cpus} \\ --genomeDir ${star_index} \\ --readFilesIn ${read_files} \\ - --readFilesCommand zcat \\ + --readFilesCommand "gunzip -c" \\ --outFileNamePrefix ${prefix}. \\ --outSAMtype BAM SortedByCoordinate \\ --outSAMunmapped Within \\ diff --git a/pipelines/nf-outrider/.nf-core.yml b/pipelines/nf-outrider/.nf-core.yml index 109d1da..e8731b9 100644 --- a/pipelines/nf-outrider/.nf-core.yml +++ b/pipelines/nf-outrider/.nf-core.yml @@ -24,6 +24,12 @@ lint: - .github/workflows/ - .github/ISSUE_TEMPLATE/ - .github/PULL_REQUEST_TEMPLATE.md + - assets/email_template.html + - assets/nf-core-PIPELINE_logo_light.png + - assets/sendmail_template.txt + - lib/NfcoreTemplate.groovy + - lib/NfcoreSchema.groovy + - lib/WorkflowMain.groovy files_unchanged: - CODE_OF_CONDUCT.md - LICENSE diff --git a/pipelines/nf-outrider/assets/email_template.html b/pipelines/nf-outrider/assets/email_template.html new file mode 100644 index 0000000..161c1e4 --- /dev/null +++ b/pipelines/nf-outrider/assets/email_template.html @@ -0,0 +1,48 @@ + + + + + + ${workflow.manifest.name} Pipeline Report + + +
+ + + +

${workflow.manifest.name} v${workflow.manifest.version}

+

Run Name: $runName

+ +<% if (!success) { %> +
+

⚠️ ${workflow.manifest.name} execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+<% } else { %> +
+${workflow.manifest.name} execution completed successfully! +
+<% } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + + +<% if (summary.size() > 0) { %> +<% for (e in summary) { %> +<% } %> +<% } %> +
Nextflow Version$nextflowVersion
Run Name$runName
Session ID$sessionId
${e.key}${e.value}
+ +

--- ${workflow.manifest.name}

+ +
+ + diff --git a/pipelines/nf-outrider/assets/nf-core-pipeline_logo_light.png b/pipelines/nf-outrider/assets/nf-core-pipeline_logo_light.png new file mode 100644 index 0000000..95c23e9 Binary files /dev/null and b/pipelines/nf-outrider/assets/nf-core-pipeline_logo_light.png differ diff --git a/pipelines/nf-outrider/assets/schema_input.json b/pipelines/nf-outrider/assets/schema_input.json new file mode 100644 index 0000000..4593f2b --- /dev/null +++ b/pipelines/nf-outrider/assets/schema_input.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/mcvickerlab/WASP2/master/pipelines/nf-outrider/assets/schema_input.json", + "title": "wasp2/nf-outrider pipeline - params.input schema", + "description": "Schema for the samplesheet used as input to wasp2/nf-outrider", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "bam": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bam$", + "errorMessage": "BAM file must be provided, cannot contain spaces, and must have extension '.bam'" + }, + "bai": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bam\\.bai$", + "errorMessage": "BAM index file must be provided, cannot contain spaces, and must have extension '.bam.bai'" + } + }, + "required": ["sample", "bam", "bai"] + } +} diff --git a/pipelines/nf-outrider/conf/base.config b/pipelines/nf-outrider/conf/base.config index 8b076a3..b62c805 100644 --- a/pipelines/nf-outrider/conf/base.config +++ b/pipelines/nf-outrider/conf/base.config @@ -14,7 +14,7 @@ process { // Error handling errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 3 + maxRetries = 1 maxErrors = '-1' // Process-specific resource requirements diff --git a/pipelines/nf-outrider/environment.yml b/pipelines/nf-outrider/environment.yml new file mode 100644 index 0000000..008b98b --- /dev/null +++ b/pipelines/nf-outrider/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for nf-outrider local Python modules +# (aggregate_counts, merge_counts, mae_detect) +channels: + - conda-forge + - bioconda +dependencies: + - python=3.11.* + - pandas>=2.0 + - scipy>=1.10 + - polars>=0.19 + - numpy>=1.21,<2.0 diff --git a/pipelines/nf-outrider/modules.json b/pipelines/nf-outrider/modules.json index 78f330d..f4520d7 100644 --- a/pipelines/nf-outrider/modules.json +++ b/pipelines/nf-outrider/modules.json @@ -1,5 +1,5 @@ { "name": "wasp2/nf-outrider", - "homePage": "", + "homePage": "https://github.com/mcvickerlab/WASP2", "repos": {} } diff --git a/pipelines/nf-outrider/modules/local/merge_counts/main.nf b/pipelines/nf-outrider/modules/local/merge_counts/main.nf index 8c663b3..876e36e 100644 --- a/pipelines/nf-outrider/modules/local/merge_counts/main.nf +++ b/pipelines/nf-outrider/modules/local/merge_counts/main.nf @@ -1,5 +1,5 @@ process MERGE_COUNTS { - tag "merge_counts" + tag "$meta.id" label 'process_medium' conda "${moduleDir}/../../../environment.yml" @@ -8,11 +8,11 @@ process MERGE_COUNTS { 'ghcr.io/mcvickerlab/wasp2:1.4.0' }" input: - path gene_counts // Collection of gene count files + tuple val(meta), path(gene_counts) output: - path "count_matrix.tsv", emit: count_matrix - path "versions.yml" , emit: versions + tuple val(meta), path("count_matrix.tsv"), emit: count_matrix + tuple val(meta), path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -114,6 +114,7 @@ EOF """ stub: + def prefix = task.ext.prefix ?: "${meta.id}" """ cat <<-END_HEADER > count_matrix.tsv gene_id sample1 sample2 sample3 diff --git a/pipelines/nf-outrider/modules/local/outrider_fit/main.nf b/pipelines/nf-outrider/modules/local/outrider_fit/main.nf index db7f14b..f70d286 100644 --- a/pipelines/nf-outrider/modules/local/outrider_fit/main.nf +++ b/pipelines/nf-outrider/modules/local/outrider_fit/main.nf @@ -1,5 +1,5 @@ process OUTRIDER_FIT { - tag "outrider" + tag "$meta.id" label 'process_high' label 'process_high_memory' @@ -9,7 +9,7 @@ process OUTRIDER_FIT { 'quay.io/biocontainers/bioconductor-outrider:1.26.3--r44he5774e6_0' }" input: - path count_matrix + tuple val(meta), path(count_matrix) val padj_cutoff val zscore_cutoff val encoding_dim @@ -18,16 +18,16 @@ process OUTRIDER_FIT { val min_count output: - path "outrider_model.rds" , emit: model - path "outrider_results.tsv" , emit: results - path "outrider_summary.html", emit: summary, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("outrider_model.rds") , emit: model + tuple val(meta), path("outrider_results.tsv") , emit: results + tuple val(meta), path("outrider_summary.html"), emit: summary, optional: true + tuple val(meta), path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: - def q_arg = encoding_dim ? "q_val <- ${encoding_dim}" : "ods <- estimateBestQ(ods); q_val <- getBestQ(ods)" + def q_arg = encoding_dim ? "q_val <- as.integer(${encoding_dim})" : "q_val <- as.integer(round(max(2, min(ncol(ods) - 1, nrow(ods) - 1, 500, 3.7 + 0.16 * ncol(ods)))))" """ set -euo pipefail @@ -64,7 +64,7 @@ process OUTRIDER_FIT { # Filter low-expressed genes min_count_thresh <- ${min_count} - min_samples <- max(2, floor(ncol(counts) * 0.5)) + min_samples <- max(1, floor(ncol(counts) * 0.5)) row_sums <- rowSums(counts >= min_count_thresh) keep_genes <- row_sums >= min_samples counts_filtered <- counts[keep_genes, , drop = FALSE] @@ -77,8 +77,6 @@ process OUTRIDER_FIT { # Create OutriderDataSet ods <- OutriderDataSet(countData = as.matrix(counts_filtered)) - ods <- filterExpression(ods, minCounts = TRUE, filterGenes = FALSE) - ods <- estimateSizeFactors(ods) # Encoding dimension ${q_arg} @@ -116,6 +114,7 @@ REOF """ stub: + def prefix = task.ext.prefix ?: "${meta.id}" """ touch outrider_model.rds cat <<-END_HEADER > outrider_results.tsv diff --git a/pipelines/nf-outrider/nextflow.config b/pipelines/nf-outrider/nextflow.config index a70e9d3..fa09f12 100644 --- a/pipelines/nf-outrider/nextflow.config +++ b/pipelines/nf-outrider/nextflow.config @@ -6,10 +6,17 @@ ---------------------------------------------------------------------------------------- */ +// Plugin configuration +plugins { + id 'nf-validation@1.1.3' +} + // Pipeline metadata manifest { name = 'wasp2/nf-outrider' author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' description = 'WASP2 + OUTRIDER for aberrant expression and mono-allelic expression detection' mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' @@ -77,6 +84,7 @@ params { // Generic options help = false version = false + validate_params = true tracedir = "${params.outdir}/pipeline_info" } @@ -102,7 +110,6 @@ profiles { conda.enabled = true docker.enabled = false singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" } docker { docker.enabled = true @@ -158,15 +165,15 @@ profiles { def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.tracedir}/timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.tracedir}/report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/trace_${trace_timestamp}.txt" } dag { enabled = true @@ -201,27 +208,24 @@ def check_max(obj, type) { try { if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println "WARNING: Invalid max_memory '${params.max_memory}', using default" + else return obj + } catch (Exception e) { + log.warn "Invalid memory config (${obj}, max=${params.max_memory}): ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println "WARNING: Invalid max_time '${params.max_time}', using default" + else return obj + } catch (Exception e) { + log.warn "Invalid time config (${obj}, max=${params.max_time}): ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { - return Math.min(obj, params.max_cpus as int) - } catch (all) { - println "WARNING: Invalid max_cpus '${params.max_cpus}', using default" + try { return Math.min(obj, params.max_cpus as int) } + catch (Exception e) { + log.warn "Invalid CPU config (${obj}, max=${params.max_cpus}): ${e.message}. Using ${obj}" return obj } } diff --git a/pipelines/nf-outrider/subworkflows/local/aberrant_expression/main.nf b/pipelines/nf-outrider/subworkflows/local/aberrant_expression/main.nf index 8868377..e7eb046 100644 --- a/pipelines/nf-outrider/subworkflows/local/aberrant_expression/main.nf +++ b/pipelines/nf-outrider/subworkflows/local/aberrant_expression/main.nf @@ -22,6 +22,8 @@ workflow ABERRANT_EXPRESSION { encoding_dim // val: encoding dimension (null for auto) max_iterations // val: max OUTRIDER iterations convergence // val: convergence threshold + min_count // val: min count per gene for expression filter + min_samples // val: minimum samples for OUTRIDER fitting main: ch_versions = Channel.empty() @@ -48,8 +50,8 @@ workflow ABERRANT_EXPRESSION { ch_gene_counts .count() .map { sample_count -> - if (sample_count < 15) { - log.warn "WARNING: OUTRIDER requires >= 15 samples for reliable results. Found ${sample_count} samples." + if (sample_count < min_samples) { + log.warn "WARNING: OUTRIDER requires >= ${min_samples} samples for reliable results. Found ${sample_count} samples." } } @@ -70,7 +72,8 @@ workflow ABERRANT_EXPRESSION { zscore_cutoff, encoding_dim, max_iterations, - convergence + convergence, + min_count ) ch_versions = ch_versions.mix(OUTRIDER_FIT.out.versions) diff --git a/pipelines/nf-outrider/subworkflows/local/aberrant_expression/tests/main.nf.test b/pipelines/nf-outrider/subworkflows/local/aberrant_expression/tests/main.nf.test index 77d20f8..8bf9181 100644 --- a/pipelines/nf-outrider/subworkflows/local/aberrant_expression/tests/main.nf.test +++ b/pipelines/nf-outrider/subworkflows/local/aberrant_expression/tests/main.nf.test @@ -27,6 +27,8 @@ nextflow_workflow { input[3] = null // encoding_dim (auto) input[4] = 15 // max_iterations input[5] = 1e-5 // convergence + input[6] = 10 // min_count + input[7] = 10 // min_samples """ } } @@ -56,6 +58,8 @@ nextflow_workflow { input[3] = 5 // explicit encoding_dim input[4] = 10 input[5] = 1e-4 + input[6] = 10 // min_count + input[7] = 10 // min_samples """ } } @@ -84,6 +88,8 @@ nextflow_workflow { input[3] = null input[4] = 20 input[5] = 1e-6 + input[6] = 10 // min_count + input[7] = 10 // min_samples """ } } @@ -110,6 +116,8 @@ nextflow_workflow { input[3] = null input[4] = 15 input[5] = 1e-5 + input[6] = 10 // min_count + input[7] = 10 // min_samples """ } } diff --git a/pipelines/nf-outrider/workflows/outrider.nf b/pipelines/nf-outrider/workflows/outrider.nf index d20123d..2bb1aef 100644 --- a/pipelines/nf-outrider/workflows/outrider.nf +++ b/pipelines/nf-outrider/workflows/outrider.nf @@ -109,9 +109,12 @@ workflow OUTRIDER { // MERGE_COUNTS( - ch_gene_counts.map { meta, counts -> counts }.collect() + ch_gene_counts + .map { meta, counts -> counts } + .collect() + .map { counts -> [ [id: 'all_samples'], counts ] } ) - ch_versions = ch_versions.mix(MERGE_COUNTS.out.versions) + ch_versions = ch_versions.mix(MERGE_COUNTS.out.versions.map { meta, versions -> versions }) // // STEP 4: OUTRIDER Aberrant Expression Detection @@ -128,8 +131,8 @@ workflow OUTRIDER { params.outrider_convergence, params.outrider_min_count ?: 10 // min count per gene for expression filter ) - ch_outliers = OUTRIDER_FIT.out.results - ch_versions = ch_versions.mix(OUTRIDER_FIT.out.versions) + ch_outliers = OUTRIDER_FIT.out.results.map { meta, results -> results } + ch_versions = ch_versions.mix(OUTRIDER_FIT.out.versions.map { meta, versions -> versions }) // // STEP 5: MAE Detection (Optional) diff --git a/pipelines/nf-rnaseq/.nf-core.yml b/pipelines/nf-rnaseq/.nf-core.yml index 17b2a9d..bc3e967 100644 --- a/pipelines/nf-rnaseq/.nf-core.yml +++ b/pipelines/nf-rnaseq/.nf-core.yml @@ -24,6 +24,12 @@ lint: - .github/workflows/ - .github/ISSUE_TEMPLATE/ - .github/PULL_REQUEST_TEMPLATE.md + - assets/email_template.html + - assets/nf-core-PIPELINE_logo_light.png + - assets/sendmail_template.txt + - lib/NfcoreTemplate.groovy + - lib/NfcoreSchema.groovy + - lib/WorkflowMain.groovy files_unchanged: - CODE_OF_CONDUCT.md - LICENSE diff --git a/pipelines/nf-rnaseq/README.md b/pipelines/nf-rnaseq/README.md index 8d7f647..e5a516c 100644 --- a/pipelines/nf-rnaseq/README.md +++ b/pipelines/nf-rnaseq/README.md @@ -40,6 +40,9 @@ nextflow run pipelines/nf-rnaseq -profile test_stub,docker # Full test with minimal data nextflow run pipelines/nf-rnaseq -profile test,docker + +# Apple Silicon (M1/M2/M3/M4) — add the arm profile +nextflow run pipelines/nf-rnaseq -profile test,docker,arm ``` ## Samplesheet Format @@ -86,6 +89,10 @@ results/ - STAR genome index - Indexed VCF with heterozygous variants +> **Apple Silicon (ARM64):** STAR containers are x86_64-only. Use `-profile docker,arm` +> to enable Rosetta 2 emulation. Requires Docker Desktop 4.16+ with Rosetta enabled. +> See [Usage Guide](docs/usage.md#apple-silicon--arm64) for details. + ## Example Commands ### Basic Analysis diff --git a/pipelines/nf-rnaseq/assets/email_template.html b/pipelines/nf-rnaseq/assets/email_template.html new file mode 100644 index 0000000..161c1e4 --- /dev/null +++ b/pipelines/nf-rnaseq/assets/email_template.html @@ -0,0 +1,48 @@ + + + + + + ${workflow.manifest.name} Pipeline Report + + +
+ + + +

${workflow.manifest.name} v${workflow.manifest.version}

+

Run Name: $runName

+ +<% if (!success) { %> +
+

⚠️ ${workflow.manifest.name} execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+<% } else { %> +
+${workflow.manifest.name} execution completed successfully! +
+<% } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + + +<% if (summary.size() > 0) { %> +<% for (e in summary) { %> +<% } %> +<% } %> +
Nextflow Version$nextflowVersion
Run Name$runName
Session ID$sessionId
${e.key}${e.value}
+ +

--- ${workflow.manifest.name}

+ +
+ + diff --git a/pipelines/nf-rnaseq/assets/nf-core-pipeline_logo_light.png b/pipelines/nf-rnaseq/assets/nf-core-pipeline_logo_light.png new file mode 100644 index 0000000..21778d2 Binary files /dev/null and b/pipelines/nf-rnaseq/assets/nf-core-pipeline_logo_light.png differ diff --git a/pipelines/nf-rnaseq/assets/samplesheet_schema.json b/pipelines/nf-rnaseq/assets/schema_input.json similarity index 96% rename from pipelines/nf-rnaseq/assets/samplesheet_schema.json rename to pipelines/nf-rnaseq/assets/schema_input.json index 4e9dea7..0339583 100644 --- a/pipelines/nf-rnaseq/assets/samplesheet_schema.json +++ b/pipelines/nf-rnaseq/assets/schema_input.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/mcvickerlab/WASP2/main/pipelines/nf-rnaseq/assets/samplesheet_schema.json", + "$id": "https://raw.githubusercontent.com/mcvickerlab/WASP2/main/pipelines/nf-rnaseq/assets/schema_input.json", "title": "WASP2 RNA-seq ASE Pipeline - Samplesheet Schema", "description": "Schema for the samplesheet used as input to the WASP2 RNA-seq ASE pipeline", "type": "array", diff --git a/pipelines/nf-rnaseq/conf/arm.config b/pipelines/nf-rnaseq/conf/arm.config new file mode 100644 index 0000000..2b1b01f --- /dev/null +++ b/pipelines/nf-rnaseq/conf/arm.config @@ -0,0 +1,21 @@ +/* +======================================================================================== + ARM / Apple Silicon configuration for WASP2 RNA-seq ASE Pipeline +======================================================================================== + STAR and other bioinformatics tools do not publish native ARM (aarch64) container + images. This config forces Docker to use linux/amd64 images via Rosetta 2 + emulation on Apple Silicon Macs. + + Usage: + nextflow run main.nf -profile docker,arm [options] + + Prerequisites: + - Docker Desktop 4.16+ with Rosetta 2 emulation enabled + - macOS on Apple Silicon (M1/M2/M3/M4) + + Performance note: + Expect ~2-3x slower execution compared to native x86_64 due to emulation. +---------------------------------------------------------------------------------------- +*/ + +docker.runOptions = '-u $(id -u):$(id -g) --platform linux/amd64' diff --git a/pipelines/nf-rnaseq/conf/base.config b/pipelines/nf-rnaseq/conf/base.config index 9fefe96..7bb4b38 100644 --- a/pipelines/nf-rnaseq/conf/base.config +++ b/pipelines/nf-rnaseq/conf/base.config @@ -74,33 +74,18 @@ process { cpus = { check_max(4 * task.attempt, 'cpus') } memory = { check_max(8.GB * task.attempt, 'memory') } time = { check_max(2.h * task.attempt, 'time') } - publishDir = [ - path: { "${params.outdir}/wasp_filtered" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } withName: 'WASP2_COUNT_ALLELES' { cpus = { check_max(4 * task.attempt, 'cpus') } memory = { check_max(8.GB * task.attempt, 'memory') } time = { check_max(2.h * task.attempt, 'time') } - publishDir = [ - path: { "${params.outdir}/counts" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } withName: 'WASP2_ANALYZE_IMBALANCE' { cpus = { check_max(2 * task.attempt, 'cpus') } memory = { check_max(4.GB * task.attempt, 'memory') } time = { check_max(1.h * task.attempt, 'time') } - publishDir = [ - path: { "${params.outdir}/analysis" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } // nf-core resource limits — replaces params.max_cpus/max_memory/max_time diff --git a/pipelines/nf-rnaseq/docs/usage.md b/pipelines/nf-rnaseq/docs/usage.md index 22b74c2..bff8623 100644 --- a/pipelines/nf-rnaseq/docs/usage.md +++ b/pipelines/nf-rnaseq/docs/usage.md @@ -158,6 +158,10 @@ STAR --runMode genomeGenerate \ --sjdbOverhang 100 ``` +> **Apple Silicon (ARM) note:** STAR genome index generation must be run on an +> x86_64 machine or under Rosetta 2 / Docker `--platform linux/amd64` emulation. +> See [Apple Silicon / ARM64](#apple-silicon--arm64) below for details. + ## Pipeline Workflow ``` @@ -198,6 +202,50 @@ STAR --runMode genomeGenerate \ ## Troubleshooting +### Apple Silicon / ARM64 + +STAR and several bioinformatics containers are only available as x86_64 (amd64) +images. On Apple Silicon Macs (M1/M2/M3/M4), Docker must run these containers +under Rosetta 2 emulation. The pipeline provides a dedicated `arm` profile for +this: + +```bash +# Docker on Apple Silicon — add the arm profile +nextflow run pipelines/nf-rnaseq -profile docker,arm \ + --input samplesheet.csv \ + --vcf variants.vcf.gz \ + --star_index /path/to/star_index + +# Local test on Apple Silicon +nextflow run pipelines/nf-rnaseq -profile test_local,docker,arm +``` + +The `arm` profile adds `--platform linux/amd64` to `docker.runOptions`, forcing +Docker Desktop to pull and execute amd64 images via Rosetta 2. + +**Prerequisites for ARM:** +- Docker Desktop 4.16+ with Rosetta 2 emulation enabled + (Settings > General > "Use Rosetta for x86_64/amd64 emulation on Apple Silicon") +- Expect ~2-3x slower execution compared to native x86_64 + +**STAR genome index on ARM:** +STAR genome index generation (`--runMode genomeGenerate`) must also be run under +amd64 emulation. You can generate the index inside a Docker container: + +```bash +docker run --platform linux/amd64 --rm -v $(pwd):/data -w /data \ + community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4 \ + STAR --runMode genomeGenerate \ + --runThreadN 4 \ + --genomeDir star_index \ + --genomeFastaFiles genome.fa \ + --sjdbGTFfile genes.gtf \ + --sjdbOverhang 100 +``` + +Alternatively, generate the index on an x86_64 machine and transfer the +`star_index/` directory to your ARM Mac. + ### Common Issues **"VCF index not found"** diff --git a/pipelines/nf-rnaseq/environment.yml b/pipelines/nf-rnaseq/environment.yml new file mode 100644 index 0000000..af043d2 --- /dev/null +++ b/pipelines/nf-rnaseq/environment.yml @@ -0,0 +1,24 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for nf-rnaseq local Python/WASP2 modules +# (wasp2_unified_make_reads, wasp2_filter_remapped, wasp2_count_alleles, wasp2_analyze_imbalance, wasp2_ml_output) +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.10 + - numpy>=1.21,<2.0 + - pandas>=2.0 + - polars>=0.19 + - scipy>=1.10 + - pysam + - pybedtools + - samtools + - bcftools + - bedtools + - anndata>=0.8.0 + - typer + - rich + - pip + - pip: + - wasp2==1.2.1 diff --git a/pipelines/nf-rnaseq/main.nf b/pipelines/nf-rnaseq/main.nf index 2ed8540..62c774f 100644 --- a/pipelines/nf-rnaseq/main.nf +++ b/pipelines/nf-rnaseq/main.nf @@ -16,6 +16,26 @@ nextflow.enable.dsl = 2 +/* +======================================================================================== + IMPORT FUNCTIONS +======================================================================================== +*/ + +include { paramsHelp; paramsSummaryLog } from 'plugin/nf-validation' + +/* +======================================================================================== + PRINT HELP MESSAGE +======================================================================================== +*/ + +if (params.help) { + def help_string = paramsHelp("nextflow run nf-rnaseq --input samplesheet.csv --vcf variants.vcf.gz --star_index /path/to/star_index -profile docker") + log.info help_string + System.exit(0) +} + /* ======================================================================================== VALIDATE & PRINT PARAMETER SUMMARY diff --git a/pipelines/nf-rnaseq/modules/local/environment.yml b/pipelines/nf-rnaseq/modules/local/environment.yml new file mode 100644 index 0000000..ccdc432 --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/environment.yml @@ -0,0 +1,22 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for nf-rnaseq WASP2 local modules +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.10 + - numpy + - pandas + - polars + - scipy + - pysam + - pybedtools + - samtools + - bcftools + - bedtools + - typer + - rich + - pip + - pip: + - wasp2==1.2.1 diff --git a/pipelines/nf-rnaseq/modules/local/star_align/meta.yml b/pipelines/nf-rnaseq/modules/local/star_align/meta.yml new file mode 100644 index 0000000..0c70b5f --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/star_align/meta.yml @@ -0,0 +1,99 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "star_align" +description: Align RNA-seq reads to a reference genome using STAR with two-pass mode +keywords: + - alignment + - rna-seq + - star + - bam + - spliced-alignment + +tools: + - star: + description: "STAR: Spliced Transcripts Alignment to a Reference" + homepage: "https://github.com/alexdobin/STAR" + documentation: "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" + doi: "10.1093/bioinformatics/bts635" + licence: ["MIT"] + identifier: biotools:star + - samtools: + description: "Tools for manipulating next-generation sequencing data" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools.html" + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:samtools + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - reads: + type: file + description: FASTQ files (single-end or paired-end) + pattern: "*.{fq,fastq}{,.gz}" + - - star_index: + type: directory + description: STAR genome index directory + - - gtf: + type: file + description: GTF annotation file (optional) + pattern: "*.gtf" + +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.Aligned.sortedByCoord.out.bam": + type: file + description: Coordinate-sorted BAM file + pattern: "*.Aligned.sortedByCoord.out.bam" + - "*.Aligned.sortedByCoord.out.bam.bai": + type: file + description: BAM index file + pattern: "*.Aligned.sortedByCoord.out.bam.bai" + - log_final: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.Log.final.out": + type: file + description: STAR final alignment log with mapping statistics + pattern: "*.Log.final.out" + - log_out: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.Log.out": + type: file + description: STAR main log file + pattern: "*.Log.out" + - sj_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.SJ.out.tab": + type: file + description: Splice junction table + pattern: "*.SJ.out.tab" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-rnaseq/modules/local/wasp2_analyze_imbalance/meta.yml b/pipelines/nf-rnaseq/modules/local/wasp2_analyze_imbalance/meta.yml new file mode 100644 index 0000000..6bf959b --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/wasp2_analyze_imbalance/meta.yml @@ -0,0 +1,56 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "wasp2_analyze_imbalance" +description: Perform statistical testing for allelic imbalance using likelihood ratio tests +keywords: + - wasp + - allelic-imbalance + - ase + - statistical-testing + - likelihood-ratio + - rna-seq + +tools: + - wasp2: + description: "WASP2: Allele-specific software for robust molecular QTL discovery" + homepage: "https://github.com/mcvicker-lab/WASP2" + documentation: "https://github.com/mcvicker-lab/WASP2/wiki" + doi: "10.1038/nmeth.3582" + licence: ["Apache-2.0"] + identifier: biotools:wasp + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - counts: + type: file + description: | + Tab-separated allele count file from WASP2_COUNT_ALLELES. + Must contain columns: chrom, pos, ref, alt, ref_count, alt_count + pattern: "*_counts.tsv" + +output: + - results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_ai_results.tsv": + type: file + description: | + Tab-separated file with allelic imbalance test results. + Columns: region, snp_count, ref_sum, alt_sum, mu, null_ll, alt_ll, LRT, pvalue, fdr + pattern: "*_ai_results.tsv" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-rnaseq/modules/local/wasp2_count_alleles/meta.yml b/pipelines/nf-rnaseq/modules/local/wasp2_count_alleles/meta.yml new file mode 100644 index 0000000..227a6a0 --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/wasp2_count_alleles/meta.yml @@ -0,0 +1,78 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "wasp2_count_alleles" +description: Count reference and alternate allele reads at heterozygous SNPs for ASE analysis +keywords: + - wasp + - allele-specific + - allele-counting + - ase + - heterozygous + - snp + - rna-seq + +tools: + - wasp2: + description: "WASP2: Allele-specific software for robust molecular QTL discovery" + homepage: "https://github.com/mcvicker-lab/WASP2" + documentation: "https://github.com/mcvicker-lab/WASP2/wiki" + doi: "10.1038/nmeth.3582" + licence: ["Apache-2.0"] + identifier: biotools:wasp + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false, sample:'NA12878' ] + - bam: + type: file + description: WASP-filtered BAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bam.bai" + - - meta2: + type: map + description: | + Groovy Map containing reference metadata + e.g. [ id:'reference' ] + - vcf: + type: file + description: VCF file containing variant genotypes + pattern: "*.{vcf,vcf.gz,bcf}" + - vcf_index: + type: file + description: VCF index file (tabix .tbi or .csi) + pattern: "*.{tbi,csi}" + - - gtf: + type: file + description: | + GTF annotation file for gene-level allele counting (optional). + When provided, counts are annotated with gene features. + pattern: "*.gtf" + +output: + - counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_counts.tsv": + type: file + description: | + Tab-separated file with allele counts per variant. + Columns: chrom, pos, ref, alt, region, ref_count, alt_count, other_count, N + pattern: "*_counts.tsv" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-rnaseq/modules/local/wasp2_filter_remapped/meta.yml b/pipelines/nf-rnaseq/modules/local/wasp2_filter_remapped/meta.yml new file mode 100644 index 0000000..019cdaf --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/wasp2_filter_remapped/meta.yml @@ -0,0 +1,98 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "wasp2_filter_remapped" +description: Filter remapped reads using the WASP algorithm and merge with non-variant reads +keywords: + - wasp + - mapping-bias + - filter + - remapping + - allele-specific + - bam + +tools: + - wasp2: + description: "WASP2: Allele-specific software for robust molecular QTL discovery" + homepage: "https://github.com/mcvicker-lab/WASP2" + documentation: "https://github.com/mcvicker-lab/WASP2/wiki" + doi: "10.1038/nmeth.3582" + licence: ["Apache-2.0"] + identifier: biotools:wasp + - samtools: + description: "Tools for manipulating next-generation sequencing data" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools.html" + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:samtools + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - remapped_bam: + type: file + description: BAM file from remapping allele-swapped reads + pattern: "*.bam" + - remapped_bai: + type: file + description: BAM index for remapped reads + pattern: "*.bam.bai" + - - _meta2: + type: map + description: Sample metadata (unused, joined by meta.id) + - to_remap_bam: + type: file + description: Original BAM with reads that overlapped variants (before remapping) + pattern: "*_to_remap.bam" + - - _meta3: + type: map + description: Sample metadata (unused, joined by meta.id) + - keep_bam: + type: file + description: BAM with reads not overlapping variants + pattern: "*_keep.bam" + - - _meta4: + type: map + description: Sample metadata (unused, joined by meta.id) + - wasp_json: + type: file + description: JSON file from make-reads tracking intermediate file metadata + pattern: "*_wasp_data.json" + +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_wasp_filt.bam": + type: file + description: WASP-filtered and merged BAM (sorted and indexed) + pattern: "*_wasp_filt.bam" + - "*_wasp_filt.bam.bai": + type: file + description: BAM index for WASP-filtered BAM + pattern: "*_wasp_filt.bam.bai" + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.filter_stats.txt": + type: file + description: Text file with filtering statistics (read counts before/after) + pattern: "*.filter_stats.txt" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-rnaseq/modules/local/wasp2_ml_output/environment.yml b/pipelines/nf-rnaseq/modules/local/wasp2_ml_output/environment.yml new file mode 100644 index 0000000..e35720b --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/wasp2_ml_output/environment.yml @@ -0,0 +1,19 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for WASP2 ML output module +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.10 + - numpy + - pandas + - polars + - scipy + - pysam + - anndata>=0.8.0 + - typer + - rich + - pip + - pip: + - wasp2==1.2.1 diff --git a/pipelines/nf-rnaseq/modules/local/wasp2_ml_output/meta.yml b/pipelines/nf-rnaseq/modules/local/wasp2_ml_output/meta.yml new file mode 100644 index 0000000..e3b01ed --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/wasp2_ml_output/meta.yml @@ -0,0 +1,100 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "wasp2_ml_output" +description: Convert WASP2 allele count TSV to ML-ready formats (Zarr, Parquet, AnnData) +keywords: + - wasp + - machine-learning + - zarr + - parquet + - anndata + - h5ad + - format-conversion + - scverse + +tools: + - wasp2: + description: "WASP2: Allele-specific software for robust molecular QTL discovery" + homepage: "https://github.com/mcvicker-lab/WASP2" + documentation: "https://github.com/mcvicker-lab/WASP2/wiki" + doi: "10.1038/nmeth.3582" + licence: ["Apache-2.0"] + identifier: biotools:wasp + - pandas: + description: "Powerful data structures for data analysis" + homepage: "https://pandas.pydata.org/" + documentation: "https://pandas.pydata.org/docs/" + licence: ["BSD-3-Clause"] + identifier: "" + - zarr: + description: "Chunked, compressed, N-dimensional arrays" + homepage: "https://zarr.readthedocs.io/" + documentation: "https://zarr.readthedocs.io/" + licence: ["MIT"] + identifier: "" + - anndata: + description: "Annotated data matrices for single-cell and genomics data" + homepage: "https://anndata.readthedocs.io/" + documentation: "https://anndata.readthedocs.io/" + licence: ["BSD-3-Clause"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - counts: + type: file + description: | + Tab-separated allele count file from WASP2_COUNT_ALLELES. + Must contain columns: chrom, pos, ref, alt, ref_count, alt_count + pattern: "*_counts.tsv" + - - output_format: + type: string + description: | + Comma-separated list of output formats to generate. + Valid values: zarr, parquet, anndata (or h5ad) + e.g. "zarr,parquet,anndata" + +output: + - zarr: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.zarr": + type: directory + description: Zarr store with allele counts (GenVarLoader compatible) + pattern: "*.zarr" + - parquet: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.parquet": + type: file + description: Parquet file with allele counts (Polars/DuckDB compatible) + pattern: "*.parquet" + - anndata: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.h5ad": + type: file + description: AnnData h5ad file with allele counts (scverse compatible) + pattern: "*.h5ad" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-rnaseq/modules/local/wasp2_unified_make_reads/meta.yml b/pipelines/nf-rnaseq/modules/local/wasp2_unified_make_reads/meta.yml new file mode 100644 index 0000000..64fa70e --- /dev/null +++ b/pipelines/nf-rnaseq/modules/local/wasp2_unified_make_reads/meta.yml @@ -0,0 +1,106 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "wasp2_unified_make_reads" +description: Generate allele-swapped reads for WASP2 mapping bias correction in RNA-seq +keywords: + - wasp + - mapping-bias + - allele-swapping + - remapping + - variant-aware + - rna-seq + - ase + +tools: + - wasp2: + description: "WASP2: Allele-specific software for robust molecular QTL discovery" + homepage: "https://github.com/mcvicker-lab/WASP2" + documentation: "https://github.com/mcvicker-lab/WASP2/wiki" + doi: "10.1038/nmeth.3582" + licence: ["Apache-2.0"] + identifier: biotools:wasp + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false, sample:'NA12878' ] + - bam: + type: file + description: BAM file with aligned reads + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bam.bai" + - - meta2: + type: map + description: | + Groovy Map containing reference metadata + e.g. [ id:'reference' ] + - vcf: + type: file + description: | + VCF file containing variant genotypes. + For compressed VCF files (.vcf.gz), an index file (.tbi) should be co-located. + pattern: "*.{vcf,vcf.gz,bcf}" + - vcf_index: + type: file + description: VCF index file (tabix .tbi or .csi) + pattern: "*.{tbi,csi}" + +output: + - remap_fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_remap_r1.fq.gz": + type: file + description: FASTQ R1 with allele-swapped reads for remapping + pattern: "*_remap_r1.fq.gz" + - "*_remap_r2.fq.gz": + type: file + description: FASTQ R2 with allele-swapped reads for remapping + pattern: "*_remap_r2.fq.gz" + - to_remap_bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_to_remap.bam": + type: file + description: Intermediate BAM with reads overlapping variants (before remapping) + pattern: "*_to_remap.bam" + - keep_bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_keep.bam": + type: file + description: Intermediate BAM with reads not overlapping variants (kept as-is) + pattern: "*_keep.bam" + - wasp_json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_wasp_data.json": + type: file + description: JSON file tracking intermediate file paths and read metadata + pattern: "*_wasp_data.json" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-rnaseq/nextflow.config b/pipelines/nf-rnaseq/nextflow.config index f26865d..20138ba 100644 --- a/pipelines/nf-rnaseq/nextflow.config +++ b/pipelines/nf-rnaseq/nextflow.config @@ -4,6 +4,22 @@ ======================================================================================== */ +plugins { + id 'nf-validation@1.1.3' +} + +// Pipeline manifest +manifest { + name = 'wasp2/nf-rnaseq' + author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' + description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2' + mainScript = 'main.nf' + nextflowVersion = '!>=23.04.0' + version = '1.0.0' +} + // Global default params params { // Pipeline options @@ -39,6 +55,9 @@ params { // ML Output options output_format = null // ML output formats: zarr,parquet,anndata (comma-separated) + // Validation + validate_params = true + // Resource limits max_cpus = 16 max_memory = '128.GB' @@ -74,7 +93,6 @@ profiles { conda.enabled = true docker.enabled = false singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" } docker { @@ -84,6 +102,13 @@ profiles { singularity.enabled = false } + arm { + // Apple Silicon / ARM64 compatibility — forces linux/amd64 containers + // via Rosetta 2 emulation. Combine with a container profile: + // nextflow run main.nf -profile docker,arm [options] + includeConfig 'conf/arm.config' + } + singularity { singularity.enabled = true singularity.autoMounts = true @@ -136,37 +161,22 @@ profiles { } } -// Container overrides -def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0' -def star_container = 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' -process { - withName: 'WASP2_UNIFIED_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_ALLELES|WASP2_ANALYZE_IMBALANCE|WASP2_ML_OUTPUT' { - container = wasp2_container - } - withName: 'STAR_ALIGN.*' { - container = star_container - } -} - -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] - // Execution reports def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.tracedir}/timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.tracedir}/report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/trace_${trace_timestamp}.txt" } dag { @@ -181,44 +191,45 @@ env { R_ENVIRON_USER = "/.Renviron" } -// Pipeline manifest -manifest { - name = 'wasp2/nf-rnaseq' - author = 'WASP2 Team' - homePage = 'https://github.com/mcvickerlab/WASP2' - description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2' - mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0.0' +// Container overrides +def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0' +def star_container = 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' +process { + withName: 'WASP2_UNIFIED_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_ALLELES|WASP2_ANALYZE_IMBALANCE|WASP2_ML_OUTPUT' { + container = wasp2_container + } + withName: 'STAR_ALIGN.*' { + container = star_container + } } +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + // Function to check max resource limits def check_max(obj, type) { if (type == 'memory') { try { if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid!" + else return obj + } catch (Exception e) { + log.warn "Invalid memory config (${obj}, max=${params.max_memory}): ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid!" + else return obj + } catch (Exception e) { + log.warn "Invalid time config (${obj}, max=${params.max_time}): ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { - return Math.min(obj, params.max_cpus as int) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid!" + try { return Math.min(obj, params.max_cpus as int) } + catch (Exception e) { + log.warn "Invalid CPU config (${obj}, max=${params.max_cpus}): ${e.message}. Using ${obj}" return obj } } diff --git a/pipelines/nf-rnaseq/tests/data/integration/generate_test_data.sh b/pipelines/nf-rnaseq/tests/data/integration/generate_test_data.sh index 7fb8795..77e0b95 100755 --- a/pipelines/nf-rnaseq/tests/data/integration/generate_test_data.sh +++ b/pipelines/nf-rnaseq/tests/data/integration/generate_test_data.sh @@ -30,6 +30,17 @@ echo "===================================================================" echo "Working directory: $SCRIPT_DIR" echo "" +# Detect Apple Silicon / ARM64 and warn about STAR compatibility +ARCH=$(uname -m) +if [[ "$ARCH" == "arm64" || "$ARCH" == "aarch64" ]]; then + echo "WARNING: Detected ARM architecture ($ARCH)." + echo " STAR does not have native ARM builds. If STAR is not installed" + echo " via Rosetta 2 or an x86_64 emulation layer, genome generation" + echo " will fail. Consider running this script on an x86_64 machine" + echo " or inside a Docker container with --platform linux/amd64." + echo "" +fi + # ----------------------------------------------------------------------------- # Check prerequisites # ----------------------------------------------------------------------------- diff --git a/pipelines/nf-scatac/.nf-core.yml b/pipelines/nf-scatac/.nf-core.yml index 195c6e5..36b18a3 100644 --- a/pipelines/nf-scatac/.nf-core.yml +++ b/pipelines/nf-scatac/.nf-core.yml @@ -16,7 +16,6 @@ template: # Linting configuration lint: - # Skip checks that don't apply to this pipeline files_exist: - docs/README.md - docs/output.md @@ -24,6 +23,12 @@ lint: - .github/workflows/ - .github/ISSUE_TEMPLATE/ - .github/PULL_REQUEST_TEMPLATE.md + - assets/email_template.html + - assets/nf-core-PIPELINE_logo_light.png + - assets/sendmail_template.txt + - lib/NfcoreTemplate.groovy + - lib/NfcoreSchema.groovy + - lib/WorkflowMain.groovy files_unchanged: - CODE_OF_CONDUCT.md - LICENSE diff --git a/pipelines/nf-scatac/assets/email_template.html b/pipelines/nf-scatac/assets/email_template.html new file mode 100644 index 0000000..161c1e4 --- /dev/null +++ b/pipelines/nf-scatac/assets/email_template.html @@ -0,0 +1,48 @@ + + + + + + ${workflow.manifest.name} Pipeline Report + + +
+ + + +

${workflow.manifest.name} v${workflow.manifest.version}

+

Run Name: $runName

+ +<% if (!success) { %> +
+

⚠️ ${workflow.manifest.name} execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+<% } else { %> +
+${workflow.manifest.name} execution completed successfully! +
+<% } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + + +<% if (summary.size() > 0) { %> +<% for (e in summary) { %> +<% } %> +<% } %> +
Nextflow Version$nextflowVersion
Run Name$runName
Session ID$sessionId
${e.key}${e.value}
+ +

--- ${workflow.manifest.name}

+ +
+ + diff --git a/pipelines/nf-scatac/assets/nf-core-pipeline_logo_light.png b/pipelines/nf-scatac/assets/nf-core-pipeline_logo_light.png new file mode 100644 index 0000000..1ed3fc1 Binary files /dev/null and b/pipelines/nf-scatac/assets/nf-core-pipeline_logo_light.png differ diff --git a/pipelines/nf-scatac/conf/base.config b/pipelines/nf-scatac/conf/base.config index 64419e3..c8e96d7 100644 --- a/pipelines/nf-scatac/conf/base.config +++ b/pipelines/nf-scatac/conf/base.config @@ -39,11 +39,6 @@ process { withLabel:process_high_memory { memory = { check_max( 128.GB * task.attempt, 'memory' ) } } - withLabel:process_wasp2 { - cpus = { check_max( 4, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } withLabel:error_ignore { errorStrategy = 'ignore' } diff --git a/pipelines/nf-scatac/environment.yml b/pipelines/nf-scatac/environment.yml new file mode 100644 index 0000000..1dfb786 --- /dev/null +++ b/pipelines/nf-scatac/environment.yml @@ -0,0 +1,25 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for nf-scatac local Python/WASP2 modules +# (scatac_count_alleles, scatac_create_anndata, scatac_add_haplotype_layers, scatac_pseudobulk) +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.10 + - numpy>=1.21,<2.0 + - pandas>=2.0 + - polars>=0.19 + - scipy>=1.10 + - pysam + - pybedtools + - samtools + - bcftools + - bedtools + - anndata>=0.8.0 + - sinto>=0.9.0 + - typer + - rich + - pip + - pip: + - wasp2==1.2.1 diff --git a/pipelines/nf-scatac/modules.json b/pipelines/nf-scatac/modules.json index 54cdd84..beb1857 100644 --- a/pipelines/nf-scatac/modules.json +++ b/pipelines/nf-scatac/modules.json @@ -1,5 +1,5 @@ { "name": "wasp2/nf-scatac", - "homePage": "", + "homePage": "https://github.com/mcvickerlab/WASP2", "repos": {} } diff --git a/pipelines/nf-scatac/modules/local/scatac_add_haplotype_layers/meta.yml b/pipelines/nf-scatac/modules/local/scatac_add_haplotype_layers/meta.yml new file mode 100644 index 0000000..0b4e955 --- /dev/null +++ b/pipelines/nf-scatac/modules/local/scatac_add_haplotype_layers/meta.yml @@ -0,0 +1,81 @@ +name: scatac_add_haplotype_layers +description: Add haplotype-resolved count layers to AnnData using phased VCF genotypes +keywords: + - scatac + - single-cell + - haplotype + - phasing + - allelic imbalance + - anndata + - wasp2 +tools: + - python: + description: Python programming language + homepage: https://www.python.org/ + licence: ["PSF-2.0"] + - anndata: + description: Annotated multivariate observation data + homepage: https://anndata.readthedocs.io/ + licence: ["BSD-3-Clause"] + - pysam: + description: Python interface to samtools/htslib for reading VCF/BAM files + homepage: https://pysam.readthedocs.io/ + licence: ["MIT"] + - wasp2: + description: WASP2 allelic imbalance analysis tools + homepage: https://github.com/mcvickerlab/WASP2 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', sample:'donor1' ] + - anndata: + type: file + description: AnnData h5ad file with ref and alt count layers from allele counting + pattern: "*.h5ad" + - meta2: + type: map + description: | + Groovy Map containing VCF sample information + - vcf: + type: file + description: Phased VCF file containing genotype information for haplotype assignment + pattern: "*.vcf.gz" + - vcf_index: + type: file + description: Tabix index for the phased VCF file + pattern: "*.vcf.gz.tbi" + - create_zarr: + type: val + description: Boolean flag to optionally output a Zarr copy of the AnnData + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', sample:'donor1' ] + - anndata: + type: file + description: AnnData h5ad with haplotype layers (X, ref, alt, hap1, hap2) and phasing metadata in var/uns + pattern: "*_with_haplotypes.h5ad" + - zarr: + type: directory + description: Optional Zarr representation of the haplotype-resolved AnnData + pattern: "*.zarr" + - cell_qc: + type: file + description: Per-cell QC metrics including counts per layer (total, ref, alt, hap1, hap2) + pattern: "*_cell_qc.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Jaureguy760" +maintainers: + - "@Jaureguy760" diff --git a/pipelines/nf-scatac/modules/local/scatac_count_alleles/main.nf b/pipelines/nf-scatac/modules/local/scatac_count_alleles/main.nf index ef524bf..593070c 100644 --- a/pipelines/nf-scatac/modules/local/scatac_count_alleles/main.nf +++ b/pipelines/nf-scatac/modules/local/scatac_count_alleles/main.nf @@ -77,7 +77,7 @@ process SCATAC_COUNT_ALLELES { print "total_barcodes", length(bc) print "total_snps", length(snp) print "total_fragment_overlaps", tot - print "mean_snps_per_cell", length(bc) > 0 ? length(snp)/length(bc) : 0 + if (length(bc) > 0) print "mean_snps_per_cell", length(snp)/length(bc); else print "mean_snps_per_cell", 0 }' ${prefix}_allele_counts.tsv > ${prefix}_count_stats.tsv [ \$(wc -l < ${prefix}_allele_counts.tsv) -lt 2 ] && echo "WARNING: No overlaps found" >&2 || true @@ -85,7 +85,7 @@ process SCATAC_COUNT_ALLELES { cat <<-END_VERSIONS > versions.yml "${task.process}": bedtools: \$(bedtools --version | sed 's/bedtools v//') - awk: \$(awk --version | head -1 | sed 's/GNU Awk //' | cut -d',' -f1) + awk: \$(awk --version 2>&1 | head -1 | sed 's/GNU Awk //' | cut -d',' -f1 || echo "unknown") END_VERSIONS """ diff --git a/pipelines/nf-scatac/modules/local/scatac_pseudobulk/main.nf b/pipelines/nf-scatac/modules/local/scatac_pseudobulk/main.nf index 51fc1be..ec69d94 100644 --- a/pipelines/nf-scatac/modules/local/scatac_pseudobulk/main.nf +++ b/pipelines/nf-scatac/modules/local/scatac_pseudobulk/main.nf @@ -38,9 +38,9 @@ process SCATAC_PSEUDOBULK { set -euo pipefail # Aggregate per-cell counts to pseudo-bulk and generate stats in one pass - awk -v OFS='\\t' -v min_cells="${min_cells}" -v prefix="${prefix}" ' + awk -v OFS='\\t' -v min_cells="${min_cells}" -v pb_file="${prefix}_pseudobulk_counts.tsv" -v stats_file="${prefix}_aggregation_stats.tsv" ' BEGIN { - print "chrom", "pos", "ref", "alt", "ref_count", "alt_count" > prefix "_pseudobulk_counts.tsv" + print "chrom", "pos", "ref", "alt", "ref_count", "alt_count" > pb_file } NR > 1 { key = \$2 OFS \$3 OFS \$4 OFS \$5 @@ -54,21 +54,21 @@ process SCATAC_PSEUDOBULK { filtered_count = 0 for (key in total) { if (cells_per_snp[key] >= min_cells) { - print key, total[key], 0 >> prefix "_pseudobulk_counts.tsv" + print key, total[key], 0 >> pb_file filtered_count++ } } # Write aggregation stats - print "metric", "value" > prefix "_aggregation_stats.tsv" - print "total_cells_input", length(input_cells) >> prefix "_aggregation_stats.tsv" - print "total_snps_input", length(input_snps) >> prefix "_aggregation_stats.tsv" - print "snps_after_filtering", filtered_count >> prefix "_aggregation_stats.tsv" + print "metric", "value" > stats_file + print "total_cells_input", length(input_cells) >> stats_file + print "total_snps_input", length(input_snps) >> stats_file + print "snps_after_filtering", filtered_count >> stats_file }' ${cell_counts} cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(awk --version | head -1 | sed 's/GNU Awk //' | cut -d',' -f1) + awk: \$(awk --version 2>&1 | head -1 | sed 's/GNU Awk //' | cut -d',' -f1 || echo "unknown") END_VERSIONS """ diff --git a/pipelines/nf-scatac/nextflow.config b/pipelines/nf-scatac/nextflow.config index 679d953..ef07dca 100644 --- a/pipelines/nf-scatac/nextflow.config +++ b/pipelines/nf-scatac/nextflow.config @@ -4,9 +4,16 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +// Plugin configuration +plugins { + id 'nf-validation@1.1.3' +} + manifest { name = 'wasp2/nf-scatac' author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' description = 'Single-Cell ATAC-seq Allelic Imbalance Pipeline' mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' @@ -77,6 +84,11 @@ profiles { process.beforeScript = 'echo $HOSTNAME' cleanup = false } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + } docker { docker.enabled = true conda.enabled = false @@ -92,12 +104,6 @@ profiles { conda.enabled = false docker.enabled = false } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" - } test { includeConfig 'conf/test.config' } @@ -145,6 +151,13 @@ dag { file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } +// Export these variables to prevent local Python/Perl libs from conflicting +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" +} + process.shell = ['/bin/bash', '-euo', 'pipefail'] // Resource limit checker with logging for configuration errors diff --git a/pipelines/nf-scatac/subworkflows/local/utils_nfscatac_pipeline.nf b/pipelines/nf-scatac/subworkflows/local/utils_nfscatac_pipeline.nf index 8c66031..799ea2b 100644 --- a/pipelines/nf-scatac/subworkflows/local/utils_nfscatac_pipeline.nf +++ b/pipelines/nf-scatac/subworkflows/local/utils_nfscatac_pipeline.nf @@ -35,6 +35,13 @@ workflow PIPELINE_INITIALISATION { error "Samplesheet error for '${row.sample}': provide 'fragments', 'cellranger_dir', or 'bam'" } + // Resolve Nextflow variables that don't interpolate inside CSV data + def resolvePath = { String p -> + p ? p.replace('${projectDir}', projectDir.toString()) + .replace('${launchDir}', launchDir.toString()) + : p + } + def meta = [ id: row.sample, single_end: false, @@ -47,10 +54,10 @@ workflow PIPELINE_INITIALISATION { def fragments = file('NO_FILE_FRAGS') def fragments_tbi = file('NO_FILE_FRAGS_TBI') if (row.fragments) { - fragments = file(row.fragments, checkIfExists: true) + fragments = file(resolvePath(row.fragments), checkIfExists: true) fragments_tbi = file("${fragments}.tbi", checkIfExists: true) } else if (row.cellranger_dir) { - def frag_path = "${row.cellranger_dir}/outs/fragments.tsv.gz" + def frag_path = "${resolvePath(row.cellranger_dir)}/outs/fragments.tsv.gz" if (file(frag_path).exists()) { fragments = file(frag_path, checkIfExists: true) fragments_tbi = file("${frag_path}.tbi", checkIfExists: true) @@ -61,7 +68,7 @@ workflow PIPELINE_INITIALISATION { def bam = file('NO_FILE_BAM') def bai = file('NO_FILE_BAI') if (row.bam && row.bam.trim()) { - bam = file(row.bam, checkIfExists: true) + bam = file(resolvePath(row.bam), checkIfExists: true) // Try common BAI naming conventions: .bam.bai and .bai def bai_path1 = file("${bam}.bai") def bai_path2 = file("${bam}".replaceAll(/\.bam$/, '.bai')) @@ -73,7 +80,7 @@ workflow PIPELINE_INITIALISATION { error "Samplesheet error for '${row.sample}': BAM index not found. Tried: ${bai_path1}, ${bai_path2}" } } else if (row.cellranger_dir) { - def bam_path = "${row.cellranger_dir}/outs/possorted_bam.bam" + def bam_path = "${resolvePath(row.cellranger_dir)}/outs/possorted_bam.bam" if (file(bam_path).exists()) { bam = file(bam_path, checkIfExists: true) bai = file("${bam_path}.bai", checkIfExists: true) @@ -82,12 +89,12 @@ workflow PIPELINE_INITIALISATION { // Optional: cell barcode whitelist file def barcodes = row.barcodes && row.barcodes.trim() - ? file(row.barcodes, checkIfExists: true) + ? file(resolvePath(row.barcodes), checkIfExists: true) : file('NO_FILE_BARCODES') // Optional: peak BED file for restricting analysis to peak regions def peaks = row.peaks && row.peaks.trim() - ? file(row.peaks, checkIfExists: true) + ? file(resolvePath(row.peaks), checkIfExists: true) : file('NO_FILE_PEAKS') [ meta, fragments, fragments_tbi, barcodes, peaks, bam, bai ] diff --git a/rust/src/bam_remapper.rs b/rust/src/bam_remapper.rs index 536536a..b3f9b9e 100644 --- a/rust/src/bam_remapper.rs +++ b/rust/src/bam_remapper.rs @@ -585,6 +585,20 @@ fn process_read_pair( // Only keep pairs where at least one read differs from original let mut haplotype_reads = Vec::new(); + // Pre-count how many haplotypes actually differ from the original. + // The filter expects exactly this many pairs to arrive; hardcoding 2 + // causes het-variant reads (where only 1 haplotype differs) to be + // discarded because the filter never sees the "missing" second pair. + let total_seqs: usize = r1_haps + .iter() + .zip(r2_haps.iter()) + .filter(|((r1_seq, _), (r2_seq, _))| r1_seq != &r1_original || r2_seq != &r2_original) + .count(); + + if total_seqs == 0 { + return Ok(Some(haplotype_reads)); + } + for (hap_idx, ((r1_seq, r1_qual), (r2_seq, r2_qual))) in r1_haps.iter().zip(r2_haps.iter()).enumerate() { @@ -599,7 +613,7 @@ fn process_read_pair( let r1_pos = read1.pos() as u32; let r2_pos = read2.pos() as u32; let seq_num = hap_idx + 1; - let total_seqs = 2; // We're generating 2 haplotypes (hap1, hap2) + let total_seqs = total_seqs; // Actual count of emitted haplotypes let base_name = generate_wasp_name(read_name, r1_pos, r2_pos, seq_num, total_seqs); diff --git a/src/counting/filter_variant_data.py b/src/counting/filter_variant_data.py index 43bfc21..0d2c59c 100644 --- a/src/counting/filter_variant_data.py +++ b/src/counting/filter_variant_data.py @@ -174,6 +174,14 @@ def parse_intersect_region_new( vcf_ncols = len(vcf_cols) + # Guard against empty intersection file (0 variants in region) + intersect_path = Path(intersect_file) + if not intersect_path.exists() or intersect_path.stat().st_size == 0: + # Return empty DataFrame with the expected schema (skip pos0) + empty_cols = [vcf_cols[0], *vcf_cols[2:]] # skip pos0 + empty_schema = {vcf_cols[0]: vcf_schema[0], **dict(zip(vcf_cols[2:], vcf_schema[2:]))} + return pl.DataFrame(schema=empty_schema) + # Process with gt df = pl.scan_csv( intersect_file, @@ -181,13 +189,13 @@ def parse_intersect_region_new( has_header=False, infer_schema_length=0, new_columns=vcf_cols, - schema_overrides=dict(zip(vcf_cols, vcf_schema)), + dtypes=dict(zip(vcf_cols, vcf_schema)), ) # Check how many region columns subset_cols = [vcf_cols[0], *vcf_cols[2:]] # skip pos0 - schema = df.collect_schema() - intersect_ncols = len(schema.names()) + schema = df.schema # OrderedDict in polars <1.0, Schema in >=1.0 + intersect_ncols = len(list(schema.keys()) if hasattr(schema, 'keys') else schema.names()) # Intersected with peak, check if region col needs to be made if intersect_ncols > vcf_ncols: @@ -200,7 +208,7 @@ def parse_intersect_region_new( else: df = df.with_columns( pl.concat_str( - [pl.col(i) for i in schema.names()[vcf_ncols : vcf_ncols + 3]], separator="_" + [pl.col(i) for i in (list(schema.keys()) if hasattr(schema, 'keys') else schema.names())[vcf_ncols : vcf_ncols + 3]], separator="_" ).alias(region_col) ) @@ -236,6 +244,13 @@ def parse_intersect_region( ValueError If BED format is not recognized. """ + # Guard against empty intersection file (0 variants in region) + intersect_path = Path(intersect_file) + if not intersect_path.exists() or intersect_path.stat().st_size == 0: + return pl.DataFrame( + schema={"chrom": pl.Categorical, "pos": pl.UInt32, "ref": pl.Categorical, "alt": pl.Categorical} + ) + df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0) # If we need to use coords as name diff --git a/src/counting/parse_gene_data.py b/src/counting/parse_gene_data.py index 6653ed1..8ccee49 100644 --- a/src/counting/parse_gene_data.py +++ b/src/counting/parse_gene_data.py @@ -244,6 +244,20 @@ def parse_intersect_genes( if parent_attribute is None: parent_attribute = "Parent" + # Guard against empty intersection file (0 variants in region) + intersect_path = Path(intersect_file) + if not intersect_path.exists() or intersect_path.stat().st_size == 0: + return pl.DataFrame( + schema={ + "chrom": pl.Categorical, + "pos": pl.UInt32, + "ref": pl.Categorical, + "alt": pl.Categorical, + attribute: pl.Utf8, + parent_attribute: pl.Utf8, + } + ) + # AFTER performing gtf_to_bed and intersecting! df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0) @@ -287,6 +301,20 @@ def parse_intersect_genes_new( if parent_attribute is None: parent_attribute = "Parent" + # Guard against empty intersection file (0 variants in region) + intersect_path = Path(intersect_file) + if not intersect_path.exists() or intersect_path.stat().st_size == 0: + return pl.DataFrame( + schema={ + "chrom": pl.Categorical, + "pos": pl.UInt32, + "ref": pl.Categorical, + "alt": pl.Categorical, + attribute: pl.Utf8, + parent_attribute: pl.Utf8, + } + ) + # AFTER performing gtf_to_bed and intersecting! df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0) diff --git a/src/counting/run_counting.py b/src/counting/run_counting.py index 651fa47..b023b0b 100644 --- a/src/counting/run_counting.py +++ b/src/counting/run_counting.py @@ -1,12 +1,15 @@ from __future__ import annotations import functools +import logging import re import tempfile from collections.abc import Callable from pathlib import Path from typing import ParamSpec, TypeVar +import polars as pl + from .count_alleles import make_count_df # local imports @@ -328,6 +331,14 @@ def run_count_variants( # Should I include a filt bam step??? + # Guard: if no variants survived intersection, write empty output and return + if df.is_empty(): + logging.getLogger(__name__).warning( + "No variants found after intersection — writing empty counts file." + ) + df.write_csv(count_files.out_file, include_header=True, separator="\t") + return + # Count count_df = make_count_df(bam_file=count_files.bam_file, df=df, use_rust=use_rust) diff --git a/src/counting/run_counting_sc.py b/src/counting/run_counting_sc.py index 08a0240..db880ed 100644 --- a/src/counting/run_counting_sc.py +++ b/src/counting/run_counting_sc.py @@ -2,6 +2,7 @@ from __future__ import annotations +import logging import re from pathlib import Path @@ -206,6 +207,16 @@ def run_count_variants_sc( region_col=None, ) + # Guard: if no variants survived intersection, warn and write empty output + if df.is_empty(): + logging.getLogger(__name__).warning( + "No variants found after intersection — writing empty output file." + ) + import anndata as ad + + ad.AnnData().write_h5ad(count_files.out_file) + return + # TODO: handle case where barcode file contains multiple columns with open(count_files.barcode_file) as file: bc_dict = {line.rstrip(): i for i, line in enumerate(file)} diff --git a/tests/shared_data/bwa_index/chr_test.fa b/tests/shared_data/bwa_index/chr_test.fa index 923c055..74c112f 100644 --- a/tests/shared_data/bwa_index/chr_test.fa +++ b/tests/shared_data/bwa_index/chr_test.fa @@ -1,331 +1,331 @@ >chr_test -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG +TACTTATAATTATTCACTTAGATATCCGACCGAATCTGATGCCACACTAGTCAACCAATA +TAGACCGGTGGTAGTGTCAGGTAACAAATTAATAGCATTCCTACATTAGTATGTATAGAA +CTTTGGCATTTTGATGAGGTTAGTAATCGGCAGATATCGCATGAAACATAATCTTAAAAT +TCACATTGTTGATGGATAGCTCGATATCATAACAACTAATAAAGAGACCCCTCGAGCAAG +GTAAACATGGTGCCAACCTTGTAATTAGCTTGCGTGACCGAGATATAACATATGGTAGTA +ATGTGGCTCTTGCGCATGGCATTTATGGAGGAAAGCTTGTAAGTTTAGTGTCCATAGACA +TTCGCTAAAGGGGAAGATGGGCCTTAATGAAGGTAAACCACCCACCAATTACGTTAGGAC +TCTAATAATTTAAACAGTGCAGTTTAACGACAGGCTCGTAAATCGGTACGGTCGATTTCG +GTTATGTTCCACCCCAAGCAACCACTCAGAGAGATATATTGGTCTAGTCTATCTCTGAAA +TTTGAAAATCATAGTTTATAGCCTAGTTGCTTAAAACTATTCTTATAGTGTAATGTCCGT +GTACTCGTTGATTCCAATGTGCGGAGGAAGCGACTTCCCCAAGACGGCGCTATCGTAAAG +TTCTGTTTTAGTACCATGTTGGATCCCCGATGAAGGCGTAGCTTATGCTCGTAGATCCCA +CTACCCTACGCGCGCCTCTCTGATAATAAACAGCAGAATCCACATCCATTGCTCCAAATG +CGGGGTGCCGGATTTATCACATGGAAATATAATTTTATGATACATATCCGCACACCGGGA +CAAAGTTTGCCAAAAGAGAGAGGGAATCTATCAACTTTACCAGCTCACCAAAACGGGAAA +GATTATGAACGTAGCGTGTGTAGCTATACGTTATCGCACCCACCGTTCGTAAGTATTCTA +CGTCTTAGGCATTTACAGTTCATAGTGAGGACCCGTCAGCAGCATCAAGCCGACATGCTA +TTGATACATAATAATTCAAACGCATGTGACGGGTAATAGAGTGAAGCAACGGCTCCGCCT +AGTATCGCGAGACTCAGACCTTATATAGTTTCCCTAACTACATTATGTCAATGTACCGAG +GGGTTCCCTAAGCATTTTGCGCTAAAGATCTACCACCTTAAGTTTCCTAAGCAATCCCTA +ACTAAACTTATACCGTGATCCTTAACACATTAAAATCATTTAATCGTTGAGTATTATAAG +TTGCTTCATTTTGCTAGGATATAAAATTTCGACTATAACTCTTAAGGCTCCAGATAGAAG +GCCGCATTTTGAAGCAAACTCTAGAGCGCTAACGTATCACAGACTTAGCTCAAGAGAATT +AAATATATTAAGGATATCATCAGAAGCTTTATCTCTGTATTATCCCTATTCCGTTGCGCG +CGTCTCACGAGAAATCATTAGTACGATTGTGACGTCGTAGTCACCAAGATTTAACCAGCG +CGTTGGCATCGTTAAACATTGAAATTAAAGGACATTAAGGGCCTGGAGCGGACTTTGTCG +TCTTCTACGTCGTTCTATTCTTGGAGCGACATATTCATAACGTTTATAACGTACAACTTG +CTCATGCTGAGTCTACTACTAAACCAGTTGTTCGCGCGGTGCTGATTGTACTACTCAGGG +ATTTAGTCATGAAACGAAGAAAGAGAGGGATATTCATGAGCGCTGCTTCGTTGCTTTACG +GGCTTATGTAAATTTGTTATAATGTATGAACTTTTAGGCTCTCATGTTCAAAGCATTTCC +GTCTTTGAGTAGCCCACCCAGACAAGGTTAGATATGAGACTGTTAGATTTATCCTGGGGA +AGATAGAAAATGATTGGAACGATTATTGCACTGTTGGAATTTCTATTTTAGGGTAGCCGA +TGAATCAAGATGTGAGAGGAATCATCACATTGATCACAAATTTATAAGTTAGAGTCATAC +GTACTATTCAGCAGGGCTGGTATTTGCTTTGTCGGGAGCGCATGCAAAAGTCGGCCATGG +CATCAATAATGGCAACAGAGACTCGGAGGTTTCCCTAGCAAAATACCCTTGTTATTAATC +CGTTCAGGACACGTTCCATATGTTAATAGGTCTCGTGGACTACCGAAGGCCGCTTGTGCT +CGAATGGAGTTAGAACTCATGGTATCTTGTGTATAATTCATGTGTCATAGTCATATCAAT +AGGACAAATGGATCGGACAATGGTATTAATGCCTTCCCACGGACAGCCACCCCAAATACC +TAAAATGATCAGACTTCTTAATACCCTTAAATTATTATCGTAGACAAGAAGTTCCCTTGT +CACGAGGCTATCCTGAGGTAAATTAGAAAACCTTCGGCAGACACCACTCTCTAGCAGATG +GTCGAGAATTGTCTGGTTAATAAGTAGATAGTAGAAAGTACTACTTGTTACTGATTTGAG +GACGACATCGCTCACAGCTCGCATGGCATGTCTAGGGTTCGGCAGCGCTGGGTTCATACC +TTGGTCTATTAGTGCAAGGGAGAACACAACGCAGTATCTCCGAATCCGACGAGCATGGTT +CTATGACTAAACCTCAACGATATTGCCTTAGTGTTGAATGATTTAGGATCGATACGGAGA +ATGACGCATTTACAAACCTAATGAGGCCGCGTAACGCAGATTCGCATGTTCTATTAACGC +GTCGCCTATCTTAGACAAATGGTGAGGACGTATCGATCAGCGGAAAAGTAATTTTCCCCT +TCACCTATAATTGGCGTTTTGCTTATGGTATAACGCCATTTTACCGTGTTGATCGATTAG +GTTAATTCCGTATCGGCCACCGTCCCTCTTTGAACACCTCCTGATATCGTACCCACTATT +CATAGTTCGTCGCGGTAGCGCACAAAACTCTAAAGCTTTGCCTTTAACTAAATTCGTGAA +TGGCTTAAACGGATGATAGGGATTACTGTCATGAATTCGATCATTGGATCGCCTATTCAG +AAATCTGATTAACAGGAAGTTCTCTTCTGGGGCACCTACCGCATTAACTTTGGTATTAGC +TAACCTTAGCGGACTATGACGGCCTAGGATCAACAAATGAATATTACATACAGCAGACGG +ATGAGTACGATGGTGAAAGGGGTTTACAAGCAGACACAACTATCTTTATGAACTAGTACT +TTCTTTCCACGATGCGTTGGTAAAAGTTTAAGCATAACGCGATCTGCGACGAATTATTAC +GCGAATTCCCGTTAAGGAAATACATTGAGATACTATCACGGGATCGGGTCACCTGGAACA +TGGATATTTGTGATTGTGTATTCGACCGTCGGATCTCTTCTAGAAGTATACCATAGTTCT +TGAGCGCTTTATAGCATATTAAGTGTGGGTTATTCCGATGCCTTGTCTATTGTCATAACC +CTAAACTCTAGGGTGATTAGTACATAGAACTGGCTCTCAAGCTTTAGGTATTGAACTTGA +GCTTCAGTTGTGAGTAAGCCGCCTGAAAGCTCAGGATGTGTCGTCCGCGCTCTTGCCCGC +CAATTCCGATGCACCCGCGTCTCTGGAACATTCATCTGAGTCTTTAGATTCAACGTATAA +TTGCAAAAGTATTTGCCTGCCTCATTTTCTTCTTCATCTTAGGAATTGGAGTGGCATTTT +AATATTGATTCGCTTATTATTTCATTTTCCATGGGAGTTCACGGTTGTAAGATAGTGTCA +CGAATCTACAAGGCCAGGTGGTCGATCGCAATGAAGTACAGCGTAGACTTTCAGAAAGGA +TCATTGCCTCGTCATGGCCGTTCAAAACTTCACTCCTCTAGGCTCACAGCCTGTGAACTC +AAGTAGTAAGCTAGGGTCACCCTAGAGTCACTTGCCTTAGAATATTCTGTTTTATCTGTT +AATCAGAAAGATGGTCTAACATGTTAATGGATACTTGGAGAGTCCGTATCTGTATCCTAC +TTAAATTCGTCATCAAAATGCTCAAGCGTTCTCGAAGGTATGTAGACTTGTATCTACGTC +AGTAATGTCTTGTGCCACAGAATAGGGCATATTGGTGTTTCGTTTTACAACATAAAATCC +GTTATCTGTATTGATAGACTGTGGGCCTAAATAGGGATCGTACGTCCATGATCAGGCCGG +AACGATTTATTACAACCACCAGACTACTGTCCCTCTCGATGAGGGGTGTTTATTATAAAA +GGGCTGAAACGGCGCCATTTGGTGCAATACCAATAGCTTGACATGTATGTAGAAACTTGG +CAATAGTATAGGGATAGACCCGGTTTTAATCGCGGTGAAATAAACTACGTACAATGGATT +GGTGTAAAGAACCTATAATCAGAATCAGCTTTGGGGTCAATCTCTTGCTTCTGTGAGGCC +GTGAAGTTAATTCTTTACAGAAAATAAATAGTCAGACAACCCAAATCATTTCATGTTTAT +CAAGATGAAAGGCTTACGGACCAACTGCGAAGGCGTGGCCTTGTTAATTTGAAGTTCATA +CCCTTGGGCGAGTTGCGGCAGATATCTCGCGGTCAAACCAATAAACAGCATAATGACTCT +GGGTGAGGGTTAATCTCTAGATCCTAAAACTCCCTGGAGGATTTAAAGTTGGTCAAGTAG +ACTGTTTCTACGTTTACGCATGAGTATCTTATAAGCTCTTTCTTTTGTCATATCCTGAAA +GTCGAAGACTCATCATTCAGATTAGTGCACCTAAAAGACTAAGCCGTATCACGGTTGGTA +TAATTCTCATTAATCAACGGTATACAATATGTGACAGATTACGCAGGAAGTTCCAGAAAG +ATTAGAGGCAAGTGTACGCCAAAAGGAGTCCATACCGGATCTCTCCCCTAGCTTGCACCG +CTCGCCAGCTATGACATCAATAGCACCTTCAGCTATTTAACAGATATAATTTATGCTTAG +AAGTAAAACCTCTGGAACAGTTCGAAACGATAATTTCGCGCCTGAAATATGAGTATGAGC +ACATGATGTAGGATATCGGGTAAGTATCGTAGTAGGACTCAGTAAACCATGTGGAAGCCC +AGGATAGATTTCAGTACTGAACAGCAATACAAACGAATACGAAAATGTCCTTTCTTACCG +GTCAAGGTTTATCAATTGAGTGGGACTGTCGTCCTATAAAGTCATCCCCTCATTGGACTG +ACACGTGTCCATTGTTCTTAAAAGCTAACCCGTTTTATCTTTTCGGCGGCTGACCTGCTT +CTAAATAGAAAACATCCAACGCGGGGAAGCAATTCACTATTTCATTTTGTATATGAAAAT +AGAAGGAACGTATGATCTTTATACCCCAGGTGGAACCGTCCGGAATTTCCTCTGATATTC +ACAAGGAGTGTTAACACTGTTAAACACGAACTATCCTTGTATAATTAAACCGAAGTGTCT +GTAACCCGTTACGACGATAGCGTCTAGCCCTTGTCGCGAAAGCCGAGATTCCACGGTCGA +TAAAGTATCACGTACCGTGTAATAGTCCGGAAGCACTGCCCCGCCATAACATAATAGAAT +TTCCCTAGATAGACGTAATGTCTAAACCCCTTGCACGAAACTCGATAATGTTTGTAATGC +TCGCGCTAGGTAGGTTTTCATGTTACATTACCAAAAGAACCATAATTTTAATGCTACAAG +TAGAGAATCCCTCTTAGACTCTTGATTAAACATAATCTCACAAGATTCGGAATTCCCGAT +TGATAGACCACAGAATCGAACATGGATCTGTACTCTTTAGACGCTTGTCTCAAGTAAATT +TTAATCCCTTACGCAATATGCTGGGTGCAAGAGTCGTAAATGCGTGGAGAACTGTATTCT +AATATAAGTTGCACGGGCGCTAGGCCGCGGGATGAAAACAAGTATGAGAACTAGATAGGT +AGTCCAAAACTGGGTTAATAGAAAATCAGAAATGATTAAATCCTAAGTTACGTTATCGGT +TGTAATTGACAGGGCGATACTGCGCTCGTTGAGTTAGAACACTTACAGTAAAACACATAG +TAATAATGAGGGCCTTCCGCTTTGTACACCTCATAATAGTTTTCAGAGTAGAGTAACACA +GTGTTTGGTCCTGCGGATCGGAAAAGGACGCTTATGCGACCACCGACCACCAATGGTTTA +TAATTCTGAGTAGTAGCAGTCAGCAGTCACAGTAATCGCGTCGTGATTCTTACTCACGCT +TTTGCGAGCCGTAGTAATATTCAGTAAAAGTGAAGATTCTCCAAATAATAAATGTTTGAA +GAGCGTTTATAGTAATAATCGACATCATTGTCAGTGAGGAGCGAAACCATCTGCGGATAA +GGCATGTCTGATTACGCCACGAAAGATAAAACCAACTACTGCATTGTTCCCCATCTACTT +GCTTAAAAGGTGACCCGTGCAAAAGACGGTAACATAACTTGAATTTGAGATCTCTGCCAA +CTTGGGGTTGTTTTCGACAGCTTTTCCTCAGACAAGCGTCTAATGTTTGCTCAATATTAA +TCCATCGTTATTATAGCTGCTAAGCACACGCGCCCATTACTAAAACTAAGACTAAGGAAA +ATAAGTCAGAGAACAGTGGAACCAGTACAGAAAACGCCACTAGATGGGCGATTGGTCAAG +CAGGTCGTATTCTGCCAACAAATTACATATCCATAGGGTAACTGGCTTCTCGTATTAAAC +CGTACTCTTAGTATCATGGACACTGATTAATGAGGACGGCGATTTGAGATATAGTGAAAG +TCACCCAGACATTATGAAATCATTTCTTCAGGGTTAGTGGGTATTTTGGGGACGACCAAA +CAGTTTGCGTTTTCTTTGGTCATTCCTCCTGCAGAGGTGGTTACCTAACCGATTAAGCAT +CAAACTATTCATAGATTCCTAGACTATCCCGATCAGCTTAGTATTACCTCTTACAAGTCG +TCGTGTAGACAAGAAGTCCTAGTGATAAATACCAAACATATCGCGGTCGGACCGTTAGGT +TACATTTGATGGTCTTTAAGGTTTAACAATGCCGCTCTTCTTAGCTGGGTAGGGACATTT +TGCCATTGCCGAACCTGAGTCCGTCCTTACAGCGATATATGATAACCGAGTGCATTAGAA +ATCTTCCTGCGCTGTGAATTTCGGGTGCAAGTATCACGTGATTGCCCTCAACTACAGTGA +GCAATTGAGCAACTTTCCAGATGTTTGAAGTTCTTCTGTCCTGAATGCTTCCGAGTAACT +TGGTAATGCTTCCATAGCTGTAGGAGAGGCGACGCTGTCGTAAATCGACAATTCGTATAA +CTCGTGCCCTGGGGAGATAGTGGATTAAATCAATTATTAATGGGGACAATAGGCCGTTCT +CAGCAGTGGACCATCCAAGGTCACCACCGGCGACTTAGGCCAACTATATGTTCCCTTCGA +TTTCGGTGATACAAAACGGCAAGGGTAGGACGATCAGAATAGTACGAACAATCGAATTGA +GAAACGTGTTAGTTTAGGGTGAAACCTATGTGAGACGAACATACCGGTTACCTGTCAAAC +GAAAGCGCGTGCTTTTCCCATCTTTAACCATCTATACATGGGTAAAACTTAATTGGTAGG +TAGTCTATACTGCACTTTTCTAAAGACTTCCATCAATGTGTAATAAGATCACCATTCTAT +ATGCTTCGGTTGATGCTCTTATGGTACCTGGAAATATTAGCGGCACATGGCATGTGCCTC +GTATTCCTGTATCCGGAAGTGCATTTTCCTTCGCCGGATTATTACTGATTGATCGATATT +ATTGAAAATGTGTAGCCTTTCGTACTATTACGTACTTCAATACCGCGTATTTTGTATTAT +AATGAGGCTACAATGATGCTGTTTCAGGCTAGAAATCACGGCTCAACGGATTAGTATTAG +GCGGTAGCATACTCCGAATTGGTACAATACAGGGACACCAGTTTTAACAGTCTCGTATAC +CCCATAGAATAACGATGGGCGTAAATACTAATAAGAGACGTTGCCTAGTTATATAAAAGA +ACTGTCTGACATCTATATAATAATCACTGGTCCAGCTTTTAAATAGTGCCGCGCACGTTG +CGTCCAGACACTAAGAACGGGGTTCATGATAAGGATAACCCAACGGTCTGTAGGCTATAA +ACCGACCATTTTGGCGTGTGTGGGATCACAAACAGACCTGCATAATTGCGAAAATGTCTA +TGGCGACCAAGTCGAGCAAGGTCACCTCTTTTCCACTGTAGTAACAACGCCAAGGACCAG +GCATATGGGTAAACGTCATGTCGGATTCCCTTTGCGAACGTACGGTTTTCAACCAGAAAC +AACCAATTATGCGACTAGGATACAATACAAATGTATGTTGGGTTCCCTGGCCGCAAATAA +TGGTTTCAGATAGGTTAGAAACGGATGAAACTTTACAATTATATTATCGTTATGGGTTCC +TAAAGAATATCGTCATGGAAACTTAAATAGCATAATCAGCTGATGTAGCAGGTGACATAT +CTTTGCTTAGCACTATTTCGTTAACTATGACTGACGGTTTAACCGGCCGCAGGTGTCCTC +TCATTGAGGTAGCGCAATCTGGTTTGCTATATAAGATTTACATTTTCATTGCTTATAGTC +TAGTGACTGGGGAGCCGTGGATTTTAGTAAGAGGGCGTTTTCACTAGGCAGCGCAAGAAC +GGTTATACGTGAAGAATCCGTCTCGAGTATTACGGAAGGCTATCCAGTCGTGTTCTTATC +ATTATGTCCCAAACAGCGAGCTGGAATTTCTGTAAAGGACTGTGGACCCGTAACTCCAAA +ATCGGGCCAGTATAGAAATTTGTCGCGCGCTAATAGCCAATAATAAGTCCTGATTGACTC +CACCCTATTACCAGATGGTCATTGGTAATAATAGCCAACGGGATTGGAGCTTGGCCTTAA +ACCCCGTTTTGAGGCCAATCAGGTGGTGCTATCACCTAAAACGTTGCTAGCAGTTAAACA +TCGGCCCACCTTAGGCTTTGGTTGAGTACGACAAGATACTGATTGATCTTCCCAGCAACC +TCATAATGATAATCAGTTTGGCTAAAGTCTCATAGTCAAGGACTAACATTACTAGAACAT +GTCCTAACTTTGTGTATGCGTATATCGGTGCCATATAGGTCACGAACAGGTTACAAAGCG +ACCAATAAAACCACTTTTCCCGACCATTACGTTTGTATCCGAGTTCTCATTCAATGCAGT +TTCTTAAATAGTGTTCCGCTCAACTTATAAGTATATTTATGGCCAAAACATGTAATTACG +TCGTTATTAGTCCCATGAGTATCTTACTGCATTAAGTGATTCAACGACTAAGTCAGTGGA +AAATCAAATGCCACATAGAACCTATGAGTATGGACAAGATTTCGCAATTATTATTCTTGG +CTTTGTGCGCTTCATTAGTTCATACTTGACTTAGGGCCCAAAATGGACATGTCATCCGTC +TAACTCTTTTCAAAATAGTAATAGTGCTGAGGTTTCACTGGTGAGTGCCCATTTTCATAC +AGATTGCTAAGATGCTGTTCTGATGCGTTACGTATGTTATTAGCGAAGAATACGCTTAAT +CTCCCCACCATCGGAAGCCTTATACTGTTAAAAGGGTATTAAAATTAAGGCACCTCTGTC +TAGTTACAACCAACATACGACTTTATACATCTCCGTGTTGATAGATGTGCATGCGCAATC +TGATGTCTGAAGTAGTAGAAATACATATAGACGTAACCTTCGTCCTTCTGGTCCTGTACA +ACGGCCCCTTAGTTTCTTGCGGGTTGCCCCACCGATAACTTCAGTGTCGAGAGTATTCTT +GGAAATGTATAAGTCGTCACCACGATTCTCGGACCGCAGCGAGCTGAATGCTTCGTTTAG +GTTAGAGCATCGGTAGACGTCGCCCTATAGCGTGCCTATACCAAGGCGGATCAAGGCCTT +GAAGTTACAGTAGAGAGTAGGAAAATAGACCACAAATATTCCCATTTTAAATGTCTCAGA +CCAACGTCATTATCTTGTATGTAACATGCCTCCAAGTCTCTGGAGACGCTTGCGTGCTAT +TTTCAGTAACAACTAATTGCAAACATACTGTACTGCGATTCTTGCTAAGCGTCTGATTAG +CTAAACGACTCTGACGACTATGGCGGTACAGGCGCCCTAAAACGTTATATTGCGCCCAAC +ACGCATCACCACTCCTTGTCTTTGCTTTCTATACTTCAACACCAGTGTGGGGCTATAAAG +TGAGTTCCCCAACGTTTATCTATGGCCGGCTTACAATCTTGAATTTCAGCACATCTTAAG +TGGAATAAAATAAAGGTTGACCCGGCCTTCCGTGATTAGATATCCTCATTGCTCGCCGGT +CACGAATCTCTAAGCTATTCTGTATAACACTCAAACCTTTAGACTTGTGCACATAGGATA +CTTGAAACTTAATCGATCCATACCATAGAAGCCAGCTTTATAGTAGGGAAAGCCCATCCA +CTGGGTGAATACCAAGGTCGAACATTAGACTCCAATCATTCGAAAGCGTAAACTACCTGA +GCGATTGTTACTACTGTTTAAATAGTATCATTGGAAGAAAACTTTATTTCGCTTTTGAAA +ATGCATAGGGGCGAGCTTTACGCCATGCTTCCGGTATATCCAAAATTATTGTAAAGCGAC +TTTATTTCTCCCTAACATTCAGTGTCACACAGCCGTCACCGATCCATTACCTCCGACTAT +GCAAATGACATATAGTTTAGTTATAGACCCTACGATATAAACAAATTGGTCTATAGGAAG +TAACCATTTCGGCATTTGCACGGCCGCCGCAATGCCGAGCCACACTTATCGTATGGCAAA +TGTAAATTCGTATCCGAATCTGATAATGTCCAGTGGTCGACTATTGCGCCACAAAGCGAC +ACTCGGTACATCACAGGGCATCAACGTGACGAAAGTGTATTAAAGGAAATGTTTCCATAA +TACGTTGGCAACATGTATTTTAAAATCCCGCAGTATTACCATGGGTCTTGGCTATAAAAC +ATCAGAGATCTATTTATCCGCAATAGAGTTTGCCTTAGAAAATTACTGCAAGTGCATTCC +AAATAATAAGTGATTGATTCGAATGTTGAATTAATACGAATTCTTGGAAAGGGATTCTTA +CGAAGTGTATCTAAATAATTCATTGAAGTAGATCAACTTTGGGTCTACGCAGCTGTTGGT +CTAAGCTAAAATAGGCTTTGTTGTCGCCCACGTTACTGGGAACTATAGATTGAGTTAGTG +AAGTTCAATATGAAGGTATTCAGGAGCTTATAAAAGTCGGATTGTCCACAACTTGATCAG +AGCGTCGCAAATATTGTTACTCCCACAGGCGGAAGACATTAATCCCAATCCGTTAGGAAA +GACGTGCAACTATTTGGCTCCGATAGCTTCTAACTCATTAGGACTGGAGATGATCAGTGA +AGAAAATCCATTAAACACAAACCTAATGTCATGAACCGATGCCTGTCTTTAGTCCTATCT +TTATTATTAAAAGCGCTCGAATCGTCAACAAAGCCCTTCTACCCATATCGTGCGTCTACG +ACCGAGGAAAGGGATAGACAAAACACTGCAGGTCATAGTATACCTTTGACTGCGCTTCGT +CCCCTAAACCAGAAACTTAAATGGGTCACAAATTCAATAAATGTCGCTCGACGGGATATC +ACTTGTACTTCGACAAAGAAGAAATAACCTTAACATAGTTCTTATTCGACACCTATAAAC +TAAAAGTCGCCTACCTGATTTGGTTTCTTCTAATGAACTAGGAGCGGCCTGAGCCATCTA +TTAATCTAGCTATATGGAAGCGTGTGAGAATTACCCATCACTGCTAAATAGAATGCAACC +GCCAAGTTAGAATCGTTCCTAAAGATAAATAACGGAAACTCCGGGTCCAACCTCAGAAAA +GGCCTAGGTGCGGCTCTCCCGGAGGGTATCTCGTTCATTTATACCAGTCGATCAAGGCCC +AAACCTATGCATGTACTTGCAGAGCCCTTTCTTTCATCATTTGGGAGGAATAAGGGGTTT +GATAATACAATAAGGCATGAAACATCGACAATTTATTCATAACCAATCAATGGCGACCTT +ACGATGAGCATTGCAGACCACGGCTGATTCATATGTTGTTAATAAAATTTGCCGATTTTA +TTCGATATACTTAAGACCTAGAATCATCCCGTTTATGGACATTACGTCATTGTGGTGGTG +AGTAACCCGGATGCTAAATACCGTAAGCTTCTCAAGTTCTTTGAATCCATACATAGAATT +ACAGTCATAGTACATATGAAAACGTAAGTAGCCGTACGCTATGTGCAATGCGCGCCATAA +GCTTTTACGAGTGTAGCTTCGTCATTATTTACGCGCTGCAGGTTGTCGACAGAACTGCAC +CGTTAAACCCTTTAAAGATACCATTTGAACTCTTACTGTACGCTATGGTGAGTATAGTAT +ACCCAATTTTCATAGACACATCACTTTATTTTGGCGTCTTGTCTTTCATTGAATCACCTC +AACACCCTATACTTGTTTAGCGCTAATTCCCCGTAATCAGAGATGGACAAGTGTTCATTT +GATGTTCGAAACGAAATAATTCTCGTACCCTACAATAGAGTCAAGACTTCACGGTATAGT +TATGGTCGGCGCATATTGATTATCGATCTAGATATAACGCTTTATCCGTTGGTTACCTTT +CACTTCAGACAGTTTGAAAATATTGCGCGAAGGTCTTGATACAATAGAACCGTTGGAAAA +CTATCCGAGCAGTTAGTGGATCAATATGCGGTAACTCTAAAGGAAGGTTGCAAAACCTGA +GTTCTGTGGCGAGTTGGATACGCACTATTCCGGTAGACCGTTTACTCAGCCTAGATCTAC +AGAATCGCCCCTGCTTTCACGAAGATATCTTTATATTGATTTGATGGTGTATACCAAGGC +TGGTACTCTACGTCGTCGATTGCTAGGAACTGGTAATACATTATTTAGCCGGAGAGTCCG +TGTTGGCGACAAAGTTTACGTGGTAGTAGTGGCAGGGTTATCGTTCATTAAACCGAAGCC +CTCCTGATCCGCGCACCACGATCCTGTTGAGACAAGGAAGTGTCCCACTCACTTACGATA +AGAACTAACCGAATCTTCGACAGAATAACCACGATTTGCATTCTCTATTTGGCTACCGCA +ATGTGGTTTTCAAGTCAACCCGGCCCGTAACCCTTAGAGATCGACGGACATTATGGGCCA +GAGTCCCTTTCTTAGCTTCCTTTTGCGTAAATCCCGGCACTCTATGTCAGTGCAGAATCG +GTAATCACGTATGCAGACTCATATTTTCTCGGGTACTCCCTACGCCGCGAGCAGCCCCTG +CATTTCTGTACCAAACCGCAAGAGGTGGTTTCTTAATCACGTACGAGTAATTGAGCGAGT +GATTTACTGAACCGTATCTAATCTTACGTTATCATAAGAATTCAAAATTGTATGCCAATG +CAGATGATCTATGTACCCGTCACTGTGTATTCGGTGCCCTAACCGTTAACGTAATCAGGT +ACCTGTGAGTAGTATCATGGAACACAGCCAATCACTGGGAGCAGTTTAGTAAAAGCTAAG +GAAAAGCAAGTAACGAGATAACTTATTTTGATGGTGTTTAACCCCTTTAAGACGGTCATT +GATTCGAGACTGCGCATTCCCCACACTTGGTAACGAACATCTTGCCAGAAAGTAGTGCCC +ACACTTAGCGATCAGTTTAAAATGGAATTGCTGATCTCCGGAGTTTGTCTATAAGTTATT +CGAGAAATGCAAGATGGCTAATACGGTGAGTTACTTAGAACGACAATTAGCTTATTTCCC +CTGAGACGATTTGATATTTACAAGCCGATAACAAAAGACCAGTGAAAAGCGAAATTATGT +AGAATTGAAGGTCGAAAGTCGGTTTAGTGCGTCTTCTATGTACGGATAGCGTATGACGCC +TCTGAAATCCCTTAGTATCACATAAAGTTTTCCACTTACGATGACCAACGCAGGAATGCA +TCGGTCTTTTAACTACCCACTGACTATAAATTTGCGAATTCATATCCACGTAAGAATTAC +CACATTCTACAGACTAAAGGACGAATTTGTTAGTGTAGGCCCACCTGGTATGTGAAACCT +TCCCTGTTTCAAACTTGGGCATAAAGAGCACTCATACGAATCTAGGTTGTTACGTATTAT +GCTCCCTATCTAAAGTATTTGAGTGTTTGCATAAAGCTTATAAAGTACCTTTTACTTTCC +ATATCTTATATTCAGCGGCGCAATTACACCATACATATGAGGTTGAATTTAAGGGAGTAT +ACGAATGTAACTCCTATTTAAATAGACAATTTACTCATACTGGCTACGGGAAGACTCAAT +GAGTTGTTTGACTCGTTAAGCCGTTTTATACTAATTTACTACTAACACCCAGGAATCTGA +GATAACGTCACGGATCAGCGGCACCTGAAACTTTGTTATTACTTATATAAAGGCCTGTAA +ACGGCCATCCATCCGGGTTGCCGAACTTATTATTATTAACCGTAAAATCACTCATAAAAG +CTTGTGAAACATTGAACACTTTAGGCGGCACGCCCCGTTGATAAGAAAATTATAGAGGTG +CGGAGGGTTACTATTTTCAAAGTTGAAGCTATGCGCGCCAGCCACCATTAATCAAGTTGT +CCATCCATGGGATCTCAACGTGCAATGTTGGTGCCCGAGTATTCCATCCTGTTCGCACCG +ACAGCATAATTGAACTATGTCGAACCCTTAGCTAGTACACTGTGTCCATATACGGATTTA +ACAACTAATTGTCATAAAACTTTTGGCCTGGCCTATTTACAATAGTCTAGTTTACAATAC +TTTCCACGTTTGTCCTTAGAAACTATATAAGTGAGACCATCTAATATCCGCACTAACTGT +GTCGTACTCAAGTTGTACGGCAGACGGCGGCCCCGTGCGAAAGAAGAGCGATTTGCTAAT +AGTTTCCCATACATGTATAGATCACACAGACTTAAGAAGATGTCTTGATATAGAAAATAC +GTAACTGTGTATGATAAGACTTGCCGCCATTCTGCGTAGTTGAAAATAAAATTAGAGGAT +TTTAGTCTGTGCCCTAATTTATTTACCAATGCCAAGCTGCAAACGAGGTTGTAGGCTGTA +TCGCAACAAAAGCGATCTCTGGTGGTGTTCAGCTATCTTAGTACTACTTCTTAATCGGAC +ATACAATCTAATATAACGCACATCAAAACGGTGAAAGATAGCCACCCATAATTAATCTTC +TCGCTAGATGCTACGACTCCCTGCGTCGTAAATTATACTGGTGACAGTTGAATGGACCGT +GAATGACCCGCACGTCTATAACCAGGTCCGTGTAAACGCATGCCTATAGTTGCGTATTGC +GTCGGAGTAGCAGATCGACGTATGCTGCGAGACGATACCGAGTCAAATCATTCACCCCAC +ATACGTTTGAGATCCAAGTTATCTGGGTAAAGCCTAGGAAATGCGTTTCTAGGATATCAG +TAGAACTTTTATGGCGTTTATCTAGAGACTGGTTAGATGGCGTATAAACCTTCCGTTTTG +AAAGGAAACTATGAAAAGAGAGCATGTTGACAAAGAGAGAAGATCGGGTTTACATACATA +CTACCTCGTAATCATTTATTTAGTTCCCCATAATCTCCGTGGTGTGATACCTGGGTAATG +ATGACGTTGTCTTAATGCTAAAGCAATGTACAGGCCGTATTACTTAAAGGCAACGCATGA +GAGTGCGTTATTACATCAGGACTCCATGTTTTCTTTTATACGCGTGGGAACGGTCGACAC +GTCAAGACAGACTTAAGGGGTAAGGTAATCTCAATCGCCTACTTTCCTAAATTTCAGGAT +GCCATATAAGCATGCCACCCTTTATGTCGTTCACTTGCAGGGCATGCCAGCTTGTAGATA +ATAAATGGACTTTCTATTTGTTAGGGGAACATCAAGCAATCAAACTACAATAATATCCGA +TACGCACCAAGGGCTATTTAAGGCGCTGAGTGAGCTCAAATCAAAACTTTCTCCTACTTT +ATGAATTCTTAAATATTTAGAGAGGAAAGCTCTCATATCAGTCTATCGTGAGCCGTATGT +GTAACATTGAGTATATCTCGATTTCTTCCTCGAGCAAATTTGATTTTCCTGGTGTACCGG +ATATATACCTTGAACCACCGAAGCCAAAGTGCCTGTCTTTCACGATATGATTTCAAGCAG +CGCTCGCTCTAGCTACGCCTAACAGGACTATGACATGGTAAACAAAAGGATTTTGAGAAC +CAAACAAAATCAATCGGTAACTTTTCACCCTCATGGAACACTGAAGAGAATCTTAAGAAT +TAGTCGGGGTGCCGACTCGCAAAACCTGGAAGGTAAACACATCAGACTCTATCATGCAAA +CACGAAAATCAATTGGCTTGTTAACTTTGGGGTCACTTGTATCCCTATCTTTTGCGTCAT +CATCGAAAAGATTACGCCATGGCAAGTCTATAAACCATGACAGGAAAGATTGGGCCCTTA +GCAAATACGGATGGGTCTCACCGGTTGGGCAGTCGGATAAATTATCACACATGGTGCAAT +CTGTATCTGGGATTTTACCGAAATTGAGGTGGCGCTTTTACGCCGTATGATTTTCGCACT +CCTTCATCGCACTTAAATCCAGTGAACCTCTCATACAGTAATCATTAGGATTCGTGGACG +ACAAGCTCTTTTCAAACTAGCCGTCCAACAACAACCGTAAGGCAAAGTTGCTAGTCCGTA +CACTTACATGATTCTATTCACAGGGCTCCCCGATAGGATGCTGTGCTATGTTTAATATGG +ATAATCGGCATAGCGTAGCCAGCCCATCCAATCGGGTTGATAAACGAACCCAGCGGATAC +GTAGTATGCCAGGATTGTCAATTCTACAACCGTTTTGGCAGCCCAATGGTCGGCAGGTTA +AAATGGGTGATGGTATGAACATAGCTTTACGAGGCAAGGAAGGTTCATACCGTCTTCACT +CTCTAATCTCTTACATCAGGCCTCGACAATTGATAACGTCCATTCCTTTCTATAACACAG +GAGGTCGGAATTAAATCCGACTTAAAAGGTTAAAGTCTCTGAAATCATTCTAAAGCGGGA +CCGCTCGCCAAAATCTTCTATGATCATATGTTCTCGATGAACGGATGAATGCTATGACTC +ATATGCTGGTTCAGCTTAGAAGTGATGATCCGTGTGAGACAATTTTGGTTACATGCACTC +AAATCCTATAGGTTTTACTACGTAACAATAGATGCAGTTCGTATGTAGCTCGAACTGCAT +ATAACAAAATCAATCTGGCGTAGGAAAGGTGTACTATATCCGTGTATATATAATAGAATG +CTTCTCGTTTGTGTGAAGTGGGTATCTAATTAATACGGTTTCTGTGAGCCAATCGTGAAG +CCACCTATACAACGTAAGGAATATGTAGTGTAACTGTTTCCTTGATGTTTTAAATTAATT +ATTTTCTTGTTGATGATACTGATTCCTGTACGCATACTCAGTTAAATAATGCCGAGGATG +ATGCTTCATCGCTCGTATGTTCTTCGATTGATGAATACAGTAACTCATGACTCATCGAAG +TGATGTTACTAGAAAACGACCTCGCCCACCTCTGAGGCCACCATACTGGATATACTCAAT +TGAACCCACCACCCTCCTGATGATTCGTCACTCTATTGCCCTATCTGAGCAACTATCAAC +AGCGGTTTAACGAGGATTCGTTAATATCTTGCTGTTATAGGCAGTGGGAACATATCGAGC +ACTTTGATGGTTACATTATCGACTGATTCGGTCTGTACTTAGGATTCGTACTATAATGAA +ATCTTTTCAATTAGTTCGGTGTAAAGCGCCGTGTGAAGCTTACATGAATATTTTACTTCA +GTAAACTGCAGGTCCCTAATAGGCACTTCCGGGAGGTCGAATTAGCGTGTATTCTAAACT +ACGGCCCACTTTAGACGGCGATTGGGTGCTAAACAGAGGCTATCCGAGTAGCTAAAAGAT +TTTGTGCGCCTTCTATTGCGCTGGGCGAAGAATCGCGTACCTTTCTTGTGGTATAAGAAC +GAGAATCATATATCAATAGAGTTTGGTGGCGAATCCTGTCCTGATCACGGTAATAATTAG +GCCTGGAGTATCTAGTAGGTCTAAAGCGAGTTGCACTCGGCATAGGGTAAAGTGATAAAA +TTTAATTAATGATAGCTTAAAACTCGCAAAGTCGTACAGATTTTCACGCCATTTGAAATA +CACGGTAGTCATGTGGGTAAGTCTGAAAAGCCGGTTGATCAGTTCCATTGAAACTATATT +GCAGATACCTTATACTCATAATACTTTGGGATTGCGCTAATTATAGTATCATTTTAACGT +AATTGAATAATATTGTACATGTCAAATCGGTAGCGACTATTTTCGCAGCTCTTACGTAGT +TAAAATAACCCATCCCCAACCGAATAAGATCTGTAGTACTTCGCAGGCACGTCTTGACCG +TGTCGGGCCCATTGTATTACTAACATACTTGCCTTAATAAGATTAGACCACACAAGTGGT +ATCTTCATATGCTTTAGGCTTTTGATACGGCAAGCCTAAAAGGTGAATAAATTCATAAGT +GGACAACAACCTTCTTTAATCAAACTCTGGAGTAGCCAGATTCGTACACCAATTCATATA +TCATTTTCATTACGAGCAGTGAGGTAGGAAGAGACAGAGCCTAGTTTTGAAACCACGGGT +AGAGCTCACTAATTAGGACGTACTGGAACCTACGTGGGGAAAAGAGAAGTCGCAGTAGGA +CTCCAGAGCGATACACTTGGCTACGACTTGGATATGAAATACGAAACATTTAGCTTTAGG +AAACTCTCTAGCTCGCGTGTGTATTTCACTGAATGGCATCCGGCAGAAGAAAGGCTGAAA +TTTTAAAACGTAGTCACGTGCTGCCAGCCAATCATGGTGGGAGTAAGCGTCTCAGGTAAG +ACAGACCTGCCTCTGCCACCATTATGACAACCCGTAGAAAGATGTTACATAAGTACGCGC +CGCAATGTCATTAAATCCTCTATTCGTACCGACATGGTGAGTCAAGAAGAACAGATATGT +CAGGTTAGTTCCTTGCCTTACTAAACTAGCCGAAGTCTGGTTTTACATATTAGCATATGT +GATCTTGTTAGCATCAAGTCAAATTCTTGTGGATGCTGAAAAGAGCAAGCAACAAAGTCC +TATACAAGGATTGTTTTAAACCAATGAAGCAAACAGGCGGGTCGGAACCTCCATCATGAT +AAATGTGGTTTCCGTATATTATAAGACCATTTGTAAAAGGATCGTACTATTGTGGACACG +TTTAGCGTTTTATATGAATTCTTTGATGATTATCAGAGGCGTAGCGGGTAGTCTATATTA +TTAATTCCCATTCGTGTGACACAATATGTACAGAATATAGACTGAACAGGGCCCCGAATA +GCCTTATTTCTAGCAGATTTCAAAGTTACATGATCCGTTTCATATCTCGAGGGTCATTCG +GTGACCTACATCATATAACTCGGGGCTTAATACAATGGATCTGGTACTTTTATATGCTAT +TCCTTCCGCACGTGCTGCAGTTTAGCTAAACGACTAGCTAGATAAGAAAGGTACCTTAAC +GTTAATATTACTAGCAATAACAAGATTATTGTGGTGAAGACACATATTGCCCTAATGCTC +AAATCTTGAAAGTAGTAAAGGATTGCGTTAGATGGCCCGCTGATTGACCGTATGTAGGTG +AGTGCCATTACTGTGGGCTGGCAGGATGGTGGTGCAACTGGTGTCAATATTCATAGTGTC diff --git a/tests/shared_data/bwa_index/chr_test.fa.bwt b/tests/shared_data/bwa_index/chr_test.fa.bwt index 7b2e7ab..d028f6b 100644 Binary files a/tests/shared_data/bwa_index/chr_test.fa.bwt and b/tests/shared_data/bwa_index/chr_test.fa.bwt differ diff --git a/tests/shared_data/bwa_index/chr_test.fa.pac b/tests/shared_data/bwa_index/chr_test.fa.pac index dd39245..f528e0d 100644 Binary files a/tests/shared_data/bwa_index/chr_test.fa.pac and b/tests/shared_data/bwa_index/chr_test.fa.pac differ diff --git a/tests/shared_data/bwa_index/chr_test.fa.sa b/tests/shared_data/bwa_index/chr_test.fa.sa index 76e12a6..6cb1b03 100644 Binary files a/tests/shared_data/bwa_index/chr_test.fa.sa and b/tests/shared_data/bwa_index/chr_test.fa.sa differ diff --git a/tests/shared_data/chr_test.fa b/tests/shared_data/chr_test.fa index 923c055..74c112f 100644 --- a/tests/shared_data/chr_test.fa +++ b/tests/shared_data/chr_test.fa @@ -1,331 +1,331 @@ >chr_test -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG +TACTTATAATTATTCACTTAGATATCCGACCGAATCTGATGCCACACTAGTCAACCAATA +TAGACCGGTGGTAGTGTCAGGTAACAAATTAATAGCATTCCTACATTAGTATGTATAGAA +CTTTGGCATTTTGATGAGGTTAGTAATCGGCAGATATCGCATGAAACATAATCTTAAAAT +TCACATTGTTGATGGATAGCTCGATATCATAACAACTAATAAAGAGACCCCTCGAGCAAG +GTAAACATGGTGCCAACCTTGTAATTAGCTTGCGTGACCGAGATATAACATATGGTAGTA +ATGTGGCTCTTGCGCATGGCATTTATGGAGGAAAGCTTGTAAGTTTAGTGTCCATAGACA +TTCGCTAAAGGGGAAGATGGGCCTTAATGAAGGTAAACCACCCACCAATTACGTTAGGAC +TCTAATAATTTAAACAGTGCAGTTTAACGACAGGCTCGTAAATCGGTACGGTCGATTTCG +GTTATGTTCCACCCCAAGCAACCACTCAGAGAGATATATTGGTCTAGTCTATCTCTGAAA +TTTGAAAATCATAGTTTATAGCCTAGTTGCTTAAAACTATTCTTATAGTGTAATGTCCGT +GTACTCGTTGATTCCAATGTGCGGAGGAAGCGACTTCCCCAAGACGGCGCTATCGTAAAG +TTCTGTTTTAGTACCATGTTGGATCCCCGATGAAGGCGTAGCTTATGCTCGTAGATCCCA +CTACCCTACGCGCGCCTCTCTGATAATAAACAGCAGAATCCACATCCATTGCTCCAAATG +CGGGGTGCCGGATTTATCACATGGAAATATAATTTTATGATACATATCCGCACACCGGGA +CAAAGTTTGCCAAAAGAGAGAGGGAATCTATCAACTTTACCAGCTCACCAAAACGGGAAA +GATTATGAACGTAGCGTGTGTAGCTATACGTTATCGCACCCACCGTTCGTAAGTATTCTA +CGTCTTAGGCATTTACAGTTCATAGTGAGGACCCGTCAGCAGCATCAAGCCGACATGCTA +TTGATACATAATAATTCAAACGCATGTGACGGGTAATAGAGTGAAGCAACGGCTCCGCCT +AGTATCGCGAGACTCAGACCTTATATAGTTTCCCTAACTACATTATGTCAATGTACCGAG +GGGTTCCCTAAGCATTTTGCGCTAAAGATCTACCACCTTAAGTTTCCTAAGCAATCCCTA +ACTAAACTTATACCGTGATCCTTAACACATTAAAATCATTTAATCGTTGAGTATTATAAG +TTGCTTCATTTTGCTAGGATATAAAATTTCGACTATAACTCTTAAGGCTCCAGATAGAAG +GCCGCATTTTGAAGCAAACTCTAGAGCGCTAACGTATCACAGACTTAGCTCAAGAGAATT +AAATATATTAAGGATATCATCAGAAGCTTTATCTCTGTATTATCCCTATTCCGTTGCGCG +CGTCTCACGAGAAATCATTAGTACGATTGTGACGTCGTAGTCACCAAGATTTAACCAGCG +CGTTGGCATCGTTAAACATTGAAATTAAAGGACATTAAGGGCCTGGAGCGGACTTTGTCG +TCTTCTACGTCGTTCTATTCTTGGAGCGACATATTCATAACGTTTATAACGTACAACTTG +CTCATGCTGAGTCTACTACTAAACCAGTTGTTCGCGCGGTGCTGATTGTACTACTCAGGG +ATTTAGTCATGAAACGAAGAAAGAGAGGGATATTCATGAGCGCTGCTTCGTTGCTTTACG +GGCTTATGTAAATTTGTTATAATGTATGAACTTTTAGGCTCTCATGTTCAAAGCATTTCC +GTCTTTGAGTAGCCCACCCAGACAAGGTTAGATATGAGACTGTTAGATTTATCCTGGGGA +AGATAGAAAATGATTGGAACGATTATTGCACTGTTGGAATTTCTATTTTAGGGTAGCCGA +TGAATCAAGATGTGAGAGGAATCATCACATTGATCACAAATTTATAAGTTAGAGTCATAC +GTACTATTCAGCAGGGCTGGTATTTGCTTTGTCGGGAGCGCATGCAAAAGTCGGCCATGG +CATCAATAATGGCAACAGAGACTCGGAGGTTTCCCTAGCAAAATACCCTTGTTATTAATC +CGTTCAGGACACGTTCCATATGTTAATAGGTCTCGTGGACTACCGAAGGCCGCTTGTGCT +CGAATGGAGTTAGAACTCATGGTATCTTGTGTATAATTCATGTGTCATAGTCATATCAAT +AGGACAAATGGATCGGACAATGGTATTAATGCCTTCCCACGGACAGCCACCCCAAATACC +TAAAATGATCAGACTTCTTAATACCCTTAAATTATTATCGTAGACAAGAAGTTCCCTTGT +CACGAGGCTATCCTGAGGTAAATTAGAAAACCTTCGGCAGACACCACTCTCTAGCAGATG +GTCGAGAATTGTCTGGTTAATAAGTAGATAGTAGAAAGTACTACTTGTTACTGATTTGAG +GACGACATCGCTCACAGCTCGCATGGCATGTCTAGGGTTCGGCAGCGCTGGGTTCATACC +TTGGTCTATTAGTGCAAGGGAGAACACAACGCAGTATCTCCGAATCCGACGAGCATGGTT +CTATGACTAAACCTCAACGATATTGCCTTAGTGTTGAATGATTTAGGATCGATACGGAGA +ATGACGCATTTACAAACCTAATGAGGCCGCGTAACGCAGATTCGCATGTTCTATTAACGC +GTCGCCTATCTTAGACAAATGGTGAGGACGTATCGATCAGCGGAAAAGTAATTTTCCCCT +TCACCTATAATTGGCGTTTTGCTTATGGTATAACGCCATTTTACCGTGTTGATCGATTAG +GTTAATTCCGTATCGGCCACCGTCCCTCTTTGAACACCTCCTGATATCGTACCCACTATT +CATAGTTCGTCGCGGTAGCGCACAAAACTCTAAAGCTTTGCCTTTAACTAAATTCGTGAA +TGGCTTAAACGGATGATAGGGATTACTGTCATGAATTCGATCATTGGATCGCCTATTCAG +AAATCTGATTAACAGGAAGTTCTCTTCTGGGGCACCTACCGCATTAACTTTGGTATTAGC +TAACCTTAGCGGACTATGACGGCCTAGGATCAACAAATGAATATTACATACAGCAGACGG +ATGAGTACGATGGTGAAAGGGGTTTACAAGCAGACACAACTATCTTTATGAACTAGTACT +TTCTTTCCACGATGCGTTGGTAAAAGTTTAAGCATAACGCGATCTGCGACGAATTATTAC +GCGAATTCCCGTTAAGGAAATACATTGAGATACTATCACGGGATCGGGTCACCTGGAACA +TGGATATTTGTGATTGTGTATTCGACCGTCGGATCTCTTCTAGAAGTATACCATAGTTCT +TGAGCGCTTTATAGCATATTAAGTGTGGGTTATTCCGATGCCTTGTCTATTGTCATAACC +CTAAACTCTAGGGTGATTAGTACATAGAACTGGCTCTCAAGCTTTAGGTATTGAACTTGA +GCTTCAGTTGTGAGTAAGCCGCCTGAAAGCTCAGGATGTGTCGTCCGCGCTCTTGCCCGC +CAATTCCGATGCACCCGCGTCTCTGGAACATTCATCTGAGTCTTTAGATTCAACGTATAA +TTGCAAAAGTATTTGCCTGCCTCATTTTCTTCTTCATCTTAGGAATTGGAGTGGCATTTT +AATATTGATTCGCTTATTATTTCATTTTCCATGGGAGTTCACGGTTGTAAGATAGTGTCA +CGAATCTACAAGGCCAGGTGGTCGATCGCAATGAAGTACAGCGTAGACTTTCAGAAAGGA +TCATTGCCTCGTCATGGCCGTTCAAAACTTCACTCCTCTAGGCTCACAGCCTGTGAACTC +AAGTAGTAAGCTAGGGTCACCCTAGAGTCACTTGCCTTAGAATATTCTGTTTTATCTGTT +AATCAGAAAGATGGTCTAACATGTTAATGGATACTTGGAGAGTCCGTATCTGTATCCTAC +TTAAATTCGTCATCAAAATGCTCAAGCGTTCTCGAAGGTATGTAGACTTGTATCTACGTC +AGTAATGTCTTGTGCCACAGAATAGGGCATATTGGTGTTTCGTTTTACAACATAAAATCC +GTTATCTGTATTGATAGACTGTGGGCCTAAATAGGGATCGTACGTCCATGATCAGGCCGG +AACGATTTATTACAACCACCAGACTACTGTCCCTCTCGATGAGGGGTGTTTATTATAAAA +GGGCTGAAACGGCGCCATTTGGTGCAATACCAATAGCTTGACATGTATGTAGAAACTTGG +CAATAGTATAGGGATAGACCCGGTTTTAATCGCGGTGAAATAAACTACGTACAATGGATT +GGTGTAAAGAACCTATAATCAGAATCAGCTTTGGGGTCAATCTCTTGCTTCTGTGAGGCC +GTGAAGTTAATTCTTTACAGAAAATAAATAGTCAGACAACCCAAATCATTTCATGTTTAT +CAAGATGAAAGGCTTACGGACCAACTGCGAAGGCGTGGCCTTGTTAATTTGAAGTTCATA +CCCTTGGGCGAGTTGCGGCAGATATCTCGCGGTCAAACCAATAAACAGCATAATGACTCT +GGGTGAGGGTTAATCTCTAGATCCTAAAACTCCCTGGAGGATTTAAAGTTGGTCAAGTAG +ACTGTTTCTACGTTTACGCATGAGTATCTTATAAGCTCTTTCTTTTGTCATATCCTGAAA +GTCGAAGACTCATCATTCAGATTAGTGCACCTAAAAGACTAAGCCGTATCACGGTTGGTA +TAATTCTCATTAATCAACGGTATACAATATGTGACAGATTACGCAGGAAGTTCCAGAAAG +ATTAGAGGCAAGTGTACGCCAAAAGGAGTCCATACCGGATCTCTCCCCTAGCTTGCACCG +CTCGCCAGCTATGACATCAATAGCACCTTCAGCTATTTAACAGATATAATTTATGCTTAG +AAGTAAAACCTCTGGAACAGTTCGAAACGATAATTTCGCGCCTGAAATATGAGTATGAGC +ACATGATGTAGGATATCGGGTAAGTATCGTAGTAGGACTCAGTAAACCATGTGGAAGCCC +AGGATAGATTTCAGTACTGAACAGCAATACAAACGAATACGAAAATGTCCTTTCTTACCG +GTCAAGGTTTATCAATTGAGTGGGACTGTCGTCCTATAAAGTCATCCCCTCATTGGACTG +ACACGTGTCCATTGTTCTTAAAAGCTAACCCGTTTTATCTTTTCGGCGGCTGACCTGCTT +CTAAATAGAAAACATCCAACGCGGGGAAGCAATTCACTATTTCATTTTGTATATGAAAAT +AGAAGGAACGTATGATCTTTATACCCCAGGTGGAACCGTCCGGAATTTCCTCTGATATTC +ACAAGGAGTGTTAACACTGTTAAACACGAACTATCCTTGTATAATTAAACCGAAGTGTCT +GTAACCCGTTACGACGATAGCGTCTAGCCCTTGTCGCGAAAGCCGAGATTCCACGGTCGA +TAAAGTATCACGTACCGTGTAATAGTCCGGAAGCACTGCCCCGCCATAACATAATAGAAT +TTCCCTAGATAGACGTAATGTCTAAACCCCTTGCACGAAACTCGATAATGTTTGTAATGC +TCGCGCTAGGTAGGTTTTCATGTTACATTACCAAAAGAACCATAATTTTAATGCTACAAG +TAGAGAATCCCTCTTAGACTCTTGATTAAACATAATCTCACAAGATTCGGAATTCCCGAT +TGATAGACCACAGAATCGAACATGGATCTGTACTCTTTAGACGCTTGTCTCAAGTAAATT +TTAATCCCTTACGCAATATGCTGGGTGCAAGAGTCGTAAATGCGTGGAGAACTGTATTCT +AATATAAGTTGCACGGGCGCTAGGCCGCGGGATGAAAACAAGTATGAGAACTAGATAGGT +AGTCCAAAACTGGGTTAATAGAAAATCAGAAATGATTAAATCCTAAGTTACGTTATCGGT +TGTAATTGACAGGGCGATACTGCGCTCGTTGAGTTAGAACACTTACAGTAAAACACATAG +TAATAATGAGGGCCTTCCGCTTTGTACACCTCATAATAGTTTTCAGAGTAGAGTAACACA +GTGTTTGGTCCTGCGGATCGGAAAAGGACGCTTATGCGACCACCGACCACCAATGGTTTA +TAATTCTGAGTAGTAGCAGTCAGCAGTCACAGTAATCGCGTCGTGATTCTTACTCACGCT +TTTGCGAGCCGTAGTAATATTCAGTAAAAGTGAAGATTCTCCAAATAATAAATGTTTGAA +GAGCGTTTATAGTAATAATCGACATCATTGTCAGTGAGGAGCGAAACCATCTGCGGATAA +GGCATGTCTGATTACGCCACGAAAGATAAAACCAACTACTGCATTGTTCCCCATCTACTT +GCTTAAAAGGTGACCCGTGCAAAAGACGGTAACATAACTTGAATTTGAGATCTCTGCCAA +CTTGGGGTTGTTTTCGACAGCTTTTCCTCAGACAAGCGTCTAATGTTTGCTCAATATTAA +TCCATCGTTATTATAGCTGCTAAGCACACGCGCCCATTACTAAAACTAAGACTAAGGAAA +ATAAGTCAGAGAACAGTGGAACCAGTACAGAAAACGCCACTAGATGGGCGATTGGTCAAG +CAGGTCGTATTCTGCCAACAAATTACATATCCATAGGGTAACTGGCTTCTCGTATTAAAC +CGTACTCTTAGTATCATGGACACTGATTAATGAGGACGGCGATTTGAGATATAGTGAAAG +TCACCCAGACATTATGAAATCATTTCTTCAGGGTTAGTGGGTATTTTGGGGACGACCAAA +CAGTTTGCGTTTTCTTTGGTCATTCCTCCTGCAGAGGTGGTTACCTAACCGATTAAGCAT +CAAACTATTCATAGATTCCTAGACTATCCCGATCAGCTTAGTATTACCTCTTACAAGTCG +TCGTGTAGACAAGAAGTCCTAGTGATAAATACCAAACATATCGCGGTCGGACCGTTAGGT +TACATTTGATGGTCTTTAAGGTTTAACAATGCCGCTCTTCTTAGCTGGGTAGGGACATTT +TGCCATTGCCGAACCTGAGTCCGTCCTTACAGCGATATATGATAACCGAGTGCATTAGAA +ATCTTCCTGCGCTGTGAATTTCGGGTGCAAGTATCACGTGATTGCCCTCAACTACAGTGA +GCAATTGAGCAACTTTCCAGATGTTTGAAGTTCTTCTGTCCTGAATGCTTCCGAGTAACT +TGGTAATGCTTCCATAGCTGTAGGAGAGGCGACGCTGTCGTAAATCGACAATTCGTATAA +CTCGTGCCCTGGGGAGATAGTGGATTAAATCAATTATTAATGGGGACAATAGGCCGTTCT +CAGCAGTGGACCATCCAAGGTCACCACCGGCGACTTAGGCCAACTATATGTTCCCTTCGA +TTTCGGTGATACAAAACGGCAAGGGTAGGACGATCAGAATAGTACGAACAATCGAATTGA +GAAACGTGTTAGTTTAGGGTGAAACCTATGTGAGACGAACATACCGGTTACCTGTCAAAC +GAAAGCGCGTGCTTTTCCCATCTTTAACCATCTATACATGGGTAAAACTTAATTGGTAGG +TAGTCTATACTGCACTTTTCTAAAGACTTCCATCAATGTGTAATAAGATCACCATTCTAT +ATGCTTCGGTTGATGCTCTTATGGTACCTGGAAATATTAGCGGCACATGGCATGTGCCTC +GTATTCCTGTATCCGGAAGTGCATTTTCCTTCGCCGGATTATTACTGATTGATCGATATT +ATTGAAAATGTGTAGCCTTTCGTACTATTACGTACTTCAATACCGCGTATTTTGTATTAT +AATGAGGCTACAATGATGCTGTTTCAGGCTAGAAATCACGGCTCAACGGATTAGTATTAG +GCGGTAGCATACTCCGAATTGGTACAATACAGGGACACCAGTTTTAACAGTCTCGTATAC +CCCATAGAATAACGATGGGCGTAAATACTAATAAGAGACGTTGCCTAGTTATATAAAAGA +ACTGTCTGACATCTATATAATAATCACTGGTCCAGCTTTTAAATAGTGCCGCGCACGTTG +CGTCCAGACACTAAGAACGGGGTTCATGATAAGGATAACCCAACGGTCTGTAGGCTATAA +ACCGACCATTTTGGCGTGTGTGGGATCACAAACAGACCTGCATAATTGCGAAAATGTCTA +TGGCGACCAAGTCGAGCAAGGTCACCTCTTTTCCACTGTAGTAACAACGCCAAGGACCAG +GCATATGGGTAAACGTCATGTCGGATTCCCTTTGCGAACGTACGGTTTTCAACCAGAAAC +AACCAATTATGCGACTAGGATACAATACAAATGTATGTTGGGTTCCCTGGCCGCAAATAA +TGGTTTCAGATAGGTTAGAAACGGATGAAACTTTACAATTATATTATCGTTATGGGTTCC +TAAAGAATATCGTCATGGAAACTTAAATAGCATAATCAGCTGATGTAGCAGGTGACATAT +CTTTGCTTAGCACTATTTCGTTAACTATGACTGACGGTTTAACCGGCCGCAGGTGTCCTC +TCATTGAGGTAGCGCAATCTGGTTTGCTATATAAGATTTACATTTTCATTGCTTATAGTC +TAGTGACTGGGGAGCCGTGGATTTTAGTAAGAGGGCGTTTTCACTAGGCAGCGCAAGAAC +GGTTATACGTGAAGAATCCGTCTCGAGTATTACGGAAGGCTATCCAGTCGTGTTCTTATC +ATTATGTCCCAAACAGCGAGCTGGAATTTCTGTAAAGGACTGTGGACCCGTAACTCCAAA +ATCGGGCCAGTATAGAAATTTGTCGCGCGCTAATAGCCAATAATAAGTCCTGATTGACTC +CACCCTATTACCAGATGGTCATTGGTAATAATAGCCAACGGGATTGGAGCTTGGCCTTAA +ACCCCGTTTTGAGGCCAATCAGGTGGTGCTATCACCTAAAACGTTGCTAGCAGTTAAACA +TCGGCCCACCTTAGGCTTTGGTTGAGTACGACAAGATACTGATTGATCTTCCCAGCAACC +TCATAATGATAATCAGTTTGGCTAAAGTCTCATAGTCAAGGACTAACATTACTAGAACAT +GTCCTAACTTTGTGTATGCGTATATCGGTGCCATATAGGTCACGAACAGGTTACAAAGCG +ACCAATAAAACCACTTTTCCCGACCATTACGTTTGTATCCGAGTTCTCATTCAATGCAGT +TTCTTAAATAGTGTTCCGCTCAACTTATAAGTATATTTATGGCCAAAACATGTAATTACG +TCGTTATTAGTCCCATGAGTATCTTACTGCATTAAGTGATTCAACGACTAAGTCAGTGGA +AAATCAAATGCCACATAGAACCTATGAGTATGGACAAGATTTCGCAATTATTATTCTTGG +CTTTGTGCGCTTCATTAGTTCATACTTGACTTAGGGCCCAAAATGGACATGTCATCCGTC +TAACTCTTTTCAAAATAGTAATAGTGCTGAGGTTTCACTGGTGAGTGCCCATTTTCATAC +AGATTGCTAAGATGCTGTTCTGATGCGTTACGTATGTTATTAGCGAAGAATACGCTTAAT +CTCCCCACCATCGGAAGCCTTATACTGTTAAAAGGGTATTAAAATTAAGGCACCTCTGTC +TAGTTACAACCAACATACGACTTTATACATCTCCGTGTTGATAGATGTGCATGCGCAATC +TGATGTCTGAAGTAGTAGAAATACATATAGACGTAACCTTCGTCCTTCTGGTCCTGTACA +ACGGCCCCTTAGTTTCTTGCGGGTTGCCCCACCGATAACTTCAGTGTCGAGAGTATTCTT +GGAAATGTATAAGTCGTCACCACGATTCTCGGACCGCAGCGAGCTGAATGCTTCGTTTAG +GTTAGAGCATCGGTAGACGTCGCCCTATAGCGTGCCTATACCAAGGCGGATCAAGGCCTT +GAAGTTACAGTAGAGAGTAGGAAAATAGACCACAAATATTCCCATTTTAAATGTCTCAGA +CCAACGTCATTATCTTGTATGTAACATGCCTCCAAGTCTCTGGAGACGCTTGCGTGCTAT +TTTCAGTAACAACTAATTGCAAACATACTGTACTGCGATTCTTGCTAAGCGTCTGATTAG +CTAAACGACTCTGACGACTATGGCGGTACAGGCGCCCTAAAACGTTATATTGCGCCCAAC +ACGCATCACCACTCCTTGTCTTTGCTTTCTATACTTCAACACCAGTGTGGGGCTATAAAG +TGAGTTCCCCAACGTTTATCTATGGCCGGCTTACAATCTTGAATTTCAGCACATCTTAAG +TGGAATAAAATAAAGGTTGACCCGGCCTTCCGTGATTAGATATCCTCATTGCTCGCCGGT +CACGAATCTCTAAGCTATTCTGTATAACACTCAAACCTTTAGACTTGTGCACATAGGATA +CTTGAAACTTAATCGATCCATACCATAGAAGCCAGCTTTATAGTAGGGAAAGCCCATCCA +CTGGGTGAATACCAAGGTCGAACATTAGACTCCAATCATTCGAAAGCGTAAACTACCTGA +GCGATTGTTACTACTGTTTAAATAGTATCATTGGAAGAAAACTTTATTTCGCTTTTGAAA +ATGCATAGGGGCGAGCTTTACGCCATGCTTCCGGTATATCCAAAATTATTGTAAAGCGAC +TTTATTTCTCCCTAACATTCAGTGTCACACAGCCGTCACCGATCCATTACCTCCGACTAT +GCAAATGACATATAGTTTAGTTATAGACCCTACGATATAAACAAATTGGTCTATAGGAAG +TAACCATTTCGGCATTTGCACGGCCGCCGCAATGCCGAGCCACACTTATCGTATGGCAAA +TGTAAATTCGTATCCGAATCTGATAATGTCCAGTGGTCGACTATTGCGCCACAAAGCGAC +ACTCGGTACATCACAGGGCATCAACGTGACGAAAGTGTATTAAAGGAAATGTTTCCATAA +TACGTTGGCAACATGTATTTTAAAATCCCGCAGTATTACCATGGGTCTTGGCTATAAAAC +ATCAGAGATCTATTTATCCGCAATAGAGTTTGCCTTAGAAAATTACTGCAAGTGCATTCC +AAATAATAAGTGATTGATTCGAATGTTGAATTAATACGAATTCTTGGAAAGGGATTCTTA +CGAAGTGTATCTAAATAATTCATTGAAGTAGATCAACTTTGGGTCTACGCAGCTGTTGGT +CTAAGCTAAAATAGGCTTTGTTGTCGCCCACGTTACTGGGAACTATAGATTGAGTTAGTG +AAGTTCAATATGAAGGTATTCAGGAGCTTATAAAAGTCGGATTGTCCACAACTTGATCAG +AGCGTCGCAAATATTGTTACTCCCACAGGCGGAAGACATTAATCCCAATCCGTTAGGAAA +GACGTGCAACTATTTGGCTCCGATAGCTTCTAACTCATTAGGACTGGAGATGATCAGTGA +AGAAAATCCATTAAACACAAACCTAATGTCATGAACCGATGCCTGTCTTTAGTCCTATCT +TTATTATTAAAAGCGCTCGAATCGTCAACAAAGCCCTTCTACCCATATCGTGCGTCTACG +ACCGAGGAAAGGGATAGACAAAACACTGCAGGTCATAGTATACCTTTGACTGCGCTTCGT +CCCCTAAACCAGAAACTTAAATGGGTCACAAATTCAATAAATGTCGCTCGACGGGATATC +ACTTGTACTTCGACAAAGAAGAAATAACCTTAACATAGTTCTTATTCGACACCTATAAAC +TAAAAGTCGCCTACCTGATTTGGTTTCTTCTAATGAACTAGGAGCGGCCTGAGCCATCTA +TTAATCTAGCTATATGGAAGCGTGTGAGAATTACCCATCACTGCTAAATAGAATGCAACC +GCCAAGTTAGAATCGTTCCTAAAGATAAATAACGGAAACTCCGGGTCCAACCTCAGAAAA +GGCCTAGGTGCGGCTCTCCCGGAGGGTATCTCGTTCATTTATACCAGTCGATCAAGGCCC +AAACCTATGCATGTACTTGCAGAGCCCTTTCTTTCATCATTTGGGAGGAATAAGGGGTTT +GATAATACAATAAGGCATGAAACATCGACAATTTATTCATAACCAATCAATGGCGACCTT +ACGATGAGCATTGCAGACCACGGCTGATTCATATGTTGTTAATAAAATTTGCCGATTTTA +TTCGATATACTTAAGACCTAGAATCATCCCGTTTATGGACATTACGTCATTGTGGTGGTG +AGTAACCCGGATGCTAAATACCGTAAGCTTCTCAAGTTCTTTGAATCCATACATAGAATT +ACAGTCATAGTACATATGAAAACGTAAGTAGCCGTACGCTATGTGCAATGCGCGCCATAA +GCTTTTACGAGTGTAGCTTCGTCATTATTTACGCGCTGCAGGTTGTCGACAGAACTGCAC +CGTTAAACCCTTTAAAGATACCATTTGAACTCTTACTGTACGCTATGGTGAGTATAGTAT +ACCCAATTTTCATAGACACATCACTTTATTTTGGCGTCTTGTCTTTCATTGAATCACCTC +AACACCCTATACTTGTTTAGCGCTAATTCCCCGTAATCAGAGATGGACAAGTGTTCATTT +GATGTTCGAAACGAAATAATTCTCGTACCCTACAATAGAGTCAAGACTTCACGGTATAGT +TATGGTCGGCGCATATTGATTATCGATCTAGATATAACGCTTTATCCGTTGGTTACCTTT +CACTTCAGACAGTTTGAAAATATTGCGCGAAGGTCTTGATACAATAGAACCGTTGGAAAA +CTATCCGAGCAGTTAGTGGATCAATATGCGGTAACTCTAAAGGAAGGTTGCAAAACCTGA +GTTCTGTGGCGAGTTGGATACGCACTATTCCGGTAGACCGTTTACTCAGCCTAGATCTAC +AGAATCGCCCCTGCTTTCACGAAGATATCTTTATATTGATTTGATGGTGTATACCAAGGC +TGGTACTCTACGTCGTCGATTGCTAGGAACTGGTAATACATTATTTAGCCGGAGAGTCCG +TGTTGGCGACAAAGTTTACGTGGTAGTAGTGGCAGGGTTATCGTTCATTAAACCGAAGCC +CTCCTGATCCGCGCACCACGATCCTGTTGAGACAAGGAAGTGTCCCACTCACTTACGATA +AGAACTAACCGAATCTTCGACAGAATAACCACGATTTGCATTCTCTATTTGGCTACCGCA +ATGTGGTTTTCAAGTCAACCCGGCCCGTAACCCTTAGAGATCGACGGACATTATGGGCCA +GAGTCCCTTTCTTAGCTTCCTTTTGCGTAAATCCCGGCACTCTATGTCAGTGCAGAATCG +GTAATCACGTATGCAGACTCATATTTTCTCGGGTACTCCCTACGCCGCGAGCAGCCCCTG +CATTTCTGTACCAAACCGCAAGAGGTGGTTTCTTAATCACGTACGAGTAATTGAGCGAGT +GATTTACTGAACCGTATCTAATCTTACGTTATCATAAGAATTCAAAATTGTATGCCAATG +CAGATGATCTATGTACCCGTCACTGTGTATTCGGTGCCCTAACCGTTAACGTAATCAGGT +ACCTGTGAGTAGTATCATGGAACACAGCCAATCACTGGGAGCAGTTTAGTAAAAGCTAAG +GAAAAGCAAGTAACGAGATAACTTATTTTGATGGTGTTTAACCCCTTTAAGACGGTCATT +GATTCGAGACTGCGCATTCCCCACACTTGGTAACGAACATCTTGCCAGAAAGTAGTGCCC +ACACTTAGCGATCAGTTTAAAATGGAATTGCTGATCTCCGGAGTTTGTCTATAAGTTATT +CGAGAAATGCAAGATGGCTAATACGGTGAGTTACTTAGAACGACAATTAGCTTATTTCCC +CTGAGACGATTTGATATTTACAAGCCGATAACAAAAGACCAGTGAAAAGCGAAATTATGT +AGAATTGAAGGTCGAAAGTCGGTTTAGTGCGTCTTCTATGTACGGATAGCGTATGACGCC +TCTGAAATCCCTTAGTATCACATAAAGTTTTCCACTTACGATGACCAACGCAGGAATGCA +TCGGTCTTTTAACTACCCACTGACTATAAATTTGCGAATTCATATCCACGTAAGAATTAC +CACATTCTACAGACTAAAGGACGAATTTGTTAGTGTAGGCCCACCTGGTATGTGAAACCT +TCCCTGTTTCAAACTTGGGCATAAAGAGCACTCATACGAATCTAGGTTGTTACGTATTAT +GCTCCCTATCTAAAGTATTTGAGTGTTTGCATAAAGCTTATAAAGTACCTTTTACTTTCC +ATATCTTATATTCAGCGGCGCAATTACACCATACATATGAGGTTGAATTTAAGGGAGTAT +ACGAATGTAACTCCTATTTAAATAGACAATTTACTCATACTGGCTACGGGAAGACTCAAT +GAGTTGTTTGACTCGTTAAGCCGTTTTATACTAATTTACTACTAACACCCAGGAATCTGA +GATAACGTCACGGATCAGCGGCACCTGAAACTTTGTTATTACTTATATAAAGGCCTGTAA +ACGGCCATCCATCCGGGTTGCCGAACTTATTATTATTAACCGTAAAATCACTCATAAAAG +CTTGTGAAACATTGAACACTTTAGGCGGCACGCCCCGTTGATAAGAAAATTATAGAGGTG +CGGAGGGTTACTATTTTCAAAGTTGAAGCTATGCGCGCCAGCCACCATTAATCAAGTTGT +CCATCCATGGGATCTCAACGTGCAATGTTGGTGCCCGAGTATTCCATCCTGTTCGCACCG +ACAGCATAATTGAACTATGTCGAACCCTTAGCTAGTACACTGTGTCCATATACGGATTTA +ACAACTAATTGTCATAAAACTTTTGGCCTGGCCTATTTACAATAGTCTAGTTTACAATAC +TTTCCACGTTTGTCCTTAGAAACTATATAAGTGAGACCATCTAATATCCGCACTAACTGT +GTCGTACTCAAGTTGTACGGCAGACGGCGGCCCCGTGCGAAAGAAGAGCGATTTGCTAAT +AGTTTCCCATACATGTATAGATCACACAGACTTAAGAAGATGTCTTGATATAGAAAATAC +GTAACTGTGTATGATAAGACTTGCCGCCATTCTGCGTAGTTGAAAATAAAATTAGAGGAT +TTTAGTCTGTGCCCTAATTTATTTACCAATGCCAAGCTGCAAACGAGGTTGTAGGCTGTA +TCGCAACAAAAGCGATCTCTGGTGGTGTTCAGCTATCTTAGTACTACTTCTTAATCGGAC +ATACAATCTAATATAACGCACATCAAAACGGTGAAAGATAGCCACCCATAATTAATCTTC +TCGCTAGATGCTACGACTCCCTGCGTCGTAAATTATACTGGTGACAGTTGAATGGACCGT +GAATGACCCGCACGTCTATAACCAGGTCCGTGTAAACGCATGCCTATAGTTGCGTATTGC +GTCGGAGTAGCAGATCGACGTATGCTGCGAGACGATACCGAGTCAAATCATTCACCCCAC +ATACGTTTGAGATCCAAGTTATCTGGGTAAAGCCTAGGAAATGCGTTTCTAGGATATCAG +TAGAACTTTTATGGCGTTTATCTAGAGACTGGTTAGATGGCGTATAAACCTTCCGTTTTG +AAAGGAAACTATGAAAAGAGAGCATGTTGACAAAGAGAGAAGATCGGGTTTACATACATA +CTACCTCGTAATCATTTATTTAGTTCCCCATAATCTCCGTGGTGTGATACCTGGGTAATG +ATGACGTTGTCTTAATGCTAAAGCAATGTACAGGCCGTATTACTTAAAGGCAACGCATGA +GAGTGCGTTATTACATCAGGACTCCATGTTTTCTTTTATACGCGTGGGAACGGTCGACAC +GTCAAGACAGACTTAAGGGGTAAGGTAATCTCAATCGCCTACTTTCCTAAATTTCAGGAT +GCCATATAAGCATGCCACCCTTTATGTCGTTCACTTGCAGGGCATGCCAGCTTGTAGATA +ATAAATGGACTTTCTATTTGTTAGGGGAACATCAAGCAATCAAACTACAATAATATCCGA +TACGCACCAAGGGCTATTTAAGGCGCTGAGTGAGCTCAAATCAAAACTTTCTCCTACTTT +ATGAATTCTTAAATATTTAGAGAGGAAAGCTCTCATATCAGTCTATCGTGAGCCGTATGT +GTAACATTGAGTATATCTCGATTTCTTCCTCGAGCAAATTTGATTTTCCTGGTGTACCGG +ATATATACCTTGAACCACCGAAGCCAAAGTGCCTGTCTTTCACGATATGATTTCAAGCAG +CGCTCGCTCTAGCTACGCCTAACAGGACTATGACATGGTAAACAAAAGGATTTTGAGAAC +CAAACAAAATCAATCGGTAACTTTTCACCCTCATGGAACACTGAAGAGAATCTTAAGAAT +TAGTCGGGGTGCCGACTCGCAAAACCTGGAAGGTAAACACATCAGACTCTATCATGCAAA +CACGAAAATCAATTGGCTTGTTAACTTTGGGGTCACTTGTATCCCTATCTTTTGCGTCAT +CATCGAAAAGATTACGCCATGGCAAGTCTATAAACCATGACAGGAAAGATTGGGCCCTTA +GCAAATACGGATGGGTCTCACCGGTTGGGCAGTCGGATAAATTATCACACATGGTGCAAT +CTGTATCTGGGATTTTACCGAAATTGAGGTGGCGCTTTTACGCCGTATGATTTTCGCACT +CCTTCATCGCACTTAAATCCAGTGAACCTCTCATACAGTAATCATTAGGATTCGTGGACG +ACAAGCTCTTTTCAAACTAGCCGTCCAACAACAACCGTAAGGCAAAGTTGCTAGTCCGTA +CACTTACATGATTCTATTCACAGGGCTCCCCGATAGGATGCTGTGCTATGTTTAATATGG +ATAATCGGCATAGCGTAGCCAGCCCATCCAATCGGGTTGATAAACGAACCCAGCGGATAC +GTAGTATGCCAGGATTGTCAATTCTACAACCGTTTTGGCAGCCCAATGGTCGGCAGGTTA +AAATGGGTGATGGTATGAACATAGCTTTACGAGGCAAGGAAGGTTCATACCGTCTTCACT +CTCTAATCTCTTACATCAGGCCTCGACAATTGATAACGTCCATTCCTTTCTATAACACAG +GAGGTCGGAATTAAATCCGACTTAAAAGGTTAAAGTCTCTGAAATCATTCTAAAGCGGGA +CCGCTCGCCAAAATCTTCTATGATCATATGTTCTCGATGAACGGATGAATGCTATGACTC +ATATGCTGGTTCAGCTTAGAAGTGATGATCCGTGTGAGACAATTTTGGTTACATGCACTC +AAATCCTATAGGTTTTACTACGTAACAATAGATGCAGTTCGTATGTAGCTCGAACTGCAT +ATAACAAAATCAATCTGGCGTAGGAAAGGTGTACTATATCCGTGTATATATAATAGAATG +CTTCTCGTTTGTGTGAAGTGGGTATCTAATTAATACGGTTTCTGTGAGCCAATCGTGAAG +CCACCTATACAACGTAAGGAATATGTAGTGTAACTGTTTCCTTGATGTTTTAAATTAATT +ATTTTCTTGTTGATGATACTGATTCCTGTACGCATACTCAGTTAAATAATGCCGAGGATG +ATGCTTCATCGCTCGTATGTTCTTCGATTGATGAATACAGTAACTCATGACTCATCGAAG +TGATGTTACTAGAAAACGACCTCGCCCACCTCTGAGGCCACCATACTGGATATACTCAAT +TGAACCCACCACCCTCCTGATGATTCGTCACTCTATTGCCCTATCTGAGCAACTATCAAC +AGCGGTTTAACGAGGATTCGTTAATATCTTGCTGTTATAGGCAGTGGGAACATATCGAGC +ACTTTGATGGTTACATTATCGACTGATTCGGTCTGTACTTAGGATTCGTACTATAATGAA +ATCTTTTCAATTAGTTCGGTGTAAAGCGCCGTGTGAAGCTTACATGAATATTTTACTTCA +GTAAACTGCAGGTCCCTAATAGGCACTTCCGGGAGGTCGAATTAGCGTGTATTCTAAACT +ACGGCCCACTTTAGACGGCGATTGGGTGCTAAACAGAGGCTATCCGAGTAGCTAAAAGAT +TTTGTGCGCCTTCTATTGCGCTGGGCGAAGAATCGCGTACCTTTCTTGTGGTATAAGAAC +GAGAATCATATATCAATAGAGTTTGGTGGCGAATCCTGTCCTGATCACGGTAATAATTAG +GCCTGGAGTATCTAGTAGGTCTAAAGCGAGTTGCACTCGGCATAGGGTAAAGTGATAAAA +TTTAATTAATGATAGCTTAAAACTCGCAAAGTCGTACAGATTTTCACGCCATTTGAAATA +CACGGTAGTCATGTGGGTAAGTCTGAAAAGCCGGTTGATCAGTTCCATTGAAACTATATT +GCAGATACCTTATACTCATAATACTTTGGGATTGCGCTAATTATAGTATCATTTTAACGT +AATTGAATAATATTGTACATGTCAAATCGGTAGCGACTATTTTCGCAGCTCTTACGTAGT +TAAAATAACCCATCCCCAACCGAATAAGATCTGTAGTACTTCGCAGGCACGTCTTGACCG +TGTCGGGCCCATTGTATTACTAACATACTTGCCTTAATAAGATTAGACCACACAAGTGGT +ATCTTCATATGCTTTAGGCTTTTGATACGGCAAGCCTAAAAGGTGAATAAATTCATAAGT +GGACAACAACCTTCTTTAATCAAACTCTGGAGTAGCCAGATTCGTACACCAATTCATATA +TCATTTTCATTACGAGCAGTGAGGTAGGAAGAGACAGAGCCTAGTTTTGAAACCACGGGT +AGAGCTCACTAATTAGGACGTACTGGAACCTACGTGGGGAAAAGAGAAGTCGCAGTAGGA +CTCCAGAGCGATACACTTGGCTACGACTTGGATATGAAATACGAAACATTTAGCTTTAGG +AAACTCTCTAGCTCGCGTGTGTATTTCACTGAATGGCATCCGGCAGAAGAAAGGCTGAAA +TTTTAAAACGTAGTCACGTGCTGCCAGCCAATCATGGTGGGAGTAAGCGTCTCAGGTAAG +ACAGACCTGCCTCTGCCACCATTATGACAACCCGTAGAAAGATGTTACATAAGTACGCGC +CGCAATGTCATTAAATCCTCTATTCGTACCGACATGGTGAGTCAAGAAGAACAGATATGT +CAGGTTAGTTCCTTGCCTTACTAAACTAGCCGAAGTCTGGTTTTACATATTAGCATATGT +GATCTTGTTAGCATCAAGTCAAATTCTTGTGGATGCTGAAAAGAGCAAGCAACAAAGTCC +TATACAAGGATTGTTTTAAACCAATGAAGCAAACAGGCGGGTCGGAACCTCCATCATGAT +AAATGTGGTTTCCGTATATTATAAGACCATTTGTAAAAGGATCGTACTATTGTGGACACG +TTTAGCGTTTTATATGAATTCTTTGATGATTATCAGAGGCGTAGCGGGTAGTCTATATTA +TTAATTCCCATTCGTGTGACACAATATGTACAGAATATAGACTGAACAGGGCCCCGAATA +GCCTTATTTCTAGCAGATTTCAAAGTTACATGATCCGTTTCATATCTCGAGGGTCATTCG +GTGACCTACATCATATAACTCGGGGCTTAATACAATGGATCTGGTACTTTTATATGCTAT +TCCTTCCGCACGTGCTGCAGTTTAGCTAAACGACTAGCTAGATAAGAAAGGTACCTTAAC +GTTAATATTACTAGCAATAACAAGATTATTGTGGTGAAGACACATATTGCCCTAATGCTC +AAATCTTGAAAGTAGTAAAGGATTGCGTTAGATGGCCCGCTGATTGACCGTATGTAGGTG +AGTGCCATTACTGTGGGCTGGCAGGATGGTGGTGCAACTGGTGTCAATATTCATAGTGTC diff --git a/tests/shared_data/generate_core_data.sh b/tests/shared_data/generate_core_data.sh index d526ab0..b775069 100755 --- a/tests/shared_data/generate_core_data.sh +++ b/tests/shared_data/generate_core_data.sh @@ -6,17 +6,16 @@ # CLI smoke tests, and container validation. # # Outputs (all committed to git, ~700K total): -# chr_test.fa + .fai - 20kb synthetic reference genome (2 gene regions) -# variants.vcf + .gz + .tbi - 10 het SNPs across 2 samples -# annotation.gtf - 2 genes, 6 exons +# chr_test.fa + .fai - 20kb random reference genome (high complexity) +# variants.vcf + .gz + .tbi - 30 het SNPs across 3 samples (phased) +# annotation.gtf - 12 genes, 16 exons # regions.bed - Peak/region file from exon coordinates # sample{1,2,3}.bam + .bai - Aligned reads (wgsim + bwa) # bwa_index/ - BWA index for chr_test.fa # expected_counts.tsv - WASP2 counting output baseline # expected_analysis.tsv - WASP2 analysis output baseline (placeholder) # -# Prerequisites: samtools, bgzip, tabix, wgsim, bwa, bcftools -# Conda env: conda activate WASP2_dev2 +# Prerequisites: python3, samtools, bgzip, tabix, wgsim, bwa, bcftools # # Usage: # cd tests/shared_data @@ -45,9 +44,10 @@ check_tool() { echo " Try: conda activate WASP2_dev2" exit 1 fi - echo " ✓ $1 found: $(which $1)" + echo " OK $1 found: $(which $1)" } +check_tool python3 check_tool samtools check_tool bgzip check_tool tabix @@ -58,45 +58,35 @@ check_tool bcftools echo "" # ----------------------------------------------------------------------------- -# Reference genome (reuse nf-rnaseq integration chr_test.fa) +# Clean up old generated files (force full regeneration) # ----------------------------------------------------------------------------- -echo "[2/8] Creating reference genome..." +echo "[2/8] Generating random reference genome via Python..." -INTEGRATION_FA="../../pipelines/nf-rnaseq/tests/data/integration/chr_test.fa" +# Remove old reference and index to force regeneration +rm -f chr_test.fa chr_test.fa.fai -if [[ -f "chr_test.fa" ]]; then - echo " chr_test.fa already exists, skipping" -else - if [[ -f "$INTEGRATION_FA" ]]; then - cp "$INTEGRATION_FA" chr_test.fa - echo " ✓ Copied chr_test.fa from nf-rnaseq integration ($(du -h chr_test.fa | cut -f1))" - else - echo "ERROR: Could not find source genome at $INTEGRATION_FA" - exit 1 - fi -fi +# Generate a realistic random reference with the Python script +python3 generate_reference.py > chr_test.fa # Index FASTA -if [[ ! -f "chr_test.fa.fai" ]]; then - samtools faidx chr_test.fa - echo " ✓ Created chr_test.fa.fai" -fi +samtools faidx chr_test.fa +echo " OK Created chr_test.fa ($(du -h chr_test.fa | cut -f1)) + .fai" echo "" # ----------------------------------------------------------------------------- -# Annotation GTF (reuse from nf-rnaseq integration) +# Annotation GTF (keep existing if present, otherwise copy from integration) # ----------------------------------------------------------------------------- -echo "[3/8] Creating annotation GTF..." +echo "[3/8] Checking annotation GTF..." INTEGRATION_GTF="../../pipelines/nf-rnaseq/tests/data/integration/integration.gtf" if [[ -f "annotation.gtf" ]]; then - echo " annotation.gtf already exists, skipping" + echo " annotation.gtf already exists, keeping" else if [[ -f "$INTEGRATION_GTF" ]]; then cp "$INTEGRATION_GTF" annotation.gtf - echo " ✓ Copied annotation.gtf from nf-rnaseq integration" + echo " OK Copied annotation.gtf from nf-rnaseq integration" else echo "ERROR: Could not find source GTF at $INTEGRATION_GTF" exit 1 @@ -106,52 +96,146 @@ fi echo "" # ----------------------------------------------------------------------------- -# VCF with 10 het SNPs across 2 samples +# VCF with 30 het SNPs across 3 samples (REF alleles from actual reference) # ----------------------------------------------------------------------------- -echo "[4/8] Creating VCF with 10 het SNPs..." - -if [[ -f "variants.vcf" ]]; then - echo " variants.vcf already exists, skipping" -else - # Gene 1 (INTGENE001): exons at 500-1500, 2500-3500, 4500-5500 (+ strand) - # Gene 2 (INTGENE002): exons at 10500-11500, 12500-13500, 14500-15500 (- strand) - # Place 5 SNPs in each gene's exonic regions - # - # Three samples matching BAM filenames: sample1 has all 10 het, - # sample2 has 8 het + 2 hom-ref, sample3 has 6 het + 4 hom-ref - # Sample names MUST be lowercase to match BAM SM tags and samplesheet - - cat > variants.vcf << 'EOVCF' -##fileformat=VCFv4.2 -##fileDate=20260218 -##source=WASP2SharedTestData -##reference=chr_test.fa -##contig= -##INFO= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 sample3 -chr_test 750 snp001 C T 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/1:50 -chr_test 1200 snp002 T G 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/1:50 -chr_test 2800 snp003 A C 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/0:50 -chr_test 3200 snp004 G A 100 PASS DP=50 GT:DP 0/1:50 0/0:50 0/0:50 -chr_test 5000 snp005 G T 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/1:50 -chr_test 10800 snp006 T C 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/0:50 -chr_test 11200 snp007 A G 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/1:50 -chr_test 12800 snp008 C A 100 PASS DP=50 GT:DP 0/1:50 0/0:50 0/0:50 -chr_test 13200 snp009 G T 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/1:50 -chr_test 15000 snp010 A C 100 PASS DP=50 GT:DP 0/1:50 0/1:50 0/0:50 -EOVCF - echo " ✓ Created variants.vcf (10 het SNPs, 3 samples)" -fi +echo "[4/8] Creating VCF with 30 phased het SNPs..." + +# Read actual reference bases at SNP positions using Python +# This ensures REF alleles match the generated reference exactly +rm -f variants.vcf variants.vcf.gz variants.vcf.gz.tbi + +python3 - << 'PYEOF' +import sys + +# Read the reference sequence +with open("chr_test.fa") as f: + lines = f.readlines() +seq = ''.join(line.strip() for line in lines if not line.startswith('>')) + +# Deterministic ALT allele mapping (always different from REF) +alt_map = {'A': 'C', 'T': 'G', 'G': 'T', 'C': 'A'} + +# SNP positions (1-based) spread across gene regions in annotation.gtf: +# Gene1 (500-5500): 750, 1200, 2800, 3200, 5000 +# Gene3 (5800-6300): 6000, 6100 +# Gene4 (6500-7000): 6700, 6800 +# Gene5 (7200-7700): 7400, 7500 +# Gene6 (7900-8400): 8100, 8200 +# Gene7 (8600-9100): 8800, 8900 +# Gene2 (10500-15500): 10800, 11200, 12800, 13200, 15000 +# Gene8 (15800-16300): 16000, 16100 +# Gene9 (16500-17000): 16700, 16800 +# Gene10 (17200-17700): 17400, 17500 +# Gene11 (17900-18400): 18100, 18200 +# Gene12 (18600-19100): 18800, 18900 +snps = [ + (750, "snp001"), + (1200, "snp002"), + (2800, "snp003"), + (3200, "snp004"), + (5000, "snp005"), + (6000, "snp011"), + (6100, "snp012"), + (6700, "snp013"), + (6800, "snp014"), + (7400, "snp015"), + (7500, "snp016"), + (8100, "snp017"), + (8200, "snp018"), + (8800, "snp019"), + (8900, "snp020"), + (10800, "snp006"), + (11200, "snp007"), + (12800, "snp008"), + (13200, "snp009"), + (15000, "snp010"), + (16000, "snp021"), + (16100, "snp022"), + (16700, "snp023"), + (16800, "snp024"), + (17400, "snp025"), + (17500, "snp026"), + (18100, "snp027"), + (18200, "snp028"), + (18800, "snp029"), + (18900, "snp030"), +] + +# Genotype patterns for 3 samples (phased, 0|1) +# sample1: 28 het, 2 hom-ref (high het) +# sample2: 22 het, 8 hom-ref (medium het) +# sample3: 22 het, 8 hom-ref (medium het, different SNPs from sample2) +genotypes = [ + # snp001-005 (Gene1) + ("0|1", "0|1", "0|1"), + ("0|1", "0|1", "0|1"), + ("0|1", "0|1", "0|0"), + ("0|1", "0|0", "0|0"), + ("0|1", "0|1", "0|1"), + # snp011-012 (Gene3) + ("0|1", "0|1", "0|0"), + ("0|1", "0|0", "0|1"), + # snp013-014 (Gene4) + ("0|1", "0|1", "0|1"), + ("0|0", "0|1", "0|1"), + # snp015-016 (Gene5) + ("0|1", "0|1", "0|0"), + ("0|1", "0|0", "0|1"), + # snp017-018 (Gene6) + ("0|1", "0|1", "0|1"), + ("0|0", "0|1", "0|1"), + # snp019-020 (Gene7) + ("0|1", "0|1", "0|1"), + ("0|1", "0|0", "0|1"), + # snp006-010 (Gene2) + ("0|1", "0|1", "0|0"), + ("0|1", "0|1", "0|1"), + ("0|1", "0|0", "0|0"), + ("0|1", "0|1", "0|1"), + ("0|1", "0|1", "0|0"), + # snp021-022 (Gene8) + ("0|1", "0|1", "0|1"), + ("0|1", "0|0", "0|1"), + # snp023-024 (Gene9) + ("0|1", "0|1", "0|0"), + ("0|0", "0|1", "0|1"), + # snp025-026 (Gene10) + ("0|1", "0|1", "0|1"), + ("0|1", "0|0", "0|1"), + # snp027-028 (Gene11) + ("0|1", "0|1", "0|0"), + ("0|0", "0|1", "0|1"), + # snp029-030 (Gene12) + ("0|1", "0|1", "0|1"), + ("0|1", "0|0", "0|1"), +] + +with open("variants.vcf", "w") as f: + f.write("##fileformat=VCFv4.2\n") + f.write("##fileDate=20260306\n") + f.write("##source=WASP2SharedTestData\n") + f.write("##reference=chr_test.fa\n") + f.write(f"##contig=\n") + f.write('##INFO=\n') + f.write('##FORMAT=\n') + f.write('##FORMAT=\n') + f.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample1\tsample2\tsample3\n") + + for i, (pos, snp_id) in enumerate(snps): + ref = seq[pos - 1] # 0-based index + alt = alt_map[ref] + gt1, gt2, gt3 = genotypes[i] + f.write(f"chr_test\t{pos}\t{snp_id}\t{ref}\t{alt}\t100\tPASS\tDP=50\tGT:DP\t{gt1}:50\t{gt2}:50\t{gt3}:50\n") + +print(f"Created variants.vcf with {len(snps)} SNPs, REF alleles verified against reference", file=sys.stderr) +PYEOF + +echo " OK Created variants.vcf (30 het SNPs, 3 samples, phased)" # Compress and index VCF -if [[ ! -f "variants.vcf.gz" || ! -f "variants.vcf.gz.tbi" ]]; then - rm -f variants.vcf.gz variants.vcf.gz.tbi - bgzip -c variants.vcf > variants.vcf.gz - tabix -p vcf variants.vcf.gz - echo " ✓ Created variants.vcf.gz + .tbi" -fi +bgzip -c variants.vcf > variants.vcf.gz +tabix -p vcf variants.vcf.gz +echo " OK Created variants.vcf.gz + .tbi" echo "" @@ -161,16 +245,8 @@ echo "" echo "[5/8] Creating regions BED..." if [[ -f "regions.bed" ]]; then - echo " regions.bed already exists, skipping" + echo " regions.bed already exists, keeping" else - # Extract exon coordinates from GTF → BED format - # GTF exons from annotation.gtf: - # chr_test 500-1500 (exon 1, gene 1) - # chr_test 2500-3500 (exon 2, gene 1) - # chr_test 4500-5500 (exon 3, gene 1) - # chr_test 10500-11500 (exon 1, gene 2) - # chr_test 12500-13500 (exon 2, gene 2) - # chr_test 14500-15500 (exon 3, gene 2) cat > regions.bed << 'EOBED' chr_test 499 1500 INTEXON001 chr_test 2499 3500 INTEXON002 @@ -179,7 +255,7 @@ chr_test 10499 11500 INTEXON004 chr_test 12499 13500 INTEXON005 chr_test 14499 15500 INTEXON006 EOBED - echo " ✓ Created regions.bed (6 exonic regions)" + echo " OK Created regions.bed (6 exonic regions)" fi echo "" @@ -190,14 +266,12 @@ echo "" echo "[6/8] Building BWA index..." BWA_INDEX_DIR="bwa_index" -if [[ -f "${BWA_INDEX_DIR}/chr_test.fa.bwt" ]]; then - echo " BWA index already exists, skipping" -else - mkdir -p "$BWA_INDEX_DIR" - cp chr_test.fa "$BWA_INDEX_DIR/" - bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -3 - echo " ✓ Created BWA index ($(du -sh $BWA_INDEX_DIR | cut -f1))" -fi +# Always rebuild BWA index when reference changes +rm -rf "$BWA_INDEX_DIR" +mkdir -p "$BWA_INDEX_DIR" +cp chr_test.fa "$BWA_INDEX_DIR/" +bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -3 +echo " OK Created BWA index ($(du -sh $BWA_INDEX_DIR | cut -f1))" echo "" @@ -216,10 +290,10 @@ simulate_and_align() { local frag_size=$3 local frag_std=$4 - if [[ -f "${sample_name}.bam" && -f "${sample_name}.bam.bai" ]]; then - echo " ${sample_name}.bam already exists, skipping" - return - fi + # Always regenerate when reference changes + rm -f "${sample_name}.bam" "${sample_name}.bam.bai" + rm -f "${sample_name}_R1.fq" "${sample_name}_R2.fq" + rm -f "${sample_name}_R1.fq.gz" "${sample_name}_R2.fq.gz" echo " Simulating ${sample_name} (seed=${seed}, frags=${frag_size}bp)..." @@ -255,7 +329,7 @@ simulate_and_align() { gzip -f "${sample_name}_R2.fq" local read_count=$(samtools view -c "${sample_name}.bam") - echo " ✓ ${sample_name}.bam: ${read_count} aligned reads ($(du -h ${sample_name}.bam | cut -f1))" + echo " OK ${sample_name}.bam: ${read_count} aligned reads ($(du -h ${sample_name}.bam | cut -f1))" } # Sample1: standard RNA-seq-like fragments (seed 42) @@ -283,13 +357,13 @@ validate_file() { if [[ -f "$filepath" ]]; then local size=$(stat -c%s "$filepath" 2>/dev/null || stat -f%z "$filepath" 2>/dev/null) if [[ $size -ge $min_size ]]; then - echo " ✓ $filepath ($(du -h "$filepath" | cut -f1))" + echo " OK $filepath ($(du -h "$filepath" | cut -f1))" else - echo " ✗ $filepath exists but too small (${size} bytes, expected >= ${min_size})" + echo " FAIL $filepath exists but too small (${size} bytes, expected >= ${min_size})" ERRORS=$((ERRORS + 1)) fi else - echo " ✗ $filepath NOT FOUND" + echo " FAIL $filepath NOT FOUND" ERRORS=$((ERRORS + 1)) fi } @@ -298,9 +372,9 @@ validate_bam() { local bam=$1 if samtools quickcheck "$bam" 2>/dev/null; then local count=$(samtools view -c "$bam") - echo " ✓ $bam passes quickcheck (${count} reads)" + echo " OK $bam passes quickcheck (${count} reads)" else - echo " ✗ $bam FAILS quickcheck" + echo " FAIL $bam FAILS quickcheck" ERRORS=$((ERRORS + 1)) fi } @@ -346,6 +420,20 @@ validate_file "sample2_R2.fq.gz" 1000 validate_file "sample3_R1.fq.gz" 1000 validate_file "sample3_R2.fq.gz" 1000 +# ----------------------------------------------------------------------------- +# Quality report: MAPQ + proper pairing +# ----------------------------------------------------------------------------- +echo "" +echo " --- Alignment quality report ---" +for sample in sample1 sample2 sample3; do + total=$(samtools view -c "${sample}.bam") + mapq_gt0=$(samtools view -c -q 1 "${sample}.bam") + mapq_pct=$(python3 -c "print(f'{100*${mapq_gt0}/${total}:.1f}')") + proper=$(samtools view -c -f 2 "${sample}.bam") + proper_pct=$(python3 -c "print(f'{100*${proper}/${total}:.1f}')") + echo " ${sample}: ${total} total, ${mapq_gt0} MAPQ>0 (${mapq_pct}%), ${proper} properly paired (${proper_pct}%)" +done + echo "" if [[ $ERRORS -eq 0 ]]; then echo "===================================================================" diff --git a/tests/shared_data/generate_reference.py b/tests/shared_data/generate_reference.py new file mode 100644 index 0000000..24bbc86 --- /dev/null +++ b/tests/shared_data/generate_reference.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +"""Generate a realistic ~20kb synthetic reference genome for WASP2 tests. + +Properties: +- Single contig named 'chr_test' +- ~40-45% GC content (realistic for mammalian genomes) +- No homopolymer runs longer than 5bp +- High sequence complexity for unique k-mer mapping +- Deterministic output (fixed seed) +""" + +import random +import sys + + +def generate_reference(length=19800, seed=12345, max_homopolymer=5, line_width=60): + """Generate a random reference sequence with realistic properties.""" + rng = random.Random(seed) + + # Weighted nucleotide frequencies for ~42% GC content + # A=29%, T=29%, G=21%, C=21% + bases = ['A', 'T', 'G', 'C'] + weights = [0.29, 0.29, 0.21, 0.21] + + sequence = [] + run_count = 0 + last_base = None + + for _ in range(length): + # Pick a base with the desired frequency distribution + base = rng.choices(bases, weights=weights, k=1)[0] + + # Enforce max homopolymer constraint + if base == last_base: + run_count += 1 + if run_count >= max_homopolymer: + # Force a different base + other_bases = [b for b in bases if b != base] + other_weights = [w for b, w in zip(bases, weights) if b != base] + total = sum(other_weights) + other_weights = [w / total for w in other_weights] + base = rng.choices(other_bases, weights=other_weights, k=1)[0] + run_count = 1 + else: + run_count = 1 + + last_base = base + sequence.append(base) + + seq_str = ''.join(sequence) + + # Verify properties + gc_count = seq_str.count('G') + seq_str.count('C') + gc_pct = gc_count / len(seq_str) * 100 + + # Check max homopolymer + max_run = 0 + current_run = 1 + for i in range(1, len(seq_str)): + if seq_str[i] == seq_str[i-1]: + current_run += 1 + max_run = max(max_run, current_run) + else: + current_run = 1 + + print(f"Reference stats:", file=sys.stderr) + print(f" Length: {len(seq_str)} bp", file=sys.stderr) + print(f" GC content: {gc_pct:.1f}%", file=sys.stderr) + print(f" Max homopolymer: {max_run} bp", file=sys.stderr) + + # Write FASTA + print(">chr_test") + for i in range(0, len(seq_str), line_width): + print(seq_str[i:i+line_width]) + + return seq_str + + +def extract_bases_at_positions(seq_str, positions): + """Print the base at each 1-based position (for VCF REF allele verification).""" + print("\nBases at SNP positions (1-based):", file=sys.stderr) + for pos in sorted(positions): + if 1 <= pos <= len(seq_str): + base = seq_str[pos - 1] # Convert to 0-based + print(f" pos {pos}: {base}", file=sys.stderr) + + +if __name__ == '__main__': + # VCF SNP positions from the test data + snp_positions = [ + 750, 1200, 2800, 3200, 5000, + 6000, 6100, 6700, 6800, 7400, 7500, + 8100, 8200, 8800, 8900, + 10800, 11200, 12800, 13200, 15000, + 16000, 16100, 16700, 16800, 17400, 17500, + 18100, 18200, 18800, 18900, + ] + + seq = generate_reference() + extract_bases_at_positions(seq, snp_positions) diff --git a/tests/shared_data/sample1.bam b/tests/shared_data/sample1.bam index e8a76d8..94cc68c 100644 Binary files a/tests/shared_data/sample1.bam and b/tests/shared_data/sample1.bam differ diff --git a/tests/shared_data/sample1.bam.bai b/tests/shared_data/sample1.bam.bai index ef1ea4f..669bf8b 100644 Binary files a/tests/shared_data/sample1.bam.bai and b/tests/shared_data/sample1.bam.bai differ diff --git a/tests/shared_data/sample1_R1.fq.gz b/tests/shared_data/sample1_R1.fq.gz index 09a9ab4..2e5a60e 100644 Binary files a/tests/shared_data/sample1_R1.fq.gz and b/tests/shared_data/sample1_R1.fq.gz differ diff --git a/tests/shared_data/sample1_R2.fq.gz b/tests/shared_data/sample1_R2.fq.gz index c5fa951..024a191 100644 Binary files a/tests/shared_data/sample1_R2.fq.gz and b/tests/shared_data/sample1_R2.fq.gz differ diff --git a/tests/shared_data/sample2.bam b/tests/shared_data/sample2.bam index 8a5cce9..5aa9090 100644 Binary files a/tests/shared_data/sample2.bam and b/tests/shared_data/sample2.bam differ diff --git a/tests/shared_data/sample2.bam.bai b/tests/shared_data/sample2.bam.bai index 352f643..a17f47e 100644 Binary files a/tests/shared_data/sample2.bam.bai and b/tests/shared_data/sample2.bam.bai differ diff --git a/tests/shared_data/sample2_R1.fq.gz b/tests/shared_data/sample2_R1.fq.gz index 9aae881..b9a8354 100644 Binary files a/tests/shared_data/sample2_R1.fq.gz and b/tests/shared_data/sample2_R1.fq.gz differ diff --git a/tests/shared_data/sample2_R2.fq.gz b/tests/shared_data/sample2_R2.fq.gz index 682a76f..828e8ac 100644 Binary files a/tests/shared_data/sample2_R2.fq.gz and b/tests/shared_data/sample2_R2.fq.gz differ diff --git a/tests/shared_data/sample3.bam b/tests/shared_data/sample3.bam index db2f260..7f087bd 100644 Binary files a/tests/shared_data/sample3.bam and b/tests/shared_data/sample3.bam differ diff --git a/tests/shared_data/sample3.bam.bai b/tests/shared_data/sample3.bam.bai index 0e80bb4..13d427f 100644 Binary files a/tests/shared_data/sample3.bam.bai and b/tests/shared_data/sample3.bam.bai differ diff --git a/tests/shared_data/sample3_R1.fq.gz b/tests/shared_data/sample3_R1.fq.gz index 15cce61..4a05a75 100644 Binary files a/tests/shared_data/sample3_R1.fq.gz and b/tests/shared_data/sample3_R1.fq.gz differ diff --git a/tests/shared_data/sample3_R2.fq.gz b/tests/shared_data/sample3_R2.fq.gz index 80db9bd..38d299a 100644 Binary files a/tests/shared_data/sample3_R2.fq.gz and b/tests/shared_data/sample3_R2.fq.gz differ diff --git a/tests/shared_data/variants.vcf b/tests/shared_data/variants.vcf index d637777..a37fe38 100644 --- a/tests/shared_data/variants.vcf +++ b/tests/shared_data/variants.vcf @@ -1,5 +1,5 @@ ##fileformat=VCFv4.2 -##fileDate=20260218 +##fileDate=20260306 ##source=WASP2SharedTestData ##reference=chr_test.fa ##contig= @@ -7,33 +7,33 @@ ##FORMAT= ##FORMAT= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 sample3 -chr_test 750 snp001 C T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 1200 snp002 T G 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 2800 snp003 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 3200 snp004 G A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|0:50 +chr_test 750 snp001 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 1200 snp002 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 2800 snp003 T G 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 +chr_test 3200 snp004 G T 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|0:50 chr_test 5000 snp005 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 6000 snp011 T A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 6100 snp012 T C 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 -chr_test 6700 snp013 C T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 6800 snp014 C G 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 +chr_test 6000 snp011 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 +chr_test 6100 snp012 C A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 +chr_test 6700 snp013 C A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 6800 snp014 T G 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 chr_test 7400 snp015 C A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 7500 snp016 C T 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 -chr_test 8100 snp017 G A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 8200 snp018 G C 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 -chr_test 8800 snp019 T G 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 8900 snp020 T C 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 -chr_test 10800 snp006 T C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 11200 snp007 A G 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 12800 snp008 C A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|0:50 -chr_test 13200 snp009 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 15000 snp010 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 16000 snp021 G A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 16100 snp022 C T 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 -chr_test 16700 snp023 C A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 16800 snp024 C G 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 -chr_test 17400 snp025 T C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 17500 snp026 T A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 -chr_test 18100 snp027 A G 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 -chr_test 18200 snp028 T C 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 -chr_test 18800 snp029 C T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 -chr_test 18900 snp030 C A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 +chr_test 7500 snp016 C A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 +chr_test 8100 snp017 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 8200 snp018 A C 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 +chr_test 8800 snp019 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 8900 snp020 C A 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 +chr_test 10800 snp006 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 +chr_test 11200 snp007 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 12800 snp008 A C 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|0:50 +chr_test 13200 snp009 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 15000 snp010 C A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 +chr_test 16000 snp021 A C 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 16100 snp022 G T 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 +chr_test 16700 snp023 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 +chr_test 16800 snp024 G T 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 +chr_test 17400 snp025 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 17500 snp026 A C 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 +chr_test 18100 snp027 C A 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|0:50 +chr_test 18200 snp028 A C 100 PASS DP=50 GT:DP 0|0:50 0|1:50 0|1:50 +chr_test 18800 snp029 G T 100 PASS DP=50 GT:DP 0|1:50 0|1:50 0|1:50 +chr_test 18900 snp030 A C 100 PASS DP=50 GT:DP 0|1:50 0|0:50 0|1:50 diff --git a/tests/shared_data/variants.vcf.gz b/tests/shared_data/variants.vcf.gz index 768f018..161431f 100644 Binary files a/tests/shared_data/variants.vcf.gz and b/tests/shared_data/variants.vcf.gz differ diff --git a/tests/shared_data/variants.vcf.gz.tbi b/tests/shared_data/variants.vcf.gz.tbi index e5ff87e..52a19a6 100644 Binary files a/tests/shared_data/variants.vcf.gz.tbi and b/tests/shared_data/variants.vcf.gz.tbi differ