You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
$ head $HP_ODIR/count.tab
Run all mapped chrM M
chrM.A 851537886 848029490 396766 181.7
chrM.B 884383716 882213718 506597 223.01
chrM.C 786560467 785208588 503241 248.9
...
directory structure
$ ls $HP_ADIR/*
bams/chrM.A.bam
bams/chrM.A.bam.bai
bams/chrM.A.idxstats
bams/chrM.A.count
...
after running the pipeline
output directories : 1/sample
$ ls out/
chrM.A/
chrM.B/
chrM.C/
...
vcf files : concat & merge
# 1st iteration : homoplasmies(AF=1) and heteroplasmies(AF<1)
$ cat mutect2.03.concat.vcf
...
##INFO=<ID=SM,Number=1,Type=String,Description="Sample">
##INFO=<ID=HP,Number=0,Type=Flag,Description="Homoloplymer">
##INFO=<ID=HS,Number=0,Type=Flag,Description="Hot spot">
##INFO=<ID=CDS,Number=0,Type=Flag,Description="coding region">
##INFO=<ID=RNR,Number=0,Type=Flag,Description="rRNA">
##INFO=<ID=TRN,Number=0,Type=Flag,Description="tRNA">
##INFO=<ID=DLOOP,Number=0,Type=Flag,Description="D-loop">
##INFO=<ID=HG,Number=1,Type=String,Description="haplogroup">
##INFO=<ID=NUMT,Number=0,Type=Flag,Description="NUMT like">
##INFO=<ID=HV,Number=0,Type=Flag,Description="Hypervariable">
##INFO=<ID=SM,Number=1,Type=String,Description="Sample">
##INFO=<ID=CR,Number=1,Type=String,Description="CADD Raw score">
##INFO=<ID=CP,Number=1,Type=String,Description="CADD PHREDD score">
...
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
...
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
chrM 64 . C T . clustered_events;haplotype SM=chrM.A;HV;DLOOP;CR=1.140555;CP=11.43 GT:DP:AF 0|1:67:1
chrM 73 . A G . PASS SM=chrM.B;HV;DLOOP;NUMT;CR=1.039800;CP=10.89 GT:DP:AF 0/1:52:1
chrM 73 . A G . PASS SM=chrM.C;HV;DLOOP;NUMT;CR=1.039800;CP=10.89 GT:DP:AF 0/1:43:1
chrM 73 . A G . clustered_events;haplotype SM=chrM.A;HV;DLOOP;NUMT;CR=1.039800;CP=10.89 GT:DP:AF 0|1:70:1
chrM 146 . T C . clustered_events;haplotype SM=chrM.A;HV;DLOOP;CR=0.949142;CP=10.37 GT:DP:AF 0|1:88:1
...
chrM 374 . A G . clustered_events SM=chrM.A;DLOOP;CR=0.998787;CP=10.66 GT:DP:AF 0/1:76:0.139
chrM 375 . C T . clustered_events;strand_bias SM=chrM.B;DLOOP;CR=1.074472;CP=11.08 GT:DP:AF 0/1:65:0.162
chrM 378 . C T . clustered_events;strand_bias SM=chrM.C;DLOOP;CR=1.043995;CP=10.91 GT:DP:AF 0/1:82:0.128
...
chrM 1037 . A C . PASS SM=chrM.B;RNR;CR=0.682748;CP=8.714 GT:DP:AF 0/1:70:0.178
chrM 1038 . C G . PASS SM=chrM.A;RNR;CR=0.822718;CP=9.603 GT:DP:AF 0/1:70:0.165
chrM 1042 . T A . PASS SM=chrM.C;RNR;CR=1.104043;CP=11.24 GT:DP:AF 0/1:60:0.24
...
$ cat mutect2.03.merge.vcf
...
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT chrM.A chrM.B chrM.C
chrM 64 . C T . . AC=1;AN=2;HV;DLOOP;CR=1.140555;CP=11.43 GT:DP:AF 0|1:67:1 .:.:. .:.:.
chrM 73 . A G . . AC=3;AN=6;HV;DLOOP;NUMT;CR=1.039800;CP=10.89 GT:DP:AF 0|1:70:1 0/1:52:1 0/1:43:1
chrM 146 . T C . . AC=1;AN=2;HV;DLOOP;CR=0.949142;CP=10.37 GT:DP:AF 0|1:88:1 .:.:. .:.:.
...
$ cat mutect2.03.merge.sitesOnly.vcf
...
#CHROM POS ID REF ALT QUAL FILTER INFO
chrM 64 . C T . . AC=1;AN=2;HV;DLOOP;CR=1.140555;CP=11.43
chrM 73 . A G . . AC=3;AN=6;HV;DLOOP;NUMT;CR=1.039800;CP=10.89
chrM 146 . T C . . AC=1;AN=2;HV;DLOOP;CR=0.949142;CP=10.37
...
# 2nd iteration ; mostly heteroplasmies(AF<1)
$ cat mutect2.mutect2.03.concat.vcf
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
chrM 374 . A G . PASS SM=chrM.A;DLOOP;CR=0.998787;CP=10.66 GT:DP:AF 0/1:62:0.138
chrM 375 . C T . PASS SM=chrM.B;DLOOP;CR=1.074472;CP=11.08 GT:DP:AF 0/1:48:0.178
chrM 378 . C T . PASS SM=chrM.C;DLOOP;CR=1.043995;CP=10.91 GT:DP:AF 0/1:71:0.121
...
chrM 1037 . A C . PASS SM=chrM.B;RNR;CR=0.682748;CP=8.714 GT:DP:AF 0/1:70:0.178
chrM 1038 . C G . PASS SM=chrM.A;RNR;CR=0.822718;CP=9.603 GT:DP:AF 0/1:70:0.165
chrM 1042 . T A . PASS SM=chrM.C;RNR;CR=1.104043;CP=11.24 GT:DP:AF 0/1:60:0.24
...
SNV counts
# 1st iteration
$ cat mutect2.03.tab
Run H h S s I i Hp hp Sp sp Ip ip A
chrM.A 31 43 28 35 3 8 28 43 28 35 0 8 74
chrM.B 27 43 25 35 2 8 22 41 22 33 0 8 70
chrM.C 40 42 36 34 4 8 38 41 36 33 2 8 82
...
# 2nd iteration ; no homoplamies(H=S=I=0)
$ cat mutect2.mutect2.03.tab
Run H h S s I i Hp hp Sp sp Ip ip A
chrM.A 0 43 0 35 0 8 0 43 0 35 0 8 43
chrM.B 0 43 0 35 0 8 0 41 0 33 0 8 43
chrM.C 0 42 0 34 0 8 0 41 0 33 0 8 42
...
Summaries
# 1st iteration
$ cat mutect2.03.summary | column -t
id count nonZero min max median mean sum
H 3 3 27 39 31 32.33 97
h 3 3 42 44 43 43 129
S 3 3 25 35 28 29.33 88
s 3 3 34 36 35 35 105
I 3 3 2 4 3 3 9
i 3 3 8 8 8 8 24
Hp 3 3 22 37 28 29 87
hp 3 3 42 44 43 43 129
Sp 3 3 22 35 28 28.33 85
sp 3 3 34 36 35 35 105
Ip 3 1 0 2 0 0.67 2
ip 3 3 8 8 8 8 24
A 3 3 69 82 75 75.33 226
Haplogroups
$ cat mutect2.haplogroup.tab
Run haplogroup
chrM.A A2+(64)
chrM.B B2
chrM.C C
...
Contamination
$ cat mutect2.haplocheck.tab | column -t
Run ContaminationStatus ContaminationLevel Distance SampleCoverage
chrM.A NO ND 14 78
chrM.B NO ND 14 79
chrM.C NO ND 14 78
...
Run # Sample name
all # number of reads in the sample reads
mapped # number of aligned reads in the sample
chrM # number of reads aligned to chrM
filter # number of chrM used (subsample ~2000x cvg based on name)
Gcvg # recomputed genome coverage: Bases/3217346917
Mcvg # mitochondrion covearge: chrM*rdLen/16569
M # mtDNA copy number ; = 2*Mcvg/Gcvg
haplogroup # sample haplogroup
H # number of homozygous SNVs (the multiallelic SNVs counted once)
h # number of heterozygous SNVs
S # number of homozygous SNVs (excluding INDELs)
s # number of heterozygous SNVs (excluding INDELs)
I # number of homozygous INDELs
i # number of heterozygous INDELs
...
?p # "p" suffix stands for non homopolimeric
...
A # total number of SNVs (H+h)