-
Notifications
You must be signed in to change notification settings - Fork 4
polyA size
zhou-ran edited this page Jan 5, 2022
·
4 revisions
Followed snakemake pipeline in this repo with your own dataset, or using the prepared bam in example_data/example.bam
snakemake -s Snakefile.smk
Using R (4.0.1) to extract the length of polyA on cDNA.
sub_bams <- GenomicAlignments::readGAlignments(
'example.bam',
use.names = T,
param = Rsamtools::ScanBamParam(
which = GenomicRanges::GRanges(),
tag = c("DT"),
flag = Rsamtools::scanBamFlag(
isPaired = TRUE,
isFirstMateRead=TRUE,
isSupplementaryAlignment = FALSE,
isSecondaryAlignment = FALSE
)
)
)
dt_length <- nchar(mcols(sub_bams)$DT)
plot(density(dt_length[dt_length >= 10]))
Prepare the la_dis_arr and pmf_la_dis_arr for apamix
empDis <- fitur::fit_empirical(dt_length[dt_length >= 10])
la_dis_arr <- c(10, 30, 50, 70, 90, 110, 130)
pmf_la_dis_arr <- rep(0, length(la_dis_arr))
names(pmf_la_dis_arr) <- c(10, 30, 50, 70, 90, 110, 130)
tmp_val <- unlist(empDis$dempDis(1:130)[c(10, 30, 50, 70, 90, 110, 130)])
pmf_la_dis_arr[names(tmp_val)] <- unname(tmp_val)
print(la_dis_arr)
# [1] 10 30 50 70 90 110 130
pmf_la_dis_arr <- table(cut(dt_length, c(0, la_dis_arr)))
names(pmf_la_dis_arr) <- la_dis_arr
print(pmf_la_dis_arr)
# 10 30 50 70 90 110 130
# 19586 132252 97910 205 27 2 0
Then call APA with estimated pmf_la_dis_arr
python SCAPE/main.py apamix \
... \
--la_dis_arr '[10, 30, 50, 70, 90, 110, 130]' \
--pmf_la_dis_arr '[19586,132252,97910,205,27,2,0]'