Snakemake_ChIPseq_PE/bibliography.bib at master · KoesGroup/Snakemake_ChIPseq_PE · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
@misc{zenodo,
  author = {Jihed Chouaref and Marc Galland and Tijs Bliek},
  title  = {{KoesGroup/Snakemake_ChIPseq_PE: Clean first release}},
  month  = dec,
  year   = 2018,
  doi    = {10.5281/zenodo.2025836},
  url    = {https://doi.org/10.5281/zenodo.2025836}
}


@article{Koster:2012,
	title = {Snakemake--a scalable bioinformatics workflow engine},
	volume = {28},
	issn = {1367-4803, 1460-2059},
	url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/bts480},
	doi = {10.1093/bioinformatics/bts480},
	language = {en},
	number = {19},
	urldate = {2018-12-18},
	journal = {Bioinformatics},
	author = {Koster, J. and Rahmann, S.},
	month = oct,
	year = {2012},
	pages = {2520--2522}
}

@article{Chen:2018,
	title = {fastp: an ultra-fast all-in-one {FASTQ} preprocessor},
	volume = {34},
	issn = {1367-4811},
	shorttitle = {fastp},
	doi = {10.1093/bioinformatics/bty560},
	abstract = {Motivation: Quality control and preprocessing of FASTQ files are essential to providing clean data for downstream analysis. Traditionally, a different tool is used for each operation, such as quality control, adapter trimming and quality filtering. These tools are often insufficiently fast as most are developed using high-level programming languages (e.g. Python and Java) and provide limited multi-threading support. Reading and loading data multiple times also renders preprocessing slow and I/O inefficient. Results: We developed fastp as an ultra-fast FASTQ preprocessor with useful quality control and data-filtering features. It can perform quality control, adapter trimming, quality filtering, per-read quality pruning and many other operations with a single scan of the FASTQ data. This tool is developed in C++ and has multi-threading support. Based on our evaluation, fastp is 2-5 times faster than other FASTQ preprocessing tools such as Trimmomatic or Cutadapt despite performing far more operations than similar tools.
Availability and implementation: The open-source code and corresponding instructions are available at https://github.com/OpenGene/fastp.},
	language = {eng},
	number = {17},
	journal = {Bioinformatics (Oxford, England)},
	author = {Chen, Shifu and Zhou, Yanqing and Chen, Yaru and Gu, Jia},
	month = sep,
	year = {2018},
	pmid = {30423086},
	pmcid = {PMC6129281},
	pages = {i884--i890}
}

@article{Langmead:2012,
	title = {Fast gapped-read alignment with {Bowtie} 2},
	volume = {9},
	issn = {1548-7105},
	doi = {10.1038/nmeth.1923},
	abstract = {As the rate of sequencing increases, greater throughput is demanded from read aligners. The full-text minute index is often used to make alignment very fast and memory-efficient, but the approach is ill-suited to finding longer, gapped alignments. Bowtie 2 combines the strengths of the full-text minute index with the flexibility and speed of hardware-accelerated dynamic programming algorithms to achieve a combination of high speed, sensitivity and accuracy.},
	language = {eng},
	number = {4},
	journal = {Nature Methods},
	author = {Langmead, Ben and Salzberg, Steven L.},
	month = mar,
	year = {2012},
	pmid = {22388286},
	pmcid = {PMC3322381},
	keywords = {Algorithms, Computational Biology, Databases, Genetic, Genome, Human, Humans, Sequence Alignment, Sequence Analysis, DNA},
	pages = {357--359}
}

@article{Li:2009,
	title = {The {Sequence} {Alignment}/{Map} format and {SAMtools}},
	volume = {25},
	issn = {1367-4811},
	doi = {10.1093/bioinformatics/btp352},
	abstract = {SUMMARY: The Sequence Alignment/Map (SAM) format is a generic alignment format for storing read alignments against reference sequences, supporting short and long reads (up to 128 Mbp) produced by different sequencing platforms. It is flexible in style, compact in size, efficient in random access and is the format in which alignments from the 1000 Genomes Project are released. SAMtools implements various utilities for post-processing alignments in the SAM format, such as indexing, variant caller and alignment viewer, and thus provides universal tools for processing read alignments.
AVAILABILITY: http://samtools.sourceforge.net.},
	language = {eng},
	number = {16},
	journal = {Bioinformatics (Oxford, England)},
	author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard and {1000 Genome Project Data Processing Subgroup}},
	month = aug,
	year = {2009},
	pmid = {19505943},
	pmcid = {PMC2723002},
	keywords = {Algorithms, Base Sequence, Computational Biology, Genome, Genomics, Molecular Sequence Data, Sequence Alignment, Sequence Analysis, DNA, Software},
	pages = {2078--2079}
}

@article{Gruning:2018a,
	title = {Bioconda: sustainable and comprehensive software distribution for the life sciences},
	volume = {15},
	issn = {1548-7105},
	shorttitle = {Bioconda},
	doi = {10.1038/s41592-018-0046-7},
	language = {eng},
	number = {7},
	journal = {Nature Methods},
	author = {Grüning, Björn and Dale, Ryan and Sjödin, Andreas and Chapman, Brad A. and Rowe, Jillian and Tomkins-Tinch, Christopher H. and Valieris, Renan and Köster, Johannes and {Bioconda Team}},
	month = jul,
	year = {2018},
	pmid = {29967506},
	pages = {475--476}
}

@article{Ramirez:2016,
	title = {{deepTools}2: a next generation web server for deep-sequencing data analysis},
	volume = {44},
	issn = {1362-4962},
	shorttitle = {{deepTools}2},
	doi = {10.1093/nar/gkw257},
	abstract = {We present an update to our Galaxy-based web server for processing and visualizing deeply sequenced data. Its core tool set, deepTools, allows users to perform complete bioinformatic workflows ranging from quality controls and normalizations of aligned reads to integrative analyses, including clustering and visualization approaches. Since we first described our deepTools Galaxy server in 2014, we have implemented new solutions for many requests from the community and our users. Here, we introduce significant enhancements and new tools to further improve data visualization and interpretation. deepTools continue to be open to all users and freely available as a web service at deeptools.ie-freiburg.mpg.de The new deepTools2 suite can be easily deployed within any Galaxy framework via the toolshed repository, and we also provide source code for command line usage under Linux and Mac OS X. A public and documented API for access to deepTools functionality is also available.},
	language = {eng},
	number = {W1},
	journal = {Nucleic Acids Research},
	author = {Ramírez, Fidel and Ryan, Devon P. and Grüning, Björn and Bhardwaj, Vivek and Kilpert, Fabian and Richter, Andreas S. and Heyne, Steffen and Dündar, Friederike and Manke, Thomas},
	year = {2016},
	pmid = {27079975},
	pmcid = {PMC4987876},
	keywords = {Animals, Base Sequence, Computational Biology, Computer Graphics, Drosophila melanogaster, High-Throughput Nucleotide Sequencing, Humans, Information Storage and Retrieval, Internet, Sequence Alignment, Sequence Analysis, DNA, Software},
	pages = {W160--165}
}

@article{Anaconda:2016,
 title = {Anaconda Software Distribution},
 journal={Computer software. Vers. 2-2.4.0},
 author={Anaconda},
 year={2016}
}

@Article{Bolger:2014,
author={Bolger, A. M.
and Lohse, M.
and Usadel, B.},
title={Trimmomatic: a flexible trimmer for Illumina sequence data},
year={2014},
volume={30},
number={15},
pages={2114-20},
keywords={Computational Biology Databases, Genetic High-Throughput Nucleotide Sequencing/*methods *Software},
abstract={MOTIVATION: Although many next-generation sequencing (NGS) read preprocessing tools already existed, we could not find any tool or combination of tools that met our requirements in terms of flexibility, correct handling of paired-end data and high performance. We have developed Trimmomatic as a more flexible and efficient preprocessing tool, which could correctly handle paired-end data. RESULTS: The value of NGS read preprocessing is demonstrated for both reference-based and reference-free tasks. Trimmomatic is shown to produce output that is at least competitive with, and in many cases superior to, that produced by other tools, in all scenarios tested. AVAILABILITY AND IMPLEMENTATION: Trimmomatic is licensed under GPL V3. It is cross-platform (Java 1.5+ required) and available at http://www.usadellab.org/cms/index.php?page=trimmomatic CONTACT: usadel@bio1.rwth-aachen.de SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
note={Bolger, Anthony M Lohse, Marc Usadel, Bjoern eng Research Support, Non-U.S. Gov't England 2014/04/04 06:00 Bioinformatics. 2014 Aug 1;30(15):2114-20. doi: 10.1093/bioinformatics/btu170. Epub 2014 Apr 1.},
doi={10.1093/bioinformatics/btu170},
url={http://www.ncbi.nlm.nih.gov/pubmed/24695404}
}

@Article{Zhang:2008,
author={Zhang, Y.
and Liu, T.
and Meyer, C. A.
and Eeckhoute, J.
and Johnson, D. S.
and Bernstein, B. E.
and Nusbaum, C.
and Myers, R. M.
and Brown, M.
and Li, W.
and Liu, X. S.},
title={Model-based analysis of ChIP-Seq (MACS)},
year={2008},
volume={9},
number={9},
pages={R137},
keywords={*Algorithms Cell Line, Tumor Chromatin Immunoprecipitation/*methods Hepatocyte Nuclear Factor 3-alpha/analysis/*genetics Humans Models, Genetic Oligonucleotide Array Sequence Analysis/*methods},
abstract={We present Model-based Analysis of ChIP-Seq data, MACS, which analyzes data generated by short read sequencers such as Solexa's Genome Analyzer. MACS empirically models the shift size of ChIP-Seq tags, and uses it to improve the spatial resolution of predicted binding sites. MACS also uses a dynamic Poisson distribution to effectively capture local biases in the genome, allowing for more robust predictions. MACS compares favorably to existing ChIP-Seq peak-finding algorithms, and is freely available.},
note={Zhang, Yong Liu, Tao Meyer, Clifford A Eeckhoute, Jerome Johnson, David S Bernstein, Bradley E Nusbaum, Chad Myers, Richard M Brown, Myles Li, Wei Liu, X Shirley eng DK074967/DK/NIDDK NIH HHS/ HG004270/HG/NHGRI NIH HHS/ HG004069/HG/NHGRI NIH HHS/ R01 HG004069-02/HG/NHGRI NIH HHS/ R01 HG004069-03/HG/NHGRI NIH HHS/ R01 HG004069/HG/NHGRI NIH HHS/ Comparative Study Research Support, N.I.H., Extramural England 2008/09/19 09:00 Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17.},
doi={10.1186/gb-2008-9-9-r137},
url={http://www.ncbi.nlm.nih.gov/pubmed/18798982}
}

@Article{Gruning:2018b,
author={Gruning, B.
and Chilton, J.
and Koster, J.
and Dale, R.
and Soranzo, N.
and van den Beek, M.
and Goecks, J.
and Backofen, R.
and Nekrutenko, A.
and Taylor, J.},
title={Practical Computational Reproducibility in the Life Sciences},
year={2018},
volume={6},
number={6},
pages={631-635},
abstract={Many areas of research suffer from poor reproducibility, particularly in computationally intensive domains where results rely on a series of complex methodological decisions that are not well captured by traditional publication approaches. Various guidelines have emerged for achieving reproducibility, but implementation of these practices remains difficult due to the challenge of assembling software tools plus associated libraries, connecting tools together into pipelines, and specifying parameters. Here, we discuss a suite of cutting-edge technologies that make computational reproducibility not just possible, but practical in both time and effort. This suite combines three well-tested components-a system for building highly portable packages of bioinformatics software, containerization and virtualization technologies for isolating reusable execution environments for these packages, and workflow systems that automatically orchestrate the composition of these packages for entire pipelines-to achieve an unprecedented level of computational reproducibility. We also provide a practical implementation and five recommendations to help set a typical researcher on the path to performing data analyses reproducibly.},
note={Gruning, Bjorn Chilton, John Koster, Johannes Dale, Ryan Soranzo, Nicola van den Beek, Marius Goecks, Jeremy Backofen, Rolf Nekrutenko, Anton Taylor, James eng R01 AI134384/AI/NIAID NIH HHS/ U41 HG006620/HG/NHGRI NIH HHS/ 2018/06/29 06:00 Cell Syst. 2018 Jun 27;6(6):631-635. doi: 10.1016/j.cels.2018.03.014.},
doi={10.1016/j.cels.2018.03.014},
url={http://www.ncbi.nlm.nih.gov/pubmed/29953862}
}

@Article{Buels:2016,
author="Buels, Robert
and Yao, Eric
and Diesh, Colin M.
and Hayes, Richard D.
and Munoz-Torres, Monica
and Helt, Gregg
and Goodstein, David M.
and Elsik, Christine G.
and Lewis, Suzanna E.
and Stein, Lincoln
and Holmes, Ian H.",
title="JBrowse: a dynamic web platform for genome visualization and analysis",
journal="Genome Biology",
year="2016",
month="Apr",
day="12",
volume="17",
number="1",
pages="66",
abstract="JBrowse is a fast and full-featured genome browser built with JavaScript and HTML5. It is easily embedded into websites or apps but can also be served as a standalone web page.",
issn="1474-760X",
doi="10.1186/s13059-016-0924-1",
url="https://doi.org/10.1186/s13059-016-0924-1"
}