From fae1580a2f35a6365be5918086f0bea9f9b00015 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:05:02 +0000 Subject: [PATCH 01/36] add in new reference run --- inst/analyses/new_ref/Makefile | 46 ++++++++++++++++++++++++++++++++++ inst/analyses/new_ref/sub.sh | 12 +++++++++ 2 files changed, 58 insertions(+) create mode 100644 inst/analyses/new_ref/Makefile create mode 100644 inst/analyses/new_ref/sub.sh diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile new file mode 100644 index 0000000..8ec18f9 --- /dev/null +++ b/inst/analyses/new_ref/Makefile @@ -0,0 +1,46 @@ +## This analysis is centered around using a new reference panel with more SNPs +# hopefully attenuating the increase in false positives that we have been +# observing in the previous analyses. +# +# This is going hopefully towards publication eventually. + +# Set up some variables first. +home=/scratch/hpc2862/CAMH/new_corge +hapgen2=$(home)/bin/hapgen2 + +# The default rule +# For now just update the git repo. +.all: $(home)/sim/.done + +# Submission +# +sub: + qsub -N "new_run" sub.sh + +# Git add rule +# +git: + cd ../../.. && git add -A + git commit -am "update coRge for new analysis see changelog" + +# Download and unpack the reference files. +$(home)/ref/.done: + cd $(home)/ref && wget https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.tgz + cd $(home)/ref && tar -xvzf 1000GP_Phase3.tgz + cd $(home)/ref/1000GP_Phase3 && mv * ../ + cd $(home)/ref && rm -rf 1000GP_Phase3.tgz 1000GP_Phase3 + touch $(home)/ref/.done # Done + +# This is the simulation protocol +# Note that the target is a placeholder generated at the end of the simulation +# as opposed to any of the actual files for the sake of simplicity. +$(home)/sim/.done: $(home)/ref/.done + mkdir -p $(home)/sim + cd $(home)/ref && $(hapgen2) -h 1000GP_Phase3_chr1.hap \ + -l 1000GP_Phase3_chr1.legend \ + -m genetic_map_chr1_combined_b37.txt \ + -dl 723891 1 1.0 1.0 \ + -n 1000 \ + -int 0 500000000 \ + -o ../sim/chr1_sim1 + diff --git a/inst/analyses/new_ref/sub.sh b/inst/analyses/new_ref/sub.sh new file mode 100644 index 0000000..f61a9ae --- /dev/null +++ b/inst/analyses/new_ref/sub.sh @@ -0,0 +1,12 @@ +#!/bin/bash +#$ -S /bin/bash +#$ -q abaqus.q +#$ -l qname=abaqus.q +#$ -cwd +#$ -V +#$ -l hostname=sw0050 +#$ -j y +#$ -o /home/hpc2862/repos/coR-ge/inst/logs/$JOB_NAME.txt + +cd /home/hpc2862/repos/coR-ge/inst/analysis/new_ref +make .all From 3a77abf57b4c789378cdf67046a8c13067741f5d Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:14:57 +0000 Subject: [PATCH 02/36] add in target for summary stats --- inst/analyses/new_ref/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 8ec18f9..dde6c29 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -44,3 +44,9 @@ $(home)/sim/.done: $(home)/ref/.done -int 0 500000000 \ -o ../sim/chr1_sim1 +$(home)/summary/stats.txt: $(home)/ref/.done + mkdir -p $(home)/summary + $(home)/bin/snptest -summary_stats_only \ + -data $(home)/sim/chr1_sim1.controls.gen $(home)/sim/chr1_sim1.controls.sample \ + -o $(home)/summary/stats.txt + From 81c4e52f90f73b4e4dc493c17e962767386e9f3a Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:19:52 +0000 Subject: [PATCH 03/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index dde6c29..1ca9d88 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -10,7 +10,8 @@ hapgen2=$(home)/bin/hapgen2 # The default rule # For now just update the git repo. -.all: $(home)/sim/.done +.PHONY: all +all: $(home)/sim/.done # Submission # From 3984e9191b2c6ee3a8c1a0a4580123edd05b406c Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:20:49 +0000 Subject: [PATCH 04/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 1ca9d88..e83b1a5 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -23,6 +23,8 @@ sub: git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" + git push + ssh hpcvl 'cd repos/coR-ge; git pull' # Download and unpack the reference files. $(home)/ref/.done: From dfa000d4daa51cd67e8427b2ab9610ca643576c6 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:22:34 +0000 Subject: [PATCH 05/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index e83b1a5..bab98ba 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -24,7 +24,7 @@ git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" git push - ssh hpcvl 'cd repos/coR-ge; git pull' + ssh hpcvl 'cd repos/coR-ge; /usr/bin/git pull' # Download and unpack the reference files. $(home)/ref/.done: From 30f0a51a8167513a72c15c50893e855b89a7837e Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:22:59 +0000 Subject: [PATCH 06/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index bab98ba..6c75642 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -24,7 +24,7 @@ git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" git push - ssh hpcvl 'cd repos/coR-ge; /usr/bin/git pull' + ssh hpcvl '/usr/bin/git pull' # Download and unpack the reference files. $(home)/ref/.done: From 33f0702a4fedceeb27f54f458236fa43708e2caa Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:23:34 +0000 Subject: [PATCH 07/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 6c75642..902cedd 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -24,7 +24,7 @@ git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" git push - ssh hpcvl '/usr/bin/git pull' + ssh -t hpcvl 'ssh swlogin1; cd repos/coR-ge; /usr/bin/git pull' # Download and unpack the reference files. $(home)/ref/.done: From 4e0590ae71a074a3a5f959ed8cd65361c5480be9 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:24:01 +0000 Subject: [PATCH 08/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 902cedd..6f2ff13 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -24,7 +24,7 @@ git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" git push - ssh -t hpcvl 'ssh swlogin1; cd repos/coR-ge; /usr/bin/git pull' + ssh hpcvl 'ssh swlogin1; cd repos/coR-ge; /usr/bin/git pull' # Download and unpack the reference files. $(home)/ref/.done: From 22cfd715a11ed84565bdd1a44f3934946156dfb3 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:24:39 +0000 Subject: [PATCH 09/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 6f2ff13..57f6f95 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -23,9 +23,8 @@ sub: git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" - git push - ssh hpcvl 'ssh swlogin1; cd repos/coR-ge; /usr/bin/git pull' - + #git push + # Download and unpack the reference files. $(home)/ref/.done: cd $(home)/ref && wget https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.tgz From a55bf048ae900e0492b5ca0a801fce908c6c9e5d Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:25:14 +0000 Subject: [PATCH 10/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 57f6f95..6d55626 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -11,7 +11,7 @@ hapgen2=$(home)/bin/hapgen2 # The default rule # For now just update the git repo. .PHONY: all -all: $(home)/sim/.done +all: $(home)/summary/stats.txt # Submission # From 47dd84b7936431610f0485a7867cef6d0780e3a2 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:26:10 +0000 Subject: [PATCH 11/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/sub.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/sub.sh b/inst/analyses/new_ref/sub.sh index f61a9ae..70d692b 100644 --- a/inst/analyses/new_ref/sub.sh +++ b/inst/analyses/new_ref/sub.sh @@ -9,4 +9,4 @@ #$ -o /home/hpc2862/repos/coR-ge/inst/logs/$JOB_NAME.txt cd /home/hpc2862/repos/coR-ge/inst/analysis/new_ref -make .all +make From 28fc968cdeef6bc091f22b7fc6e1dd1d329185c6 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:45:35 +0000 Subject: [PATCH 12/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/summary.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 inst/analyses/new_ref/summary.Rmd diff --git a/inst/analyses/new_ref/summary.Rmd b/inst/analyses/new_ref/summary.Rmd new file mode 100644 index 0000000..60c566a --- /dev/null +++ b/inst/analyses/new_ref/summary.Rmd @@ -0,0 +1,9 @@ +--- +title: "An Example Using the Tufte Style" +author: "John Smith" +output: + tufte::tufte_handout: default + tufte::tufte_html: default +--- + +Test From 86c2b619fe931d686bfc256225e4b859adc68e83 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:53:47 -0500 Subject: [PATCH 13/36] update coRge for new analysis see changelog --- inst/analyses/Makefile | 1 + inst/analyses/new_ref/summary.html | 62 ++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 inst/analyses/new_ref/summary.html diff --git a/inst/analyses/Makefile b/inst/analyses/Makefile index e69de29..3694d87 100644 --- a/inst/analyses/Makefile +++ b/inst/analyses/Makefile @@ -0,0 +1 @@ +.PHONY: sim_gen diff --git a/inst/analyses/new_ref/summary.html b/inst/analyses/new_ref/summary.html new file mode 100644 index 0000000..2969333 --- /dev/null +++ b/inst/analyses/new_ref/summary.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + +An Example Using the Tufte Style + + + + + + + + + + + + + + + + + + + +

An Example Using the Tufte Style

+

John Smith

+ + + +

Test

+ + + + + + + + From 2d1c6e833b455dca5db5421b237222c16705fcdd Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 14:58:11 +0000 Subject: [PATCH 14/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/summary.Rmd | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/inst/analyses/new_ref/summary.Rmd b/inst/analyses/new_ref/summary.Rmd index 60c566a..39226fd 100644 --- a/inst/analyses/new_ref/summary.Rmd +++ b/inst/analyses/new_ref/summary.Rmd @@ -1,9 +1,16 @@ --- -title: "An Example Using the Tufte Style" -author: "John Smith" +title: "Looking at the new panel" +author: "" output: tufte::tufte_handout: default tufte::tufte_html: default --- -Test +First we might want to look at a simple summary of the MAF to see if there are any super weird bins that might be affecing the generation of scores to make them abnormally high or low. + +```{r} +table <- data.table::fread("/scratch/hpc2862/CAMH/new_corge/summary/stats.txt", h = T) + +hist(table$all_maf) +hist(table$missing_data_proportion) +``` From d56bbf6ccaa4f5f963be0cae5de4d47a7d4bd151 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 15:00:54 +0000 Subject: [PATCH 15/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 6d55626..03106d4 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -52,3 +52,5 @@ $(home)/summary/stats.txt: $(home)/ref/.done -data $(home)/sim/chr1_sim1.controls.gen $(home)/sim/chr1_sim1.controls.sample \ -o $(home)/summary/stats.txt +summary.html: summary.Rmd + Rscript -e 'rmarkdown::render("$<", "tufte_html")' From 1515fc8b68b0c562f79bb632369c2f95f6bd1823 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 15:02:31 +0000 Subject: [PATCH 16/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analyses/new_ref/Makefile b/inst/analyses/new_ref/Makefile index 03106d4..af18265 100644 --- a/inst/analyses/new_ref/Makefile +++ b/inst/analyses/new_ref/Makefile @@ -53,4 +53,4 @@ $(home)/summary/stats.txt: $(home)/ref/.done -o $(home)/summary/stats.txt summary.html: summary.Rmd - Rscript -e 'rmarkdown::render("$<", "tufte_html")' + Rscript -e 'rmarkdown::render("$<")' From beb4643aa7887fd68eebd23bc82c7de5487bc727 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 15:06:47 +0000 Subject: [PATCH 17/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/summary.Rmd | 3 --- 1 file changed, 3 deletions(-) diff --git a/inst/analyses/new_ref/summary.Rmd b/inst/analyses/new_ref/summary.Rmd index 39226fd..77d04c1 100644 --- a/inst/analyses/new_ref/summary.Rmd +++ b/inst/analyses/new_ref/summary.Rmd @@ -1,9 +1,6 @@ --- title: "Looking at the new panel" author: "" -output: - tufte::tufte_handout: default - tufte::tufte_html: default --- First we might want to look at a simple summary of the MAF to see if there are any super weird bins that might be affecing the generation of scores to make them abnormally high or low. From 5996fc374f720fd541eb285cddb421733f1842f2 Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Sat, 7 Jan 2017 15:09:49 -0500 Subject: [PATCH 18/36] update coRge for new analysis see changelog --- inst/analyses/new_ref/summary.html | 136 +++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 7 deletions(-) diff --git a/inst/analyses/new_ref/summary.html b/inst/analyses/new_ref/summary.html index 2969333..05a98dc 100644 --- a/inst/analyses/new_ref/summary.html +++ b/inst/analyses/new_ref/summary.html @@ -8,18 +8,27 @@ - - -An Example Using the Tufte Style +Looking at the new panel - + + + + + + + + + + -

An Example Using the Tufte Style

-

John Smith

-

Test

+ + + +

First we might want to look at a simple summary of the MAF to see if there are any super weird bins that might be affecing the generation of scores to make them abnormally high or low.

+
table <- data.table::fread("/scratch/hpc2862/CAMH/new_corge/summary/stats.txt", h = T)
+
## 
+Read 0.0% of 6461692 rows
+Read 4.5% of 6461692 rows
+Read 8.4% of 6461692 rows
+Read 11.0% of 6461692 rows
+Read 13.9% of 6461692 rows
+Read 17.5% of 6461692 rows
+Read 22.0% of 6461692 rows
+Read 27.2% of 6461692 rows
+Read 33.4% of 6461692 rows
+Read 37.1% of 6461692 rows
+Read 41.0% of 6461692 rows
+Read 47.4% of 6461692 rows
+Read 50.1% of 6461692 rows
+Read 59.3% of 6461692 rows
+Read 61.1% of 6461692 rows
+Read 66.9% of 6461692 rows
+Read 74.1% of 6461692 rows
+Read 84.0% of 6461692 rows
+Read 89.9% of 6461692 rows
+Read 99.2% of 6461692 rows
+Read 6461692 rows and 21 (of 21) columns from 0.583 GB file in 00:00:40
+
## Warning in data.table::fread("/scratch/hpc2862/CAMH/new_corge/summary/
+## stats.txt", : Stopped reading at empty line 6461702 but text exists
+## afterwards (discarded): # Completed successfully at 2017-01-07 15:07:30
+
hist(table$all_maf)
+

+
hist(table$missing_data_proportion)
+

+ + + + + + +