From e5be8e0c937bc30e3fba39ed54a89b7ac41deb77 Mon Sep 17 00:00:00 2001 From: Chris C Date: Fri, 23 Sep 2016 13:23:27 +0100 Subject: [PATCH 01/11] report estimated coefficients along with P values --- R/correct.R | 17 +++++++++++++---- R/gen_phen_df.R | 6 ++++-- inst/data | 2 +- src/lm.cpp | 7 ++++--- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/R/correct.R b/R/correct.R index d644a95..15b5b29 100644 --- a/R/correct.R +++ b/R/correct.R @@ -1,9 +1,9 @@ -#' Correct strata by group +#' @title Correct strata by group #' -#' Note that group name must be a factor +#' @description Note that group name must be a factor #' -#' @param strata Strata -#' @param n_strata Number of strata +#' @param strata Strata object returned from \code{stratify}. +#' @param n_strata Number of strata you wish to consider. Note: Only 2 currently supported. #' @param assoc Assoc file path #' #' @importFrom data.table fread @@ -15,6 +15,15 @@ correct <- function(strata = NULL, n_strata = NULL, assoc = NULL, group = FALSE, group_name = NULL, mode = "default"){ + if(mode == "odp"){ + + flog.info("Using ODP for correction.") + + + + } + + if(mode == "default"){ strata <- as.data.frame(strata) diff --git a/R/gen_phen_df.R b/R/gen_phen_df.R index a06fee1..d13534c 100644 --- a/R/gen_phen_df.R +++ b/R/gen_phen_df.R @@ -10,10 +10,12 @@ assoc_wrapper <- function(gen, samp){ phen <- samp$Z[-1] %>% as.vector %>% as.double - t <- assoc(as.matrix(gen[,-c(1:5)]), phen) + out <- assoc(as.matrix(gen[,-c(1:5)]), phen) + t <- out[,1] / out[,2] + P <- pt(t, df = length(phen) -2, lower = FALSE)*2 - return(data.frame(rsid = gen[,2], P = P)) + return(data.frame(rsid = gen[,2], out[,1], out[,2], P = P)) diff --git a/inst/data b/inst/data index 26da8db..51ed963 160000 --- a/inst/data +++ b/inst/data @@ -1 +1 @@ -Subproject commit 26da8db5dfc54ac8168d37704d8a9cc39461c6b3 +Subproject commit 51ed963918c6b985635e7c33be2ef62e7e29867e diff --git a/src/lm.cpp b/src/lm.cpp index 2cb1600..9dc2f3a 100644 --- a/src/lm.cpp +++ b/src/lm.cpp @@ -24,10 +24,10 @@ using namespace arma; //' @export // [[Rcpp::export]] -arma::vec assoc(arma::mat gen, arma::colvec y){ +arma::mat assoc(arma::mat gen, arma::colvec y){ // Initialize output as a vector. - arma::vec outputList(gen.n_rows); + arma::mat outputList(gen.n_rows, 2); // Split regression into each row of the gen file // meaning each SNP is regressed seperately. @@ -87,7 +87,8 @@ arma::vec assoc(arma::mat gen, arma::colvec y){ std::string name = patch::to_string(i); - outputList[i] = t; + outputList[i,0] = coef; + outputList[i,1] = stderrest; } return outputList; From 3d0c653dd2649a20e5f0d81411221e5d47cf7066 Mon Sep 17 00:00:00 2001 From: Chris C Date: Fri, 23 Sep 2016 13:29:04 +0100 Subject: [PATCH 02/11] compile --- R/RcppExports.R | 2 +- src/RcppExports.cpp | 44 ++++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index dab9c08..81e2ac6 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,4 +1,4 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #' @export diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 26ad132..dc40ea9 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -10,61 +10,61 @@ using namespace Rcpp; arma::mat gen_cor(arma::vec causal, arma::mat all); RcppExport SEXP coRge_gen_cor(SEXP causalSEXP, SEXP allSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::vec >::type causal(causalSEXP); Rcpp::traits::input_parameter< arma::mat >::type all(allSEXP); - __result = Rcpp::wrap(gen_cor(causal, all)); - return __result; + rcpp_result_gen = Rcpp::wrap(gen_cor(causal, all)); + return rcpp_result_gen; END_RCPP } // returnLD Rcpp::List returnLD(arma::uvec cIndex, arma::mat gen, arma::vec bpVec); RcppExport SEXP coRge_returnLD(SEXP cIndexSEXP, SEXP genSEXP, SEXP bpVecSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::uvec >::type cIndex(cIndexSEXP); Rcpp::traits::input_parameter< arma::mat >::type gen(genSEXP); Rcpp::traits::input_parameter< arma::vec >::type bpVec(bpVecSEXP); - __result = Rcpp::wrap(returnLD(cIndex, gen, bpVec)); - return __result; + rcpp_result_gen = Rcpp::wrap(returnLD(cIndex, gen, bpVec)); + return rcpp_result_gen; END_RCPP } // assoc -arma::vec assoc(arma::mat gen, arma::colvec y); +arma::mat assoc(arma::mat gen, arma::colvec y); RcppExport SEXP coRge_assoc(SEXP genSEXP, SEXP ySEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type gen(genSEXP); Rcpp::traits::input_parameter< arma::colvec >::type y(ySEXP); - __result = Rcpp::wrap(assoc(gen, y)); - return __result; + rcpp_result_gen = Rcpp::wrap(assoc(gen, y)); + return rcpp_result_gen; END_RCPP } // fun arma::uvec fun(arma::field input_field, std::string id); RcppExport SEXP coRge_fun(SEXP input_fieldSEXP, SEXP idSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::field >::type input_field(input_fieldSEXP); Rcpp::traits::input_parameter< std::string >::type id(idSEXP); - __result = Rcpp::wrap(fun(input_field, id)); - return __result; + rcpp_result_gen = Rcpp::wrap(fun(input_field, id)); + return rcpp_result_gen; END_RCPP } // th arma::vec th(Rcpp::StringVector strata_rsid, Rcpp::StringVector rsid, Rcpp::NumericVector r2); RcppExport SEXP coRge_th(SEXP strata_rsidSEXP, SEXP rsidSEXP, SEXP r2SEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::StringVector >::type strata_rsid(strata_rsidSEXP); Rcpp::traits::input_parameter< Rcpp::StringVector >::type rsid(rsidSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type r2(r2SEXP); - __result = Rcpp::wrap(th(strata_rsid, rsid, r2)); - return __result; + rcpp_result_gen = Rcpp::wrap(th(strata_rsid, rsid, r2)); + return rcpp_result_gen; END_RCPP } From 721bf245330c6aa4718381fa4a9ce961f95f15ca Mon Sep 17 00:00:00 2001 From: Chris C Date: Tue, 13 Dec 2016 11:21:43 -0500 Subject: [PATCH 03/11] update coRge for new analysis see changelog --- inst/analysis/new_ref/makefile | 22 ++++++++++++++++++++++ inst/data | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 inst/analysis/new_ref/makefile diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile new file mode 100644 index 0000000..21d1aa4 --- /dev/null +++ b/inst/analysis/new_ref/makefile @@ -0,0 +1,22 @@ +## This analysis is centered around using a new reference panel with more SNPs +# hopefully attenuating the increase in false positives that we have been +# observing in the previous analyses. +# +# This is going hopefully towards publication eventually. + +# Set up some variables first. +hapgen2= +home=/scratch/hpc2862/CAMH/new_corge + + +# The default rule +# For now just update the git repo. +.all: git + +# Git add rule +# +git: + git add -A + git commit -am "update coRge for new analysis see changelog" + + diff --git a/inst/data b/inst/data index 51ed963..4dfd1c7 160000 --- a/inst/data +++ b/inst/data @@ -1 +1 @@ -Subproject commit 51ed963918c6b985635e7c33be2ef62e7e29867e +Subproject commit 4dfd1c72f2fc29beb4364d39834498ff65f2aabe From 51f0edc267b7369133f846ab6b4ea743e75d085f Mon Sep 17 00:00:00 2001 From: Chris C Date: Tue, 13 Dec 2016 11:22:12 -0500 Subject: [PATCH 04/11] update coRge for new analysis see changelog --- TODO.md | 4 ++++ inst/analysis/new_ref/makefile | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..38837f0 --- /dev/null +++ b/TODO.md @@ -0,0 +1,4 @@ +## TODO + +- Simulate one and look at MAF and other things +- Change the referene panel. diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index 21d1aa4..b823989 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -16,7 +16,7 @@ home=/scratch/hpc2862/CAMH/new_corge # Git add rule # git: - git add -A + cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" From e8bdc5cf7a55f91172ae5fd508456c1fb91fa40f Mon Sep 17 00:00:00 2001 From: Chris C Date: Tue, 13 Dec 2016 12:01:06 -0500 Subject: [PATCH 05/11] update coRge for new analysis see changelog --- inst/analysis/new_ref/makefile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index b823989..07a2d46 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -19,4 +19,20 @@ git: cd ../../.. && git add -A git commit -am "update coRge for new analysis see changelog" +# Download and unpack the reference files. +$(home)/ref/.done: + cd $(home)/ref && wget https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.tgz + cd $(home)/ref && tar -xvzf 1000GP_Phase3.tgz + cd $(home)/ref/1000GP_Phase3 && mv * ../ + cd $(home)/ref && rm -rf 1000GP_Phase3.tgz 1000GP_Phase3 + touch $(home)/ref/.done # Done + +# This is the simulation protocol +# Note that the target is a placeholder generated at the end of the simulation +# as opposed to any of the actual files for the sake of simplicity. +$(home)/sim/.done: $(home)/ref/.done + mkdir -p $(home)/sim + $(hapgen2) -h + /home/hpc2862/Programs/hapgen2 -h pilot1.jun2010.b36.CEU.chr1.snpfilt.haps -l pilot1.jun2010.b36.CEU.chr1.snpfilt.legend -m genetic_map_chr1_combined_b36.txt -dl 77053 1 1.0 1.0 -n 1000 0 -int 0 500000000 -o /scratch/hpc2862/CAMH/perm_container/container_${i2}_${SGE_TASK_ID}/chr1_block_${i2}_perm_${j2}_k_${SGE_TASK_ID} + From 76bfb3ce559fdabc9a573b5c3509bf6993c1c5d2 Mon Sep 17 00:00:00 2001 From: Chris C Date: Wed, 14 Dec 2016 11:32:03 -0500 Subject: [PATCH 06/11] blah --- inst/analysis/new_ref/makefile | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index 07a2d46..85bb193 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -5,9 +5,8 @@ # This is going hopefully towards publication eventually. # Set up some variables first. -hapgen2= home=/scratch/hpc2862/CAMH/new_corge - +hapgen2=$(home)/bin/hapgen2 # The default rule # For now just update the git repo. @@ -32,7 +31,10 @@ $(home)/ref/.done: # as opposed to any of the actual files for the sake of simplicity. $(home)/sim/.done: $(home)/ref/.done mkdir -p $(home)/sim - $(hapgen2) -h - /home/hpc2862/Programs/hapgen2 -h pilot1.jun2010.b36.CEU.chr1.snpfilt.haps -l pilot1.jun2010.b36.CEU.chr1.snpfilt.legend -m genetic_map_chr1_combined_b36.txt -dl 77053 1 1.0 1.0 -n 1000 0 -int 0 500000000 -o /scratch/hpc2862/CAMH/perm_container/container_${i2}_${SGE_TASK_ID}/chr1_block_${i2}_perm_${j2}_k_${SGE_TASK_ID} - - + cd $(home)/ref && $(hapgen2) -h 1000GP_Phase3_chr1.hap.gz \ + -l 1000GP_Phase3_chr1.legend.gz \ + -m genetic_map_chr1_combined_b37.txt \ + -dl 77053 1 1.0 1.0 \ + -n 1000 \ + -int 0 500000000 \ + -o ../sim/chr1_sim1 From 3f9dc59c4cd647d8b121041da447c341f5f4870c Mon Sep 17 00:00:00 2001 From: Chris1221 Date: Wed, 14 Dec 2016 11:34:34 -0500 Subject: [PATCH 07/11] update coRge for new analysis see changelog --- inst/bash/test.txt | 12 ++++++++++++ inst/bash/test2.txt | 0 2 files changed, 12 insertions(+) create mode 100644 inst/bash/test.txt create mode 100644 inst/bash/test2.txt diff --git a/inst/bash/test.txt b/inst/bash/test.txt new file mode 100644 index 0000000..bc9006f --- /dev/null +++ b/inst/bash/test.txt @@ -0,0 +1,12 @@ +job-ID prior name user state submit/start at queue slots ja-task-ID +----------------------------------------------------------------------------------------------------------------- +1841411 0.50500 hi hpc2862 r 10/26/2016 15:42:22 abaqus.q@sw0050.hpcvl.org 1 2 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:22 abaqus.q@sw0050.hpcvl.org 1 3 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:23 abaqus.q@sw0050.hpcvl.org 1 4 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:23 abaqus.q@sw0050.hpcvl.org 1 5 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:23 abaqus.q@sw0050.hpcvl.org 1 6 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:23 abaqus.q@sw0050.hpcvl.org 1 7 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:23 abaqus.q@sw0050.hpcvl.org 1 8 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:24 abaqus.q@sw0050.hpcvl.org 1 9 +1841411 0.50500 hi hpc2862 t 10/26/2016 15:42:24 abaqus.q@sw0050.hpcvl.org 1 10 +1841411 0.50500 hi hpc2862 r 10/26/2016 15:42:22 abaqus.q@sw0053.hpcvl.org 1 1 diff --git a/inst/bash/test2.txt b/inst/bash/test2.txt new file mode 100644 index 0000000..e69de29 From a8de8fcfd2899395239fc6d4ff4ce12ddd70f53d Mon Sep 17 00:00:00 2001 From: Chris C Date: Wed, 14 Dec 2016 11:36:08 -0500 Subject: [PATCH 08/11] update coRge for new analysis see changelog --- inst/analysis/new_ref/makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index 85bb193..f2824f1 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -10,7 +10,7 @@ hapgen2=$(home)/bin/hapgen2 # The default rule # For now just update the git repo. -.all: git +.all: $(home)/sim/.done # Git add rule # From c402197cafa09cd4d9b80ecb1104b897bd30983b Mon Sep 17 00:00:00 2001 From: Chris C Date: Wed, 14 Dec 2016 11:56:54 -0500 Subject: [PATCH 09/11] add --- inst/analysis/new_ref/makefile | 5 +++++ inst/analysis/new_ref/sub.sh | 12 ++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 inst/analysis/new_ref/sub.sh diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index f2824f1..53bd2fc 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -12,6 +12,11 @@ hapgen2=$(home)/bin/hapgen2 # For now just update the git repo. .all: $(home)/sim/.done +# Submission +# +sub: + qsub -N "new_run" sub.sh + # Git add rule # git: diff --git a/inst/analysis/new_ref/sub.sh b/inst/analysis/new_ref/sub.sh new file mode 100644 index 0000000..f61a9ae --- /dev/null +++ b/inst/analysis/new_ref/sub.sh @@ -0,0 +1,12 @@ +#!/bin/bash +#$ -S /bin/bash +#$ -q abaqus.q +#$ -l qname=abaqus.q +#$ -cwd +#$ -V +#$ -l hostname=sw0050 +#$ -j y +#$ -o /home/hpc2862/repos/coR-ge/inst/logs/$JOB_NAME.txt + +cd /home/hpc2862/repos/coR-ge/inst/analysis/new_ref +make .all From 99385c7e42b04ea671fc3d928a3234c8c028d3ef Mon Sep 17 00:00:00 2001 From: Chris C Date: Wed, 14 Dec 2016 16:57:53 -0500 Subject: [PATCH 10/11] use not compressed --- inst/analysis/new_ref/makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index 53bd2fc..a7fdf8b 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -36,8 +36,8 @@ $(home)/ref/.done: # as opposed to any of the actual files for the sake of simplicity. $(home)/sim/.done: $(home)/ref/.done mkdir -p $(home)/sim - cd $(home)/ref && $(hapgen2) -h 1000GP_Phase3_chr1.hap.gz \ - -l 1000GP_Phase3_chr1.legend.gz \ + cd $(home)/ref && $(hapgen2) -h 1000GP_Phase3_chr1.hap \ + -l 1000GP_Phase3_chr1.legend \ -m genetic_map_chr1_combined_b37.txt \ -dl 77053 1 1.0 1.0 \ -n 1000 \ From c1c3fc9674ef61c0cc278f479bf9702e7aa4f140 Mon Sep 17 00:00:00 2001 From: Chris C Date: Wed, 14 Dec 2016 19:54:37 -0500 Subject: [PATCH 11/11] fix dl --- inst/analysis/new_ref/makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/analysis/new_ref/makefile b/inst/analysis/new_ref/makefile index a7fdf8b..7960d89 100644 --- a/inst/analysis/new_ref/makefile +++ b/inst/analysis/new_ref/makefile @@ -39,7 +39,7 @@ $(home)/sim/.done: $(home)/ref/.done cd $(home)/ref && $(hapgen2) -h 1000GP_Phase3_chr1.hap \ -l 1000GP_Phase3_chr1.legend \ -m genetic_map_chr1_combined_b37.txt \ - -dl 77053 1 1.0 1.0 \ + -dl 723891 1 1.0 1.0 \ -n 1000 \ -int 0 500000000 \ -o ../sim/chr1_sim1