Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions code/SoS/data_preprocessing/phenotype/gene_annotation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
"| `input/rnaseq/protocol_example.leafcutter.intron_count.tsv` | Toy leafcutter intron-count table (IDs `chr:start:end:clu_N_strand`). |\n",
"| `input/rnaseq/protocol_example.leafcutter.phenotype.bed.gz` | Toy leafcutter intron-excision phenotype matrix. |\n",
"| `input/rnaseq/protocol_example.psichomics.phenotype.tsv` | Toy psichomics phenotype matrix (IDs end in `_<gene_id>`). |\n",
"| `reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf` | Collapsed gene-model GTF (gene/protein annotation). |\n",
"| `reference_data/Homo_sapiens.GRCh38.103.chr.gtf` | Full gene-model GTF with exons (leafcutter/psichomics). |\n"
"| `input/reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf` | Collapsed gene-model GTF (gene/protein annotation). |\n",
"| `input/reference_data/Homo_sapiens.GRCh38.103.chr.gtf` | Full gene-model GTF with exons (leafcutter/psichomics). |\n"
]
},
{
Expand Down Expand Up @@ -132,7 +132,7 @@
"sos run pipeline/gene_annotation.ipynb annotate_coord \\\n",
" --cwd output/gene_annotation \\\n",
" --phenoFile input/rnaseq/protocol_example.rnaseq.bed.gz \\\n",
" --coordinate-annotation reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf \\\n",
" --coordinate-annotation input/reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf \\\n",
" --phenotype-id-column gene_id\n"
]
},
Expand Down Expand Up @@ -183,7 +183,7 @@
"sos run pipeline/gene_annotation.ipynb annotate_coord \\\n",
" --cwd output/gene_annotation \\\n",
" --phenoFile input/proteomics/protocol_example.protein.no_coord.tsv \\\n",
" --coordinate-annotation reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf \\\n",
" --coordinate-annotation input/reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf \\\n",
" --phenotype-id-column gene_id \\\n",
" --molecular-trait-type protein\n"
]
Expand Down Expand Up @@ -227,7 +227,7 @@
" --cwd output/gene_annotation \\\n",
" --phenoFile input/rnaseq/protocol_example.leafcutter.phenotype.bed.gz \\\n",
" --intron-count input/rnaseq/protocol_example.leafcutter.intron_count.tsv \\\n",
" --coordinate-annotation reference_data/Homo_sapiens.GRCh38.103.chr.gtf \\\n",
" --coordinate-annotation input/reference_data/Homo_sapiens.GRCh38.103.chr.gtf \\\n",
" --map-stra site\n"
]
},
Expand Down Expand Up @@ -293,7 +293,7 @@
" --cwd output/gene_annotation \\\n",
" --phenoFile input/rnaseq/protocol_example.leafcutter.phenotype.bed.gz \\\n",
" --intron-count input/rnaseq/protocol_example.leafcutter.intron_count.tsv \\\n",
" --coordinate-annotation reference_data/Homo_sapiens.GRCh38.103.chr.gtf \\\n",
" --coordinate-annotation input/reference_data/Homo_sapiens.GRCh38.103.chr.gtf \\\n",
" --map-stra site\n"
]
},
Expand Down Expand Up @@ -335,7 +335,7 @@
"sos run pipeline/gene_annotation.ipynb annotate_psichomics_isoforms \\\n",
" --cwd output/gene_annotation \\\n",
" --phenoFile input/rnaseq/protocol_example.psichomics.phenotype.tsv \\\n",
" --coordinate-annotation reference_data/Homo_sapiens.GRCh38.103.chr.gtf\n"
" --coordinate-annotation input/reference_data/Homo_sapiens.GRCh38.103.chr.gtf\n"
]
},
{
Expand Down Expand Up @@ -622,9 +622,7 @@
"metadata": {
"kernel": "SoS"
},
"source": [
"The gtf used here should be the collapsed gtf, i.e. the final output of reference_data gtf processing and the one used to called rnaseq."
]
"source": "The gtf used here should be the collapsed gtf, i.e. the final output of reference_data gtf processing and the one used to called rnaseq."
},
{
"cell_type": "code",
Expand Down
7 changes: 3 additions & 4 deletions code/script/molecular_phenotypes/QC/bulk_expression_QC.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@ if (is.null(opt[["tpm-gct"]])) stop("--tpm-gct is required")

dir.create(opt$cwd, showWarnings = FALSE, recursive = TRUE)

normalize_output_prefix <- function(path) {
stem <- sub("\\.(gct|GCT)(\\.gz)?$", "", basename(path))
stem <- sub("\\.gene_tpm$", "", stem)
stem <- sub("\\.tpm$", "", stem)
normalize_output_prefix <- function(path, n_ext = 3) {
stem <- basename(path)
for (i in seq_len(n_ext)) stem <- sub("\\.[^.]+$", "", stem)
stem
}

Expand Down
Loading