Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@

> Tremblay, B. J., (2024). universalmotif: An R package for biological motif analysis. Journal of Open Source Software, 9(100), 701

- [TFLink](https://doi.org/10.1093/database/baac083)

> Fekete T, Gyorffy B. TFLink: an integrated gateway to access transcription factor-target gene interactions for multiple species. Database (Oxford). 2022;2022:baac083.

- [SNEEP](https://doi.org/10.1016/j.isci.2024.109765)

> Baumgarten N, Ebert P, Schmidt F, Kern F, Schulz MH. A statistical approach for identifying single nucleotide variants that affect transcription factor binding. iScience, Volume 27, Issue 5, 109765
Expand Down
18 changes: 18 additions & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ params {
gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf"
mito_name = "MT"
taxon_id = 9606
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz"
blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed"
}
GRCh38 {
fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 9606
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
sneep_scale_file = "${projectDir}/assets/sneep_scale_human_817.txt"
sneep_motif_file = "${projectDir}/assets/sneep_transfac_human_817.txt"
Expand All @@ -32,12 +34,14 @@ params {
gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 9606
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz"
}
GRCm38 {
fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf"
mito_name = "MT"
taxon_id = 10090
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Mus_musculus_interactions_All_simpleFormat_v1.0.tsv.gz"
blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed"
mito_name = "Mt"
}
Expand All @@ -58,6 +62,7 @@ params {
mito_name = "MtDNA"
macs_gsize = "9e7"
taxon_id = 6239
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Caenorhabditis_elegans_interactions_All_simpleFormat_v1.0.tsv"
}
'CanFam3.1' {
fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -70,13 +75,15 @@ params {
gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf"
mito_name = "MT"
taxon_id = 7955
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Danio_rerio_interactions_All_simpleFormat_v1.0.tsv"
}
BDGP6 {
fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf"
mito_name = "M"
macs_gsize = "1.2e8"
taxon_id = 7227
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Drosophila_melanogaster_interactions_All_simpleFormat_v1.0.tsv"
}
EquCab2 {
fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa"
Expand Down Expand Up @@ -123,18 +130,21 @@ params {
gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf"
mito_name = "MT"
taxon_id = 10116
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Rattus_norvegicus_interactions_All_simpleFormat_v1.0.tsv"
}
'Rnor_6.0' {
fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf"
mito_name = "MT"
taxon_id = 10116
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Rattus_norvegicus_interactions_All_simpleFormat_v1.0.tsv"
}
'R64-1-1' {
fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf"
mito_name = "MT"
taxon_id = 559292
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Saccharomyces_cerevisiae_interactions_All_simpleFormat_v1.0.tsv"
}
EF2 {
fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa"
Expand Down Expand Up @@ -164,6 +174,7 @@ params {
gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 9606
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
snps = "https://zenodo.org/records/15090556/files/dbSNP_hg38.bed.gz"
sneep_scale_file = "${projectDir}/assets/sneep_scale_human_817.txt"
Expand All @@ -174,13 +185,15 @@ params {
gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 9606
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz"
blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed"
}
mm10 {
fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 10090
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Mus_musculus_interactions_All_simpleFormat_v1.0.tsv.gz"
blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed"
snps = "https://zenodo.org/records/15090556/files/dbSNP_mm10.bed.gz"
sneep_scale_file = "${projectDir}/assets/sneep_scale_mouse_218.txt"
Expand All @@ -198,6 +211,7 @@ params {
mito_name = "chrM"
macs_gsize = "9e7"
taxon_id = 6239
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Caenorhabditis_elegans_interactions_All_simpleFormat_v1.0.tsv"
}
canFam3 {
fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -211,12 +225,14 @@ params {
mito_name = "chrM"
macs_gsize = "1.37e9"
taxon_id = 7955
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Danio_rerio_interactions_All_simpleFormat_v1.0.tsv"
}
dm6 {
fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa"
gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 7227
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Drosophila_melanogaster_interactions_All_simpleFormat_v1.0.tsv"
}
equCab2 {
fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -241,12 +257,14 @@ params {
gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf"
mito_name = "chrM"
taxon_id = 10116
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Rattus_norvegicus_interactions_All_simpleFormat_v1.0.tsv"
}
sacCer3 {
fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa"
mito_name = "chrM"
macs_gsize = "1.2e7"
taxon_id = 559292
tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Saccharomyces_cerevisiae_interactions_All_simpleFormat_v1.0.tsv"
}
susScr3 {
fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa"
Expand Down
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,14 @@ process {
]
}

withName: ".*:TFACTIVITY:TFLINK_ANNOTATE" {
publishDir = [
path: { "${params.outdir}/05_ranking/07_tflink_annotation" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

/*
FIMO
*/
Expand Down
6 changes: 6 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ The ranking outputs provide the primary results of the pipeline: prioritized lis
- `<assay>.tg_ranking.tsv`: TG ranking matrices per assay (COMBINE_TGS_PER_ASSAY).
- `06_combined_tgs_across_assays/`
- `all.tsv`: TG ranking matrices combined across assays (COMBINE_TGS_ACROSS_ASSAYS).
- `07_tflink_annotation/` (only if `--tflink_file` is provided directly or via `--genome`)
- `<assay>.tf_ranking.tsv`: TF rankings with TFLink support columns (`tflink_supported`, `tflink_supported_edges`, `tflink_total_edges`, `tflink_support_rate`).
- `<assay>.tg_ranking.tsv`: Unchanged TG ranking matrix copied for traceable side-by-side usage with TFLink annotations.
- `<assay>.tflink_edges.tsv`: Edge-level TFLink support table with per TF-target support status and evidence metadata.
- `<assay>.tflink_summary.tsv`: Per-assay support summary for annotated edges.

</details>

Expand Down Expand Up @@ -387,6 +392,7 @@ This final step generates a comprehensive, interactive HTML report that consolid
- Binding affinity predictions from STARE
- Regulatory coefficients from DYNAMITE analysis
- Motif information and binding site predictions
- TFLink evidence summaries and per-TF support metrics when TFLink annotation is enabled

**Distribution Formats**: Results are provided in two convenient formats:

Expand Down
3 changes: 3 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ params.taxon_id = getGenomeAttribute('taxon_id')
params.snps = getGenomeAttribute('snps')
params.sneep_scale_file = getGenomeAttribute('sneep_scale_file')
params.sneep_motif_file = getGenomeAttribute('sneep_motif_file')
params.tflink_file = getGenomeAttribute('tflink_file')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -63,6 +64,7 @@ workflow NFCORE_TFACTIVITY {
snps = params.snps ? file(params.snps, checkIfExists: true) : null
sneep_scale_file = params.sneep_scale_file ? file(params.sneep_scale_file, checkIfExists: true) : null
sneep_motif_file = params.sneep_motif_file ? file(params.sneep_motif_file, checkIfExists: true) : null
tflink_file = params.tflink_file ? file(params.tflink_file, checkIfExists: true) : null

//
// SUBWORKFLOW: Prepare genome
Expand Down Expand Up @@ -113,6 +115,7 @@ workflow NFCORE_TFACTIVITY {
params.dynamite_randomize,
params.alpha,
snps,
tflink_file,
ch_versions,
params.skip_fimo,
params.skip_sneep,
Expand Down
64 changes: 60 additions & 4 deletions modules/local/report/preprocess/templates/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from gtfparse import read_gtf

# Constants
OVERVIEW_TEMPLATE = {"dcg": {}, "regression_coefficients": {}, "differential_expression": {}, "tpm": {}}
OVERVIEW_TEMPLATE = {
"dcg": {},
"regression_coefficients": {},
"differential_expression": {},
"tpm": {},
"tflink": {},
}
TF_TEMPLATE = {
"target_genes": {},
"differential_expression": {},
Expand All @@ -20,6 +26,7 @@
"tpm": {},
"counts": {},
"fimo_binding_sites": {},
"tflink": {},
}

def remove_motif_id(tf):
Expand Down Expand Up @@ -72,11 +79,30 @@ def init_tf_data(tf, tfs):

def process_ranking_data(paths, overview, tfs):
"""Process TF ranking and target gene data."""
tflink_summary = {}

for file in paths['tf_ranking_dir'].glob("*.tf_ranking.tsv"):
assay = file.stem.split(".")[0]
file_name = file.name
if file_name.endswith(".tflink.tf_ranking.tsv"):
assay = file_name.removesuffix(".tflink.tf_ranking.tsv")
tg_filename = file_name.replace(".tflink.tf_ranking.tsv", ".tflink.tg_ranking.tsv")
else:
assay = file_name.removesuffix(".tf_ranking.tsv")
tg_filename = file_name.replace(".tf_ranking.tsv", ".tg_ranking.tsv")

df_tf = pd.read_csv(file, sep="\\t", index_col=0)
df_tg = pd.read_csv(paths['tg_ranking_dir'] / f"{assay}.tg_ranking.tsv", sep="\\t", index_col=0)
df_tg = pd.read_csv(paths['tg_ranking_dir'] / tg_filename, sep="\\t", index_col=0)
has_tflink_columns = all(
column in df_tf.columns
for column in ["tflink_supported_edges", "tflink_total_edges", "tflink_support_rate"]
)

if has_tflink_columns and assay not in tflink_summary:
tflink_summary[assay] = {
"supported_edges": 0,
"total_edges": 0,
"support_rate": 0.0,
}

# Process all TFs from this assay
for tf, dcg_score in df_tf["dcg"].items():
Expand All @@ -90,6 +116,29 @@ def process_ranking_data(paths, overview, tfs):
# Store target genes
tfs[tf]["target_genes"][assay] = df_tg[tf].to_dict()

if has_tflink_columns:
supported_edges = int(df_tf.loc[tf, "tflink_supported_edges"]) if pd.notna(df_tf.loc[tf, "tflink_supported_edges"]) else 0
total_edges = int(df_tf.loc[tf, "tflink_total_edges"]) if pd.notna(df_tf.loc[tf, "tflink_total_edges"]) else 0
support_rate = float(df_tf.loc[tf, "tflink_support_rate"]) if pd.notna(df_tf.loc[tf, "tflink_support_rate"]) else 0.0

tflink_data = {
"supported_edges": supported_edges,
"total_edges": total_edges,
"support_rate": support_rate,
}
overview[tf]["tflink"][assay] = tflink_data
tfs[tf]["tflink"][assay] = tflink_data

tflink_summary[assay]["supported_edges"] += supported_edges
tflink_summary[assay]["total_edges"] += total_edges

for assay in tflink_summary:
total_edges = tflink_summary[assay]["total_edges"]
supported_edges = tflink_summary[assay]["supported_edges"]
tflink_summary[assay]["support_rate"] = (float(supported_edges) / float(total_edges)) if total_edges else 0.0

return tflink_summary

def process_differential_expression(paths, overview, tfs):
"""Process differential expression data."""
pairings = set()
Expand Down Expand Up @@ -335,6 +384,9 @@ def clean_empty_data(overview, tfs):
# Remove empty target_genes
if not tfs[tf]["target_genes"]:
del tfs[tf]["target_genes"]
# Remove empty TFLink annotations
if "tflink" in tfs[tf] and not tfs[tf]["tflink"]:
del tfs[tf]["tflink"]

def merge_overview_data(overview, tfs):
"""Merge overview data into individual TF structures."""
Expand Down Expand Up @@ -491,12 +543,16 @@ def main():
clean_params_data(params)

# Process core data
process_ranking_data(paths, overview, tfs)
tflink_summary = process_ranking_data(paths, overview, tfs)
pairings = process_differential_expression(paths, overview, tfs)

# Get assays from overview
assays = list(set([assay for tf_data in overview.values() for assay in tf_data["dcg"].keys()]))
metadata["assays"] = assays
metadata["tflink"] = {
"enabled": bool(tflink_summary),
"assays": tflink_summary,
}

# Process remaining data types
process_regression_coefficients(paths, overview, pairings, assays)
Expand Down
6 changes: 6 additions & 0 deletions modules/local/tflink/annotate/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::pandas=2.3.0
- conda-forge::pyyaml=6.0.2
45 changes: 45 additions & 0 deletions modules/local/tflink/annotate/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
process TFLINK_ANNOTATE {
tag "${meta.id}"
label "process_single"

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7c/7c256e63e08633ac420692d3ceec1f554fe4fcc794e5bdd331994f743096a46d/data'
: 'community.wave.seqera.io/library/pandas_pyyaml:c0acbb47d05e4f9c'}"

input:
tuple val(meta), path(tf_ranking), path(tg_ranking)
path tflink_file

output:
tuple val(meta), path("*.tf_ranking.tsv"), emit: tf_ranking
tuple val(meta), path("*.tg_ranking.tsv"), emit: tg_ranking
tuple val(meta), path("*.tflink_edges.tsv"), emit: edge_annotations
tuple val(meta), path("*.tflink_summary.tsv"), emit: summary
path "versions.yml", emit: versions

script:
template("annotate.py")

stub:
"""
cp ${tf_ranking} ${meta.id}.tflink.tf_ranking.tsv
cp ${tg_ranking} ${meta.id}.tflink.tg_ranking.tsv

cat <<-END_EDGES > ${meta.id}.tflink_edges.tsv
tf\ttarget_gene\tscore\ttflink_supported\ttflink_match_type\ttflink_evidence_scope\ttflink_source_count\ttflink_sources
END_EDGES

cat <<-END_SUMMARY > ${meta.id}.tflink_summary.tsv
assay\ttflink_total_edges\ttflink_supported_edges\ttflink_support_rate
${meta.id}\t0\t0\t0.0
END_SUMMARY

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python3 --version | cut -f 2 -d " ")
pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
yaml: \$(python3 -c "import yaml; print(yaml.__version__)")
END_VERSIONS
"""
}
Loading
Loading