diff --git a/modules/nf-core/tetranscripts/environment.yml b/modules/nf-core/tetranscripts/environment.yml new file mode 100644 index 000000000000..5cdf78c89ae6 --- /dev/null +++ b/modules/nf-core/tetranscripts/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::tetranscripts=2.2.3" diff --git a/modules/nf-core/tetranscripts/main.nf b/modules/nf-core/tetranscripts/main.nf new file mode 100644 index 000000000000..f1ae631991f8 --- /dev/null +++ b/modules/nf-core/tetranscripts/main.nf @@ -0,0 +1,55 @@ +process TETRANSCRIPTS { + tag "$meta_c.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tetranscripts:2.2.3--pyh7cba7a3_0': + 'biocontainers/tetranscripts:2.2.3--pyh7cba7a3_0' }" + + input: + tuple val(meta_t), path(bam_t) + tuple val(meta_c), path(bam_c) + tuple val(meta_ggtf), path(g_gtf) + tuple val(meta_tegtf), path(te_gtf) + + output: + tuple val(meta_t), path("*.cntTable"), emit: countTable + tuple val(meta_t), path("*.R"), emit: log2fc + tuple val(meta_t), path("*_analysis.txt"), emit: analysis, optional: true + tuple val(meta_t), path("*_gene_TE.txt"), emit: sigdiff, optional: true + tuple val("${task.process}"), val('tetranscripts'), eval("tetranscripts version | sed '1!d;s/.* //'"), emit: versions_tetranscripts, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta_c.id}" +// Join multiple BAM files with spaces for -t and -c arguments + def treatment_bams = [bam_t].flatten().join(' ') + def control_bams = [bam_c].flatten().join(' ') + """ + TEtranscripts \\ + -t ${treatment_bams} \\ + -c ${control_bams} \\ + --GTF $g_gtf \\ + --TE $te_gtf \\ + --project ${prefix} \\ + $args + + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta_c.id}" + """ + echo $args + + touch ${prefix}.R + touch ${prefix}.cntTable + touch ${prefix}_gene_TE_analysis.txt + touch ${prefix}_sigdiff_gene_TE.txt + + """ +} diff --git a/modules/nf-core/tetranscripts/meta.yml b/modules/nf-core/tetranscripts/meta.yml new file mode 100644 index 000000000000..0345dd07407c --- /dev/null +++ b/modules/nf-core/tetranscripts/meta.yml @@ -0,0 +1,150 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tetranscripts" +description: Runs TEtranscripts which summarises transposable element content of + a bam file. +keywords: + - transposable + - TE + - transcriptomics +tools: + - "tetranscripts": + description: A package for including transposable elements in differential + enrichment analysis of sequencing datasets. + homepage: https://github.com/mhammell-laboratory/TEtranscripts + documentation: https://hammelllab.labsites.cshl.edu/software/#TEtranscripts + tool_dev_url: https://github.com/mhammell-laboratory/TEtranscripts + doi: 10.1093/bioinformatics/btv422 + licence: ["GPL v3"] + identifier: biotools:tetranscripts + +input: + # Treatment BAM + - - meta_t: + type: map + description: | + Groovy Map containing treatment sample information. e.g. `[ + id:'sample1' ]` + - bam_t: + type: file + description: A BAM file for the treatment condition + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + + # Control BAM + - - meta_c: + type: map + description: | + Groovy Map containing control sample information, + e.g. `[ id:'control1']` + - bam_c: + type: file + description: A BAM file for the control condition + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + + # Genome GTF + - - meta_ggtf: + type: map + description: | + Groovy map containing control sample information + e.g. `[ id:'control1' ]` + - g_gtf: + type: file + description: A GTF file for alignment to the genome + pattern: "*.{gtf}" + ontologies: + - edam: "http://edamontology.org/format_2306" + + # TE GTF + - - meta_tegtf: + type: map + description: | + Groovy map containing TE GTF information + e.g. `[ id:'control1' ]` + + - te_gtf: + type: file + description: A curated GTF file for alignment to transposable elements + pattern: "*.{gtf}" + ontologies: + - edam: "http://edamontology.org/format_2306" +output: + countTable: + - - meta_t: + type: map + description: | + Groovy Map containing treatment sample information. + e.g. `id:'sample1' ]` + - "*.cntTable": + type: file + description: Counts table of transposable element families + pattern: "*.cntTable" + ontologies: [] + log2fc: + - - meta_t: + type: map + description: | + Groovy Map containing treatment sample information. + e.g. `id:'sample1' ]` + - "*.R": + type: file + description: Differential gene expression analysis file + pattern: "*.R" + + ontologies: + - edam: http://edamontology.org/format_3999 # R script + + analysis: + - - meta_t: + type: map + description: | + Groovy map containing treatment sample information. + e.g. `id:'sample1' ]` + - "*_analysis.txt": + type: file + description: DESeq2 analysis file + pattern: "*_analysis.txt" + ontologies: + - edam: http://edamontology.org/format_2330 # text format + + sigdiff: + - - meta_t: + type: map + description: | + Groovy map containing treatment sample information. + e.g. `id:'sample1' ]` + - "*_gene_TE.txt": + type: file + description: DESeq2 analysis file + pattern: "*_gene_TE.txt" + ontologies: + - edam: http://edamontology.org/format_2330 # text format + + versions_tetranscripts: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tetranscripts: + type: string + description: The tool name + - "tetranscripts version | sed '1!d;s/.* //'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tetranscripts: + type: string + description: The tool name + - "tetranscripts version | sed '1!d;s/.* //'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@hanalysis" +maintainers: + - "@hanalysis" diff --git a/modules/nf-core/tetranscripts/tests/main.nf.test b/modules/nf-core/tetranscripts/tests/main.nf.test new file mode 100644 index 000000000000..d8b44da9bbed --- /dev/null +++ b/modules/nf-core/tetranscripts/tests/main.nf.test @@ -0,0 +1,158 @@ +// nf-core modules test tetranscripts +nextflow_process { + + name "Test Process TETRANSCRIPTS" + script "../main.nf" + process "TETRANSCRIPTS" + + tag "modules" + tag "modules_nfcore" + tag "tetranscripts" + + test("single file c and t - bam") { + config "./nextflow.config" + + when { + process { + """ + + input[0] = [ + [ id:'ctrl_bam' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [], + ] + + input[1] = [ + [ id:'test_bam' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + [], + ] + input[2] = [ + [ id:'genome_gtf' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true), + ] + input[3] = [ + [ id:'te_gtf' ], + file('https://raw.githubusercontent.com/hanalysis/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/chr21/sequence/GRCh38_GENCODE_rmsk_TE_chr21.gtf', checkIfExists: true), + ] + """ + + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("multiple files t - bam") { + config "./nextflow.config" + + when { + process { + """ + + input[0] = [ + [ id:'ctrl_bam' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [], + ] + + input[1] = [ + [ id:'test_bam' ], + [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam', checkIfExists: true),], + [] + ] + input[2] = [ + [ id:'genome_gtf' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true), + ] + input[3] = [ + [ id:'te_gtf' ], + file('https://raw.githubusercontent.com/hanalysis/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/chr21/sequence/GRCh38_GENCODE_rmsk_TE_chr21.gtf', checkIfExists: true), + ] + """ + + } + } + + then { + assert process.success + + def outputFile = path(process.out.countTable[0][1]) // [0] is first sample, [1] is file (not meta) + assert outputFile.exists() + + def lines = outputFile.readLines() + // take first 5 lines to check + def firstLines = lines.take(5) + + // Check 4 columns (row names + 3 samples) + firstLines.eachWithIndex { line, idx -> + def columns = line.split(/\s+/) + assert columns.size() == 4: "Line ${idx + 1}: Expected 4 columns but got ${columns.size()} in line: '${line}'" + } + + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("single file c and t - bam - stub") { + + options "-stub" + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'ctrl_bam' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [], + ] + + input[1] = [ + [ id:'test_bam' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + [], + ] + input[2] = [ + [ id:'genome_gtf' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true), + ] + input[3] = [ + [ id:'te_gtf' ], + file('https://raw.githubusercontent.com/hanalysis/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/chr21/sequence/GRCh38_GENCODE_rmsk_TE_chr21.gtf', checkIfExists: true), + ] + + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tetranscripts/tests/main.nf.test.snap b/modules/nf-core/tetranscripts/tests/main.nf.test.snap new file mode 100644 index 000000000000..7ecb52cb0c9c --- /dev/null +++ b/modules/nf-core/tetranscripts/tests/main.nf.test.snap @@ -0,0 +1,256 @@ +{ + "single file c and t - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.cntTable:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.R:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_gene_TE_analysis.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_sigdiff_gene_TE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ], + "analysis": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_gene_TE_analysis.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "countTable": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.cntTable:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log2fc": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.R:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sigdiff": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_sigdiff_gene_TE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_tetranscripts": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ] + }, + { + "versions_tetranscripts": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T15:07:58.338450261" + }, + "multiple files t - bam": { + "content": [ + { + "0": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.cntTable:md5,9c5e3941a10964abbd251a498467181e" + ] + ], + "1": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_DESeq2.R:md5,b409dbe3a0ef5289d893a532d41c2622" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ], + "analysis": [ + + ], + "countTable": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.cntTable:md5,9c5e3941a10964abbd251a498467181e" + ] + ], + "log2fc": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_DESeq2.R:md5,b409dbe3a0ef5289d893a532d41c2622" + ] + ], + "sigdiff": [ + + ], + "versions_tetranscripts": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ] + }, + { + "versions_tetranscripts": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T15:07:49.638351518" + }, + "single file c and t - bam": { + "content": [ + { + "0": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.cntTable:md5,475cf6862ede5e7f871780133cedc97a" + ] + ], + "1": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_DESeq2.R:md5,3b059d1a81771742f535977b49be952d" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ], + "analysis": [ + + ], + "countTable": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam.cntTable:md5,475cf6862ede5e7f871780133cedc97a" + ] + ], + "log2fc": [ + [ + { + "id": "ctrl_bam" + }, + "test_bam_DESeq2.R:md5,3b059d1a81771742f535977b49be952d" + ] + ], + "sigdiff": [ + + ], + "versions_tetranscripts": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ] + }, + { + "versions_tetranscripts": [ + [ + "TETRANSCRIPTS", + "tetranscripts", + "" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T15:07:29.760971276" + } +} \ No newline at end of file diff --git a/modules/nf-core/tetranscripts/tests/nextflow.config b/modules/nf-core/tetranscripts/tests/nextflow.config new file mode 100644 index 000000000000..f9bca2ce332a --- /dev/null +++ b/modules/nf-core/tetranscripts/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: TETRANSCRIPTS { + ext.args = '--sortByPos' + } +}