diff --git a/modules/nf-core/bracken/build/main.nf b/modules/nf-core/bracken/build/main.nf index 351fed679b1e..b219a583cf97 100644 --- a/modules/nf-core/bracken/build/main.nf +++ b/modules/nf-core/bracken/build/main.nf @@ -1,30 +1,33 @@ process BRACKEN_BUILD { - tag "$meta.id" + tag "${meta.id}" label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f3/f30aa99d8d4f6ff1104f56dbacac95c1dc0905578fb250c80f145b6e80703bd1/data': - 'community.wave.seqera.io/library/bracken:3.1--22a4e66ce04c5e01' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f3/f30aa99d8d4f6ff1104f56dbacac95c1dc0905578fb250c80f145b6e80703bd1/data' + : 'community.wave.seqera.io/library/bracken:3.1--22a4e66ce04c5e01'}" input: - tuple val(meta), path(kraken2db) + tuple val(meta), path(k2d, stageAs: "kraken2db_forbuilding/"), path(map, stageAs: "kraken2db_forbuilding/"), path(library, stageAs: "kraken2db_forbuilding/library/added/"), path(taxonomy, stageAs: "kraken2db_forbuilding/taxonomy/") output: - tuple val(meta), path(kraken2db , includeInputs: true), emit: db - tuple val(meta), path("${kraken2db}/database*", includeInputs: true), emit: bracken_files - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}/", includeInputs: true), emit: db + tuple val(meta), path("${prefix}/database*", includeInputs: true), emit: bracken_files + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" """ bracken-build \\ - $args \\ - -t $task.cpus \\ - -d $kraken2db + ${args} \\ + -t ${task.cpus} \\ + -d kraken2db_forbuilding/ + + mv kraken2db_forbuilding/ ${prefix}/ cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -36,9 +39,11 @@ process BRACKEN_BUILD { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${kraken2db}/database100mers.kmer_distrib - touch ${kraken2db}/database100mers.kraken - touch ${kraken2db}/database.kraken + echo ${args} + mkdir kraken2db_forbuilding/ + touch kraken2db_forbuilding/database100mers.kmer_distrib + touch kraken2db_forbuilding/database100mers.kraken + touch kraken2db_forbuilding/database.kraken cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bracken/build/meta.yml b/modules/nf-core/bracken/build/meta.yml index b49fae21500d..6a59e114d08f 100644 --- a/modules/nf-core/bracken/build/meta.yml +++ b/modules/nf-core/bracken/build/meta.yml @@ -23,10 +23,22 @@ input: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - kraken2db: - type: directory - description: A Kraken2 database directory - pattern: "*/" + - k2d: + type: file + description: Kraken2 k2d binary database files + pattern: "*.k2d" + - map: + type: file + description: Kraken2 k2d binary database taxonomy to sequencing mapping file + pattern: "*.map" + - library: + type: file + description: Kraken2 masked FASTA files used to build the database + pattern: "*.fasta" + - taxonomy: + type: file + description: Kraken2 nodes.dmp, names.dmp, and .accession2taxid taxonomy files + pattern: "*.{dmp,accession2taxid}" output: db: - - meta: @@ -34,9 +46,9 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - kraken2db: + - ${prefix}/: type: directory - description: A Kraken2 database directory with required bracken files inside + description: Bracken compatible Kraken2 database directory containing kraken2 database files and the additional bracken kmer distribution files pattern: "*/" bracken_files: - - meta: @@ -44,10 +56,10 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ${kraken2db}/database*: + - ${prefix}/database*: type: directory - description: Bracken files required to extend the Kraken2 database - pattern: "*/" + description: Bracken kmer distribution files + pattern: "database*" versions: - versions.yml: type: file diff --git a/modules/nf-core/bracken/build/tests/main.nf.test b/modules/nf-core/bracken/build/tests/main.nf.test index 781c1518436a..267ae733c6cf 100644 --- a/modules/nf-core/bracken/build/tests/main.nf.test +++ b/modules/nf-core/bracken/build/tests/main.nf.test @@ -8,25 +8,69 @@ nextflow_process { tag "modules_nfcore" tag "bracken" tag "bracken/build" - tag "untar" + tag "gunzip" + tag "kraken2" + tag "kraken2/add" + tag "kraken2/build" + + setup { - test("kraken2 - db") { + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file( + params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot.accession2taxid.gz", + checkIfExists: true + ) + ]) + """ + } + } - setup { - run ("UNTAR") { - script "../../../untar/main.nf" - process { - """ - input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)] - """ - } + run("KRAKEN2_ADD") { + script "modules/nf-core/kraken2/add/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/proteome.fasta", checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_names.dmp", checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_nodes.dmp", checkIfExists: true) + input[3] = GUNZIP.out.gunzip.map{ it[1] } + input[4] = [] + """ } } + run("KRAKEN2_BUILD") { + script "modules/nf-core/kraken2/build/main.nf" + process { + """ + ch_seqid2taxid = KRAKEN2_ADD.out.seqid2taxid_map.ifEmpty([[:],[]]) + + input[0] = KRAKEN2_ADD.out.library_added_files + input[1] = ch_seqid2taxid + input[2] = KRAKEN2_ADD.out.taxonomy_files + input[3] = false // Do not clean up otherwise Bracken fails + """ + } + } + + } + + test("kraken2 - db") { + when { process { """ - input[0] = UNTAR.out.untar + input[0] = KRAKEN2_BUILD.out.db_separated """ } } @@ -53,21 +97,10 @@ nextflow_process { options "-stub" - setup { - run ("UNTAR") { - script "../../../untar/main.nf" - process { - """ - input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)] - """ - } - } - } - when { process { """ - input[0] = UNTAR.out.untar + input[0] = [[id: 'kraken2db_forbuilding'], [], [], [], []] """ } } diff --git a/modules/nf-core/bracken/build/tests/main.nf.test.snap b/modules/nf-core/bracken/build/tests/main.nf.test.snap index d3fa92819945..c550004eeca3 100644 --- a/modules/nf-core/bracken/build/tests/main.nf.test.snap +++ b/modules/nf-core/bracken/build/tests/main.nf.test.snap @@ -23,41 +23,19 @@ "0": [ [ { - "id": "db" + "id": "kraken2db_forbuilding" }, [ "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e", - "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", - "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - [ - [ - "G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e", - "G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e", - "z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - "seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e", - "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - [ - "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", - "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", - "nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] ], "1": [ [ { - "id": "db" + "id": "kraken2db_forbuilding" }, [ "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -72,7 +50,7 @@ "bracken_files": [ [ { - "id": "db" + "id": "kraken2db_forbuilding" }, [ "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -84,34 +62,12 @@ "db": [ [ { - "id": "db" + "id": "kraken2db_forbuilding" }, [ "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e", - "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e", - "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - [ - [ - "G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e", - "G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e", - "z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - "seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e", - "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", - [ - "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", - "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", - "nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e", - "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] ], @@ -122,8 +78,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nextflow": "25.04.7" }, - "timestamp": "2025-06-02T13:19:12.806972" + "timestamp": "2025-10-29T10:47:45.239034306" } } \ No newline at end of file diff --git a/modules/nf-core/kraken2/build/main.nf b/modules/nf-core/kraken2/build/main.nf index 92b63c41ab49..f62294eab44f 100644 --- a/modules/nf-core/kraken2/build/main.nf +++ b/modules/nf-core/kraken2/build/main.nf @@ -14,6 +14,7 @@ process KRAKEN2_BUILD { output: tuple val(meta), path("${prefix}"), emit: db + tuple val(meta), path("${prefix}/*k2d"), path("${prefix}/*map"), path("${prefix}/library/added/*"), path("${prefix}/taxonomy/*"), optional: true, emit: db_separated path "versions.yml", emit: versions when: diff --git a/modules/nf-core/kraken2/build/meta.yml b/modules/nf-core/kraken2/build/meta.yml index df86ca3873a8..92fa1650c6da 100644 --- a/modules/nf-core/kraken2/build/meta.yml +++ b/modules/nf-core/kraken2/build/meta.yml @@ -37,8 +37,8 @@ input: e.g. `[ id:'sample1', single_end:false ]` - seqid2taxid_map: type: file - description: File mapping sequence IDs to taxonomy IDs, either generated - or premade. + description: File mapping sequence IDs to taxonomy IDs, either generated or + premade. pattern: "seqid2taxid.map" ontologies: - edam: http://edamontology.org/data_3028 # Taxonomy @@ -49,8 +49,8 @@ input: e.g. `[ id:'sample1', single_end:false ]` - taxonomy_files: type: file - description: Files present in the /taxonomy/ directory, including - nodes.dmp, names.dmp, and .accession2taxid files. + description: Files present in the /taxonomy/ directory, including nodes.dmp, + names.dmp, and .accession2taxid files. pattern: "*" ontologies: - edam: http://edamontology.org/data_3028 # Taxonomy @@ -70,6 +70,28 @@ output: pattern: "*/" ontologies: - edam: http://edamontology.org/data_1049 # Directory + db_separated: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/*k2d: + type: file + description: Kraken2 k2d binary database files + pattern: "*.k2d" + - ${prefix}/*map: + type: file + description: Kraken2 k2d binary database taxonomy to sequencing mapping file + pattern: "*.map" + - ${prefix}/library/added/*: + type: file + description: Kraken2 masked FASTA files used to build the database + pattern: "*.fasta" + - ${prefix}/taxonomy/*: + type: file + description: Kraken2 nodes.dmp, names.dmp, and .accession2taxid taxonomy files + pattern: "*.{dmp,accession2taxid}" versions: - versions.yml: type: file diff --git a/modules/nf-core/kraken2/build/tests/main.nf.test.snap b/modules/nf-core/kraken2/build/tests/main.nf.test.snap index 9b81c0155eea..edad464cda2e 100644 --- a/modules/nf-core/kraken2/build/tests/main.nf.test.snap +++ b/modules/nf-core/kraken2/build/tests/main.nf.test.snap @@ -41,6 +41,9 @@ ] ], "1": [ + + ], + "2": [ "versions.yml:md5,3036edb85a7e1048bb8236d1e9ee910a" ], "db": [ @@ -54,6 +57,9 @@ "tax.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] + ], + "db_separated": [ + ], "versions": [ "versions.yml:md5,3036edb85a7e1048bb8236d1e9ee910a" @@ -64,6 +70,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.7" }, - "timestamp": "2025-10-01T07:16:12.844220381" + "timestamp": "2025-10-29T08:39:52.857076709" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf index 415f4a1a73c0..4034372dec8a 100644 --- a/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf +++ b/subworkflows/nf-core/fasta_build_add_kraken2_bracken/main.nf @@ -28,7 +28,7 @@ workflow FASTA_BUILD_ADD_KRAKEN2_BRACKEN { ch_versions = ch_versions.mix(KRAKEN2_BUILD.out.versions.first()) if (val_runbrackenbuild) { - BRACKEN_BUILD(KRAKEN2_BUILD.out.db) + BRACKEN_BUILD(KRAKEN2_BUILD.out.db_separated) ch_final_db = BRACKEN_BUILD.out.db ch_versions = ch_versions.mix(BRACKEN_BUILD.out.versions.first()) }