nf-core · jfy133 · Oct 1, 2025 · Oct 1, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/modules/nf-core/bracken/build/main.nf b/modules/nf-core/bracken/build/main.nf
@@ -1,30 +1,33 @@
 process BRACKEN_BUILD {
-    tag "$meta.id"
+    tag "${meta.id}"
     label 'process_high'
 
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f3/f30aa99d8d4f6ff1104f56dbacac95c1dc0905578fb250c80f145b6e80703bd1/data':
-        'community.wave.seqera.io/library/bracken:3.1--22a4e66ce04c5e01' }"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f3/f30aa99d8d4f6ff1104f56dbacac95c1dc0905578fb250c80f145b6e80703bd1/data'
+        : 'community.wave.seqera.io/library/bracken:3.1--22a4e66ce04c5e01'}"
 
     input:
-    tuple val(meta), path(kraken2db)
+    tuple val(meta), path(k2d, stageAs: "kraken2db_forbuilding/"), path(map, stageAs: "kraken2db_forbuilding/"), path(library, stageAs: "kraken2db_forbuilding/library/added/"), path(taxonomy, stageAs: "kraken2db_forbuilding/taxonomy/")
 
     output:
-    tuple val(meta), path(kraken2db               , includeInputs: true), emit: db
-    tuple val(meta), path("${kraken2db}/database*", includeInputs: true), emit: bracken_files
-    path "versions.yml"                                  , emit: versions
+    tuple val(meta), path("${prefix}/", includeInputs: true), emit: db
+    tuple val(meta), path("${prefix}/database*", includeInputs: true), emit: bracken_files
+    path "versions.yml", emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
     """
     bracken-build \\
-        $args \\
-        -t $task.cpus \\
-        -d $kraken2db
+        ${args} \\
+        -t ${task.cpus} \\
+        -d kraken2db_forbuilding/
+
+    mv kraken2db_forbuilding/ ${prefix}/
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -36,9 +39,11 @@ process BRACKEN_BUILD {
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${kraken2db}/database100mers.kmer_distrib
-    touch ${kraken2db}/database100mers.kraken
-    touch ${kraken2db}/database.kraken
+    echo ${args}
+    mkdir kraken2db_forbuilding/
+    touch kraken2db_forbuilding/database100mers.kmer_distrib
+    touch kraken2db_forbuilding/database100mers.kraken
+    touch kraken2db_forbuilding/database.kraken
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/nf-core/bracken/build/meta.yml b/modules/nf-core/bracken/build/meta.yml
@@ -23,31 +23,43 @@ input:
         description: |
           Groovy Map containing sample information
           e.g. `[ id:'sample1', single_end:false ]`
-    - kraken2db:
-        type: directory
-        description: A Kraken2 database directory
-        pattern: "*/"
+    - k2d:
+        type: file
+        description: Kraken2 k2d binary database files
+        pattern: "*.k2d"
+    - map:
+        type: file
+        description: Kraken2 k2d binary database taxonomy to sequencing mapping file
+        pattern: "*.map"
+    - library:
+        type: file
+        description: Kraken2 masked FASTA files used to build the database
+        pattern: "*.fasta"
+    - taxonomy:
+        type: file
+        description: Kraken2 nodes.dmp, names.dmp, and .accession2taxid taxonomy files
+        pattern: "*.{dmp,accession2taxid}"
 output:
   db:
     - - meta:
           type: map
           description: |
             Groovy Map containing sample information
             e.g. `[ id:'sample1', single_end:false ]`
-      - kraken2db:
+      - ${prefix}/:
           type: directory
-          description: A Kraken2 database directory with required bracken files inside
+          description: Bracken compatible Kraken2 database directory containing kraken2 database files and the additional bracken kmer distribution files
           pattern: "*/"
   bracken_files:
     - - meta:
           type: map
           description: |
             Groovy Map containing sample information
             e.g. `[ id:'sample1', single_end:false ]`
-      - ${kraken2db}/database*:
+      - ${prefix}/database*:
           type: directory
-          description: Bracken files required to extend the Kraken2 database
-          pattern: "*/"
+          description: Bracken kmer distribution files
+          pattern: "database*"
   versions:
     - versions.yml:
         type: file

diff --git a/modules/nf-core/bracken/build/tests/main.nf.test b/modules/nf-core/bracken/build/tests/main.nf.test
@@ -8,25 +8,69 @@ nextflow_process {
     tag "modules_nfcore"
     tag "bracken"
     tag "bracken/build"
-    tag "untar"
+    tag "gunzip"
+    tag "kraken2"
+    tag "kraken2/add"
+    tag "kraken2/build"
+
+    setup {
 
-    test("kraken2 - db") {
+        run("GUNZIP") {
+            script "modules/nf-core/gunzip/main.nf"
+            process {
+                """
+                input[0] = Channel.of([
+                    [],
+                    file(
+                        params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot.accession2taxid.gz",
+                        checkIfExists: true
+                    )
+                ])
+                """
+            }
+        }
 
-        setup {
-            run ("UNTAR") {
-                script "../../../untar/main.nf"
-                process {
-                    """
-                    input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)]
-                    """
-                }
+        run("KRAKEN2_ADD") {
+            script "modules/nf-core/kraken2/add/main.nf"
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                        [
+                        file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true),
+                        file(params.modules_testdata_base_path + "genomics/sarscov2/genome/proteome.fasta", checkIfExists: true)
+                        ]
+                    ]
+                input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_names.dmp", checkIfExists: true)
+                input[2] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_nodes.dmp", checkIfExists: true)
+                input[3] = GUNZIP.out.gunzip.map{ it[1] }
+                input[4] = []
+                """
             }
         }
 
+        run("KRAKEN2_BUILD") {
+            script "modules/nf-core/kraken2/build/main.nf"
+            process {
+                """
+                ch_seqid2taxid = KRAKEN2_ADD.out.seqid2taxid_map.ifEmpty([[:],[]])
+
+                input[0] = KRAKEN2_ADD.out.library_added_files
+                input[1] = ch_seqid2taxid
+                input[2] = KRAKEN2_ADD.out.taxonomy_files
+                input[3] = false // Do not clean up otherwise Bracken fails
+                """
+            }
+        }
+
+    }
+
+    test("kraken2 - db") {
+
         when {
             process {
                 """
-                input[0] = UNTAR.out.untar
+                input[0] = KRAKEN2_BUILD.out.db_separated
                 """
             }
         }
@@ -53,21 +97,10 @@ nextflow_process {
 
         options "-stub"
 
-        setup {
-            run ("UNTAR") {
-                script "../../../untar/main.nf"
-                process {
-                    """
-                    input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)]
-                    """
-                }
-            }
-        }
-
         when {
             process {
                 """
-                input[0] = UNTAR.out.untar
+                input[0] = [[id: 'kraken2db_forbuilding'], [], [], [], []]
                 """
             }
         }

diff --git a/modules/nf-core/bracken/build/tests/main.nf.test.snap b/modules/nf-core/bracken/build/tests/main.nf.test.snap
@@ -23,41 +23,19 @@
                 "0": [
                     [
                         {
-                            "id": "db"
+                            "id": "kraken2db_forbuilding"
                         },
                         [
                             "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
                             "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            [
-                                [
-                                    "G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e"
-                                ]
-                            ],
-                            "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            [
-                                "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                "nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
-                            ],
-                            "unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
                 "1": [
                     [
                         {
-                            "id": "db"
+                            "id": "kraken2db_forbuilding"
                         },
                         [
                             "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
@@ -72,7 +50,7 @@
                 "bracken_files": [
                     [
                         {
-                            "id": "db"
+                            "id": "kraken2db_forbuilding"
                         },
                         [
                             "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
@@ -84,34 +62,12 @@
                 "db": [
                     [
                         {
-                            "id": "db"
+                            "id": "kraken2db_forbuilding"
                         },
                         [
                             "database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
                             "database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            [
-                                [
-                                    "G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                    "z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e"
-                                ]
-                            ],
-                            "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            [
-                                "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                "nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e",
-                                "prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
-                            ],
-                            "unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                            "database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
@@ -122,8 +78,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "24.10.2"
+            "nextflow": "25.04.7"
         },
-        "timestamp": "2025-06-02T13:19:12.806972"
+        "timestamp": "2025-10-29T10:47:45.239034306"
     }
 }
diff --git a/modules/nf-core/kraken2/build/main.nf b/modules/nf-core/kraken2/build/main.nf
@@ -14,6 +14,7 @@ process KRAKEN2_BUILD {
 
     output:
     tuple val(meta), path("${prefix}"), emit: db
+    tuple val(meta), path("${prefix}/*k2d"), path("${prefix}/*map"), path("${prefix}/library/added/*"), path("${prefix}/taxonomy/*"), optional: true, emit: db_separated
     path "versions.yml", emit: versions
 
     when:

diff --git a/modules/nf-core/kraken2/build/meta.yml b/modules/nf-core/kraken2/build/meta.yml
@@ -37,8 +37,8 @@ input:
           e.g. `[ id:'sample1', single_end:false ]`
     - seqid2taxid_map:
         type: file
-        description: File mapping sequence IDs to taxonomy IDs, either generated
-          or premade.
+        description: File mapping sequence IDs to taxonomy IDs, either generated or
+          premade.
         pattern: "seqid2taxid.map"
         ontologies:
           - edam: http://edamontology.org/data_3028 # Taxonomy
@@ -49,8 +49,8 @@ input:
           e.g. `[ id:'sample1', single_end:false ]`
     - taxonomy_files:
         type: file
-        description: Files present in the <name>/taxonomy/ directory, including
-          nodes.dmp, names.dmp, and .accession2taxid files.
+        description: Files present in the <name>/taxonomy/ directory, including nodes.dmp,
+          names.dmp, and .accession2taxid files.
         pattern: "*"
         ontologies:
           - edam: http://edamontology.org/data_3028 # Taxonomy
@@ -70,6 +70,28 @@ output:
           pattern: "*/"
           ontologies:
             - edam: http://edamontology.org/data_1049 # Directory
+  db_separated:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - ${prefix}/*k2d:
+          type: file
+          description: Kraken2 k2d binary database files
+          pattern: "*.k2d"
+      - ${prefix}/*map:
+          type: file
+          description: Kraken2 k2d binary database taxonomy to sequencing mapping file
+          pattern: "*.map"
+      - ${prefix}/library/added/*:
+          type: file
+          description: Kraken2 masked FASTA files used to build the database
+          pattern: "*.fasta"
+      - ${prefix}/taxonomy/*:
+          type: file
+          description: Kraken2 nodes.dmp, names.dmp, and .accession2taxid taxonomy files
+          pattern: "*.{dmp,accession2taxid}"
   versions:
     - versions.yml:
         type: file
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,7 @@ process KRAKEN2_BUILD { @@
         output:
         tuple val(meta), path("${prefix}"), emit: db
+        tuple val(meta), path("${prefix}/*k2d"), path("${prefix}/*map"), path("${prefix}/library/added/*"), path("${prefix}/taxonomy/*"), optional: true, emit: db_separated
         path "versions.yml", emit: versions
         when:
@@ Expand Down @@