Skip to content
33 changes: 19 additions & 14 deletions modules/nf-core/bracken/build/main.nf
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
process BRACKEN_BUILD {
tag "$meta.id"
tag "${meta.id}"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f3/f30aa99d8d4f6ff1104f56dbacac95c1dc0905578fb250c80f145b6e80703bd1/data':
'community.wave.seqera.io/library/bracken:3.1--22a4e66ce04c5e01' }"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f3/f30aa99d8d4f6ff1104f56dbacac95c1dc0905578fb250c80f145b6e80703bd1/data'
: 'community.wave.seqera.io/library/bracken:3.1--22a4e66ce04c5e01'}"

input:
tuple val(meta), path(kraken2db)
tuple val(meta), path(k2d, stageAs: "kraken2db_forbuilding/"), path(map, stageAs: "kraken2db_forbuilding/"), path(library, stageAs: "kraken2db_forbuilding/library/added/"), path(taxonomy, stageAs: "kraken2db_forbuilding/taxonomy/")

output:
tuple val(meta), path(kraken2db , includeInputs: true), emit: db
tuple val(meta), path("${kraken2db}/database*", includeInputs: true), emit: bracken_files
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}/", includeInputs: true), emit: db
tuple val(meta), path("${prefix}/database*", includeInputs: true), emit: bracken_files
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
bracken-build \\
$args \\
-t $task.cpus \\
-d $kraken2db
${args} \\
-t ${task.cpus} \\
-d kraken2db_forbuilding/

mv kraken2db_forbuilding/ ${prefix}/

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -36,9 +39,11 @@ process BRACKEN_BUILD {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${kraken2db}/database100mers.kmer_distrib
touch ${kraken2db}/database100mers.kraken
touch ${kraken2db}/database.kraken
echo ${args}
mkdir kraken2db_forbuilding/
touch kraken2db_forbuilding/database100mers.kmer_distrib
touch kraken2db_forbuilding/database100mers.kraken
touch kraken2db_forbuilding/database.kraken

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
30 changes: 21 additions & 9 deletions modules/nf-core/bracken/build/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,43 @@ input:
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- kraken2db:
type: directory
description: A Kraken2 database directory
pattern: "*/"
- k2d:
type: file
description: Kraken2 k2d binary database files
pattern: "*.k2d"
- map:
type: file
description: Kraken2 k2d binary database taxonomy to sequencing mapping file
pattern: "*.map"
- library:
type: file
description: Kraken2 masked FASTA files used to build the database
pattern: "*.fasta"
- taxonomy:
type: file
description: Kraken2 nodes.dmp, names.dmp, and .accession2taxid taxonomy files
pattern: "*.{dmp,accession2taxid}"
output:
db:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- kraken2db:
- ${prefix}/:
type: directory
description: A Kraken2 database directory with required bracken files inside
description: Bracken compatible Kraken2 database directory containing kraken2 database files and the additional bracken kmer distribution files
pattern: "*/"
bracken_files:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- ${kraken2db}/database*:
- ${prefix}/database*:
type: directory
description: Bracken files required to extend the Kraken2 database
pattern: "*/"
description: Bracken kmer distribution files
pattern: "database*"
versions:
- versions.yml:
type: file
Expand Down
79 changes: 56 additions & 23 deletions modules/nf-core/bracken/build/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,69 @@ nextflow_process {
tag "modules_nfcore"
tag "bracken"
tag "bracken/build"
tag "untar"
tag "gunzip"
tag "kraken2"
tag "kraken2/add"
tag "kraken2/build"

setup {

test("kraken2 - db") {
run("GUNZIP") {
script "modules/nf-core/gunzip/main.nf"
process {
"""
input[0] = Channel.of([
[],
file(
params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot.accession2taxid.gz",
checkIfExists: true
)
])
"""
}
}

setup {
run ("UNTAR") {
script "../../../untar/main.nf"
process {
"""
input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)]
"""
}
run("KRAKEN2_ADD") {
script "modules/nf-core/kraken2/add/main.nf"
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/sarscov2/genome/proteome.fasta", checkIfExists: true)
]
]
input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_names.dmp", checkIfExists: true)
input[2] = file(params.modules_testdata_base_path + "genomics/sarscov2/metagenome/prot_nodes.dmp", checkIfExists: true)
input[3] = GUNZIP.out.gunzip.map{ it[1] }
input[4] = []
"""
}
}

run("KRAKEN2_BUILD") {
script "modules/nf-core/kraken2/build/main.nf"
process {
"""
ch_seqid2taxid = KRAKEN2_ADD.out.seqid2taxid_map.ifEmpty([[:],[]])

input[0] = KRAKEN2_ADD.out.library_added_files
input[1] = ch_seqid2taxid
input[2] = KRAKEN2_ADD.out.taxonomy_files
input[3] = false // Do not clean up otherwise Bracken fails
"""
}
}

}

test("kraken2 - db") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[0] = KRAKEN2_BUILD.out.db_separated
"""
}
}
Expand All @@ -53,21 +97,10 @@ nextflow_process {

options "-stub"

setup {
run ("UNTAR") {
script "../../../untar/main.nf"
process {
"""
input[0] = [[id: 'db'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2_intermediate.tar.gz', checkIfExists: true)]
"""
}
}
}

when {
process {
"""
input[0] = UNTAR.out.untar
input[0] = [[id: 'kraken2db_forbuilding'], [], [], [], []]
"""
}
}
Expand Down
60 changes: 8 additions & 52 deletions modules/nf-core/bracken/build/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -23,41 +23,19 @@
"0": [
[
{
"id": "db"
"id": "kraken2db_forbuilding"
},
[
"database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
"database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
"database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
"hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
[
[
"G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
"G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
"z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
"seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e",
"taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
[
"names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
"nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
"nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
"database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
"1": [
[
{
"id": "db"
"id": "kraken2db_forbuilding"
},
[
"database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
Expand All @@ -72,7 +50,7 @@
"bracken_files": [
[
{
"id": "db"
"id": "kraken2db_forbuilding"
},
[
"database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
Expand All @@ -84,34 +62,12 @@
"db": [
[
{
"id": "db"
"id": "kraken2db_forbuilding"
},
[
"database.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
"database100mers.kmer_distrib:md5,d41d8cd98f00b204e9800998ecf8427e",
"database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e",
"hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
[
[
"G46z5ZvKEd.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
"G46z5ZvKEd.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map_MtGz4nUfR3.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map_eNakvrOVZm.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
"z_4A5lulyr.fna:md5,d41d8cd98f00b204e9800998ecf8427e",
"z_4A5lulyr.fna.masked:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
"seqid2taxid.map:md5,d41d8cd98f00b204e9800998ecf8427e",
"taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
[
"names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
"nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
"nucl_gb.accession2taxid:md5,d41d8cd98f00b204e9800998ecf8427e",
"prelim_map.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
"database100mers.kraken:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
Expand All @@ -122,8 +78,8 @@
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.2"
"nextflow": "25.04.7"
},
"timestamp": "2025-06-02T13:19:12.806972"
"timestamp": "2025-10-29T10:47:45.239034306"
}
}
1 change: 1 addition & 0 deletions modules/nf-core/kraken2/build/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ process KRAKEN2_BUILD {

output:
tuple val(meta), path("${prefix}"), emit: db
tuple val(meta), path("${prefix}/*k2d"), path("${prefix}/*map"), path("${prefix}/library/added/*"), path("${prefix}/taxonomy/*"), optional: true, emit: db_separated
path "versions.yml", emit: versions

when:
Expand Down
30 changes: 26 additions & 4 deletions modules/nf-core/kraken2/build/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ input:
e.g. `[ id:'sample1', single_end:false ]`
- seqid2taxid_map:
type: file
description: File mapping sequence IDs to taxonomy IDs, either generated
or premade.
description: File mapping sequence IDs to taxonomy IDs, either generated or
premade.
pattern: "seqid2taxid.map"
ontologies:
- edam: http://edamontology.org/data_3028 # Taxonomy
Expand All @@ -49,8 +49,8 @@ input:
e.g. `[ id:'sample1', single_end:false ]`
- taxonomy_files:
type: file
description: Files present in the <name>/taxonomy/ directory, including
nodes.dmp, names.dmp, and .accession2taxid files.
description: Files present in the <name>/taxonomy/ directory, including nodes.dmp,
names.dmp, and .accession2taxid files.
pattern: "*"
ontologies:
- edam: http://edamontology.org/data_3028 # Taxonomy
Expand All @@ -70,6 +70,28 @@ output:
pattern: "*/"
ontologies:
- edam: http://edamontology.org/data_1049 # Directory
db_separated:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- ${prefix}/*k2d:
type: file
description: Kraken2 k2d binary database files
pattern: "*.k2d"
- ${prefix}/*map:
type: file
description: Kraken2 k2d binary database taxonomy to sequencing mapping file
pattern: "*.map"
- ${prefix}/library/added/*:
type: file
description: Kraken2 masked FASTA files used to build the database
pattern: "*.fasta"
- ${prefix}/taxonomy/*:
type: file
description: Kraken2 nodes.dmp, names.dmp, and .accession2taxid taxonomy files
pattern: "*.{dmp,accession2taxid}"
versions:
- versions.yml:
type: file
Expand Down
Loading
Loading