nf-core
diff --git a/‎subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf‎
Lines changed: 90 additions & 0 deletions b/‎subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml‎
Lines changed: 96 additions & 0 deletions b/‎subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎subworkflows/nf-core/fastq_align_dedup_bwamem/nextflow.config‎
Lines changed: 14 additions & 0 deletions b/‎subworkflows/nf-core/fastq_align_dedup_bwamem/nextflow.config‎
Lines changed: 14 additions & 0 deletions
@@ -0,0 +1,90 @@
+include { FASTQ_ALIGN_BWA                                   } from '../fastq_align_bwa/main'
+include { PICARD_ADDORREPLACEREADGROUPS                     } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
+include { PICARD_MARKDUPLICATES                             } from '../../../modules/nf-core/picard/markduplicates/main'  
+include { SAMTOOLS_INDEX                                    } from '../../../modules/nf-core/samtools/index/main'
+
+workflow FASTQ_ALIGN_DEDUP_BWAMEM {
+
+    take:
+    ch_reads             // channel: [ val(meta), [ reads ] ]
+    ch_fasta             // channel: [ val(meta), [ fasta ] ]
+    ch_fasta_index       // channel: [ val(meta), [ fasta index ] ]
+    ch_bwamem_index      // channel: [ val(meta), [ bwam index ] ]
+    skip_deduplication   // boolean: whether to deduplicate alignments
+    use_gpu              // boolean: whether to use GPU or CPU for bwamem alignment
+
+    main:
+
+    ch_alignment                     = Channel.empty()
+    ch_alignment_index               = Channel.empty()
+    ch_flagstat                      = Channel.empty()
+    ch_stats                         = Channel.empty()
+    ch_picard_metrics                = Channel.empty()
+    ch_multiqc_files                 = Channel.empty()
+    ch_versions                      = Channel.empty()
+
+    FASTQ_ALIGN_BWA (
+        ch_reads,
+        ch_bwamem_index,
+        true, // val_sort_bam hardcoded to true
+        ch_fasta
+    )
+    ch_alignment        = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bam)
+    ch_alignment_index  = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bai)
+    ch_stats            = ch_alignment.mix(FASTQ_ALIGN_BWA.out.stats)    // channel: [ val(meta), path(stats) ]
+    ch_flagstat         = ch_alignment.mix(FASTQ_ALIGN_BWA.out.flagstat) // channel: [ val(meta), path(flagstat) ]
+    ch_idxstats         = ch_alignment.mix(FASTQ_ALIGN_BWA.out.idxstats) // channel: [ val(meta), path(idxstats) ]
+    ch_versions         = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first())
+
+    if (!skip_deduplication) {
+        /*
+         * Run Picard AddOrReplaceReadGroups to add read group (RG) to reads in bam file
+         */
+        PICARD_ADDORREPLACEREADGROUPS (
+            ch_alignment,
+            ch_fasta,
+            ch_fasta_index
+        )
+        ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS.out.versions.first())
+
+        /*
+         * Run Picard MarkDuplicates with the --REMOVE_DUPLICATES true flag
+         */
+
+        PICARD_MARKDUPLICATES (
+            PICARD_ADDORREPLACEREADGROUPS.out.bam,
+            ch_fasta,
+            ch_fasta_index
+        )
+        ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first())
+
+        /*
+         * Run samtools index on deduplicated alignment
+         */
+        SAMTOOLS_INDEX (
+            PICARD_MARKDUPLICATES.out.bam
+        )
+        ch_alignment       = PICARD_MARKDUPLICATES.out.bam
+        ch_alignment_index = SAMTOOLS_INDEX.out.bai
+        ch_picard_metrics  = PICARD_MARKDUPLICATES.out.metrics
+        ch_versions        = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+    }
+
+    /*
+     * Collect MultiQC inputs
+     */
+    ch_multiqc_files = ch_picard_metrics.collect{ meta, metrics -> metrics }
+                        .mix(ch_flagstat.collect{ meta, flagstat -> flagstat })
+                        .mix(ch_stats.collect{ meta, stats -> stats  })
+                        .mix(ch_idxstats.collect{ meta, stats -> stats  })
+
+    emit:
+    bam                           = ch_alignment                     // channel: [ val(meta), [ bam ]       ]
+    bai                           = ch_alignment_index               // channel: [ val(meta), [ bai ]       ]
+    samtools_flagstat             = ch_flagstat                      // channel: [ val(meta), [ flagstat ]  ]
+    samtools_stats                = ch_stats                         // channel: [ val(meta), [ stats ]     ]
+    samtools_index_stats          = ch_idxstats                      // channel: [ val(meta), [ idxstats ]  ]
+    picard_metrics                = ch_picard_metrics                // channel: [ val(meta), [ metrics ]   ]
+    multiqc                       = ch_multiqc_files                 // channel: [ *{html,txt}              ]
+    versions                      = ch_versions                      // channel: [ versions.yml             ]
+}
@@ -0,0 +1,96 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_align_dedup_bwamem"
+description: Performs alignment of DNA or TAPS-treated reads using bwamem, sort and deduplicate
+keywords:
+  - bwamem
+  - alignment
+  - map
+  - 5mC
+  - methylseq
+  - DNA
+  - fastq
+  - bam
+components:
+  - samtools/index
+  - picard/addorreplacereadgroups
+  - picard/markduplicates
+  - fastq_align_bwa
+input:
+  - ch_reads:
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+        Structure: [ val(meta), [ path(reads) ] ]
+      pattern: "*.{fastq,fastq.gz}"
+  - ch_fasta:
+      type: file
+      description: |
+        Structure: [ val(meta), path(fasta) ]
+      pattern: "*.{fa,fa.gz}"
+  - ch_fasta_index:
+      type: file
+      description: |
+        Structure: [ val(meta), path(fasta index) ]
+  - ch_bwamem_index:
+      type: directory
+      description: |
+        Bwa-mem genome index files
+        Structure: [ val(meta), path(index) ]
+      pattern: "Bwa-memIndex"
+  - skip_deduplication:
+      type: boolean
+      description: |
+        Skip deduplication of aligned reads
+output:
+  - bam:
+      type: file
+      description: |
+        Channel containing BAM files
+        Structure: [ val(meta), path(bam) ]
+      pattern: "*.bam"
+  - bai:
+      type: file
+      description: |
+        Channel containing indexed BAM (BAI) files
+        Structure: [ val(meta), path(bai) ]
+      pattern: "*.bai"
+  - samtools_flagstat:
+      type: file
+      description: |
+        File containing samtools flagstat output
+        Structure: [ val(meta), path(flagstat) ]
+      pattern: "*.flagstat"
+  - samtools_idxstats:
+      type: file
+      description: |
+        File containing samtools idxstats output
+        Structure: [ val(meta), path(idxstats) ]
+      pattern: "*.idxstats"
+  - samtools_stats:
+      type: file
+      description: |
+        File containing samtools stats output
+        Structure: [ val(meta), path(stats) ]
+      pattern: "*.{stats}"
+  - picard_metrics:
+      type: file
+      description: |
+        Duplicate metrics file generated by picard
+        Structure: [ val(meta), path(metrics) ]
+      pattern: "*.{metrics.txt}"
+  - multiqc:
+      type: file
+      description: |
+        Channel containing MultiQC report aggregating results across samples.
+        Structure: [ val(meta), path(multiqc_report.html) ]
+      pattern: "*.html"
+  - versions:
+      type: file
+      description: |
+        File containing software versions
+        Structure: [ path(versions.yml) ]
+      pattern: "versions.yml"
+authors:
+  - "@eduard-watchmaker"
+maintainers:
+  - "@eduard-watchmaker"
@@ -0,0 +1,14 @@
+// IMPORTANT: This config file should be included to ensure that the subworkflow works properly.
+process {
+    withName: 'SAMTOOLS_SORT' {
+        ext.prefix = { "${meta.id}.sorted" }
+    }
+    withName: 'PICARD_MARKDUPLICATES' {
+        ext.args = "--ASSUME_SORTED true --REMOVE_DUPLICATES true --VALIDATION_STRINGENCY LENIENT --PROGRAM_RECORD_ID 'null' --TMP_DIR tmp"
+        ext.prefix = { "${meta.id}.deduped.sorted" }
+    }
+    withName: 'PICARD_ADDORREPLACEREADGROUPS' {
+        ext.args = "--RGID 1 --RGLB lib1 --RGPL illumina --RGPU unit1 --RGSM sample1"
+        ext.prefix = { "${meta.id}.RG.sorted" }
+    }
+}