Skip to content

Commit b25dc59

Browse files
eduard-watchmakerrootrootvagkaratzas
authored
New align dedup bwamem (#9197)
* tests * tests * confs * confs * confs * confs * confs * confs * confs * gpu * gpu * last snap * snap * rm snap * removed snapshot * new snap * lint * lint * channel * channel * channel * snap test * last snapshot * last lint * gpu snap * PR feedback * samtools sort prefix * samtools sort prefix * samtools sort prefix * samtools sort prefix * samtools sort prefix * samtools sort prefix * new snap * new snap * Update subworkflows/nf-core/fastq_align_dedup_bwamem/meta.yml Co-authored-by: Evangelos Karatzas <[email protected]> * changed tests again * new cpu snapshot * low memory to fq2bam * conf revert * rm nftest utils line and add lowmem to fq2bam * revert lowmem to fq2bam * index file * path to index * path to index * revert nft utils * test * test * removed parabricks * new snapshot * revert test data fw2bam * revert test data fw2bam * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/main.nf Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/nextflow.config Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * Update subworkflows/nf-core/fastq_align_dedup_bwamem/tests/main.nf.test Co-authored-by: Evangelos Karatzas <[email protected]> * added new tag at test * added new tag at test * new snapshot --------- Co-authored-by: root <[email protected]> Co-authored-by: root <[email protected]> Co-authored-by: Evangelos Karatzas <[email protected]>
1 parent 1c8c773 commit b25dc59

File tree

6 files changed

+889
-0
lines changed

6 files changed

+889
-0
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
include { FASTQ_ALIGN_BWA } from '../fastq_align_bwa/main'
2+
include { PICARD_ADDORREPLACEREADGROUPS } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
3+
include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main'
4+
include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'
5+
6+
workflow FASTQ_ALIGN_DEDUP_BWAMEM {
7+
8+
take:
9+
ch_reads // channel: [ val(meta), [ reads ] ]
10+
ch_fasta // channel: [ val(meta), [ fasta ] ]
11+
ch_fasta_index // channel: [ val(meta), [ fasta index ] ]
12+
ch_bwamem_index // channel: [ val(meta), [ bwam index ] ]
13+
skip_deduplication // boolean: whether to deduplicate alignments
14+
use_gpu // boolean: whether to use GPU or CPU for bwamem alignment
15+
16+
main:
17+
18+
ch_alignment = Channel.empty()
19+
ch_alignment_index = Channel.empty()
20+
ch_flagstat = Channel.empty()
21+
ch_stats = Channel.empty()
22+
ch_picard_metrics = Channel.empty()
23+
ch_multiqc_files = Channel.empty()
24+
ch_versions = Channel.empty()
25+
26+
FASTQ_ALIGN_BWA (
27+
ch_reads,
28+
ch_bwamem_index,
29+
true, // val_sort_bam hardcoded to true
30+
ch_fasta
31+
)
32+
ch_alignment = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bam)
33+
ch_alignment_index = ch_alignment.mix(FASTQ_ALIGN_BWA.out.bai)
34+
ch_stats = ch_alignment.mix(FASTQ_ALIGN_BWA.out.stats) // channel: [ val(meta), path(stats) ]
35+
ch_flagstat = ch_alignment.mix(FASTQ_ALIGN_BWA.out.flagstat) // channel: [ val(meta), path(flagstat) ]
36+
ch_idxstats = ch_alignment.mix(FASTQ_ALIGN_BWA.out.idxstats) // channel: [ val(meta), path(idxstats) ]
37+
ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first())
38+
39+
if (!skip_deduplication) {
40+
/*
41+
* Run Picard AddOrReplaceReadGroups to add read group (RG) to reads in bam file
42+
*/
43+
PICARD_ADDORREPLACEREADGROUPS (
44+
ch_alignment,
45+
ch_fasta,
46+
ch_fasta_index
47+
)
48+
ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS.out.versions.first())
49+
50+
/*
51+
* Run Picard MarkDuplicates with the --REMOVE_DUPLICATES true flag
52+
*/
53+
54+
PICARD_MARKDUPLICATES (
55+
PICARD_ADDORREPLACEREADGROUPS.out.bam,
56+
ch_fasta,
57+
ch_fasta_index
58+
)
59+
ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first())
60+
61+
/*
62+
* Run samtools index on deduplicated alignment
63+
*/
64+
SAMTOOLS_INDEX (
65+
PICARD_MARKDUPLICATES.out.bam
66+
)
67+
ch_alignment = PICARD_MARKDUPLICATES.out.bam
68+
ch_alignment_index = SAMTOOLS_INDEX.out.bai
69+
ch_picard_metrics = PICARD_MARKDUPLICATES.out.metrics
70+
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
71+
}
72+
73+
/*
74+
* Collect MultiQC inputs
75+
*/
76+
ch_multiqc_files = ch_picard_metrics.collect{ meta, metrics -> metrics }
77+
.mix(ch_flagstat.collect{ meta, flagstat -> flagstat })
78+
.mix(ch_stats.collect{ meta, stats -> stats })
79+
.mix(ch_idxstats.collect{ meta, stats -> stats })
80+
81+
emit:
82+
bam = ch_alignment // channel: [ val(meta), [ bam ] ]
83+
bai = ch_alignment_index // channel: [ val(meta), [ bai ] ]
84+
samtools_flagstat = ch_flagstat // channel: [ val(meta), [ flagstat ] ]
85+
samtools_stats = ch_stats // channel: [ val(meta), [ stats ] ]
86+
samtools_index_stats = ch_idxstats // channel: [ val(meta), [ idxstats ] ]
87+
picard_metrics = ch_picard_metrics // channel: [ val(meta), [ metrics ] ]
88+
multiqc = ch_multiqc_files // channel: [ *{html,txt} ]
89+
versions = ch_versions // channel: [ versions.yml ]
90+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
2+
name: "fastq_align_dedup_bwamem"
3+
description: Performs alignment of DNA or TAPS-treated reads using bwamem, sort and deduplicate
4+
keywords:
5+
- bwamem
6+
- alignment
7+
- map
8+
- 5mC
9+
- methylseq
10+
- DNA
11+
- fastq
12+
- bam
13+
components:
14+
- samtools/index
15+
- picard/addorreplacereadgroups
16+
- picard/markduplicates
17+
- fastq_align_bwa
18+
input:
19+
- ch_reads:
20+
description: |
21+
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
22+
respectively.
23+
Structure: [ val(meta), [ path(reads) ] ]
24+
pattern: "*.{fastq,fastq.gz}"
25+
- ch_fasta:
26+
type: file
27+
description: |
28+
Structure: [ val(meta), path(fasta) ]
29+
pattern: "*.{fa,fa.gz}"
30+
- ch_fasta_index:
31+
type: file
32+
description: |
33+
Structure: [ val(meta), path(fasta index) ]
34+
- ch_bwamem_index:
35+
type: directory
36+
description: |
37+
Bwa-mem genome index files
38+
Structure: [ val(meta), path(index) ]
39+
pattern: "Bwa-memIndex"
40+
- skip_deduplication:
41+
type: boolean
42+
description: |
43+
Skip deduplication of aligned reads
44+
output:
45+
- bam:
46+
type: file
47+
description: |
48+
Channel containing BAM files
49+
Structure: [ val(meta), path(bam) ]
50+
pattern: "*.bam"
51+
- bai:
52+
type: file
53+
description: |
54+
Channel containing indexed BAM (BAI) files
55+
Structure: [ val(meta), path(bai) ]
56+
pattern: "*.bai"
57+
- samtools_flagstat:
58+
type: file
59+
description: |
60+
File containing samtools flagstat output
61+
Structure: [ val(meta), path(flagstat) ]
62+
pattern: "*.flagstat"
63+
- samtools_idxstats:
64+
type: file
65+
description: |
66+
File containing samtools idxstats output
67+
Structure: [ val(meta), path(idxstats) ]
68+
pattern: "*.idxstats"
69+
- samtools_stats:
70+
type: file
71+
description: |
72+
File containing samtools stats output
73+
Structure: [ val(meta), path(stats) ]
74+
pattern: "*.{stats}"
75+
- picard_metrics:
76+
type: file
77+
description: |
78+
Duplicate metrics file generated by picard
79+
Structure: [ val(meta), path(metrics) ]
80+
pattern: "*.{metrics.txt}"
81+
- multiqc:
82+
type: file
83+
description: |
84+
Channel containing MultiQC report aggregating results across samples.
85+
Structure: [ val(meta), path(multiqc_report.html) ]
86+
pattern: "*.html"
87+
- versions:
88+
type: file
89+
description: |
90+
File containing software versions
91+
Structure: [ path(versions.yml) ]
92+
pattern: "versions.yml"
93+
authors:
94+
- "@eduard-watchmaker"
95+
maintainers:
96+
- "@eduard-watchmaker"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// IMPORTANT: This config file should be included to ensure that the subworkflow works properly.
2+
process {
3+
withName: 'SAMTOOLS_SORT' {
4+
ext.prefix = { "${meta.id}.sorted" }
5+
}
6+
withName: 'PICARD_MARKDUPLICATES' {
7+
ext.args = "--ASSUME_SORTED true --REMOVE_DUPLICATES true --VALIDATION_STRINGENCY LENIENT --PROGRAM_RECORD_ID 'null' --TMP_DIR tmp"
8+
ext.prefix = { "${meta.id}.deduped.sorted" }
9+
}
10+
withName: 'PICARD_ADDORREPLACEREADGROUPS' {
11+
ext.args = "--RGID 1 --RGLB lib1 --RGPL illumina --RGPU unit1 --RGSM sample1"
12+
ext.prefix = { "${meta.id}.RG.sorted" }
13+
}
14+
}

0 commit comments

Comments
 (0)