Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/regenie/step1/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::regenie=4.1.2"
55 changes: 55 additions & 0 deletions modules/nf-core/regenie/step1/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process REGENIE_STEP1 {
tag "${meta.id}:${meta2.pheno_col}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890'
: 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}"

input:
tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file)
tuple val(meta2), path(pheno)
tuple val(meta3), path(covar)
val(bsize)

output:
tuple val(meta2), path("*_pred.list"), path("*.loco.gz"), emit: predictions
tuple val(meta2), path("*.log"), emit: log
tuple val("${task.process}"), val('regenie'), eval("regenie --version 2>&1 | head -n 1"), topic: versions, emit: versions_regenie

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def binary_arg = meta2.is_binary ? '--bt' : ''
def covar_arg = covar ? "--covarFile ${covar}" : ''
def pheno_col = meta2.pheno_col
def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed'
def prefix = "${meta.id}"
def bsize_arg = bsize ?: 1000

"""
regenie \\
--step 1 \\
${genotype_flag} ${prefix} \\
--phenoFile ${pheno} \\
--phenoColList ${pheno_col} \\
${covar_arg} \\
${binary_arg} \\
--bsize ${bsize_arg} \\
--gz \\
--threads ${task.cpus} \\
${args} \\
--out ${prefix}
"""

stub:
def prefix = "${meta.id}"
"""
touch ${prefix}_pred.list
printf '' | gzip > ${prefix}_1.loco.gz
touch ${prefix}.log
"""
}
141 changes: 141 additions & 0 deletions modules/nf-core/regenie/step1/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "regenie_step1"
description: Run REGENIE step 1 to fit whole-genome regression models and emit LOCO predictions
keywords:
- regenie
- gwas
- association
- burden test
- genomics
tools:
- "regenie":
description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)."
homepage: "https://rgcgithub.github.io/regenie/"
documentation: "https://rgcgithub.github.io/regenie/options/"
tool_dev_url: "https://github.com/rgcgithub/regenie"
doi: "10.1038/s41588-021-00870-7"
licence: ["MIT"]
identifier: "biotools:regenie"

input:
- - meta:
type: map
description: |
Groovy Map containing genotype information
Keep only the genotype analysis identifier in this map
The PLINK bundle must already be staged with basename `meta.id`
e.g. `[ id:'cohort' ]`
- plink_genotype_file:
type: file
description: PLINK primary genotype file in BED or PGEN format
pattern: "*.{bed,pgen}"
ontologies:
- edam: "http://edamontology.org/format_3003" # BED
- plink_variant_file:
type: file
description: PLINK variant metadata file in BIM or PVAR format
pattern: "*.{bim,pvar,zst}"
ontologies: []
- plink_sample_file:
type: file
description: PLINK sample metadata file in FAM or PSAM format
pattern: "*.{fam,psam}"
ontologies: []
- - meta2:
type: map
description: |
Groovy Map containing phenotype file information and phenotype selector
Keep `id` for the phenotype file identity and use `pheno_col` for the phenotype column passed to `--phenoColList`
e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]`
- pheno:
type: file
description: Phenotype file passed to `--phenoFile`
pattern: "*.{phe,pheno,txt,tsv}"
ontologies:
- edam: "http://edamontology.org/format_3475" # TSV
- - meta3:
type: map
description: |
Groovy Map containing covariate file information
e.g. `[ id:'covariates' ]`
- covar:
type: file
optional: true
description: Optional covariate file passed to `--covarFile`; provide `[]` when absent
pattern: "*.{covar,cov,txt,tsv}"
ontologies:
- edam: "http://edamontology.org/format_3475" # TSV
- bsize:
type: integer
description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000`

output:
predictions:
- - meta2:
type: map
description: |
Groovy Map containing phenotype file information and phenotype selector
e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]`
- "*_pred.list":
type: file
description: REGENIE prediction list file
pattern: "*_pred.list"
ontologies: []
- "*.loco.gz":
type: file
description: REGENIE LOCO prediction files
pattern: "*.loco.gz"
ontologies:
- edam: "http://edamontology.org/format_3987" # GZIP
log:
- - meta2:
type: map
description: |
Groovy Map containing phenotype file information and phenotype selector
e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]`
- "*.log":
type: file
description: REGENIE step 1 log file
pattern: "*.log"
ontologies:
- edam: "http://edamontology.org/format_2330" # Text
versions_regenie:
- - "${task.process}":
type: string
description: The process the versions were collected from
- "regenie":
type: string
description: The tool name
- "regenie --version 2>&1 | head -n 1":
type: eval
description: The command used to generate the version of the tool

topics:
versions:
- - ${task.process}:
type: string
description: The process the versions were collected from
- regenie:
type: string
description: The tool name
- regenie --version 2>&1 | head -n 1:
type: eval
description: The command used to generate the version of the tool
authors:
- "@andongni"
maintainers:
- "@andongni"
containers:
conda:
linux_amd64:
lock_file: "https://wave.seqera.io/v1alpha1/builds/bd-5d361f9fcb2f85cf_1/condalock"
docker:
linux_amd64:
build_id: "bd-5d361f9fcb2f85cf_1"
name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf"
scanId: "sc-cc9eb5ed5eb381dd_2"
singularity:
linux_amd64:
build_id: "bd-7c121fb4ecd57890_1"
name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890"
https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data"
Loading
Loading