nf-core · lyh970817 · Mar 13, 2026 · Mar 13, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/modules/nf-core/regenie/step1/environment.yml b/modules/nf-core/regenie/step1/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::regenie=4.1.2"
diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf
@@ -0,0 +1,55 @@
+process REGENIE_STEP1 {
+    tag "${meta.id}:${meta2.pheno_col}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890'
+        : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}"
+
+    input:
+    tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file)
+    tuple val(meta2), path(pheno)
+    tuple val(meta3), path(covar)
+    val(bsize)
+
+    output:
+    tuple val(meta2), path("*_pred.list"), path("*.loco.gz"), emit: predictions
+    tuple val(meta2), path("*.log"), emit: log
+    tuple val("${task.process}"), val('regenie'), eval("regenie --version 2>&1 | head -n 1"), topic: versions, emit: versions_regenie
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def binary_arg = meta2.is_binary ? '--bt' : ''
+    def covar_arg = covar ? "--covarFile ${covar}" : ''
+    def pheno_col = meta2.pheno_col
+    def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed'
+    def prefix = "${meta.id}"
+    def bsize_arg = bsize ?: 1000
+
+    """
+    regenie \\
+        --step 1 \\
+        ${genotype_flag} ${prefix} \\
+        --phenoFile ${pheno} \\
+        --phenoColList ${pheno_col} \\
+        ${covar_arg} \\
+        ${binary_arg} \\
+        --bsize ${bsize_arg} \\
+        --gz \\
+        --threads ${task.cpus} \\
+        ${args} \\
+        --out ${prefix}
+    """
+
+    stub:
+    def prefix = "${meta.id}"
+    """
+    touch ${prefix}_pred.list
+    printf '' | gzip > ${prefix}_1.loco.gz
+    touch ${prefix}.log
+    """
+}
diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml
@@ -0,0 +1,141 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "regenie_step1"
+description: Run REGENIE step 1 to fit whole-genome regression models and emit LOCO predictions
+keywords:
+  - regenie
+  - gwas
+  - association
+  - burden test
+  - genomics
+tools:
+  - "regenie":
+      description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)."
+      homepage: "https://rgcgithub.github.io/regenie/"
+      documentation: "https://rgcgithub.github.io/regenie/options/"
+      tool_dev_url: "https://github.com/rgcgithub/regenie"
+      doi: "10.1038/s41588-021-00870-7"
+      licence: ["MIT"]
+      identifier: "biotools:regenie"
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing genotype information
+          Keep only the genotype analysis identifier in this map
+          The PLINK bundle must already be staged with basename `meta.id`
+          e.g. `[ id:'cohort' ]`
+    - plink_genotype_file:
+        type: file
+        description: PLINK primary genotype file in BED or PGEN format
+        pattern: "*.{bed,pgen}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3003" # BED
+    - plink_variant_file:
+        type: file
+        description: PLINK variant metadata file in BIM or PVAR format
+        pattern: "*.{bim,pvar,zst}"
+        ontologies: []
+    - plink_sample_file:
+        type: file
+        description: PLINK sample metadata file in FAM or PSAM format
+        pattern: "*.{fam,psam}"
+        ontologies: []
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing phenotype file information and phenotype selector
+          Keep `id` for the phenotype file identity and use `pheno_col` for the phenotype column passed to `--phenoColList`
+          e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]`
+    - pheno:
+        type: file
+        description: Phenotype file passed to `--phenoFile`
+        pattern: "*.{phe,pheno,txt,tsv}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3475" # TSV
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing covariate file information
+          e.g. `[ id:'covariates' ]`
+    - covar:
+        type: file
+        optional: true
+        description: Optional covariate file passed to `--covarFile`; provide `[]` when absent
+        pattern: "*.{covar,cov,txt,tsv}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3475" # TSV
+  - bsize:
+      type: integer
+      description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000`
+
+output:
+  predictions:
+    - - meta2:
+          type: map
+          description: |
+            Groovy Map containing phenotype file information and phenotype selector
+            e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]`
+      - "*_pred.list":
+          type: file
+          description: REGENIE prediction list file
+          pattern: "*_pred.list"
+          ontologies: []
+      - "*.loco.gz":
+          type: file
+          description: REGENIE LOCO prediction files
+          pattern: "*.loco.gz"
+          ontologies:
+            - edam: "http://edamontology.org/format_3987" # GZIP
+  log:
+    - - meta2:
+          type: map
+          description: |
+            Groovy Map containing phenotype file information and phenotype selector
+            e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]`
+      - "*.log":
+          type: file
+          description: REGENIE step 1 log file
+          pattern: "*.log"
+          ontologies:
+            - edam: "http://edamontology.org/format_2330" # Text
+  versions_regenie:
+    - - "${task.process}":
+          type: string
+          description: The process the versions were collected from
+      - "regenie":
+          type: string
+          description: The tool name
+      - "regenie --version 2>&1 | head -n 1":
+          type: eval
+          description: The command used to generate the version of the tool
+
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - regenie:
+          type: string
+          description: The tool name
+      - regenie --version 2>&1 | head -n 1:
+          type: eval
+          description: The command used to generate the version of the tool
+authors:
+  - "@andongni"
+maintainers:
+  - "@andongni"
+containers:
+  conda:
+    linux_amd64:
+      lock_file: "https://wave.seqera.io/v1alpha1/builds/bd-5d361f9fcb2f85cf_1/condalock"
+  docker:
+    linux_amd64:
+      build_id: "bd-5d361f9fcb2f85cf_1"
+      name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf"
+      scanId: "sc-cc9eb5ed5eb381dd_2"
+  singularity:
+    linux_amd64:
+      build_id: "bd-7c121fb4ecd57890_1"
+      name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890"
+      https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data"